Repository: alinaqi/claude-bootstrap
Branch: main
Commit: 57c5c839f18f
Files: 501
Total size: 2.7 MB

Directory structure:
gitextract_idptty0p/

├── .github/
│   └── workflows/
│       ├── skill-lint.yml
│       └── skill-review.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── _project_specs/
│   ├── 00-autonomous-engineering-roadmap.md
│   ├── 01-runtime-observability.md
│   ├── 02-rollback-and-recovery.md
│   ├── 03-verifiable-contracts.md
│   ├── 04-multi-agent-coordination.md
│   ├── 05-confidence-calibration.md
│   ├── 06-cost-budget-awareness.md
│   ├── 07-human-escalation-protocol.md
│   ├── 08-auto-code-index.md
│   └── 09-multimodal-ingestion.md
├── commands/
│   ├── analyze-repo.md
│   ├── analyze-workspace.md
│   ├── check-contributors.md
│   ├── icpg-bootstrap.md
│   ├── icpg-drift.md
│   ├── icpg-impact.md
│   ├── icpg-why.md
│   ├── initialize-project.md
│   ├── maggy-init.md
│   ├── maggy.md
│   ├── mnemos-checkpoint.md
│   ├── mnemos-status.md
│   ├── polyphony-init.md
│   ├── polyphony-spawn.md
│   ├── polyphony-status.md
│   ├── spawn-team.md
│   ├── sync-agents.md
│   ├── sync-contracts.md
│   └── update-code-index.md
├── docs/
│   ├── architecture-v5.md
│   ├── benchmark-results.md
│   ├── mnemos-implementation.md
│   └── polyphony-spec.md
├── evals/
│   ├── README.md
│   ├── agent-teams/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── base/
│   │   ├── scenario-1/
│   │   │   ├── criteria.json
│   │   │   └── task.md
│   │   └── scenario-2/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── code-review/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── commit-hygiene/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── credentials/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── database-schema/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── existing-repo/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── llm-patterns/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── project-tooling/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── python/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── react-web/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── run-evals.sh
│   ├── security/
│   │   ├── scenario-1/
│   │   │   ├── criteria.json
│   │   │   └── task.md
│   │   └── scenario-2/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── session-management/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   ├── supabase/
│   │   └── scenario-1/
│   │       ├── criteria.json
│   │       └── task.md
│   └── typescript/
│       └── scenario-1/
│           ├── criteria.json
│           └── task.md
├── hooks/
│   ├── post-commit-graph
│   ├── pre-push
│   └── workspace/
│       ├── check-contract-freshness.sh
│       ├── check-graph-freshness.sh
│       ├── post-commit-contracts.sh
│       └── pre-push-contracts.sh
├── install.sh
├── maggy/
│   ├── .gitignore
│   ├── PLAN.md
│   ├── README.md
│   ├── config.example.yaml
│   ├── docs/
│   │   ├── benchmark-results.md
│   │   └── maggy-rfc.md
│   ├── install.sh
│   ├── maggy/
│   │   ├── __init__.py
│   │   ├── adapters/
│   │   │   ├── __init__.py
│   │   │   ├── cli_discovery.py
│   │   │   └── pi.py
│   │   ├── api/
│   │   │   ├── __init__.py
│   │   │   ├── auth.py
│   │   │   ├── routes.py
│   │   │   ├── routes_budget.py
│   │   │   ├── routes_chat.py
│   │   │   ├── routes_cikg.py
│   │   │   ├── routes_deploy.py
│   │   │   ├── routes_engram.py
│   │   │   ├── routes_escalation.py
│   │   │   ├── routes_events.py
│   │   │   ├── routes_forge.py
│   │   │   ├── routes_heartbeat.py
│   │   │   ├── routes_history.py
│   │   │   ├── routes_improve.py
│   │   │   ├── routes_lexon.py
│   │   │   ├── routes_mesh.py
│   │   │   ├── routes_mesh_admin.py
│   │   │   ├── routes_monitor.py
│   │   │   ├── routes_observability.py
│   │   │   ├── routes_planning.py
│   │   │   ├── routes_process.py
│   │   │   ├── routes_projects.py
│   │   │   ├── routes_routing.py
│   │   │   └── routes_setup.py
│   │   ├── budget.py
│   │   ├── calibration/
│   │   │   ├── __init__.py
│   │   │   └── tracker.py
│   │   ├── checkpoint.py
│   │   ├── cikg/
│   │   │   ├── __init__.py
│   │   │   ├── graph.py
│   │   │   ├── models.py
│   │   │   ├── queries.py
│   │   │   └── storage.py
│   │   ├── cli.py
│   │   ├── cli_chat.py
│   │   ├── cli_client.py
│   │   ├── cli_output.py
│   │   ├── cli_repl_cmds.py
│   │   ├── cli_sessions.py
│   │   ├── cli_welcome.py
│   │   ├── config.py
│   │   ├── contracts/
│   │   │   ├── __init__.py
│   │   │   └── generator.py
│   │   ├── coordination/
│   │   │   ├── __init__.py
│   │   │   └── lock_manager.py
│   │   ├── deploy.py
│   │   ├── discovery.py
│   │   ├── engram/
│   │   │   ├── __init__.py
│   │   │   ├── diagnostics.py
│   │   │   ├── record.py
│   │   │   ├── retrieval.py
│   │   │   ├── seed.py
│   │   │   └── store.py
│   │   ├── escalation/
│   │   │   ├── __init__.py
│   │   │   └── protocol.py
│   │   ├── event_spine/
│   │   │   ├── __init__.py
│   │   │   ├── emitter.py
│   │   │   ├── events.py
│   │   │   ├── header.py
│   │   │   └── store.py
│   │   ├── fatigue.py
│   │   ├── forge/
│   │   │   ├── __init__.py
│   │   │   ├── connector.py
│   │   │   ├── detector.py
│   │   │   └── registry.py
│   │   ├── heartbeat/
│   │   │   ├── __init__.py
│   │   │   ├── jobs.py
│   │   │   └── scheduler.py
│   │   ├── history/
│   │   │   ├── __init__.py
│   │   │   ├── analyzer.py
│   │   │   ├── models.py
│   │   │   ├── parsers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── claude.py
│   │   │   │   ├── codex.py
│   │   │   │   └── kimi.py
│   │   │   ├── service.py
│   │   │   └── store.py
│   │   ├── improve/
│   │   │   ├── __init__.py
│   │   │   ├── analyzer.py
│   │   │   ├── models.py
│   │   │   ├── service.py
│   │   │   └── signals.py
│   │   ├── lexon/
│   │   │   ├── __init__.py
│   │   │   ├── disambiguate.py
│   │   │   ├── personalization.py
│   │   │   ├── record.py
│   │   │   ├── router.py
│   │   │   └── terminology.py
│   │   ├── main.py
│   │   ├── mesh/
│   │   │   ├── __init__.py
│   │   │   ├── discovery.py
│   │   │   ├── git_discovery.py
│   │   │   ├── manager.py
│   │   │   ├── memory.py
│   │   │   ├── network.py
│   │   │   ├── org_scanner.py
│   │   │   ├── protocol.py
│   │   │   ├── provenance.py
│   │   │   ├── publisher.py
│   │   │   ├── quarantine.py
│   │   │   ├── store.py
│   │   │   ├── sync.py
│   │   │   ├── transport.py
│   │   │   ├── ws_client.py
│   │   │   └── ws_server.py
│   │   ├── mnemos/
│   │   │   ├── __init__.py
│   │   │   ├── fatigue.py
│   │   │   └── signals.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   └── plan.py
│   │   ├── observability/
│   │   │   ├── __init__.py
│   │   │   └── collector.py
│   │   ├── planning.py
│   │   ├── process/
│   │   │   ├── __init__.py
│   │   │   ├── discovery.py
│   │   │   ├── github_prs.py
│   │   │   ├── model_router.py
│   │   │   ├── models.py
│   │   │   ├── patterns.py
│   │   │   ├── report.py
│   │   │   ├── service.py
│   │   │   ├── signals.py
│   │   │   └── store.py
│   │   ├── providers/
│   │   │   ├── __init__.py
│   │   │   ├── asana.py
│   │   │   ├── base.py
│   │   │   ├── github_issues.py
│   │   │   └── monday.py
│   │   ├── recovery/
│   │   │   ├── __init__.py
│   │   │   └── rollback.py
│   │   ├── registry.py
│   │   ├── routing.py
│   │   ├── routing_rules.py
│   │   ├── routing_rules_defaults.py
│   │   ├── routing_rules_io.py
│   │   ├── scores.py
│   │   ├── services/
│   │   │   ├── __init__.py
│   │   │   ├── account_guide.py
│   │   │   ├── activity.py
│   │   │   ├── ai_client.py
│   │   │   ├── cascade.py
│   │   │   ├── chat.py
│   │   │   ├── chat_context.py
│   │   │   ├── chat_router.py
│   │   │   ├── chat_stream.py
│   │   │   ├── checkpoint.py
│   │   │   ├── competitor.py
│   │   │   ├── context_compactor.py
│   │   │   ├── convention_inferrer.py
│   │   │   ├── convention_scanner.py
│   │   │   ├── executor.py
│   │   │   ├── executor_helpers.py
│   │   │   ├── executor_prompts.py
│   │   │   ├── executor_types.py
│   │   │   ├── inbox.py
│   │   │   ├── monitor.py
│   │   │   ├── output_reviewer.py
│   │   │   ├── planner.py
│   │   │   ├── session_detect.py
│   │   │   ├── stakes.py
│   │   │   ├── tdd_verifier.py
│   │   │   └── vision.py
│   │   └── static/
│   │       ├── app.js
│   │       └── index.html
│   ├── pyproject.toml
│   └── tests/
│       ├── conftest.py
│       ├── integration/
│       │   ├── __init__.py
│       │   ├── test_full_task_flow.py
│       │   ├── test_model_fallback.py
│       │   └── test_process_loop.py
│       ├── test_account_guide.py
│       ├── test_activity.py
│       ├── test_api_endpoints.py
│       ├── test_benchmark_scenario.py
│       ├── test_bootstrap.py
│       ├── test_budget.py
│       ├── test_calibration.py
│       ├── test_cascade.py
│       ├── test_chat.py
│       ├── test_chat_context.py
│       ├── test_chat_routed.py
│       ├── test_chat_router.py
│       ├── test_chat_stream.py
│       ├── test_checkpoint.py
│       ├── test_checkpoint_mgr.py
│       ├── test_cikg.py
│       ├── test_cli.py
│       ├── test_cli_chat.py
│       ├── test_cli_discovery.py
│       ├── test_cli_sessions.py
│       ├── test_cli_welcome.py
│       ├── test_context_compactor.py
│       ├── test_contracts.py
│       ├── test_convention_inferrer.py
│       ├── test_convention_scanner.py
│       ├── test_coordination.py
│       ├── test_deploy.py
│       ├── test_discovery.py
│       ├── test_dual_planner.py
│       ├── test_engram.py
│       ├── test_escalation.py
│       ├── test_event_spine.py
│       ├── test_executor_routing.py
│       ├── test_fatigue.py
│       ├── test_forge.py
│       ├── test_heartbeat.py
│       ├── test_history.py
│       ├── test_history_parsers.py
│       ├── test_improve.py
│       ├── test_lexon.py
│       ├── test_mesh.py
│       ├── test_mesh_network.py
│       ├── test_mesh_store.py
│       ├── test_mesh_ws.py
│       ├── test_mnemos_fatigue.py
│       ├── test_monday_provider.py
│       ├── test_monitor.py
│       ├── test_multimodel_integration.py
│       ├── test_observability.py
│       ├── test_output_reviewer.py
│       ├── test_pi_adapter.py
│       ├── test_planning.py
│       ├── test_registry.py
│       ├── test_repl_cmds.py
│       ├── test_rollback.py
│       ├── test_routes_escalation.py
│       ├── test_routes_observability.py
│       ├── test_routes_projects.py
│       ├── test_routing_config.py
│       ├── test_routing_rules.py
│       ├── test_routing_service.py
│       ├── test_scores.py
│       ├── test_setup_routes.py
│       ├── test_stakes.py
│       ├── test_tdd_verifier.py
│       ├── test_vision.py
│       └── test_zero_config.py
├── rules/
│   ├── nodejs-backend.md
│   ├── python.md
│   ├── quality-gates.md
│   ├── react.md
│   ├── security.md
│   ├── tdd-workflow.md
│   └── typescript.md
├── scripts/
│   ├── convert-hooks-to-toml.sh
│   ├── convert-skills-structure.sh
│   ├── detect-agents.sh
│   ├── icpg/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── bootstrap.py
│   │   ├── contracts.py
│   │   ├── drift.py
│   │   ├── models.py
│   │   ├── pyproject.toml
│   │   ├── store.py
│   │   ├── symbols.py
│   │   └── vectors.py
│   ├── install-graph-tools.sh
│   ├── install-hooks.sh
│   ├── install-skills.sh
│   ├── mnemos/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── checkpoint.py
│   │   ├── consolidation.py
│   │   ├── fatigue.py
│   │   ├── models.py
│   │   ├── pyproject.toml
│   │   ├── signals.py
│   │   └── store.py
│   ├── polyphony/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── adapters/
│   │   │   ├── __init__.py
│   │   │   ├── claude.py
│   │   │   ├── codex.py
│   │   │   └── kimi.py
│   │   ├── config.py
│   │   ├── events.py
│   │   ├── identity.py
│   │   ├── models.py
│   │   ├── orchestrator.py
│   │   ├── pyproject.toml
│   │   ├── router.py
│   │   ├── runtime.py
│   │   ├── scoring.py
│   │   ├── sources/
│   │   │   ├── __init__.py
│   │   │   ├── github.py
│   │   │   └── local.py
│   │   ├── state_machine.py
│   │   ├── store.py
│   │   └── workspace.py
│   └── skill_lint/
│       ├── __init__.py
│       ├── __main__.py
│       ├── content.py
│       ├── frontmatter.py
│       ├── pyproject.toml
│       ├── references.py
│       ├── report.py
│       └── spec.py
├── skills/
│   ├── aeo-optimization/
│   │   └── SKILL.md
│   ├── agent-teams/
│   │   ├── SKILL.md
│   │   └── agents/
│   │       ├── code-review.md
│   │       ├── feature.md
│   │       ├── merger.md
│   │       ├── quality.md
│   │       ├── security.md
│   │       └── team-lead.md
│   ├── agentic-development/
│   │   └── SKILL.md
│   ├── ai-models/
│   │   └── SKILL.md
│   ├── android-java/
│   │   └── SKILL.md
│   ├── android-kotlin/
│   │   └── SKILL.md
│   ├── aws-aurora/
│   │   └── SKILL.md
│   ├── aws-dynamodb/
│   │   └── SKILL.md
│   ├── azure-cosmosdb/
│   │   └── SKILL.md
│   ├── base/
│   │   └── SKILL.md
│   ├── cloudflare-d1/
│   │   └── SKILL.md
│   ├── code-deduplication/
│   │   └── SKILL.md
│   ├── code-graph/
│   │   └── SKILL.md
│   ├── code-review/
│   │   └── SKILL.md
│   ├── codex-review/
│   │   └── SKILL.md
│   ├── commit-hygiene/
│   │   └── SKILL.md
│   ├── cpg-analysis/
│   │   └── SKILL.md
│   ├── credentials/
│   │   └── SKILL.md
│   ├── cross-agent-delegation/
│   │   └── SKILL.md
│   ├── database-schema/
│   │   └── SKILL.md
│   ├── existing-repo/
│   │   └── SKILL.md
│   ├── firebase/
│   │   └── SKILL.md
│   ├── flutter/
│   │   └── SKILL.md
│   ├── gemini-review/
│   │   └── SKILL.md
│   ├── icpg/
│   │   └── SKILL.md
│   ├── iterative-development/
│   │   └── SKILL.md
│   ├── klaviyo/
│   │   └── SKILL.md
│   ├── llm-patterns/
│   │   └── SKILL.md
│   ├── maggy/
│   │   └── SKILL.md
│   ├── medusa/
│   │   └── SKILL.md
│   ├── mnemos/
│   │   └── SKILL.md
│   ├── ms-teams-apps/
│   │   └── SKILL.md
│   ├── nodejs-backend/
│   │   └── SKILL.md
│   ├── playwright-testing/
│   │   └── SKILL.md
│   ├── polyphony/
│   │   └── SKILL.md
│   ├── posthog-analytics/
│   │   └── SKILL.md
│   ├── project-tooling/
│   │   └── SKILL.md
│   ├── pwa-development/
│   │   └── SKILL.md
│   ├── python/
│   │   └── SKILL.md
│   ├── react-native/
│   │   └── SKILL.md
│   ├── react-web/
│   │   └── SKILL.md
│   ├── reddit-ads/
│   │   └── SKILL.md
│   ├── reddit-api/
│   │   └── SKILL.md
│   ├── security/
│   │   └── SKILL.md
│   ├── session-management/
│   │   └── SKILL.md
│   ├── shopify-apps/
│   │   └── SKILL.md
│   ├── site-architecture/
│   │   └── SKILL.md
│   ├── supabase/
│   │   └── SKILL.md
│   ├── supabase-nextjs/
│   │   └── SKILL.md
│   ├── supabase-node/
│   │   └── SKILL.md
│   ├── supabase-python/
│   │   └── SKILL.md
│   ├── team-coordination/
│   │   └── SKILL.md
│   ├── ticket-craft/
│   │   └── SKILL.md
│   ├── typescript/
│   │   └── SKILL.md
│   ├── ui-mobile/
│   │   └── SKILL.md
│   ├── ui-testing/
│   │   └── SKILL.md
│   ├── ui-web/
│   │   └── SKILL.md
│   ├── user-journeys/
│   │   └── SKILL.md
│   ├── web-content/
│   │   └── SKILL.md
│   ├── web-payments/
│   │   └── SKILL.md
│   ├── woocommerce/
│   │   └── SKILL.md
│   └── workspace/
│       └── SKILL.md
├── templates/
│   ├── AGENTS.md
│   ├── CLAUDE.local.md
│   ├── CLAUDE.md
│   ├── Dockerfile.polyphony
│   ├── codex-auto-review.sh
│   ├── config.toml
│   ├── icpg-pre-edit.sh
│   ├── icpg-stop-record.sh
│   ├── mnemos-post-compact-inject.sh
│   ├── mnemos-post-tool.sh
│   ├── mnemos-pre-compact.sh
│   ├── mnemos-pre-edit.sh
│   ├── mnemos-session-start.sh
│   ├── mnemos-statusline.sh
│   ├── mnemos-stop-checkpoint.sh
│   ├── polyphony-agents.yaml
│   ├── polyphony-config.yaml
│   ├── polyphony-identities.yaml
│   ├── polyphony-routing.yaml
│   ├── pre-compact.sh
│   ├── settings.json
│   └── tdd-loop-check.sh
└── tests/
    ├── test_cross_agent.py
    ├── test_cross_tool.py
    ├── test_polyphony_adapters.py
    ├── test_polyphony_config.py
    ├── test_polyphony_events.py
    ├── test_polyphony_identity.py
    ├── test_polyphony_models.py
    ├── test_polyphony_orchestrator.py
    ├── test_polyphony_router.py
    ├── test_polyphony_runtime.py
    ├── test_polyphony_scoring.py
    ├── test_polyphony_sources.py
    ├── test_polyphony_state.py
    ├── test_polyphony_store.py
    ├── test_polyphony_workspace.py
    ├── test_session_detect.py
    ├── test_skill_lint.py
    └── validate-structure.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/skill-lint.yml
================================================
name: Skill Lint

on:
  push:
    branches: [main]
    paths:
      - 'skills/**'
      - 'scripts/skill_lint/**'
      - 'tests/test_skill_lint.py'
  pull_request:
    paths:
      - 'skills/**'
      - 'scripts/skill_lint/**'
      - 'tests/test_skill_lint.py'

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install pytest
        run: pip install pytest

      - name: Run skill-lint tests
        run: PYTHONPATH=scripts python -m pytest tests/test_skill_lint.py -v

      - name: Run skill-lint (errors fail)
        run: PYTHONPATH=scripts python -m skill_lint --fail-on error skills/

      - name: Run skill-lint (full report)
        if: always()
        run: PYTHONPATH=scripts python -m skill_lint --format json skills/ > skill-lint-report.json || true

      - name: Upload lint report
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: skill-lint-report
          path: skill-lint-report.json


================================================
FILE: .github/workflows/skill-review.yml
================================================
name: Skill Review (Tessl + skills-ref)

on:
  pull_request:
    paths:
      - 'skills/**'

jobs:
  tessl:
    runs-on: ubuntu-latest
    if: ${{ vars.TESSL_ENABLED == 'true' }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup Tessl
        uses: tesslio/setup-tessl@v2
        with:
          token: ${{ secrets.TESSL_TOKEN }}

      - name: Detect changed skills
        id: changes
        run: |
          # Multi-skill PRs produce a multi-line list. Plain echo "skills=$X"
          # fails GHA's output parser on newlines ("Invalid format"), AND the
          # downstream `for skill in ${{ outputs.skills }}` breaks on newlines
          # because the expansion ends the `for ... in` expression. Join with
          # spaces so both the output format and the shell loop are happy.
          CHANGED=$(git diff --name-only origin/main...HEAD -- skills/ | cut -d'/' -f2 | sort -u | tr '\n' ' ')
          # Trim trailing space for clean logs
          CHANGED="${CHANGED%% }"
          echo "skills=$CHANGED" >> "$GITHUB_OUTPUT"
          echo "Changed skills: $CHANGED"

      - name: Run Tessl review on changed skills
        if: steps.changes.outputs.skills != ''
        run: |
          for skill in ${{ steps.changes.outputs.skills }}; do
            echo "=== Reviewing: $skill ==="
            tessl skill lint "skills/$skill" || true
            tessl skill review --json "skills/$skill" || true
          done

  skills-ref:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install skills-ref
        run: pip install skills-ref || echo "skills-ref not available, skipping"

      - name: Detect changed skills
        id: changes
        run: |
          # Same space-join as the tessl job — keeps both the GHA output format
          # and the downstream `for skill in ${{ ... }}` loop working.
          CHANGED=$(git diff --name-only origin/main...HEAD -- skills/ | cut -d'/' -f2 | sort -u | tr '\n' ' ')
          CHANGED="${CHANGED%% }"
          echo "skills=$CHANGED" >> "$GITHUB_OUTPUT"

      - name: Validate changed skills
        if: steps.changes.outputs.skills != ''
        run: |
          for skill in ${{ steps.changes.outputs.skills }}; do
            echo "=== Validating: $skill ==="
            skills-ref validate "skills/$skill" || true
          done


================================================
FILE: .gitignore
================================================
__pycache__/
.DS_Store
evals/.results/
.pytest_cache/


================================================
FILE: CHANGELOG.md
================================================
# Changelog

All notable changes to Claude Bootstrap will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

---

## [5.8.0] - 2026-05-12

### Fixed

#### UX Fix Pass (12 issues from manual CLI testing)
- **Prompt character** — Changed from `maggy:` to `>` for cleaner input (`cli_chat.py:76`)
- **Ctrl+C during streaming** — Now cancels current response instead of exiting REPL. Added `except KeyboardInterrupt` in `_stream_chunks` (`cli_chat.py:161`)
- **`/health` 404** — Client was calling `/api/health/memory` (non-existent). Fixed to call `/api/engram/diagnostics` (`cli_client.py:260`)
- **`/route`, `/models`, `/budget`, `/stats`, `/health`, `/config` crash on server down** — Added `_call(fn, default)` safe wrapper that catches `Exception` and `SystemExit` from unreachable server. All display commands return fallback data instead of crashing (`cli_repl_cmds.py:18`)
- **Models shows "0 tracked" / "No data yet"** — When heatmap is empty, now shows the 5 known model tiers (local, kimi, gpt, claude, codex) with 0 samples (`cli_repl_cmds.py:129`)
- **`/use` accepts invalid model names** — Now validates against `_KNOWN_MODELS`, prints warning for unknown names while still setting the restriction (`cli_repl_cmds.py:147`)
- **Dir shows "?"** — Welcome banner now falls back to `os.getcwd()` when session `working_dir` is empty (`cli_welcome.py:36`)

### Added

#### Budget Subscription Awareness
- **`plan` field** on `BudgetConfig` — Users set `budget.plan: subscription` in `~/.maggy/config.yaml` (`config.py:150`)
- **`BudgetManager.budget_status()`** includes `plan` in response (`budget.py:163`)
- **`/budget`** shows "Subscription" instead of "$0.00 / $10.00" when plan is subscription (`cli_repl_cmds.py:87`)
- **Welcome banner** shows "Subscription" for subscription plans (`cli_welcome.py:54`)

#### Welcome Banner Improvements
- **Models count** — Shows "5 available" (known model count) instead of "0 tracked" when no heatmap data (`cli_welcome.py:62`)

### Changed
- **`_HELP` compressed** — 2-column layout saves 6 lines, fits all new features within 200-line limit (`cli_repl_cmds.py:191`)

### Tests
- `test_repl_cmds.py` — +5 tests: models_empty_shows_known, use_warns_unknown_model, budget_subscription_plan, health_graceful_failure, stats_server_down
- `test_cli_welcome.py` — +3 tests: dir_shows_cwd_fallback, models_shows_available_count, budget_subscription_welcome
- `test_cli_chat.py` — +1 test: chat_prompt_uses_angle_bracket
- **Total: 825 tests passing** (816 + 9 new)

---

## [5.7.0] - 2026-05-12

### Added

#### `/monitor` Command — Background Tracker Polling
- **`maggy/services/monitor.py`** — MonitorService with SQLite-backed polling for GitHub PRs and Monday.com items. `MonitorConfig` and `MonitorEvent` dataclasses, `add/remove/list_active/is_new/mark_seen/status/poll` methods
- **`maggy/providers/monday.py`** — Monday.com provider implementing `IssueTrackerProvider` protocol via GraphQL API. Maps board items to Task dataclass
- **`maggy/api/routes_monitor.py`** — REST endpoints: `GET /api/monitor/status`, `POST /api/monitor/start`, `POST /api/monitor/stop`
- **`/monitor` handler** in REPL — shows active monitor count (`cli_chat.py:94`)

#### `/health` Command — Memory Health Dashboard
- **`cmd_health()`** — Shows Engram health score (color-coded) and Mnemos fatigue state in Rich Panel (`cli_repl_cmds.py:180`)
- **`health_dashboard()`** and **`engram_diagnostics()`** client methods (`cli_client.py:259`)

#### Enhanced Welcome Banner
- **`cli_welcome.py`** — New file with Rich Panel welcome banner showing project info, budget, models, status, and memory health score

#### Search Routing to Local Model
- **"search" type** added to `TYPE_KEYWORDS` in `chat_router.py` — 11 keywords (find, search, grep, where, locate, which, look, scan, show, list, read) route to local/Qwen model for free

#### Account Switching Guidance
- **`maggy/services/account_guide.py`** — Detects CLI auth profiles from `~/.claude/`, `~/.codex/`. `suggest_switch()` returns CLI instructions, `render_switch_guide()` prints Rich-formatted guidance
- **Quota error detection** — `_QUOTA_MARKERS` in `cli_chat.py` triggers account switch guidance on rate limit errors

### Tests
- `test_monitor.py` — 8 tests for MonitorService
- `test_monday_provider.py` — 6 tests for MondayProvider
- `test_account_guide.py` — 5 tests for account switching
- `test_chat_router.py` — +3 tests for search type detection
- `test_repl_cmds.py` — +3 tests for health command
- `test_cli_welcome.py` — +2 tests for health and session history
- `test_cli_chat.py` — +1 test for quota error guidance
- **Total: 816 tests passing** (788 + 28 new)

---

## [5.1.0] - 2026-05-11

### Added

#### REPL Slash Commands — Stats, Routing, Model Control
- **`maggy/cli_repl_cmds.py`** — 9 command handlers for the interactive REPL:
  - `/stats` — Budget + model performance summary (spend, status, reward heatmap)
  - `/budget` — Detailed per-provider breakdown with visual progress bar
  - `/route` — Routing rules, task type overrides, model strengths/success rates
  - `/models` — Full reward heatmap grid by model × task type × blast tier
  - `/use claude,codex` — Restrict routing to specific models for this session
  - `/use all` — Remove model restriction
  - `/config` — Configuration summary (codebases, routing mode, budget limit)
  - `/claude-md` — Render project's CLAUDE.md in terminal
  - `/help` — List all available commands
- **`SessionState`** dataclass — Mutable session-level state (session_id, working_dir, allowed_models)
- **`dispatch()`** router — Parses slash commands, routes to handlers, returns True if handled
- **`GET /api/routing/rules`** endpoint — Exposes routing mode, task type overrides, model performance
- **`allowed_models`** field on `RoutedMessageRequest` — Server-side model restriction: if routed model not in allowed list, picks first allowed model with updated reason

#### Qwen3-Coder Benchmarks
- **75.7 tok/s average** — 3.4× faster than Qwen2.5-Coder (22.1 tok/s), 2× faster than Claude API (37.4 tok/s)
- MoE architecture (3.3B active / 30B total params) on M4 Max 128GB
- Quality: 10/10 BST correctness, 9/10 async rate limiter (token bucket + asyncio.Lock)
- Cold start: ~13s model load; hot runs: <100ms start

#### mWP Mindset — Full Framework
- **`skills/base/SKILL.md`** — Added complete mWP section with 11-Star Framework (Brian Chesky), mWP planning checklist (obvious → magical → multiplier)
- **`routing_rules.py`** — Expanded mWP convention injected into all CLI prompts (codex, kimi, qwen3, claude) with 3-question framework and 11-star reference

### Changed
- **`cli_chat.py`** — Integrated `SessionState` and `dispatch()` from `cli_repl_cmds`; passes `allowed_models` to `chat_send_routed()`; mode hint now shows `/help for commands`
- **`cli_client.py`** — Added `budget_by_provider()`, `routing_rules()` methods; updated `chat_send_routed()` signature to accept `allowed_models`
- **`benchmark-results.md`** — Qwen3-Coder results filled in (was TBD), quality assessment section added

### Tests
- `tests/test_repl_cmds.py` — 10 tests (dispatch routing, stats, budget, route, models, use, claude-md, help)
- `tests/test_cli_chat.py` — Updated 2 assertions for `allowed_models=None` parameter
- **Total: 653 tests passing** (643 maggy + 10 session detect)

---

## [5.0.0] - 2026-05-10

### Added

#### Interactive Chat — Session Takeover
- **`maggy/services/chat.py`** — ChatManager for interactive Claude sessions with SSE streaming
  - Auto-connects to all active CLI sessions (Claude, Codex, Kimi) via ActivityService process scanning
  - Session continuity with `--resume <session-id>` for multi-turn conversations
  - `CLAUDECODE` env var stripping to allow nested Claude subprocess spawning
  - `--verbose` flag for `--output-format stream-json` compatibility
  - Deduplication via dict keyed by project name
- **`maggy/services/chat_context.py`** — Context builder for session enrichment
  - Path-based history matching (not just exact project name) via `_path_candidates()`
  - `_SKIP_DIRS` set prevents matching common system directories (Users, Documents, Library)
  - Recent prompt injection from activity data per project
  - Claude `session_id` resolution from `~/.claude/history.jsonl` for true `--resume`
- **`maggy/api/routes_chat.py`** — Chat API (5 endpoints)
  - `POST /api/chat/auto-connect` — detect all active sessions, enrich with history context
  - `POST /api/chat/sessions` — create session
  - `GET /api/chat/sessions` — list sessions
  - `GET /api/chat/sessions/{id}` — get session + messages
  - `POST /api/chat/sessions/{id}/send` — send message, stream response via SSE
  - `DELETE /api/chat/sessions/{id}` — delete session
- **Chat UI** in `app.js` — full web-based chat interface
  - Auto-connects on tab load, shows all active project sessions in sidebar
  - Message thread with user/Claude bubbles
  - SSE EventSource for real-time streaming
  - Session history context display
  - New session creation from active + configured projects

#### Auto-Bootstrap — No Empty Tabs
- **`_bootstrap()` in `main.py`** — seeds all services on startup
  - `history.analyze()` — parses CLI sessions immediately (260+ sessions, 11,994 prompts)
  - `introspector.analyze()` — collects signals, emits events
  - `_seed_cikg()` — scans configured codebases, creates nodes for repos + detected languages

#### UI Navigation Cleanup
- **Grouped navigation** — 9 flat tabs reorganized into 3 logical groups:
  - **Work** (Chat, Tasks, Watching) — things you do
  - **Intel** (Competitors, Insights) — things you learn
  - **System** (gear dropdown: Budget, Models, Forge, Settings) — things you configure
- **Tab renames** — Inbox→Tasks, Followed→Watching, Process→Insights
- **Chat is default tab** — loads on startup, auto-connects immediately
- **Gear dropdown** — system tabs collapsed into icon menu, reduces nav clutter
- **Section labels** — tiny uppercase "WORK" / "INTEL" separators

#### Process Intelligence Tab Enhancement
- Parallel fetch of activity, history, improve, events, CIKG data
- Health signals display (routing, memory, reliability, cost percentages)
- Live activity section showing active sessions + recent prompts
- Session patterns from history analysis
- Button spinner feedback + success toast on Analyze History / Self-Improve

#### Infrastructure
- **No-cache static middleware** — `_NoCacheStatic` adds `Cache-Control: no-store` to `/static`
- **Cache-busting** — `?v=3` on script tag
- **`showToast()`** — green success notification for async operations

### Security
- **Chat path validation** — `project_path` now validated against configured codebase roots (blocks arbitrary filesystem access via `--dangerously-skip-permissions`)
- **Chat streaming lock** — per-session `asyncio.Lock` rejects concurrent `/send` requests, preventing duplicate subprocess spawning and workspace corruption

### Fixed
- Engram `expire_engrams` referencing `self` outside class context
- `auto_connect` returning duplicate sessions for same project
- `CLAUDECODE` env var blocking nested Claude subprocess spawning
- `--verbose` flag required when using `--output-format stream-json` with `-p`
- History matching missing projects stored under parent dir name (e.g. "AI-Playground" vs "claude-skills-package")
- Process tab buttons doing nothing due to browser-cached old JS
- 500-row limit in history store masking projects — switched to aggregated report data

### Changed
- Default tab: `inbox` → `chat`
- Org name in config: `"Your Org"` → read from `~/.maggy/config.yaml`
- README fully rewritten to reflect current feature set (was still describing MVP)

### Tests
- `tests/test_chat.py` — 17 tests (ChatManager + AutoConnect)
- `tests/test_chat_context.py` — 18 tests (path candidates, history matching, prompts, session ID)
- Total: **466 tests passing**

---

## [4.0.0] - 2026-05-05

### Added

#### Polyphony — Multi-Agent Orchestration (Core)
- **`scripts/polyphony/`** — Full multi-agent orchestration package with container-isolated workspaces. Each agent session runs in its own Docker container with independent git branches.
- **Domain models** (`models.py`) — Task, Identity, AgentProfile, RunSpec, Result dataclasses
- **Task state machine** (`state_machine.py`) — DISCOVERED -> CLAIMED -> ROUTED -> PROVISIONED -> RUNNING -> VERIFYING -> LANDED with FAILED/BLOCKED paths
- **SQLite store** (`store.py`) — Persistent CRUD for tasks, run_specs, results with state audit log
- **YAML config** (`config.py`) — Configuration loading from `~/.polyphony/` with defaults merging
- **5-dimension complexity scoring** (`scoring.py`) — Cyclomatic depth, fan-out, security boundary, concurrency, domain invariants (0-10 scale)
- **Pure function router** (`router.py`) — Task x Policy -> RunSpec, first-match rules with fallback chains
- **Identity broker** (`identity.py`) — Named credential bundles with volume mounts and env overlays
- **Workspace manager** (`workspace.py`) — Per-task git clone lifecycle with `--reference`/`--dissociate` mirror support
- **Docker runtime** (`runtime.py`) — Container create/start/stop/wait/logs/rm lifecycle
- **Event parser** (`events.py`) — NDJSON/stream-json parsing from container stdout
- **Orchestrator** (`orchestrator.py`) — Supervisor loop: discover -> claim -> route -> provision -> run -> verify -> land
- **Agent adapters** (`adapters/`) — Claude (`-p --output-format stream-json`), Codex (`exec --full-auto`), Kimi (`--print -y`)
- **Work sources** (`sources/`) — GitHub Issues via `gh api`, local SQLite task queue
- **CLI** (`__main__.py`) — `polyphony {init|spawn|status|cleanup}` commands
- **Skill** (`skills/polyphony/SKILL.md`) — Full documentation for the orchestration system
- **Commands** — `/polyphony-init`, `/polyphony-spawn`, `/polyphony-status`
- **Templates** — `Dockerfile.polyphony`, `polyphony-config.yaml`, `polyphony-identities.yaml`, `polyphony-agents.yaml`, `polyphony-routing.yaml`
- **Spec** (`docs/polyphony-spec.md`) — Full specification reference (12 sections)
- **173 tests** across 13 test files with full TDD coverage

---

## [3.6.1] - 2026-05-04

### Changed
- **Complexity-based delegation replaces file-count heuristic** (`skills/cross-agent-delegation/SKILL.md`) — Kimi delegation now scored on 5 dimensions (cyclomatic depth, fan-out, security boundary, concurrency, domain invariants) × 0-2 each, sourced from iCPG signals + Claude reasoning. Routing: 0-3 → Kimi solo, 4-6 → Kimi + Codex auto-review, 7-10 → Claude direct. Adds trivial-case shortcut (<2 files + no risk keywords → auto-Kimi without scoring) and single-dimension override (7+ in any one dim keeps Claude). PR #16.

---

## [3.6.0] - 2026-05-03

### Added

#### Cross-Tool Compatibility (Claude + Kimi + Codex)
- **`scripts/detect-agents.sh`** — Detects installed AI CLI tools (Claude Code, Kimi CLI, Codex CLI)
- **`scripts/install-skills.sh`** — Reusable skill copier for any target directory
- **`templates/AGENTS.md`** — Codex project instructions template (mirrors CLAUDE.md with `.agents/skills/` paths)
- **`templates/config.toml`** — Hooks in TOML format for Kimi/Codex compatibility
- **`scripts/convert-hooks-to-toml.sh`** — JSON to TOML hook converter (requires jq)
- **`commands/sync-agents.md`** — `/sync-agents` command for cross-tool config sync
- **`install.sh`** auto-detects and installs skills to `~/.kimi/skills/` and `~/.codex/skills/`
- **`/initialize-project`** question 9: "Which AI CLI tools do you use?" with auto-detection
- Cross-tool directories (`.kimi/`, `.codex/`, `.agents/`) added to `.gitignore` template

#### Cross-Agent Intelligence
- **`templates/codex-auto-review.sh`** — Stop hook that auto-runs Codex review on changed files
  - Checks for Critical/High severity issues only
  - Exit 0 = pass, Exit 2 = feed findings back to Claude for fixing
  - Truncates diff to 8000 chars to prevent Codex token overflow
  - Gracefully skips if Codex CLI not installed
- **`skills/cross-agent-delegation/SKILL.md`** — Delegation skill with:
  - Tool detection (checks `command -v` for each CLI)
  - iCPG blast radius rules for Kimi delegation (<=3 files suggest Kimi, 4-8 offer option, 9+ stay Claude)
  - iCPG mandatory pre-task queries for all agents (prior, constraints, risk)
  - Mnemos mandatory memory lifecycle for all agents (goals, checkpoints, fatigue)
  - 10-step cross-agent workflow summary
- **Codex auto-review Stop hook** added to `settings.json` (after TDD, before iCPG record, 120s timeout)
- **Codex auto-review TOML hook** added to `config.toml` for Kimi/Codex compatibility
- **Cross-Agent Workflow** section added to both `CLAUDE.md` and `AGENTS.md` templates
- **`cross-agent-delegation/`** added to always-copy skill list in `/initialize-project`

#### Tests
- **`tests/test_cross_tool.py`** — 12 tests for cross-tool compatibility (detect-agents, install-skills, templates, sync-agents)
- **`tests/test_cross_agent.py`** — 22 tests for cross-agent intelligence (codex-auto-review, delegation skill, settings.json hook ordering, config.toml, template refs)

### Changed
- `install.sh` bumped to v3.6.0
- `install.sh` now makes `codex-auto-review.sh` executable during install
- `tests/validate-structure.sh` includes cross-tool template validation
- Total skills increased from 60 to **61 skills**
- Total tests: 62 pytest + 238 validation checks

---

## [3.5.2] - 2026-04-22

### Fixed
- **Hook error behavior revised** — the 3.5.1 fix silently no-op'd missing scripts, which hid real installation problems. Hook commands now:
  - **Fail loud on real errors** — if the script exists and crashes, its stderr + non-zero exit propagate to Claude Code so you can debug
  - **Print one actionable line on missing installs** — `[claude-bootstrap] hook script 'X' not installed — run <claude-bootstrap>/install.sh …` and exit 0 (no blocking error, but you see exactly what to do)
  - **Use `exec` to run the resolved script** — exit code + stderr pass through unchanged
- **Hook scripts stop swallowing stderr** — removed 19 instances of `2>/dev/null` across `mnemos-*.sh`, `icpg-*.sh`, and `tdd-loop-check.sh`. Python tracebacks and Python stderr now surface to Claude Code's hook diagnostics. Command substitution (`$(...)`) only captures stdout, so this doesn't affect any value parsing.

## [3.5.1] - 2026-04-21

### Fixed
- **PreToolUse hook "Bash hook error" on any tool call.** `templates/settings.json` declared hook commands as relative paths (`scripts/mnemos-*.sh`) that don't exist in most projects — the scripts live in `templates/` and nothing copies them to `<project>/scripts/`. Every tool call triggered a hook-not-found error shown as `PreToolUse:Bash hook error` in the session (non-blocking but noisy).
- Hook commands now try `.claude/scripts/<name>.sh` first (project-local override), fall back to `$HOME/.claude/templates/<name>.sh` (always installed by `install.sh`), and no-op cleanly when neither exists. Applied to all 8 hook script references across `PreCompact`, `PreToolUse`, `PostToolUse`, `Stop`, and `SessionStart`.

---

## [3.5.0] - 2026-04-19

### CI
- **`skill-review.yml`**: both `tessl` and `skills-ref` jobs now space-join the detected-skills list before writing to `$GITHUB_OUTPUT`. The old plain `echo "skills=$CHANGED"` with a multi-line `$CHANGED` value failed GHA's output parser ("Invalid format") AND broke the downstream `for skill in ${{ outputs.skills }}` loop. Space-joining keeps both happy and unblocks multi-skill PRs (like this one, which touches both `maggy/` and `mnemos/`).

### Third review pass fixes (Copilot iteration)
- **Package renamed `src/` → `maggy/`.** The top-level `src` package name was a well-known Python packaging anti-pattern that collides with other projects. The Python code now lives at `claude-bootstrap/maggy/maggy/` and imports as `from maggy.X import Y` (matching the icpg/mnemos/skill_lint convention). `pyproject.toml` entrypoint + includes, `install.sh`, and the launcher commands updated to `python3 -m maggy.main`.
- **SQLite PRAGMAs** — `InboxService` and `CompetitorService` open connections via a shared helper that sets `journal_mode=WAL`, `foreign_keys=ON`, and `busy_timeout=30000`. Matches the convention used by `scripts/icpg/store.py` and prevents "database is locked" errors when the FastAPI handlers race the heartbeat worker.
- **Host-safety startup check** — `create_app()` now refuses to boot when `dashboard.auth_mode="local"` is combined with a non-loopback host (anything other than `127.0.0.1`/`localhost`/`::1`). Execute spawns `claude --dangerously-skip-permissions`, so binding to `0.0.0.0` with no auth would expose that to the local network. Users are directed to switch to token auth or rebind.
- **`is_configured()` no longer accepts `linear`** — `providers.build()` raises `NotImplementedError` for Linear (stub), so treating it as configured would crash `create_app()` at startup. Now returns `False` cleanly.
- **`providers.build()`** raises `NotImplementedError` with a clear "use github or asana" hint for `linear`.
- **GitHub provider logs non-200s** in `list_tasks` — previously a 401/403/404 silently yielded an empty inbox. Now WARNING-logged with the repo slug and first 200 chars of the response body for debuggability.
- **Removed unused `timedelta` import** from `inbox.py`.

### Second review pass fixes (CodeRabbit iteration 2)
- `AsyncAnthropic` used in async methods — inbox ranking + competitor discovery + daily briefing no longer block the event loop on multi-second LLM round-trips
- RSS/Google News feed date handling uses `parsedate_to_datetime` + ISO parser and compares real `datetime` objects — RFC 822 strings aren't lexicographically ordered (day-of-week cycles weekly)
- iCPG CLI invocation fixed: `python3 -m scripts.icpg query prior --text ...` against the real argparse entrypoint, not the utility submodule `scripts.icpg.symbols` which has no `__main__`
- Background `asyncio.create_task()` reference kept in a set + `add_done_callback(discard)` so GC can't kill the TDD pipeline mid-run
- `GitHubIssuesProvider.list_followed()` and `search_tasks()` refuse to run when `repos` is empty (otherwise the query has no repo filter and searches all of public GitHub)
- `AsanaProvider.list_tasks()` drops the dead `completed_filter` variable and skips sending `completed_since=""` (Asana validator rejects empty string); filters `closed` state properly
- `install.sh` enforces Python 3.11+ minimum (was only checking `python3` existed)
- `/static/index.html`: added CSP meta tag; Font Awesome pinned with SHA-384 SRI; Tailwind Play CDN annotated with vendor-for-prod TODO
- `static/app.js`: added `jsStr()` for JS-string-context escaping in inline onclick handlers (esc() alone leaves single quotes intact — XSS via ticket titles was possible)
- `regenerateBriefing()` catches and displays errors instead of swallowing them
- `commands/maggy.md`: reads `dashboard.host`/`dashboard.port` from config before probing health (was hardcoded 8080)
- `commands/maggy-init.md`: removed the "offer to write to .env" suggestion — the runtime doesn't load that file, so it would leave tokens on disk with no reader
- `config.example.yaml`: removed the Linear section (stub only, shouldn't be in the advertised selectable set)
- `PLAN.md`: config sample aligned with the actual runtime schema (removed spurious `config:` nesting)
- `maggy/README.md`: install path no longer assumes `~/Documents/AI-Playground/...`; uses relative `cd claude-bootstrap/maggy`
- `providers/__init__.py`: `__all__` alphabetized (RUF022)
- `skills/maggy/SKILL.md`: explicit permission-model disclosure box explaining the `--dangerously-skip-permissions` tradeoff and the `working_dir` whitelist mitigations

### Added
- **Maggy — AI engineering command center** (optional extension under `maggy/`)
  - Local FastAPI + vanilla JS dashboard; install with `maggy/install.sh`, zero build step
  - Provider abstraction: `GitHubIssuesProvider`, `AsanaProvider`, `LinearProvider` (stub) implement a single `IssueTrackerProvider` Protocol — swap trackers without touching services
  - AI-prioritized inbox with 30-min SQLite cache; stale-cache fallback when provider is unavailable
  - Generic competitor discovery + RSS + Google News monitoring with daily AI briefing (cached per day)
  - TDD execute pipeline (plan → tests → implement) spawns `claude -p --dangerously-skip-permissions` locally in the right codebase, with iCPG context auto-injected from the bootstrap's iCPG CLI
  - Config-driven (`~/.maggy/config.yaml`) — no hardcoded org IDs, repo names, or competitor lists
  - `/maggy` command launches dashboard; `/maggy-init` runs interactive setup
  - `skills/maggy/SKILL.md` documents capabilities; README skills table updated
- Maggy skill included in the skills table (fixes RI002 lint error for this PR)

### Fixed
- Added YAML frontmatter to `skills/mnemos/SKILL.md` (fixes FM001 lint error that was blocking CI on main)
- Skill lint now passes across all 60 skills

### Security (Maggy)
- RSS URL validation before fetching competitor feeds — blocks loopback, link-local, private-network, and non-HTTP(S) targets (SSRF prevention)
- `safeHref()` in dashboard JS — only allows `http(s)`/`mailto` schemes in external links, blocks `javascript:`/`data:` URIs that would slip past HTML escaping
- `working_dir` validated against configured codebase roots before launching Claude Code — prevents arbitrary-cwd execution of `--dangerously-skip-permissions`
- Execute-mode input validated via `Literal["tdd", "plan"]`; typos rejected at request boundary
- GitHub `_decode_id()` returns `None` on malformed input instead of raising — surfaces as 404 not 500
- LLM ranking output validated (index range, numeric rank, dedupe) before applying

### Resilience (Maggy)
- `provider.list_tasks` failure falls back to last cached ranking (flagged `stale=true`) instead of 500
- Route-level `_require_configured()` returns 503 + onboarding hint when `~/.maggy/config.yaml` is missing, instead of dereferencing `None` services
- `is_configured()` requires provider credentials (token) in addition to org/repos; refreshes cache on each check
- Claude subprocess kill on timeout (`proc.kill()` + `await proc.wait()`), non-zero exits marked as failed sessions
- `_run_claude()` returns `(ok, output)` tuple — TDD pipeline now aborts chain on first-step failure
- Competitor news events use deterministic SHA-256 IDs with `INSERT OR IGNORE` — prevents duplicate rows on cursor reset / overlapping scans

### Changed (Maggy)
- `pyproject.toml` console script `maggy = "src.main:main"` (proper callable) instead of `"src.main:app"` (ASGI instance)

---

## [3.4.1] - 2026-04-10

### Fixed
- Fixed broken `build-backend` in all three pyproject.toml files (icpg, mnemos, skill_lint). Changed `setuptools.backends._legacy:_Backend` to `setuptools.build_meta`. (Community reported)

### Added
- Cheeky personality section in CLAUDE.md template for new projects

---

## [3.4.0] - 2026-04-07

### Added
- **Skill Quality Gates** — Automated linter, CI integration, and behavioral evals
  - `scripts/skill_lint/` — Python package with 20 check rules across 4 categories:
    - Frontmatter (FM001-FM009): YAML validation, name/description/field checks
    - Spec (SP001-SP003, SR001): SKILL.md existence, line count limits, skills-ref integration
    - Content (CQ001-CQ006): ASCII art detection, vague phrase detection, filler intensity, code block density, stale references, H1 heading
    - References (RI001-RI002): Cross-skill link validation, README coverage
  - CLI: `PYTHONPATH=scripts python3 -m skill_lint [--format text|json] [--severity error|warning|info] [--skill NAME] [--fail-on error|warning] skills/`
  - Inline suppression: `<!-- skill-lint: disable=SP002 -->` in first 10 lines
  - 28 unit tests covering all check modules, report formatters, and CLI
  - `.github/workflows/skill-lint.yml` — Runs linter + tests on PR/push to skills/ or scripts/skill_lint/
  - `.github/workflows/skill-review.yml` — Tessl skill review + skills-ref validation on PRs (requires TESSL_TOKEN)
  - `evals/` — 18 behavioral eval scenarios for 15 skills with deterministic and LLM-judged criteria
  - `evals/run-evals.sh` — Eval runner with baseline comparison mode
- Updated `CONTRIBUTING.md` with quality gate requirements and linter usage

### Scan Results (59 skills)
- Errors: 1 (mnemos/ missing frontmatter)
- Warnings: 85 (19 skills over 500 lines, 30+ with ASCII art)
- Clean: 3 skills

---

## [3.3.2] - 2026-04-07

### Fixed
- Removed stale `Load with: base.md` line from all 53 skills. Since v3.0, base skill loads via `@include` in CLAUDE.md, not per-skill. The leftover line caused confusion about missing files. (Fixes #13)

### Housekeeping
- Closed #10 (Gen Agent Trust Hub security audit) — false positives from scanning markdown code samples as executable code.
- Closed #12 (Dispatch discoverability) — will address skill description metadata in a future cleanup pass.
- Closed #11 (Low quality skills) — will revisit with specific eval criteria.

---

## [3.3.1] - 2026-04-03

### Added
- **Post-Compaction Task Restoration** (Two-Layer Defense)
  - `templates/mnemos-post-compact-inject.sh` — PreToolUse hook (no matcher, fires on ALL tools) that detects compaction via `.mnemos/just-compacted` marker and re-injects the full checkpoint into Claude's context. Fast path ~5ms when no compaction, ~100ms injection when triggered.
  - `build_task_narrative()` in `checkpoint.py` — Reads signals.jsonl to build human-readable summary of recent activity (files edited, read counts, focus area, error patterns). Automatically included in checkpoints.
  - `format_for_post_compact_injection()` in `checkpoint.py` — Formats checkpoint as structured restoration block with goal, constraints, activity narrative, progress, key files, git state.
  - Compaction marker system (`write_compaction_marker`, `check_compaction_marker`, `consume_compaction_marker`) — Atomic marker write/consume to prevent parallel injection.

### Changed
- **`mnemos-pre-compact.sh`** — Enhanced from advisory to assertive. Now includes inline checkpoint content in preservation instructions, writes compaction marker for Layer 2, builds task narrative from signals, and uses stronger verbatim framing.
- **`CheckpointNode`** — Added `task_narrative` (str) and `recent_files` (list[dict]) fields for richer checkpoint content.
- **`settings.json`** — Added new PreToolUse entry (no matcher) for `mnemos-post-compact-inject.sh` before the existing Edit|Write matcher.
- **`SKILL.md`** — Documented post-compaction recovery mechanism.
- **`README.md`** — Rewrote Mnemos section with two-layer defense architecture, resilience failure mode table, "why not just a plain file" rationale, and post-compaction restoration flow diagram.

## [3.3.0] - 2026-04-03

### Added

#### Mnemos — Task-Scoped Memory Lifecycle
Agents crash when context fills up. Claude Code's compaction is lossy — it summarizes everything uniformly. Mnemos solves this with typed memory, continuous fatigue monitoring, and checkpoint/resume.

- **`scripts/mnemos/`** — Python package (zero external dependencies)
  - `models.py` — MnemoNode (8 types with typed eviction policies), FatigueState, CheckpointNode
  - `store.py` — SQLite MnemoGraph storage with mnemo_nodes, checkpoints, fatigue_log tables
  - `fatigue.py` — 4-dimension fatigue model from passively observed signals (no agent cooperation needed)
  - `signals.py` — Behavioral signal collection from hooks (scope scatter, re-read ratio, error density)
  - `checkpoint.py` — CheckpointNode write/load with iCPG bridge, git state capture, formatted resume output
  - `consolidation.py` — Micro-consolidation: compress ResultNodes, evict cold ContextNodes, decay weights
  - `__main__.py` — CLI: init, status, fatigue, checkpoint, resume, consolidate, nodes, add, bridge-icpg

- **4-Dimension Fatigue Model** (all passively observed from hooks):
  - Token utilization (0.40) — real context_window.used_percentage from statusline
  - Scope scatter (0.25) — unique directories in recent tool calls (from PreToolUse)
  - Re-read ratio (0.20) — files Read more than once, strongest signal of context loss (from PreToolUse)
  - Error density (0.15) — failed tool calls ratio (from PostToolUse)
  - States: FLOW (0-0.4), COMPRESS (0.4-0.6), PRE-SLEEP (0.6-0.75), REM (0.75-0.9), EMERGENCY (0.9+)

- **Auto-Feeding Token Signal**:
  - `templates/mnemos-statusline.sh` — Statusline receives `context_window` JSON from Claude Code, writes `fatigue.json`, delegates display to ccusage (if installed) or shows simple context %
  - JSONL fallback in PostToolUse — reads conversation JSONL to estimate context usage when statusline not configured (0.75 correction factor for cache overhead, ~1-2pp accuracy)
  - `statusLine` config added to `templates/settings.json` — auto-activates on install, no separate configuration needed

- **Fatigue-Aware Hook System**:
  - `templates/mnemos-pre-edit.sh` — PreToolUse: logs file signals, reads fatigue, auto-checkpoints at 0.60+, auto-consolidates at 0.40+, includes iCPG context
  - `templates/mnemos-post-tool.sh` — PostToolUse: logs tool success/failure for error density, auto-feeds token signal from JSONL when statusline is stale
  - `templates/mnemos-session-start.sh` — SessionStart: loads checkpoint on resume, bridges iCPG state
  - `templates/mnemos-pre-compact.sh` — PreCompact: emergency checkpoint + typed preservation priorities (NEVER DROP goals/constraints, OK TO DROP file contents)
  - `templates/mnemos-stop-checkpoint.sh` — Stop: writes final session checkpoint

- **MnemoNode Eviction Policies**:
  - GoalNodes, ConstraintNodes, CheckpointNodes, HandoffNodes: NEVER evicted
  - ResultNodes, WorkingNodes, SkillNodes: compressed first (summary kept), then evictable
  - ContextNodes: evictable when activation weight drops below threshold

- **iCPG Bridge**: `mnemos bridge-icpg` imports ReasonNodes as GoalNodes, postconditions/invariants as ConstraintNodes

- **Skill + Commands**:
  - `skills/mnemos/SKILL.md` — Full skill documentation with fatigue states, CLI reference, agent instructions
  - `commands/mnemos-status.md` — `/mnemos-status` slash command
  - `commands/mnemos-checkpoint.md` — `/mnemos-checkpoint` slash command

- **Documentation**:
  - `docs/mnemos-implementation.md` — Implementation addendum for the Mnemos RFC

### Changed

#### iCPG Fixes
- `scripts/icpg/bootstrap.py` — Fixed `_get_commits()` git log parsing (was producing 0 symbols linked)
- `scripts/icpg/drift.py` — Added `check_file_drift()` for fast, file-scoped drift (O(symbols-in-file))
- `scripts/icpg/__main__.py` — Added `drift file <path>` subcommand, `_resolve_path()` for relative path handling
- `templates/icpg-pre-edit.sh` — Now includes file-scoped drift detection alongside context and constraints

#### Settings Template
- `templates/settings.json` — Added `statusLine` config for auto-feeding token signal, Mnemos hooks replace standalone iCPG hooks, added PostToolUse hook, added mnemos permission allows
- `templates/CLAUDE.md` — Added `@.claude/skills/mnemos/SKILL.md` to skill includes

---

## [3.2.0] - 2026-04-02

### Added

#### iCPG Full Implementation (Intent-Augmented Code Property Graph)
- **`scripts/icpg/`** — Python CLI package implementing the full iCPG RFC v8
  - `models.py` — ReasonNode, Symbol, Edge, DriftEvent data models with Design by Contract (preconditions, postconditions, invariants)
  - `store.py` — SQLite storage layer with 4 tables, WAL mode, indexed queries
  - `symbols.py` — Language-aware symbol extraction: Python (AST), TypeScript/JS (regex), Go, Rust, Elixir
  - `drift.py` — 6-dimension drift detection: spec, decision, ownership, test, usage, dependency
  - `contracts.py` — Design by Contract layer with LLM inference (Claude/OpenAI) and heuristic fallback
  - `vectors.py` — Tiered duplicate detection: ChromaDB → TF-IDF → exact match fallback
  - `bootstrap.py` — Git history inference: cluster commits, LLM-infer ReasonNodes, link symbols
  - `__main__.py` — CLI with subcommands: init, create, record, query, drift, bootstrap, status
  - `pyproject.toml` — pip-installable with optional deps (chromadb, sentence-transformers, openai)

- **3 Canonical Pre-Task Queries** (RFC Section 2.1):
  - `icpg query prior "<goal>"` — Vector-based duplicate detection before starting work
  - `icpg query constraints <file>` — Get invariants/contracts for files being modified
  - `icpg query risk <symbol>` — Drift score, ownership history, modification count

- **Hook Integration**:
  - `templates/icpg-pre-edit.sh` — PreToolUse hook: injects intent context + constraints before every Edit/Write
  - `templates/icpg-stop-record.sh` — Stop hook: auto-records symbols to active ReasonNode after implementation

- **Slash Commands**:
  - `commands/icpg-impact.md` — `/icpg-impact <id>` blast radius visualization
  - `commands/icpg-why.md` — `/icpg-why <symbol>` trace symbol to creating intent
  - `commands/icpg-drift.md` — `/icpg-drift` full drift report across all dimensions
  - `commands/icpg-bootstrap.md` — `/icpg-bootstrap` infer intents from git history

### Changed

#### iCPG Skill Rewrite
- **`skills/icpg/SKILL.md`** — Complete rewrite aligning with RFC v8
  - ReasonNode now carries formal contracts (preconditions, postconditions, invariants)
  - Drift formally defined as predicate failure (not vague metric)
  - 6-dimension drift model with 0-1 severity scores per dimension
  - CLI reference for all `icpg` subcommands
  - Hook integration documentation (PreToolUse + Stop)
  - Agent Teams integration section with updated pipeline

#### Agent Team iCPG Integration
- **`skills/agent-teams/agents/team-lead.md`** — Team lead now creates ReasonNodes and checks for duplicates before creating task chains
- **`skills/agent-teams/agents/feature.md`** — Feature agents query constraints/risk before implementing, auto-record symbols after
- **`skills/agent-teams/agents/quality.md`** — Quality agent runs drift checks during GREEN verify, validates spec-intent alignment
- **`skills/agent-teams/SKILL.md`** — Updated "Integration with Existing Skills" table with iCPG + code-graph entries

#### Settings Template
- **`templates/settings.json`** — Added PreToolUse hook (icpg-pre-edit.sh), Stop hook extension (icpg-stop-record.sh), icpg permission allows

---

## [3.1.0] - 2026-04-02

### Added

#### iCPG Skill (Initial Spec)
- **`skills/icpg/SKILL.md`** — Initial iCPG skill spec (now superseded by 3.2.0 full implementation)

---

## [3.0.0] - 2026-03-31

### Breaking Changes

This release aligns Claude Bootstrap with how Claude Code actually works internally. Several features that referenced non-existent infrastructure have been replaced with real Claude Code mechanisms.

- **Ralph Wiggum plugin removed** — The `/ralph-loop` command, `claude-plugins-official` marketplace, and plugin stop-hook mechanism never existed in Claude Code. All references removed.
- **TDD loops now use real Stop hooks** — Claude Code's Stop hook (exit code 2 feeds stderr back to the model) replaces the fake plugin. `scripts/tdd-loop-check.sh` runs tests/lint/typecheck after each response.
- **`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1` removed** — Agent spawning and task management are standard Claude Code features, not gated behind an env var. All references removed.
- **CLAUDE.md template uses `@include` directives** — Skills are loaded via `@.claude/skills/base/SKILL.md` syntax which Claude Code resolves at parse time (recursive, max depth 5, cycle detection).
- **Quality gates moved from CLAUDE.md to `.claude/rules/`** — Rules use YAML frontmatter with `paths:` globs for conditional activation.
- **"STRICTLY ENFORCED" / "Non-Negotiable" language removed** — Claude Code treats CLAUDE.md as user-level context (not system prompt) wrapped in `<system-reminder>` tags with "may or may not be relevant" caveat. Aggressive language wastes tokens without creating binding constraints.

### Added

#### Stop Hook TDD Loops
- **`templates/tdd-loop-check.sh`** — Universal TDD loop script for Stop hooks
  - Runs tests, lint, typecheck after each Claude response
  - Exit 0 (all pass) = Claude stops; Exit 2 (failures) = stderr fed back to Claude
  - Iteration counter with configurable max (default 25)
  - Detects project type (Node.js/Python) and runs appropriate commands
  - Distinguishes code errors (loop) from environment errors (stop)

- **`templates/settings.json`** — Pre-configured Claude Code settings
  - Stop hook configuration for TDD loops
  - SessionStart hook for auto-context injection
  - Permission allow rules: test runners, linters, git read commands, gh CLI
  - Permission deny rules: `rm -rf`, `git push --force`, writing `.env` files
  - Ready to copy into any project's `.claude/settings.json`

#### Conditional Rules System
- **`.claude/rules/` directory** with 7 rule files using proper YAML frontmatter:
  - `quality-gates.md` — Always active: 20 lines/function, 200 lines/file, 3 params, 80% coverage
  - `tdd-workflow.md` — Always active: RED-GREEN-VALIDATE workflow
  - `security.md` — Always active: no secrets in code, parameterized queries, bcrypt
  - `react.md` — Active on `**/*.tsx`, `**/*.jsx`, `src/components/**`
  - `typescript.md` — Active on `**/*.ts`, `**/*.tsx`
  - `python.md` — Active on `**/*.py`
  - `nodejs-backend.md` — Active on `src/api/**`, `src/routes/**`, `server/**`

#### CLAUDE.local.md
- **`templates/CLAUDE.local.md`** — Private developer override template
  - Not checked into git (higher priority than project CLAUDE.md)
  - Template with common overrides: preferences, local environment, quality gate tweaks

#### Agent Definition Frontmatter
- All 6 agent definitions now use proper Claude Code frontmatter:
  - `name` — Agent identifier
  - `description` — When-to-use hint
  - `model` — Model selection (sonnet, inherit)
  - `tools` — Tool allowlist (e.g., `[Read, Glob, Grep, TaskCreate]`)
  - `disallowedTools` — Tool denylist (e.g., `[Write, Edit, Bash]`)
  - `maxTurns` — Maximum agentic turns before stopping
  - `effort` — Thinking depth (medium/high)

#### @include Directives in CLAUDE.md
- CLAUDE.md template now uses `@.claude/skills/base/SKILL.md` syntax
- Claude Code resolves these at load time (recursively inlined)
- Skills actually become part of the prompt instead of decorative text

#### CLAUDE.md Template Structure
- Added **Project Structure** section — tells Claude where things live without filesystem exploration
- Added **Key Decisions** section — prevents Claude from re-litigating settled architectural choices
- Added **Conventions** section — patterns Claude should follow (test colocation, API shape, etc.)
- Added **Don't** section — short guardrails (no .env writes, no secret leaks)
- Removed Session Persistence section (belongs in skills, not root template)

#### PreCompact Hook for Smarter Compaction
- **`templates/pre-compact.sh`** — PreCompact hook that injects project-specific preservation priorities into the compaction summarizer
  - Auto-detects project type (TypeScript, Python, Next.js, FastAPI, Flutter, etc.)
  - Finds schema files (Drizzle, Prisma, SQLAlchemy) and tells summarizer to preserve all schema discussion verbatim
  - Finds API directories and tells summarizer to preserve exact endpoint paths, request/response shapes
  - Extracts Key Decisions from CLAUDE.md and tells summarizer to reference them by name
  - Injects live git state (branch, uncommitted changes, staged files) into summary priorities
  - Tells summarizer to preserve exact error messages and fix context (not paraphrased)
  - Tells summarizer what NOT to preserve (dead ends, full file contents, formatting noise)
  - Zero overhead during normal usage — only runs when compaction fires
  - Configured in `.claude/settings.json` under `hooks.PreCompact`

#### Full Skill Frontmatter (all 57 skills)
- Added undocumented-but-functional Claude Code skill frontmatter to all 57 skills:
  - `when-to-use` — guidance for when Claude should invoke the skill
  - `user-invocable` — 11 skills are user-invocable (code-review, codex-review, gemini-review, security, existing-repo, ticket-craft, workspace, cpg-analysis, playwright-testing, ai-models), 46 are model-only
  - `effort` — thinking depth per skill (6 high, 47 medium, 4 low)
  - `paths` — file glob patterns for 24 language/framework/database skills (e.g., `["**/*.py"]` for Python, `["**/*.tsx"]` for React)
  - `allowed-tools` — restricted tool access for 3 review/security skills (`[Read, Glob, Grep, Bash]`)

### Changed
- `install.sh` now copies rules/, templates/, and no longer checks for Ralph Wiggum plugin
- `iterative-development/SKILL.md` completely rewritten for Stop hooks
- `base/SKILL.md` — Ralph Wiggum auto-invoke section replaced with Stop hook explanation
- `agent-teams/SKILL.md` — Removed experimental env var requirement
- `commands/spawn-team.md` — Removed env var check, removed Shift+Up/Down and Ctrl+T UI references
- All agent definitions in `skills/agent-teams/agents/` rewritten with frontmatter
- Total files: 57 skills + 7 conditional rules + 3 templates

### Removed
- All Ralph Wiggum plugin references (`/ralph-loop`, `/plugin install`, `--completion-promise`, `<promise>` tags)
- `CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1` env var requirement
- Plugin marketplace references (`claude-plugins-official`)
- `Shift+Up/Down` and `Ctrl+T` UI interaction assumptions
- "STRICTLY ENFORCED" and "Non-Negotiable" language throughout

### Migration

```bash
cd "$(cat ~/.claude/.bootstrap-dir)"
git pull
./install.sh

# Then in each project:
claude
> /initialize-project
# Will update to v3.0.0 structure
```

**Manual steps for existing projects:**
1. Copy `templates/settings.json` to `.claude/settings.json`
2. Copy `templates/tdd-loop-check.sh` to `scripts/tdd-loop-check.sh` and `chmod +x`
3. Replace skill listings in CLAUDE.md with `@include` directives
4. Copy `rules/` files to `.claude/rules/`
5. Add `CLAUDE.local.md` to `.gitignore`
6. Remove `CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS` from environment

---

## [2.7.0] - 2026-03-23

### Added

#### Tiered Code Graph System (MCP-based)
- **Code Graph skill** (`code-graph/SKILL.md`) - Always-on code intelligence via MCP
  - "Graph first, file second" workflow — Claude queries the graph before reading files
  - Integrates with codebase-memory-mcp: 14 MCP tools, 64 languages, sub-ms queries
  - Decision tables for when to use graph vs direct file reads
  - Workflow: LOCATE → UNDERSTAND → BLAST → TRACE → CHANGE → VERIFY
  - Anti-patterns guide for common graph-ignoring mistakes

- **CPG Analysis skill** (`cpg-analysis/SKILL.md`) - Opt-in deep code analysis
  - Tier 2: Joern CPG via CodeBadger MCP (40+ tools, AST+CFG+CDG+DDG+PDG)
    - Control flow graph analysis, data flow tracing, dead code detection
    - CPGQL query examples for common analysis patterns
    - 12 language support (Java, Python, TypeScript, Go, C/C++, etc.)
  - Tier 3: CodeQL MCP for interprocedural taint analysis and security auditing
    - OWASP vulnerability detection, source-to-sink data flow
    - 10+ languages including Rust (which Joern doesn't support)
  - Combined workflow: Tier 1 scope → Tier 2 flow → Tier 3 security

- **Graph tools installer** (`scripts/install-graph-tools.sh`)
  - Platform-detecting installer (macOS/Linux, ARM64/AMD64)
  - `--joern` flag for Tier 2 (Docker + Python setup)
  - `--codeql` flag for Tier 3 (CodeQL CLI + query packs)
  - `--all` flag for all tiers

- **Post-commit graph hook** (`hooks/post-commit-graph`)
  - Lightweight (~10ms) hook that signals codebase-memory-mcp file watcher
  - Filters to code files only, never blocks git workflow
  - Auto-installed by `/initialize-project`

- **Graph freshness check** (`hooks/workspace/check-graph-freshness.sh`)
  - Session-start advisory warns if graph data is stale
  - Cross-platform timestamp comparison (macOS/Linux)

#### Initialize Project Updates
- New question 4b: "Code graph analysis level?" (Standard/Deep/Security/Full)
- New Step 4b: Automatic MCP server configuration (`.mcp.json`)
- `.code-graph/` auto-added to `.gitignore`
- Post-commit graph hook auto-installed
- CLAUDE.md template now includes "Code Graph (MCP)" section
- Summary output shows graph tier configuration

### Changed
- Total skills increased from 55 to **57 skills**
- `install.sh` now copies `install-graph-tools.sh` to `~/.claude/`
- `install.sh` summary output includes graph tools commands

---

## [2.6.0] - 2026-02-14

### Added

#### AI-Native Ticket Writing
- **Ticket Craft skill** - Write Jira/Asana/Linear tickets optimized for Claude Code execution
  - INVEST+C criteria: standard INVEST plus "Claude-Ready" verification
  - 4 ticket templates: Feature, Bug, Tech Debt, Epic Breakdown
  - Claude Code Context section: file refs, pattern refs, verification commands, constraints
  - Claude Code Ready Checklist: 16-point validation before tickets enter sprint
  - Anti-patterns guide: 6 common ticket-writing mistakes that cause AI agents to fail
  - Story point calibration for AI agents (different from human estimation)
  - Epic slicing techniques: by workflow, data variation, user role, CRUD, happy path
  - Given-When-Then acceptance criteria format
  - Integration guide for Jira, Asana, Linear, and GitHub Issues
  - Maps tickets directly to the agent-teams 10-task pipeline

#### Bug Fixes
- **Fix pre-push hook false positive** - Hook was blocking pushes even when review passed with 0 Critical/High issues (fixes #8, reported by @shawnyeager)
  - `grep` pattern matched "Critical" in table headers and pass messages
  - Now checks for explicit `Status: ✅ PASS` / `Status: ❌` lines instead

#### Community Contributions
- **Flexible install directory** - Bootstrap can now be cloned anywhere, not just `~/.claude-bootstrap` (PR #9 by @victortrac)
  - Install path saved to `~/.claude/.bootstrap-dir` for runtime resolution
  - Removes fragile symlink approach
- **Workspace skill frontmatter fix** - Added missing YAML frontmatter to workspace skill (PR #9 by @victortrac)

### Changed
- Total skills increased from 54 to **55 skills**

### Contributors
- @victortrac - Flexible install path, workspace skill fix (PR #9)
- @shawnyeager - Pre-push hook bug report (#8)

---

## [2.5.0] - 2026-02-07

### Added

#### Agent Teams (Default Workflow)
- **Agent Teams skill** - Coordinated team of AI agents as the default development workflow
  - Strict TDD pipeline: Specs > Tests > Fail > Implement > Test > Review > Security > Branch > PR
  - Task dependency chains enforce pipeline ordering (no step can be skipped)
  - Multiple features run in parallel with shared verification agents
  - Quality gates at every stage with cross-agent verification

- **Default agent roster** (5 permanent agents):
  - **Team Lead** - Orchestration only (delegate mode), task breakdown, feature agent spawning
  - **Quality Agent** - TDD verification (RED/GREEN phases), spec review, coverage >= 80%
  - **Security Agent** - OWASP scanning, secrets detection, dependency audit
  - **Code Review Agent** - Multi-engine code review (Claude/Codex/Gemini)
  - **Merger Agent** - Feature branches, PR creation via `gh` CLI

- **Feature agents** - One per feature, each follows the strict pipeline end-to-end
  - Writes spec, tests, implementation, validation
  - Hands off to Quality, Review, Security, Merger at each gate

- **Agent definition files** in `skills/agent-teams/agents/`:
  - `team-lead.md`, `quality.md`, `security.md`, `code-review.md`, `merger.md`, `feature.md`
  - Copied to `.claude/agents/` during project initialization

- **`/spawn-team` command** - Spawn the agent team on any project
  - Checks prerequisites (env var, agent definitions, feature specs)
  - Spawns all agents and creates task dependency chains
  - Shows team status summary

- **10-task dependency chain per feature**:
  1. Spec → 2. Spec Review → 3. Tests → 4. RED Verify → 5. Implement →
  6. GREEN Verify → 7. Validate → 8. Code Review → 9. Security Scan → 10. Branch+PR

### Changed
- Total skills increased from 53 to **54 skills**
- `/initialize-project` Phase 6 now sets up agent team by default (replaces manual next steps)
- CLAUDE.md template includes agent teams section
- `team-coordination.md` superseded by `agent-teams.md` for automated coordination

---

## [2.4.0] - 2026-01-20

### Added

#### Multi-Repo Workspace Awareness
- **Workspace skill** - Dynamic multi-repo and monorepo awareness for Claude Code
  - Workspace topology discovery (monorepo, multi-repo, hybrid detection)
  - Dependency graph generation (who calls whom)
  - API contract extraction (OpenAPI, GraphQL, tRPC, TypeScript, Pydantic)
  - Key file identification with token estimates
  - Cross-repo capability index (search before reimplementing)
  - Token budget management (P0-P3 priority allocation)

- **`/analyze-workspace` command** - Full workspace analysis
  - Phase 1: Topology discovery (~30s)
  - Phase 2: Module analysis (~60s)
  - Phase 3: Contract extraction (~45s)
  - Phase 4: Dependency graph (~30s)
  - Phase 5: Key file identification (~30s)
  - Generates TOPOLOGY.md, CONTRACTS.md, DEPENDENCY_GRAPH.md, KEY_FILES.md, CROSS_REPO_INDEX.md

- **`/sync-contracts` command** - Lightweight incremental contract sync
  - Checks only contract source files (~15s)
  - Diff mode to preview changes
  - Validate mode to check consistency
  - Lightweight mode for hooks

#### Contract Freshness System
- **Session start hook** - Staleness check (~5s, advisory)
- **Post-commit hook** - Auto-sync when contracts change (~15s)
- **Pre-push hook** - Validation gate (~10s, blocking)
- `.contract-sources` file to track monitored files
- Freshness indicators: 🟢 Fresh, 🟡 Stale, 🔴 Outdated, ⚠️ Drift

#### Cross-Repo Change Detection
- Automatic detection when changes affect other modules
- Impact analysis with recommended action order
- Breaking change protocol

### Changed
- Total skills increased from 52 to **53 skills**
- Added 3 new commands: `/analyze-workspace`, `/sync-contracts`, `/workspace-status`
- Added 3 workspace hooks for contract freshness

---

## [2.3.0] - 2026-01-17

### Added

#### Google Gemini Code Review
- **Gemini Review skill** - Google Gemini CLI for code review with Gemini 2.5 Pro
  - 1M token context window - analyze entire repositories at once
  - Free tier: 1,000 requests/day with Google account
  - Code Review Extension: `/code-review` command in Gemini CLI
  - Headless mode for CI/CD: `gemini -p "prompt"`
  - Benchmarks: 63.8% SWE-Bench, 56.3% Qodo PR, 70.4% LiveCodeBench

- **Multi-engine code review** - `/code-review` now supports up to 3 engines
  - Claude (built-in) - quick, context-aware reviews
  - OpenAI Codex - 88% security issue detection
  - Google Gemini - 1M token context for large codebases
  - Dual engine mode - run any two engines, compare findings
  - Triple engine mode - maximum coverage for critical/security code

- **GitHub Actions workflows** for all configurations
  - Gemini-only workflow
  - Triple engine (Claude + Codex + Gemini) workflow
  - Updated dual engine workflow

### Changed
- Total skills increased from 51 to **52 skills**
- Updated `/code-review` to support engine selection: `--engine claude,codex,gemini`
- Added `--gemini` and `--all` shortcuts for common configurations

---

## [2.2.0] - 2026-01-17

### Added

#### Existing Repository Support
- **Existing Repo skill** - Analyze existing codebases, maintain structure, setup guardrails
  - Repo structure detection (monorepo, full-stack, frontend-only, backend-only)
  - Tech stack auto-detection (TypeScript, Python, Flutter, Android, etc.)
  - Convention detection (naming, imports, exports, test patterns)
  - Guardrails audit (pre-commit hooks, linting, formatting, type checking)
  - Structure preservation rules - work within existing patterns, don't reorganize
  - Gradual implementation strategy for adding guardrails to legacy projects
  - Cross-repo coordination for separate frontend/backend repos

- **`/analyze-repo` command** - Quick analysis of any existing repository
  - Directory structure mapping
  - Guardrails status audit (Husky, pre-commit, ESLint, Ruff, commitlint, etc.)
  - Convention detection and documentation
  - Generates analysis report with recommendations
  - Offers to add missing guardrails
  - **Auto-triggered** by `/initialize-project` when existing codebase detected

#### Initialize Project Enhancement
- **Auto-analysis for existing codebases** - `/initialize-project` now automatically analyzes existing repos before making changes
- **User choice after analysis** - Options: skills only, skills + guardrails, full setup, or just view analysis
- **Existing-repo skill auto-copied** - When working with existing codebases

#### Guardrails Setup (for JS/TS and Python)
- **Husky + lint-staged** setup for JavaScript/TypeScript projects
- **pre-commit framework** setup for Python projects
- **commitlint** configuration for conventional commits
- **ESLint 9 flat config** template
- **Ruff + mypy** configuration for Python

### Changed
- Total skills increased from 50 to **51 skills**
- Updated README with `/analyze-repo` usage pattern

---

## [2.1.0] - 2026-01-17

### Added

#### Mobile Development (contributed by @tyr4n7)
- **Android Java skill** - MVVM architecture, ViewBinding, Espresso testing, GitHub Actions CI
- **Android Kotlin skill** - Coroutines, Jetpack Compose, Hilt DI, MockK/Turbine testing
- **Flutter skill** - Riverpod state management, Freezed models, go_router, mocktail testing
- **Android/Flutter auto-detection** - `/initialize-project` now detects Flutter, Android Java, and Android Kotlin projects

#### Database Skills (addresses #7)
- **Firebase skill** - Firestore, Auth, Storage, real-time listeners, security rules, offline persistence
- **Cloudflare D1 skill** - Serverless SQLite with Workers, Drizzle ORM integration, migrations
- **AWS DynamoDB skill** - Single-table design, GSI patterns, SDK v3 TypeScript/Python
- **AWS Aurora skill** - Serverless v2, RDS Proxy, Data API, connection pooling for Lambda
- **Azure Cosmos DB skill** - Partition key design, consistency levels, change feed, SDK patterns

#### Code Review Enhancements
- **Codex Review skill** - OpenAI Codex CLI for code review with GPT-5.2-Codex (88% detection rate)
- **Code review engine choice** - `/code-review` now lets you choose: Claude, OpenAI Codex, or both engines
- **Dual engine review mode** - Run both Claude and Codex, compare findings, catch more issues
- **CI/CD templates** - GitHub Actions workflows for Claude, Codex, and dual-engine reviews

### Changed
- Total skills increased from 44 to **50 skills**
- Updated README with new database and mobile skill listings

### Contributors
- @tyr4n7 - Android Java, Android Kotlin, Flutter skills and auto-detection
- @johnsfuller - Feature request for database skills (#7)

---

## [2.0.0] - 2026-01-08

### Breaking Changes
- **Skills structure changed** - Skills now use folder/SKILL.md structure instead of flat .md files
  - Before: `~/.claude/skills/base.md`
  - After: `~/.claude/skills/base/SKILL.md`
- All skills now require YAML frontmatter with `name` and `description` fields

### Added
- **Validation test** (`tests/validate-structure.sh`) - Validates skills structure, commands, hooks
  - `--full` mode: All 142 checks
  - `--quick` mode: Essential checks for initialize-project
- **Phase 0 validation** in `/initialize-project` - Checks bootstrap installation before setup
- **Conversion script** (`scripts/convert-skills-structure.sh`) - Migrates flat skills to folder structure
- Install script now runs validation automatically
- Symlink created at `~/.claude-bootstrap` for easy access to validation tools

### Fixed
- Skills now load properly in Claude Code (fixes #1)
- Install script properly copies skill folders instead of merging contents

### Migration
```bash
cd ~/.claude-bootstrap
git pull
./install.sh
```

---

## [1.5.0] - 2026-01-07

### Added
- **Code Deduplication skill** - Prevent semantic code duplication with capability index
- **Team Coordination skill** - Multi-person projects with shared state and todo claiming
- `/check-contributors` command - Detect solo vs team projects
- `/update-code-index` command - Regenerate CODE_INDEX.md
- Pre-push hook for code review enforcement

### Changed
- Code reviews now mandatory before push (blocks on Critical/High issues)

---

## [1.4.0] - 2026-01-06

### Added
- **Code Review skill** - Mandatory code reviews via `/code-review`
- **Commit Hygiene skill** - Atomic commits, PR size limits
- Pre-push hooks installation script

---

## [1.3.0] - 2026-01-05

### Added
- **MS Teams Apps skill** - Teams bots and AI agents with Claude/OpenAI
- **Reddit Ads skill** - Agentic ad optimization service
- **PWA Development skill** - Service workers, caching, offline support

---

## [1.2.0] - 2026-01-04

### Added
- **Playwright Testing skill** - E2E testing with Page Objects
- **PostHog Analytics skill** - Event tracking, feature flags
- **Shopify Apps skill** - Remix, Admin API, checkout extensions

---

## [1.1.0] - 2026-01-03

### Added
- Session management with automatic state tracking
- Decision logging for architectural choices
- Code landmarks for quick navigation

---

## [1.0.0] - 2026-01-01

### Added
- Initial release with 30+ skills
- `/initialize-project` command
- TDD-first workflow with Ralph Wiggum loops
- Security-first patterns
- Support for Python, TypeScript, React, React Native
- Supabase integration skills
- AI/LLM patterns for Claude and OpenAI


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Maggy

Thanks for your interest in contributing! This project aims to make AI-assisted development more reliable and consistent.

## Philosophy

Before contributing, understand the core philosophy:

1. **Complexity is the enemy** - Every line of code is a liability
2. **Measurable constraints** - Prefer specific numbers (20 lines/fn) over vague guidance
3. **Security is non-negotiable** - All projects must pass security checks
4. **AI-first thinking** - LLMs for logic, code for plumbing
5. **Spec-driven** - Define before you build

## How to Contribute

### Adding a New Skill

1. Create a directory in `skills/` with a lowercase hyphenated name
2. Add `SKILL.md` with YAML frontmatter:
   ```markdown
   ---
   name: my-skill
   description: One-line description of what this skill does
   when-to-use: When to activate this skill
   user-invocable: true
   effort: medium
   ---
   # My Skill

   ## Core Principles
   ...
   ```
3. Include these sections:
   - Core principles with measurable constraints
   - Project structure (if applicable)
   - Patterns with code examples (>= 1 per 50 lines)
   - Anti-patterns list
4. Keep under 500 lines (ideal: under 300)
5. Run the linter before submitting:
   ```bash
   PYTHONPATH=scripts python3 -m skill_lint --skill my-skill skills/
   ```
6. Update `README.md` to include the new skill

### Quality Gates

All skills must pass the automated linter before merge:

```bash
# Lint all skills
PYTHONPATH=scripts python3 -m skill_lint skills/

# Lint a single skill
PYTHONPATH=scripts python3 -m skill_lint --skill python skills/

# JSON output for CI
PYTHONPATH=scripts python3 -m skill_lint --format json skills/
```

**Checks enforced:**
- **FM001-FM009**: YAML frontmatter (name, description, format, fields)
- **SP001-SP003**: Spec compliance (SKILL.md exists, line count limits)
- **CQ001-CQ006**: Content quality (no ASCII art, no vague phrases, code examples)
- **RI001-RI002**: Cross-references (valid skill links, README listing)

Suppress known issues with inline comments:
```markdown
<!-- skill-lint: disable=SP002 -->
```

### Improving Existing Skills

1. Keep changes focused on one improvement
2. Maintain the existing structure
3. Ensure examples are correct and tested
4. Update version comments if significant

### Updating the Initialize Command

1. Test changes locally before submitting
2. Ensure idempotency - running twice shouldn't break anything
3. Preserve user customizations (never overwrite `_project_specs/`)

## Skill Guidelines

### Do

- Use specific, measurable constraints
- Provide working code examples
- Include anti-patterns with explanations
- Keep skills focused on one topic
- Reference other skills when building on them

### Don't

- Use vague guidance ("write clean code")
- Include time estimates
- Add features beyond what's needed
- Break existing projects when run as update

## Testing Your Changes

```bash
# Install your changes
./install.sh

# Test on a new project
mkdir test-project && cd test-project
claude
> /initialize-project

# Test on an existing project
cd existing-project
claude
> /initialize-project
# Should update skills without breaking existing config
```

## Pull Request Process

1. Fork the repository
2. Create a feature branch (`git checkout -b feature/new-skill`)
3. Make your changes
4. Test locally
5. Submit PR with clear description of changes

## Code of Conduct

- Be respectful and constructive
- Focus on technical merit
- Welcome newcomers
- Share knowledge freely

## Questions?

Open an issue for:
- Bug reports
- Feature requests
- Clarification on philosophy
- Help with implementation


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2025 Ali Naqi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# Maggy

> **From opinionated Claude Code setup to autonomous AI engineering platform.**

Maggy started as an opinionated project initialization system for Claude Code — skills, TDD hooks, quality gates. It has evolved into a full autonomous engineering command center: interactive chat with session takeover, multi-agent orchestration in containers, P2P mesh networking across machines, AI-prioritized task triage, competitor intelligence, and process analytics. The guardrails that keep AI-generated code simple, secure, and verifiable are still the foundation — but now they power an end-to-end autonomous engineering workflow.

**v5.0.0** — Interactive Chat (`--resume` session takeover), Polyphony (container-isolated multi-agent orchestration), P2P Mesh (cross-machine session sync), auto-bootstrap, grouped dashboard navigation.

## Core Philosophy

```
┌────────────────────────────────────────────────────────────────┐
│  TDD LOOPS VIA STOP HOOKS                                      │
│  ─────────────────────────────────────────────────────────────│
│  Stop hooks run tests after each Claude response.              │
│  Failures feed back automatically. Claude iterates until green.│
│  Real Claude Code infrastructure — no plugins needed.          │
├────────────────────────────────────────────────────────────────┤
│  TESTS FIRST, ALWAYS                                           │
│  ─────────────────────────────────────────────────────────────│
│  Features: Write tests → Watch them fail → Implement → Pass    │
│  Bugs: Find test gap → Write failing test → Fix → Pass         │
│  No code ships without a test that failed first.               │
├────────────────────────────────────────────────────────────────┤
│  SIMPLICITY IS THE GOAL                                        │
│  ─────────────────────────────────────────────────────────────│
│  20 lines per function │ 200 lines per file │ 3 params max     │
│  Enforced via .claude/rules/ with paths: frontmatter.          │
├────────────────────────────────────────────────────────────────┤
│  SECURITY BY DEFAULT                                           │
│  ─────────────────────────────────────────────────────────────│
│  No secrets in code │ Permission deny rules for .env files     │
│  Dependency scanning │ Pre-commit hooks │ CI enforcement       │
├────────────────────────────────────────────────────────────────┤
│  AGENT TEAMS BY DEFAULT                                        │
│  ─────────────────────────────────────────────────────────────│
│  Every project runs as a coordinated team of AI agents.        │
│  Agent definitions use proper frontmatter: tools, model,       │
│  maxTurns, effort, disallowedTools.                            │
├────────────────────────────────────────────────────────────────┤
│  CONDITIONAL RULES                                             │
│  ─────────────────────────────────────────────────────────────│
│  Rules in .claude/rules/ activate based on file paths.         │
│  React rules only load when editing .tsx files.                │
│  Python rules only load when editing .py files.                │
│  Saves tokens. Reduces noise. More targeted guidance.          │
└────────────────────────────────────────────────────────────────┘
```

## Quick Start

```bash
# Clone and install (clone anywhere you like)
git clone https://github.com/alinaqi/claude-bootstrap.git
cd claude-bootstrap && ./install.sh

# In any project directory
claude
> /initialize-project
```

Claude will:
1. **Validate tools** - Check gh, vercel, supabase CLIs
2. **Ask questions** - Language, framework, AI-first?, database, graph analysis level
3. **Set up repository** - Create or connect GitHub repo
4. **Create structure** - Skills, rules, settings, security, CI/CD, specs, todos
5. **Copy settings.json** - Pre-configured permissions and Stop hooks
6. **Generate CLAUDE.md** - With `@include` directives for modular skills
7. **Generate CLAUDE.local.md** - Template for private developer overrides
8. **Spawn agent team** - Deploy Team Lead + Quality + Security + Review + Merger + Feature agents

## Cross-Tool Compatibility (Claude + Kimi + Codex)

Maggy works with **Claude Code**, **Kimi CLI**, and **OpenAI Codex CLI**. All three use the same `SKILL.md` format.

| Feature | Claude Code | Kimi CLI | Codex CLI |
|---------|-------------|----------|-----------|
| Skills | `.claude/skills/` | `.kimi/skills/` (also reads `.claude/`) | `.codex/skills/` |
| Project instructions | `CLAUDE.md` | (uses skills) | `AGENTS.md` |
| Hooks config | `settings.json` | `config.toml` | `config.toml` |

**`install.sh`** auto-detects installed tools and installs skills to all of them.

**`/sync-agents`** syncs project config across tools on demand.

```bash
# Install tools
curl -L code.kimi.com/install.sh | bash     # Kimi
npm i -g @openai/codex                       # Codex

# Reinstall to pick up new tools
cd maggy && ./install.sh

# In any project, sync cross-tool config
claude
> /sync-agents
```

## Cross-Agent Intelligence

When multiple AI CLI tools are installed, Maggy enables intelligent collaboration between them.

### Codex Auto-Review (Stop Hook)

After tests pass, Codex automatically reviews your diff for critical bugs and security issues. Runs as a Stop hook between TDD and iCPG recording.

```
Stop hook order:
1. tdd-loop-check.sh     → tests pass?
2. codex-auto-review.sh  → Codex reviews diff (NEW)
3. icpg-stop-record.sh   → record symbols
4. mnemos-checkpoint.sh   → save memory
```

- Exit 0 = no critical issues found
- Exit 2 = critical/high issues feed back to Claude for fixing
- Gracefully skips if Codex not installed

### Kimi Delegation (Token Optimization)

Claude checks iCPG blast radius and delegates small tasks to Kimi automatically — the user doesn't run anything:

| Blast Radius | Claude's Action |
|-------------|----------------|
| 1-3 files | Saves context via `mnemos checkpoint`, runs `kimi --print -y -p "..."` with context + task |
| 4-8 files | Asks user, then delegates or handles directly |
| 9+ files | Handles directly (needs full context window) |

Context transfer uses structured state (mnemos checkpoints + iCPG constraints), not raw conversation.

### iCPG + Mnemos (Always-On for All Agents)

All three tools run the same iCPG pre-task queries and Mnemos memory lifecycle:

```bash
# Before any code change (Claude, Kimi, or Codex):
icpg query prior "<goal>"        # check for duplicate work
icpg query constraints <file>    # check invariants
icpg query risk <symbol>         # check fragility

# Memory management:
mnemos add goal "<task>"         # at task start
mnemos checkpoint                # at sub-goal boundaries
```

## How TDD Loops Work (Stop Hooks)

**No plugins. No fake commands.** Claude Code's Stop hook runs a script when Claude finishes a response. Exit code 2 feeds stderr back to Claude and continues the conversation.

```
┌─────────────────────────────────────────────────────────────┐
│  1. You say: "Add email validation to signup"               │
│  2. Claude writes tests + implementation                    │
│  3. Claude finishes response                                │
│  4. Stop hook runs: npm test && npm run lint                │
│  5a. All pass (exit 0) → Done!                              │
│  5b. Failures (exit 2) → stderr fed back to Claude          │
│  6. Claude sees failures, fixes, finishes again             │
│  7. Stop hook runs again → repeat until green               │
└─────────────────────────────────────────────────────────────┘
```

**Configuration** in `.claude/settings.json`:

```json
{
  "hooks": {
    "Stop": [{
      "hooks": [{
        "type": "command",
        "command": "scripts/tdd-loop-check.sh",
        "timeout": 60,
        "statusMessage": "Running tests..."
      }]
    }]
  }
}
```

The `tdd-loop-check.sh` script runs tests, lint, and typecheck. It tracks iteration count (max 25) and distinguishes code errors (loop) from environment errors (stop).

## @include Directives

CLAUDE.md uses `@include` to modularly load skills:

```markdown
# CLAUDE.md
@.claude/skills/base/SKILL.md
@.claude/skills/iterative-development/SKILL.md
@.claude/skills/security/SKILL.md
```

These are **resolved at load time** by Claude Code — the content is recursively inlined (max depth 5, cycle detection built in). This means skills actually become part of the prompt instead of just being listed as text.

## Conditional Rules

Rules in `.claude/rules/` use YAML frontmatter with `paths:` to activate only when relevant files are being edited:

```yaml
# .claude/rules/react.md
---
paths: ["src/components/**", "**/*.tsx"]
---
Prefer functional components with hooks...
```

```yaml
# .claude/rules/python.md
---
paths: ["**/*.py"]
---
Use type hints, pytest, ruff...
```

**Included rules:**

| Rule | Activates When |
|------|----------------|
| `quality-gates.md` | Always (no paths: filter) |
| `tdd-workflow.md` | Always |
| `security.md` | Always |
| `react.md` | Editing .tsx/.jsx files |
| `typescript.md` | Editing .ts/.tsx files |
| `python.md` | Editing .py files |
| `nodejs-backend.md` | Editing api/routes/server files |

## Smarter Compaction (PreCompact Hook)

Claude Code's built-in compaction fires at ~83% context and summarizes everything into 20K tokens using a generic 9-section template. It doesn't know what YOUR project cares about.

The PreCompact hook fixes this by injecting **project-specific preservation priorities** into the summarizer:

```
┌─────────────────────────────────────────────────────────────┐
│  Built-in compaction:                                       │
│  "Summarize this conversation" → generic summary            │
├─────────────────────────────────────────────────────────────┤
│  With PreCompact hook:                                      │
│  "Summarize, but preserve ALL schema decisions verbatim,    │
│   keep exact error messages, keep API contract details,     │
│   reference these Key Decisions by name, and here's the     │
│   current git state to include" → project-aware summary     │
└─────────────────────────────────────────────────────────────┘
```

The hook auto-detects:
- **Project type** (TypeScript/Next.js, Python/FastAPI, Flutter, etc.)
- **Schema files** (Drizzle, Prisma, SQLAlchemy) → tells summarizer to preserve schema discussion
- **API directories** → tells summarizer to preserve endpoint paths and contracts
- **Key Decisions from CLAUDE.md** → tells summarizer to reference them by name
- **Git state** → injects branch, uncommitted changes, staged files

Zero overhead during normal usage. Only runs when compaction actually fires.

## Mnemos — Task-Scoped Memory Lifecycle

Claude Code's built-in compaction is lossy and unreliable. It sometimes doesn't fire, `/compact` and `/clear` can fail (especially in multi-agent executions), and crashes/restarts lose all context. Mnemos provides **disk-persistent structured state** that survives all of these failure modes.

```
┌─────────────────────────────────────────────────────────────┐
│  DEFAULT CLAUDE CODE          vs  WITH MNEMOS               │
├─────────────────────────────────────────────────────────────┤
│  Blind until 83.5%               Continuous 4-dim monitoring│
│  Sudden hard compaction           Graduated: 40→60→75→83%   │
│  Uniform summarization            Typed: goals never evict  │
│  No cross-session memory          Auto checkpoint/resume    │
│  Crash = total context loss       Crash = resume from disk  │
│  Multi-agent: no shared state     Per-agent structured state│
│  No behavioral awareness          Detects re-reads, scatter │
└─────────────────────────────────────────────────────────────┘
```

### Post-Compaction Task Restoration (Two-Layer Defense)

When compaction fires, the built-in summarizer often drops task-specific state. Mnemos uses two independent layers to guarantee restoration:

```
BEFORE COMPACTION                    AFTER COMPACTION

PreCompact hook fires                First tool call → PreToolUse fires
├── Write emergency checkpoint       ├── Detect ".mnemos/just-compacted" marker
├── Build task narrative from        ├── Read checkpoint-latest.json
│   signals.jsonl (files, tools)     ├── Output full checkpoint into context
├── Output STRONG preservation       ├── Delete marker (one-shot)
│   instructions to summarizer       └── Claude now has: summary + checkpoint
└── Write ".mnemos/just-compacted"
    marker file                      = Task fully restored
```

**Layer 1** (best-effort): PreCompact tells the summarizer what to keep, including inline checkpoint content with typed eviction priorities.

**Layer 2** (guaranteed): Post-compaction injection via PreToolUse re-injects the full checkpoint on the first tool call after compaction. Doesn't depend on the summarizer. Fast path ~5ms when no compaction occurred.

### Why Not Just Write to a Plain File?

You could — but you'd immediately face: what format? When to update? How to distinguish "this is critical" from "this is nice to have"? The MnemoGraph's typed nodes solve this:

| Node Type | Eviction Policy | Example |
|-----------|----------------|---------|
| GoalNode | NEVER evict | "Implement auth module" |
| ConstraintNode | NEVER evict | "API backward compatibility" |
| ResultNode | Compress first | "JWT middleware tested" → summary kept |
| WorkingNode | Compress first | Current reasoning / in-progress analysis |
| ContextNode | Evictable | File contents → re-read from disk |

Without typed priorities, a checkpoint is just a blob. With them, the system knows goals > constraints > working memory > context, and makes intelligent decisions about what to restore within token budgets.

### Resilience Beyond Normal Compaction

The real value isn't the happy path — it's when things go wrong:

| Failure Mode | CC Built-in | Mnemos |
|---|---|---|
| Session crash/collapse | Context gone | Checkpoint on disk survives |
| `/compact` doesn't fire | Truncation at limit | Fatigue hooks wrote checkpoints earlier |
| Multi-agent child dies | No recovery | Child's `.mnemos/` has structured state |
| Forced restart | Generic summary | SessionStart reloads full checkpoint |
| `/clear` fails in multi-agent | Stuck in weird state | MnemoGraph is independent of CC's state |

### Fatigue Model

4 dimensions passively observed from hooks — no agent cooperation needed:

| Dimension | Weight | Signal Source | Detects |
|-----------|--------|---------------|---------|
| Token utilization | 0.40 | Statusline JSON | How full the context window is |
| Scope scatter | 0.25 | PreToolUse file paths | Agent bouncing between directories |
| Re-read ratio | 0.20 | PreToolUse Read calls | Agent re-reading files (context loss) |
| Error density | 0.15 | PostToolUse outcomes | Agent struggling (high error rate) |

Fatigue states: **FLOW** (0-0.4) → **COMPRESS** (0.4-0.6) → **PRE-SLEEP** (0.6-0.75) → **REM** (0.75-0.9) → **EMERGENCY** (0.9+). The fatigue model ensures checkpoints are written *before* things go wrong — so when a crash happens at 0.85, you have a recent checkpoint from 0.6.

### CLI

```bash
mnemos init                    # Initialize .mnemos/
mnemos status                  # Node counts + fatigue
mnemos fatigue                 # Detailed 4-dimension breakdown
mnemos checkpoint --force      # Write checkpoint now
mnemos resume                  # Output checkpoint for session inject
mnemos add goal "Build auth"   # Create a GoalNode
mnemos bridge-icpg             # Import iCPG ReasonNodes
```

**Overhead:** ~5ms per tool call (fast path), 84KB on disk. Token signal auto-feeds via statusline.

## iCPG — Intent-Augmented Code Property Graph

iCPG tracks *why* code exists, not just what it does. Every code change is linked to a ReasonNode that captures the intent, postconditions, and invariants.

```bash
icpg create "Implement auth" --scope src/auth/   # Create intent
icpg record src/auth/middleware.ts                # Link symbols
icpg query constraints src/auth/middleware.ts     # Get invariants
icpg drift                                        # Check for drift
icpg bootstrap                                    # Infer from git history
```

**Pre-Task Queries** (injected automatically via PreToolUse hook):
- `icpg query context <file>` — What intents touch this file?
- `icpg query constraints <file>` — What invariants must hold?
- `icpg drift file <file>` — Has this file drifted from its intent?

**6-Dimension Drift Detection:** spec drift, decision drift, ownership drift, test drift, usage drift, dependency drift.

## Maggy Dashboard — AI Engineering Command Center (Optional)

Maggy is a full-featured AI engineering command center. Install once, point it at your codebases and issue tracker, and get an interactive dashboard with chat, task triage, competitor intelligence, process analytics, and P2P session sync.

```bash
cd maggy/maggy
./install.sh

# Edit ~/.maggy/config.yaml — set your org, GitHub repos, codebase paths
export GITHUB_TOKEN=ghp_...
export ANTHROPIC_API_KEY=sk-ant-...

python3 -m maggy.main   # Open http://localhost:8080
```

Or from inside any Claude Code session:

```
/maggy-init   # Interactive setup wizard
/maggy        # Launch dashboard
```

### What it does

- **Interactive Chat** — auto-connects to all active Claude/Codex/Kimi sessions, SSE streaming, session continuity via `--resume`, path-based history matching
- **AI-prioritized Tasks** — Claude ranks your open issues by urgency, OKR alignment, and recency. 30-min SQLite cache with stale-cache fallback.
- **One-click Execute** — spawns `claude -p` locally in the right codebase, with iCPG context pre-injected. Runs a TDD pipeline, then commits locally for your review.
- **Competitor Intelligence** — AI-discovered competitors in whatever domain you configure, plus daily news briefing from RSS + Google News.
- **Process Insights** — CLI session history analysis, health signals, self-improvement recommendations, event spine queries.
- **P2P Mesh** — WebSocket-based multi-node session sync and handoff across machines, org-scoped networks, state quarantine.
- **Auto-Bootstrap** — all services seed themselves on startup (history, CIKG, events). No empty tabs.
- **Provider-agnostic** — GitHub Issues, Asana, or (stubbed) Linear. Swap trackers without touching services.

### Dashboard Navigation

Navigation is grouped by intent — 3 groups instead of 9 flat tabs:

| Group | Tabs | Purpose |
|-------|------|---------|
| **Work** | Chat, Tasks, Watching | Do things — chat with Claude, triage issues |
| **Intel** | Competitors, Insights | Learn things — competitor news, session analytics |
| **System** | Budget, Models, Forge, Settings | Configure — spend limits, model routing, MCP gaps |

Chat is the default tab — auto-connects to all running CLI sessions on load.

### Architecture

```
maggy/
├── maggy/                           # optional dashboard — run ./install.sh to enable
│   ├── maggy/                       # Python package (importable as `maggy`)
│   │   ├── main.py                  # FastAPI entry + auto-bootstrap
│   │   ├── config.py                # ~/.maggy/config.yaml loader
│   │   ├── providers/               # GitHub, Asana, Linear (stub)
│   │   ├── services/                # chat, inbox, competitor, executor, activity
│   │   ├── api/                     # REST endpoints (chat, mesh, process, etc.)
│   │   ├── mesh/                    # P2P networking (discovery, sync, WebSocket)
│   │   ├── process/                 # Process intelligence (patterns, signals, router)
│   │   ├── history/                 # CLI session history parsers (Claude, Codex, Kimi)
│   │   ├── improve/                 # Self-improvement (signals, analyzer)
│   │   ├── cikg/                    # Code Intelligence Knowledge Graph
│   │   ├── engram/                  # Memory entries (write/query/expire)
│   │   ├── event_spine/             # Structured event emission + querying
│   │   ├── forge/                   # MCP capability gap detection
│   │   ├── heartbeat/               # Scheduled jobs (history, engram, mesh sync)
│   │   └── static/                  # Dashboard (Tailwind + vanilla JS, no build step)
│   ├── tests/                       # 468 tests
│   └── install.sh                   # one-line install
├── commands/maggy.md                # /maggy command
├── commands/maggy-init.md           # /maggy-init wizard
└── skills/maggy/SKILL.md            # skill reference
```

### Config-driven, no hardcoded anything

One `~/.maggy/config.yaml` drives everything — org name, domain, repos, codebase paths, competitor categories. No hardcoded board IDs or team lists.

```yaml
org: { name: "Acme Corp", domain: "fintech" }
issue_tracker:
  provider: "github"           # or "asana"
  github:
    org: "acmecorp"
    repos: ["acmecorp/api", "acmecorp/web"]
codebases:
  - { path: "~/dev/acmecorp/api", key: "api" }
  - { path: "~/dev/acmecorp/web", key: "web" }
competitors:
  categories: ["fintech", "embedded-finance"]
```

### Safety model

Execute and Chat both run Claude Code with `--dangerously-skip-permissions` so subprocesses aren't blocked waiting on approval prompts with no terminal attached. Mitigations in place:

- `working_dir` and `project_path` are **validated against configured codebase roots** — both Execute and Chat reject arbitrary filesystem paths
- **Per-session streaming lock** — `asyncio.Lock` prevents concurrent subprocess spawning via the Chat API
- Dashboard **refuses to boot** if `auth_mode="local"` is combined with a non-loopback host (would expose Execute on the local network)
- RSS URLs **SSRF-validated** before fetching (blocks loopback, private, link-local)
- `CLAUDECODE` env var stripped from subprocesses to allow nested Claude sessions
- **No-cache static middleware** — `Cache-Control: no-store` prevents stale JS

See `maggy/README.md` for the full hardening notes.

### P2P Mesh Network

Multi-node session sync and handoff across machines. Each Maggy instance is a mesh peer that can share memory, discover other nodes, and synchronize state.

| Component | What it does |
|-----------|-------------|
| **Peer Discovery** | Registry of known peers with address, org, last-seen tracking |
| **Git Discovery** | Auto-discovers peers from shared git remotes across configured codebases |
| **WebSocket Server/Client** | Bidirectional real-time communication between peers |
| **Mesh Protocol** | 7 message types: `hello`, `share`, `request`, `response`, `quarantine`, `promote`, `heartbeat` |
| **Quarantine** | Untrusted data from peers is quarantined until reviewed — prevents poisoned memory injection |
| **Org Scoping** | Peers are filtered by org key so only your team's nodes connect |
| **Provenance** | Tracks origin of shared data (which peer, when, confidence level) |

Configure in `~/.maggy/config.yaml`:

```yaml
mesh:
  enabled: true
  port: 8080
  orgs: ["my-team"]
  git_discovery: true
  share_interval: 600
```

### Engram Memory

Persistent memory system with typed records, namespace isolation, and multi-path retrieval. Engrams survive across sessions — they're stored in SQLite, not in-context.

| Field | Purpose |
|-------|---------|
| `memory_type` | `fact`, `decision`, `code_ref`, `handoff` |
| `origin` | `explicit` (user-created), `inferred` (AI-derived), `mesh` (from peer) |
| `validity` | `active`, `superseded`, `expired` |
| `confidence` | 0.0-1.0 trust score |
| `namespace` | Project/session scoping |
| `expires_at` | Optional TTL for auto-expiry |

Retrieval paths: by namespace, by type, by keyword, by tag, or most recent. The heartbeat scheduler runs periodic expiry to clean stale entries.

### Event Spine

Structured event emission and querying across all Maggy services. Every significant action (task executed, competitor discovered, history analyzed, self-improvement run) emits a typed event with a standard header.

Events are stored in SQLite and queryable via the `/api/events` endpoint. The Insights tab visualizes event streams for debugging and auditing service behavior.

### Other Subsystems

| Subsystem | Purpose |
|-----------|---------|
| **CIKG** | Code Intelligence Knowledge Graph — codebase nodes, technology detection, landscape queries |
| **Forge** | MCP capability gap detection — scans filesystem patterns, suggests MCP tools to fill gaps |
| **History** | CLI session history parsers for Claude, Codex, and Kimi — topic extraction, session patterns |
| **Improve** | Self-improvement — signal collection, health scoring, actionable recommendations |
| **Budget** | Daily token spend limits with per-provider breakdown |
| **Model Router** | Reward-based heatmap for model selection by task type |
| **Heartbeat** | Scheduled jobs — history refresh, engram expiry, self-improvement, mesh sync |

## Pre-configured Permissions

`.claude/settings.json` includes permission rules so users don't get pestered for routine operations:

```json
{
  "permissions": {
    "allow": [
      "Bash(npm test *)",
      "Bash(npm run lint *)",
      "Bash(pytest *)",
      "Bash(git status *)",
      "Bash(gh pr *)"
    ],
    "deny": [
      "Bash(rm -rf *)",
      "Bash(git push --force *)",
      "Write(.env)",
      "Write(.env.*)"
    ]
  }
}
```

## CLAUDE.local.md (Private Overrides)

Each developer gets a `.gitignore`'d `CLAUDE.local.md` for personal preferences:

```markdown
# My Preferences
- I prefer verbose explanations
- My local DB runs on port 5433
- Use pnpm instead of npm
```

This loads at **higher priority** than project `CLAUDE.md` — personal preferences override team config without polluting the repo.

## Agent Teams

Every project runs as a coordinated team of AI agents with **proper frontmatter definitions**:

```yaml
# .claude/agents/team-lead.md
---
name: team-lead
description: Orchestrates the agent team
model: sonnet
tools: [Read, Glob, Grep, TaskCreate, TaskUpdate, TaskList, TaskGet, SendMessage]
disallowedTools: [Write, Edit, Bash]
maxTurns: 50
effort: high
---
```

**Default Team:**

| Agent | Role | Can Edit Code? |
|-------|------|----------------|
| **Team Lead** | Orchestrates, assigns tasks (never writes code) | No |
| **Quality Agent** | Verifies RED/GREEN TDD phases, coverage >= 80% | No |
| **Security Agent** | OWASP scanning, secrets detection, dependency audit | No |
| **Code Review Agent** | Multi-engine reviews | No |
| **Merger Agent** | Creates feature branches and PRs via `gh` CLI | No |
| **Feature Agent (x N)** | One per feature, follows strict TDD pipeline | Yes |

**Pipeline (enforced by task dependencies):**

```
Spec > Spec Review > Tests > RED Verify > Implement >
GREEN Verify > Validate > Code Review > Security > Branch+PR
```

```bash
# Auto-spawned by /initialize-project, or manually:
/spawn-team
```

## What Gets Created

```
your-project/
├── .claude/
│   ├── agents/               # Agent definitions with frontmatter
│   │   ├── team-lead.md      # name, model, tools, disallowedTools, maxTurns
│   │   ├── quality.md
│   │   ├── security.md
│   │   ├── code-review.md
│   │   ├── merger.md
│   │   └── feature.md
│   ├── rules/                # Conditional rules (paths: frontmatter)
│   │   ├── quality-gates.md  # Always active
│   │   ├── tdd-workflow.md   # Always active
│   │   ├── security.md       # Always active
│   │   ├── react.md          # Active on .tsx/.jsx files
│   │   ├── typescript.md     # Active on .ts/.tsx files
│   │   ├── python.md         # Active on .py files
│   │   └── nodejs-backend.md # Active on api/routes/server files
│   ├── skills/               # Skills loaded via @include
│   │   ├── base/SKILL.md
│   │   ├── iterative-development/SKILL.md
│   │   ├── security/SKILL.md
│   │   ├── mnemos/SKILL.md
│   │   ├── cross-agent-delegation/SKILL.md
│   │   └── [framework]/SKILL.md
│   └── settings.json         # Permissions + hooks + statusline
├── scripts/
│   ├── tdd-loop-check.sh     # Stop hook script for TDD loops
│   ├── icpg/                 # Intent-Augmented Code Property Graph
│   └── mnemos/               # Task-Scoped Memory Lifecycle
├── .mnemos/                  # Mnemos state (auto-created, gitignored)
│   ├── mnemo.db              # SQLite MnemoGraph
│   ├── fatigue.json          # Live fatigue signal
│   ├── signals.jsonl         # Behavioral signal log
│   └── checkpoint-latest.json # Most recent checkpoint
├── .github/workflows/
│   ├── quality.yml
│   └── security.yml
├── _project_specs/
│   ├── features/
│   └── todos/
├── CLAUDE.md                 # @include directives, project context
└── CLAUDE.local.md           # Private developer overrides (gitignored)
```

## Commit Hygiene

```
┌─────────────────────────────────────────────────────────────┐
│  COMMIT SIZE THRESHOLDS                                     │
├─────────────────────────────────────────────────────────────┤
│  OK:     ≤ 5 files,  ≤ 200 lines                           │
│  WARN:   6-10 files, 201-400 lines  → "Commit soon"        │
│  STOP:   > 10 files, > 400 lines    → "Commit NOW"         │
└─────────────────────────────────────────────────────────────┘
```

## Skills Included (62 Skills)

### Core Skills
| Skill | Purpose |
|-------|---------|
| `base.md` | Universal patterns, constraints, TDD workflow, atomic todos |
| `iterative-development.md` | TDD loops via Stop hooks (replaces Ralph Wiggum) |
| `mnemos.md` | Task-scoped memory lifecycle — fatigue monitoring, checkpoints, typed compaction |
| `icpg.md` | Intent-augmented code property graph — track why code exists, detect drift |
| `code-review.md` | Mandatory code reviews - Claude, Codex, Gemini, or multi-engine |
| `codex-review.md` | OpenAI Codex CLI code review |
| `gemini-review.md` | Google Gemini CLI code review, 1M token context |
| `workspace.md` | Multi-repo workspace awareness, contract tracking |
| `commit-hygiene.md` | Atomic commits, PR size limits |
| `code-deduplication.md` | Prevent semantic duplication with capability index |
| `agent-teams.md` | Agent team workflow with proper frontmatter definitions |
| `ticket-craft.md` | AI-native ticket writing optimized for Claude Code |
| `maggy.md` | Optional local AI command center — AI-prioritized inbox, one-click TDD execute, competitor intelligence. See the [Maggy section](#maggy--ai-engineering-command-center-optional) for the full docs |
| `team-coordination.md` | Multi-person projects, shared state, handoffs |
| `code-graph.md` | Persistent code graph via MCP |
| `cpg-analysis.md` | Deep CPG analysis - Joern + CodeQL |
| `security.md` | OWASP patterns, secrets management |
| `credentials.md` | Centralized API key management |
| `session-management.md` | Context preservation, resumability |
| `project-tooling.md` | gh, vercel, supabase CLI + deployment |
| `existing-repo.md` | Analyze existing repos, setup guardrails |
| `cross-agent-delegation.md` | Cross-agent task routing, Codex auto-review, Kimi delegation |
| `polyphony.md` | Multi-agent orchestration with container-isolated workspaces |

### Language & Framework Skills
| Skill | Purpose |
|-------|---------|
| `python.md` | Python + ruff + mypy + pytest |
| `typescript.md` | TypeScript strict + eslint + jest |
| `nodejs-backend.md` | Express/Fastify patterns, repositories |
| `react-web.md` | React + hooks + React Query + Zustand |
| `react-native.md` | Mobile patterns, platform-specific code |
| `android-java.md` | Android Java with MVVM, ViewBinding, Espresso |
| `android-kotlin.md` | Android Kotlin with Coroutines, Jetpack Compose, Hilt |
| `flutter.md` | Flutter with Riverpod, Freezed, go_router |

### UI Skills
| Skill | Purpose |
|-------|---------|
| `ui-web.md` | Web UI - Tailwind, dark mode, accessibility |
| `ui-mobile.md` | Mobile UI - React Native, iOS/Android patterns |
| `ui-testing.md` | Visual testing |
| `playwright-testing.md` | E2E testing - Playwright, Page Objects |
| `user-journeys.md` | User experience flows |
| `pwa-development.md` | Progressive Web Apps - service workers, offline |

### Database & Backend Skills
| Skill | Purpose |
|-------|---------|
| `database-schema.md` | Schema awareness |
| `supabase.md` | Core Supabase CLI, migrations, RLS |
| `supabase-nextjs.md` | Next.js + Supabase + Drizzle ORM |
| `supabase-python.md` | FastAPI + Supabase |
| `supabase-node.md` | Express/Hono + Supabase |
| `firebase.md` | Firebase Firestore, Auth, Storage |
| `cloudflare-d1.md` | Cloudflare D1 SQLite with Workers |
| `aws-dynamodb.md` | AWS DynamoDB single-table design |
| `aws-aurora.md` | AWS Aurora Serverless v2 |
| `azure-cosmosdb.md` | Azure Cosmos DB |

### AI & Agentic Skills
| Skill | Purpose |
|-------|---------|
| `agentic-development.md` | Build AI agents |
| `llm-patterns.md` | AI-first apps, LLM testing |
| `ai-models.md` | Latest models reference |

### Content, Integration & Other Skills
| Skill | Purpose |
|-------|---------|
| `aeo-optimization.md` | AI Engine Optimization |
| `web-content.md` | SEO + AI discovery |
| `site-architecture.md` | Technical SEO |
| `web-payments.md` | Stripe Checkout, subscriptions |
| `reddit-api.md` | Reddit API |
| `reddit-ads.md` | Reddit Ads API + agentic optimization |
| `ms-teams-apps.md` | Microsoft Teams bots |
| `posthog-analytics.md` | PostHog analytics |
| `shopify-apps.md` | Shopify app development |
| `woocommerce.md` | WooCommerce REST API |
| `medusa.md` | Medusa headless commerce |
| `klaviyo.md` | Klaviyo email/SMS marketing |

## Usage Patterns

### New Project
```bash
mkdir my-new-app && cd my-new-app
claude
> /initialize-project
```

### Existing Project
```bash
cd my-existing-app
claude
> /initialize-project
# Auto-detects existing code → runs analysis first
```

### Update Skills Globally
```bash
cd "$(cat ~/.claude/.bootstrap-dir)"
git pull
./install.sh
```

## Prerequisites

```bash
# GitHub CLI
brew install gh && gh auth login

# Vercel CLI (optional)
npm i -g vercel && vercel login

# Supabase CLI (optional)
brew install supabase/tap/supabase && supabase login
```

## Evolution

| Version | Date | What Changed |
|---------|------|-------------|
| **v1.0** | Jan 2026 | Initial release — 30+ skills, `/initialize-project`, TDD via Ralph Wiggum loops, Python/TypeScript/React support |
| **v2.0** | Jan 2026 | Skills restructured (`folder/SKILL.md`), YAML frontmatter, validation tests, 60+ skills across 10 categories |
| **v3.0** | Mar 2026 | **Real Claude Code infrastructure** — Ralph Wiggum replaced with Stop hooks, `@include` directives, conditional rules (`paths:` frontmatter), agent teams via `.claude/agents/`, pre-configured permissions |
| **v3.3** | Apr 2026 | Mnemos (task-scoped memory), iCPG (intent tracking + drift detection), Maggy dashboard MVP (inbox, execute, competitors) |
| **v3.5** | Apr 2026 | PreCompact hook for smarter compaction, fatigue model (4 dimensions), hook error resilience |
| **v3.6** | May 2026 | Cross-tool compatibility (Claude + Kimi + Codex), cross-agent intelligence (Codex auto-review, Kimi delegation), complexity-based routing |
| **v4.0** | May 2026 | **Polyphony** — multi-agent orchestration with container isolation, 5-dimension complexity scoring, Docker runtime, 3 agent adapters, state machine task lifecycle |
| **v5.0** | May 2026 | **Autonomous command center** — Interactive Chat with `--resume` takeover, P2P Mesh networking, process intelligence, auto-bootstrap, grouped UI (Work/Intel/System), 468 tests, security hardening (path validation, streaming lock) |

### Where we started vs where we are

| Area | v1 (Jan 2026) | v5 (May 2026) |
|------|---------------|---------------|
| **Scope** | Claude Code project setup tool | Autonomous AI engineering platform |
| **TDD** | Ralph Wiggum plugin (didn't exist) | Real Stop hooks with iteration tracking |
| **Skills** | 30 flat `.md` files | 62 skills with `@include`, conditional rules |
| **Memory** | None (lost on compaction) | Mnemos typed graph + fatigue model |
| **Intent** | None | iCPG with 6-dimension drift detection |
| **Agents** | Single Claude session | Polyphony containers + cross-agent delegation |
| **Models** | Claude only | Claude + Codex + Kimi + complexity routing |
| **Dashboard** | None | Maggy — chat, tasks, competitors, insights, mesh |
| **Networking** | None | P2P Mesh (WebSocket sync, org-scoped) |
| **Tests** | Shell validation script | 468 pytest tests + integration suite |

## Contributing

See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.

## Changelog

See [CHANGELOG.md](CHANGELOG.md) for version history.

## License

MIT - See [LICENSE](LICENSE)

## Credits

Built on learnings from 100+ projects across customer experience management, agentic AI platforms, mobile apps, and full-stack web applications.

---

**Need help scaling AI in your org?** [Claude Code & MCP experts](https://leanai.ventures/aiops/claude)


================================================
FILE: _project_specs/00-autonomous-engineering-roadmap.md
================================================
# Autonomous Engineering Roadmap

A set of specs closing the gaps between claude-bootstrap's current code-intelligence stack (`codebase-memory-mcp` + `iCPG` + `Joern/CodeQL` + `Mnemos` + `code-deduplication`) and what an autonomous coding agent actually needs to ship changes without supervision.

## Why these?

Autonomous agents fail in 10 specific, repeatable ways (see [comparison doc in chat history — 2026-04-20](#)). Our current stack addresses 9 of them. The specs below close the remaining agent-observable gaps and add the two "frontier" capabilities (multimodal ingestion, verifiable contracts).

## Priority order

**Tier 1 — highest leverage, unlocks the rest:**

| # | Spec | Why it matters |
|---|---|---|
| 01 | [Runtime observability](01-runtime-observability.md) | Drift detection is static — an agent that ships code needs a production feedback signal to know if the change actually worked |
| 03 | [Verifiable contracts](03-verifiable-contracts.md) | iCPG postconditions are currently natural-language. Generating property-based tests from them makes them machine-checkable. |
| 07 | [Human escalation protocol](07-human-escalation-protocol.md) | When the agent is stuck, it needs a formal "page a human with this packet" channel |

**Tier 2 — valuable, not blocking:**

| # | Spec | Why it matters |
|---|---|---|
| 08 | [Auto CODE_INDEX](08-auto-code-index.md) | The capability index currently depends on humans maintaining it. Auto-derive from the graph. |
| 04 | [Multi-agent coordination](04-multi-agent-coordination.md) | When two agents touch the same area, we need locking / negotiation |
| 02 | [Rollback & recovery](02-rollback-and-recovery.md) | Drift flags a problem; we still need automated revert paths |

**Tier 3 — frontier / optional:**

| # | Spec | Why it matters |
|---|---|---|
| 05 | [Confidence calibration](05-confidence-calibration.md) | Reinforcement loop — learn from past agent actions which patterns fail |
| 06 | [Cost / budget awareness](06-cost-budget-awareness.md) | Agents stuck in loops burn real money. Hard budget stops. |
| 09 | [Multimodal ingestion](09-multimodal-ingestion.md) | Graphify-style. Only matters if your repos include docs/images/video. |

## What each spec contains

- **Context** — the failure mode being addressed
- **Goal** — one-sentence outcome
- **Approach** — concrete integration points with existing skills/scripts
- **Success criteria** — how we know it works
- **Effort** — rough size (small / medium / large)
- **Depends on** — other specs that should land first

## Implementation convention

When picking up a spec:

1. Create a feature branch `feat/spec-XX-<short-slug>`
2. Add an entry to `CHANGELOG.md` under an "Unreleased" section
3. Write the feature following TDD (as the rest of the project does)
4. Update the spec file's `Status` field when merged

Status values: `pending` · `in-progress` · `in-review` · `done` · `deferred`


================================================
FILE: _project_specs/01-runtime-observability.md
================================================
# Spec 01: Runtime Observability for Drift Detection

**Status:** pending
**Priority:** Tier 1 (highest leverage)
**Effort:** Medium

## Context

`iCPG` detects drift **statically** — it can tell you a symbol's checksum changed, its tests disappeared, or a postcondition's predicate no longer holds against the current codebase. What it cannot tell you is whether the running system still delivers what the intent promised.

An autonomous agent that ships code needs a feedback signal after deploy. Otherwise:

- A refactor passes all tests and drift checks but tanks p99 latency in production → agent has no signal
- A bug fix validates against one invariant but introduces regressions users hit → silent
- An intent's postcondition is "<500ms response" — a static graph can't verify this

## Goal

Bridge `iCPG` to runtime telemetry so drift detection includes post-deploy signals, not just pre-commit signals.

## Approach

### Step 1 — Define a runtime-signal abstraction

Add a new edge type to iCPG:

```
VALIDATED_IN_PROD    Reason → Metric    (intent's postcondition has a runtime check)
```

A `Metric` node references an observability query:

```yaml
metric:
  id: "checkout_p99_under_500ms"
  source: "datadog"       # datadog | sentry | honeycomb | prometheus
  query: "avg:trace.checkout.latency.p99{env:prod}"
  predicate: "value < 500"
  window: "1h"
```

### Step 2 — Pluggable observability adapters

One-file adapters per backend (`scripts/icpg/observability/`):

- `datadog_adapter.py` — query API key from env, return metric value
- `sentry_adapter.py` — query event frequency for a given issue
- `honeycomb_adapter.py` — run a Honeycomb query and extract result
- `prometheus_adapter.py` — PromQL
- `stub_adapter.py` — for testing, reads from a JSON file

Each exposes `fetch(metric_id, window) -> float | None`.

### Step 3 — Extend `icpg drift check` with `--include-runtime`

When the flag is set, evaluate every `VALIDATED_IN_PROD` edge by calling its adapter. Runtime predicate failure adds a 7th drift dimension:

```
Runtime drift   Postcondition metric violates its predicate in production
```

### Step 4 — Hook into claude-bootstrap's post-commit flow

The `hooks/post-commit-graph` script runs `icpg record`. Add an optional `--check-runtime` step that queries the adapters for any symbols touched in this commit, so the agent sees drift before the change ships.

## Integration points

- `scripts/icpg/models.py` — add `MetricNode`, `RuntimeEdge`
- `scripts/icpg/drift.py` — add `check_runtime_drift()`
- `scripts/icpg/__main__.py` — wire `drift check --include-runtime` flag
- `skills/icpg/SKILL.md` — document the pattern
- `templates/icpg-metric.yaml` — template for declaring metrics

## Success criteria

1. `icpg drift check --include-runtime` queries configured adapters and reports runtime-dimension drift
2. At least one adapter (Datadog or Sentry) ships with docs + example config
3. A test harness using `stub_adapter` verifies runtime drift triggers correctly
4. Agent receives runtime signal in pre-task query output (`icpg query risk` includes current runtime state)
5. Zero network calls when no `VALIDATED_IN_PROD` edges exist — backward compatible

## Depends on

None — can be built independently on top of current iCPG.

## Follow-ups

- Spec 02 (rollback) uses the same signal to auto-revert on severe drift
- Spec 05 (confidence calibration) learns from runtime failures


================================================
FILE: _project_specs/02-rollback-and-recovery.md
================================================
# Spec 02: Rollback & Recovery

**Status:** pending
**Priority:** Tier 2
**Effort:** Medium

## Context

When `iCPG` detects drift or a runtime signal (Spec 01) indicates a shipped change broke something, the agent has no automated path to recover. It knows the problem exists but still has to manually coordinate a revert — find the right commit, check for downstream work, revert, re-verify.

For autonomous engineering this needs to be a first-class operation. The agent should be able to say "revert intent R-abc because its postcondition failed in production" and get a safe, auditable rollback.

## Goal

Add a `icpg revert` command that safely undoes all commits attributed to a given ReasonNode, handling downstream dependencies and leaving a verifiable audit trail.

## Approach

### Step 1 — Track commit SHAs on intents

iCPG already has `CREATES` / `MODIFIES` edges between ReasonNodes and Symbols. Extend the `record` command to also store the commit SHA that made the change:

```
CREATES      Reason → Symbol    [commit_sha, timestamp]
MODIFIES     Reason → Symbol    [commit_sha, timestamp]
```

### Step 2 — `icpg revert <intent-id>`

The command:

1. Collects all commit SHAs attributed to this intent (from its edges)
2. Checks for downstream `REQUIRES` intents whose postconditions depend on this one
3. If downstream intents exist and aren't in `drifted`/`abandoned` status → refuse revert, explain the chain
4. Otherwise: `git revert --no-commit <sha1> <sha2> ...` in reverse chronological order
5. Runs the intent's `VALIDATED_BY` tests to confirm pre-intent state is reached
6. Updates the intent status to `reverted` (new status)
7. Emits a `REVERTED` edge type linking the revert commit to the original

### Step 3 — Auto-revert on severe drift (opt-in)

Wire into drift detection: when `Runtime drift` severity > 0.9 AND drift age < 1h AND `auto_revert: true` is set on the intent → trigger `icpg revert` automatically and page a human (Spec 07).

Config per-project in `.icpg/config.yaml`:

```yaml
auto_revert:
  enabled: false        # opt-in per project
  severity_threshold: 0.9
  max_age_minutes: 60
  require_test_pass: true
```

### Step 4 — Recovery for partial failures

If `git revert` fails mid-way (conflicts, missing commits), leave the tree in a clean state (`git revert --abort`) and report exactly which commit failed + why.

## Integration points

- `scripts/icpg/__main__.py` — add `revert` subcommand
- `scripts/icpg/models.py` — add `commit_sha` field on edges, `reverted` status, `REVERTED` edge type
- `scripts/icpg/drift.py` — optional auto-revert trigger for severe runtime drift
- `hooks/post-commit-graph` — capture SHA when recording
- `skills/icpg/SKILL.md` — add revert section

## Success criteria

1. `icpg revert <id>` reverts all commits attributed to that intent cleanly or explains why it can't
2. Downstream `REQUIRES` intents block the revert with a clear message
3. Auto-revert is opt-in per-intent and only fires on high-severity runtime drift
4. Every revert is logged in the graph with `REVERTED` edges pointing to the original commits
5. A test harness verifies revert correctness against a scripted intent lifecycle

## Depends on

- Spec 01 (runtime observability) — the auto-revert signal comes from runtime drift


================================================
FILE: _project_specs/03-verifiable-contracts.md
================================================
# Spec 03: Verifiable Contracts (Property-Based Test Generation)

**Status:** pending
**Priority:** Tier 1 (highest leverage)
**Effort:** Large

## Context

iCPG's ReasonNodes already carry formal contracts:

```
preconditions:   What must be true before execution
postconditions:  What must be true when fulfilled
invariants:      What must remain true
```

Today these are natural-language strings. Drift detection matches commit patterns and checksums against them heuristically. That's good but not verifiable — the agent can't prove a postcondition still holds after a change.

For autonomous engineering, we want machine-checkable contracts: the agent writes a postcondition, and the system generates tests that will fail if the postcondition is ever violated.

## Goal

Generate property-based tests from iCPG postconditions so drift detection becomes "did the test pass?" instead of "does the string still plausibly match?"

## Approach

### Step 1 — Structured postconditions (optional schema)

Let authors write postconditions in either natural language (current) or a structured form that's machine-generatable:

```yaml
postconditions:
  - type: "returns"
    of: "save_response"
    shape: "Response"
    properties:
      - "response.id is not null"
      - "response.org_id == input.org_id"
      - "len(response.answers) == len(input.answers)"
  - type: "invariant"
    holds: "during_save"
    assertion: "db.responses.count() increases by 1"
```

The structured form compiles to tests; natural language fallback uses LLM-assisted generation (Step 2).

### Step 2 — Pluggable property-based test generators

One generator per language/framework:

- `scripts/icpg/codegen/hypothesis_python.py` — Hypothesis (Python)
- `scripts/icpg/codegen/fastcheck_ts.py` — fast-check (TypeScript)
- `scripts/icpg/codegen/proptest_rust.py` — proptest (Rust)
- Natural-language postconditions use LLM generation, structured ones compile directly

Each takes a `ReasonNode` and returns a test file with a `# @icpg-generated from R-abc123` header so the agent knows not to hand-edit.

### Step 3 — `icpg contracts generate <intent-id>`

CLI command that:

1. Reads the intent's postconditions
2. Detects the language of the scope files (already tracked)
3. Invokes the right generator
4. Writes tests to `tests/generated/contracts/<intent-id>.test.py` (or equivalent)
5. Adds a `VALIDATED_BY` edge automatically

`icpg contracts generate --all` regenerates every intent's tests (bulk operation for upgrading existing projects).

### Step 4 — Drift check gains a "contract-verified" signal

Existing drift detection checks whether `VALIDATED_BY` tests exist and pass. With this spec, those tests are now *derived from the postconditions* rather than hand-written, so failure is a direct postcondition violation signal — not just "a test broke."

### Step 5 — Regenerate on intent edit

When a ReasonNode's postconditions change, stale generated tests are flagged. Agent can run `icpg contracts sync` to regenerate; humans can review the diff.

## Integration points

- `scripts/icpg/models.py` — add structured `postcondition` variants alongside existing strings
- `scripts/icpg/codegen/` — new package, one module per language/framework
- `scripts/icpg/__main__.py` — `contracts generate`, `contracts sync` subcommands
- `skills/icpg/SKILL.md` — document how to write structured postconditions
- `templates/reasonnode-structured.yaml` — template showing both forms

## Success criteria

1. Given an intent with structured postconditions, `icpg contracts generate` produces a runnable property-based test in Hypothesis/fast-check
2. The generated test has a header marking it as machine-generated
3. Running the test suite fails immediately when a postcondition is violated in the actual implementation
4. Natural-language postconditions fall back to LLM generation cleanly (doesn't silently skip)
5. Drift detection differentiates "stale test" from "postcondition violation" in its severity score

## Depends on

None (iCPG only). But pairs well with:

- Spec 01 — runtime postconditions (metric predicates) complement code-level postconditions
- Spec 02 — a generated test failure is a strong auto-revert signal


================================================
FILE: _project_specs/04-multi-agent-coordination.md
================================================
# Spec 04: Multi-Agent Coordination (Symbol-Level Locks)

**Status:** pending
**Priority:** Tier 2
**Effort:** Medium

## Context

claude-bootstrap already has `agent-teams` and `team-coordination` skills, and Maggy ships with a P2P session-handoff pattern. But when two agents (or two sessions of the same agent) want to modify the same area of code, there's no coordination protocol. First-to-commit wins, which creates silent merge conflicts, duplicated work, and lost intent tracking.

For autonomous engineering at team scale (multiple agents, or one agent coordinating long-running subtasks), we need intent-level and symbol-level locks.

## Goal

Agents claim exclusive work on an intent or set of symbols before modifying, negotiate with holders of conflicting locks, and release on completion or timeout.

## Approach

### Step 1 — Lock primitive in iCPG

Add a `lock` table and edge type:

```
LOCKED_BY    Reason | Symbol → Agent    [acquired_at, expires_at, purpose]
```

Locks are scoped to an intent (broadest), a set of files, or a set of symbols (finest). A lock has:

- `holder_id` — agent or session identifier
- `scope` — intent id | files[] | symbols[]
- `purpose` — one-line description ("refactor auth service")
- `acquired_at` / `expires_at` — auto-expire to prevent orphans (default 30 min)
- `heartbeat_at` — renewed periodically by the holder

### Step 2 — `icpg lock` / `icpg unlock` commands

```bash
icpg lock intent R-abc --purpose "refactor auth" --expires 30m
icpg lock symbols auth.login,auth.logout --purpose "rate-limiting fix"
icpg locks list                              # show all active locks
icpg unlock R-abc                             # release
icpg locks prune                              # remove expired
```

Lock attempts on a held scope return the holder's info so the requesting agent can decide what to do (wait, negotiate, defer).

### Step 3 — Pre-task query integration

Extend the 3 canonical pre-task queries with a 4th:

| Query | What It Answers |
|---|---|
| `icpg query locks <scope>` | Is someone else working on this right now? |

The PreToolUse hook adds this to the injected context before any Edit/Write call.

### Step 4 — Negotiation protocol

When an agent wants a held lock, it sends a `negotiation_request` to the holder (Mnemos message):

- Requester states: intent, priority, estimated duration
- Holder responds: `accept` (release), `defer` (hold until completion), `split` (narrow the lock to specific symbols)

If no response within 5 minutes, the requester either takes the lock (if the holder's heartbeat is stale) or escalates (Spec 07).

### Step 5 — Conflict prevention at commit time

Post-commit hook verifies the committing agent holds the right lock for all symbols the commit modified. If not, the commit is logged as `unauthorized_modification` and the drift check flags it.

## Integration points

- `scripts/icpg/models.py` — `Lock`, `LockedByEdge`
- `scripts/icpg/store.py` — `acquire_lock`, `release_lock`, `prune_locks`, `list_locks`
- `scripts/icpg/__main__.py` — `lock`, `unlock`, `locks` subcommands
- `hooks/pre-tool-use` — inject active-lock context
- `hooks/post-commit-graph` — verify lock matches modified symbols
- `skills/agent-teams/SKILL.md` — add locking discipline section
- `skills/icpg/SKILL.md` — document the 4th pre-task query

## Success criteria

1. Two concurrent agents attempting to modify the same symbol can't both succeed — the second sees the held lock
2. Locks auto-expire 30 min after last heartbeat (agents don't have to remember to release)
3. Pre-task queries include active-lock info
4. Commits violating lock ownership are flagged in drift reports
5. Negotiation protocol works: requester gets a structured response from holder, or escalation fires

## Depends on

- Spec 07 (escalation) — when negotiation fails, escalation fires
- Builds on existing `agent-teams` and Maggy P2P patterns


================================================
FILE: _project_specs/05-confidence-calibration.md
================================================
# Spec 05: Confidence Calibration (Reinforcement Loop)

**Status:** pending
**Priority:** Tier 3 (frontier)
**Effort:** Medium

## Context

iCPG's `get_risk_profile` query today classifies symbols as fragile/stable based on ownership history and drift count. It doesn't learn from what actually failed when agents touched it. An agent that tried refactoring this file three times and failed gets the same risk score as one that hasn't been tried yet.

For autonomous engineering, we want a reinforcement loop: past agent failures against a symbol or pattern should raise its risk score for future agents.

## Goal

Track agent actions and their outcomes against symbols/patterns, and use that history to calibrate confidence for future pre-task queries.

## Approach

### Step 1 — Action-outcome tracking

Add two new node types to iCPG:

```
AgentAction   { id, agent, intent, scope[], timestamp }
Outcome       { action_id, result, evidence }
```

Result types:
- `success` — tests passed, intent fulfilled, no drift
- `partial` — intent fulfilled but introduced drift elsewhere
- `failure_test` — tests failed, rolled back
- `failure_runtime` — shipped, runtime drift detected (Spec 01)
- `abandoned` — agent gave up

Evidence is a pointer — commit SHA, test output, drift report.

### Step 2 — Hook into existing flows

Automatic capture:

- Pre-task query writes an open `AgentAction` node
- Post-commit: matches to the most recent pending action and records outcome based on test results
- Drift check: if a `VALIDATED_BY` test fails on an intent, the agent action tied to that intent's commit is marked `failure_test`
- Spec 01 runtime drift: marks `failure_runtime`
- Spec 02 auto-revert: marks `abandoned`

### Step 3 — Risk score now includes success rate

`icpg query risk <symbol>` returns a calibrated score:

```
Historical success rate for this symbol: 40% (2 of 5 attempts successful)
Pattern complexity: high (10+ dependents, 3 owners, drifted twice)
Recommendation: treat as fragile — consider smaller changes or pair
```

Calibration uses a simple Bayesian update: prior = structural risk (current method), likelihood = recent action outcomes.

### Step 4 — Pattern-level learning (stretch)

For autonomous agents, single-symbol history is too narrow — we want "refactors of dataclasses with >5 fields fail 60% of the time." This requires clustering actions by pattern, not just symbol. Defer this to a v2 of this spec; first ship the single-symbol version.

### Step 5 — Privacy & data hygiene

Action history is sensitive (could leak intent details). Make it:

- Opt-out per project (`.icpg/config.yaml: track_outcomes: false`)
- Redact content, keep structure only (symbol ids, outcome types, timestamps)
- Never exported outside the `.icpg/` directory

## Integration points

- `scripts/icpg/models.py` — `AgentAction`, `Outcome` node types
- `scripts/icpg/store.py` — outcome-tracking tables
- `scripts/icpg/drift.py` — risk scoring gains history term
- `hooks/pre-tool-use` — record `AgentAction` before Edit/Write calls
- `hooks/post-commit-graph` — finalize the outcome
- `skills/icpg/SKILL.md` — document calibrated risk semantics

## Success criteria

1. Every agent Edit/Write action is automatically logged (no manual reporting)
2. `icpg query risk <symbol>` returns a score incorporating historical outcomes
3. Risk score converges toward structural risk when action history is empty (no regression)
4. Privacy opt-out works — no history written when disabled
5. A test harness replays an action sequence and verifies calibrated scores update correctly

## Depends on

- Spec 01 (runtime observability) — feeds `failure_runtime` signal
- Spec 02 (rollback) — feeds `abandoned` signal
- Spec 03 (verifiable contracts) — feeds high-signal `failure_test` from postcondition failures


================================================
FILE: _project_specs/06-cost-budget-awareness.md
================================================
# Spec 06: Cost / Budget Awareness

**Status:** pending
**Priority:** Tier 3 (frontier)
**Effort:** Small

## Context

Autonomous agents stuck in loops burn real money. Mnemos's fatigue detection (4-dim: tokens, scatter, re-reads, error density) is a *behavioral* proxy for "the agent is struggling" but it isn't a hard stop. An agent that's actually wasting tokens or API calls needs a budget ceiling.

This matters especially for:

- `/improve-maggy`, self-improvement flows, anything that spawns subagents
- Team runs where one misbehaving agent shouldn't bankrupt the whole run
- Maggy's TDD execute pipeline (up to 3 Claude Code invocations per ticket)

## Goal

Add per-task and per-session budget limits with hard stops and a budget-aware fatigue state.

## Approach

### Step 1 — Declare a budget in intent config

Extend ReasonNode:

```yaml
budget:
  tokens: 100000
  api_calls: 50
  wall_clock_minutes: 30
  usd: 5.00
```

All fields optional. `usd` calculated from model pricing tables (current Sonnet/Opus rates, refreshed quarterly).

### Step 2 — Track spend via hooks

PostToolUse hook accumulates:

- Tokens consumed (from `transcript_path` JSON blobs)
- Claude API calls (by counting tool uses)
- Wall clock elapsed since intent started

Stored in `.icpg/budgets/<intent-id>.json` with heartbeats.

### Step 3 — Budget-aware fatigue state

Add a 5th Mnemos fatigue dimension: `budget_burn_rate`. If the agent has consumed 70% of its token budget at 40% progress, that's a signal to compress / consolidate / consider abandoning. Threshold behavior:

| Budget consumed | Action |
|---|---|
| <60% | Normal |
| 60-85% | Mnemos COMPRESS state forced |
| 85-100% | Mnemos REM state forced, agent warned to wrap up |
| >100% | Hard stop — PreToolUse hook rejects further Edit/Write/Bash |

### Step 4 — Graceful stop behavior

When budget is exceeded:

1. PreToolUse hook returns `budget_exceeded` error with context about remaining work
2. Agent is expected to write a handoff Mnemos checkpoint before exiting
3. Intent status flips to `deferred_budget`
4. Human (or another agent with a fresh budget) can resume from the checkpoint

### Step 5 — Budget override

A human can set `allow_overage: true` on an intent or raise the limit mid-run. Override requires a commit to the intent's config (auditable).

## Integration points

- `scripts/icpg/models.py` — `Budget` field on ReasonNode
- `scripts/icpg/budget.py` — new module for tracking and enforcement
- `hooks/pre-tool-use` — budget check before Edit/Write/Bash
- `hooks/post-tool-use` — accumulate spend
- `templates/pricing.yaml` — model → $/token table, refreshed quarterly
- `skills/mnemos/SKILL.md` — document the 5th fatigue dimension
- `skills/icpg/SKILL.md` — document budget declaration

## Success criteria

1. An intent with a 10k-token budget hard-stops at 10k tokens via PreToolUse rejection
2. Mnemos fatigue state reflects budget consumption (COMPRESS / REM / EMERGENCY)
3. Budget overruns leave a Mnemos handoff checkpoint so work can resume
4. `icpg budgets list` shows current spend vs limit per active intent
5. No budget declared → no enforcement (backward compatible)

## Depends on

- Mnemos fatigue model (already exists)
- Nothing else


================================================
FILE: _project_specs/07-human-escalation-protocol.md
================================================
# Spec 07: Human-in-the-Loop Escalation Protocol

**Status:** pending
**Priority:** Tier 1 (highest leverage)
**Effort:** Small-Medium

## Context

When an autonomous agent hits a wall it can't resolve — drift it can't fix, a contract violation with no clear cause, lock negotiation failure, budget exceeded — there's no formal protocol for raising the problem to a human. The hooks infrastructure exists, the discipline doesn't.

Today the agent might:
- Silently continue and compound the issue
- Write a confused summary and exit, leaving no actionable packet
- Page every minor issue, creating alert fatigue

None of these scale to autonomous engineering at a team level.

## Goal

A standard escalation protocol: the agent packages a context packet (what it tried, what went wrong, what it needs a human to decide) and delivers it through a configured channel.

## Approach

### Step 1 — Escalation packet schema

```yaml
escalation:
  id: "esc-abc123"
  agent: "claude-opus-4.7"
  intent: "R-auth-refactor"
  severity: "blocking"           # blocking | high | medium | low
  category: "drift_unresolvable" # or: contract_violation, lock_conflict,
                                 # budget_exceeded, taint_detected, unknown
  summary: "Two-sentence description of the situation"
  what_was_tried:
    - "Attempted X — result: failed because Y"
    - "Attempted Z — result: partial"
  proposed_options:
    - "Option A: revert to sha abc, human makes a decision"
    - "Option B: accept the drift, update postcondition"
  context_refs:
    - "commit: sha-latest"
    - "intent: R-auth-refactor"
    - "drift_report: path/to/drift.json"
    - "mnemos_checkpoint: path/to/checkpoint.json"
  awaiting: "resolution"
```

### Step 2 — `icpg escalate` CLI

```bash
icpg escalate --intent R-auth-refactor \
              --category drift_unresolvable \
              --severity blocking \
              --summary "Cannot resolve postcondition drift" \
              --context drift.json
```

Writes the packet to `.icpg/escalations/<id>.yaml` and fires the configured delivery channel.

### Step 3 — Pluggable delivery channels

One adapter per channel (`scripts/icpg/escalation/`):

- `slack_adapter.py` — post to configured channel with packet fields
- `github_issue_adapter.py` — create issue with the packet
- `email_adapter.py` — SendGrid / SMTP
- `file_adapter.py` — default; writes to `.icpg/escalations/` only (for local/dev)

Config in `.icpg/config.yaml`:

```yaml
escalation:
  channels:
    - type: slack
      webhook_url_env: SLACK_ESCALATION_WEBHOOK
      min_severity: high
    - type: github_issue
      repo: "org/repo"
      min_severity: blocking
```

### Step 4 — Auto-trigger from known conditions

Wire automatic escalations:

| Condition | Severity | Category |
|---|---|---|
| Drift severity >0.8, auto-revert failed | blocking | drift_unresolvable |
| Contract violation caught by generated test (Spec 03) | high | contract_violation |
| Lock negotiation timeout (Spec 04) | medium | lock_conflict |
| Budget exceeded without handoff checkpoint (Spec 06) | high | budget_exceeded |
| CodeQL finds new taint path | blocking | taint_detected |

Each hook module calls `icpg escalate` with the right packet when its trigger fires.

### Step 5 — Resolution tracking

When a human responds (comment on the GitHub issue, Slack thread reply with a resolution marker like `resolved: revert`), an `EscalationResolution` node is written and any pending agent waiting on the packet can resume.

Agents consult `icpg escalations list --pending` as part of their pre-task queries.

### Step 6 — Rate limiting / dedup

Don't spam. If the same intent + category has an open escalation, merge into it (append to `what_was_tried`) instead of creating a new one. Escalation adapter respects a per-channel rate limit.

## Integration points

- `scripts/icpg/models.py` — `Escalation`, `EscalationResolution`
- `scripts/icpg/escalation/` — new package, one module per channel
- `scripts/icpg/__main__.py` — `escalate`, `escalations list/resolve` subcommands
- `hooks/post-tool-use` — auto-escalate on trigger conditions
- `skills/icpg/SKILL.md` — document when agents should manually call it
- `templates/escalation-config.yaml` — example config

## Success criteria

1. Agent can manually escalate a situation with `icpg escalate` and humans receive it through at least one channel (Slack preferred)
2. Auto-escalations fire for all 5 trigger conditions above
3. Dedup works — same intent + category doesn't spam
4. Human resolution flows back as `EscalationResolution` node, pending agents can detect it
5. Local/dev config uses file-only adapter (no external calls), never breaks tests

## Depends on

None directly — builds on existing hook infrastructure. Integrates with:
- Spec 02 (rollback) — failed auto-revert triggers escalation
- Spec 03 (contracts) — test failures trigger escalation
- Spec 04 (locks) — negotiation timeout triggers escalation
- Spec 06 (budget) — overrun without handoff triggers escalation


================================================
FILE: _project_specs/08-auto-code-index.md
================================================
# Spec 08: Auto-Derived CODE_INDEX from Graph

**Status:** pending
**Priority:** Tier 2
**Effort:** Small-Medium

## Context

The `code-deduplication` skill requires a `CODE_INDEX.md` in the project root — a capability index that tells the agent "this already exists, don't reimplement it." The current design asks humans (or agents) to maintain it manually.

In practice:

- Agents don't reliably update the index when they add capabilities
- Humans forget to update it
- The index drifts from reality fast
- Agents that check the index get stale info and duplicate anyway

Since we already have `codebase-memory-mcp` (symbol graph) and `iCPG` (intent graph), we can derive the capability index from them instead of hand-maintaining it.

## Goal

Auto-generate `CODE_INDEX.md` from the graph, refreshed on every commit, organized by capability so agents can check-before-write reliably.

## Approach

### Step 1 — Capability extraction pass

A new pass over the combined graph:

1. Read all `ReasonNode`s with status `fulfilled` (iCPG)
2. For each, pull the symbols they `CREATE` or `MODIFY`
3. Group by capability domain (inferred from:
   - intent's `scope` path prefixes — `app/auth/*` → "auth"
   - intent's `decision_type` — `business_goal` and `arch_decision` are top-level, `task` and `workaround` are subcategories
   - common tag patterns in the codebase)
4. For each capability, collect the main entry points (public classes/functions that serve that capability)

### Step 2 — Emit CODE_INDEX.md

```markdown
# Code Capability Index

Auto-generated from iCPG + codebase-memory-mcp. Last updated: 2026-04-20.
Run `icpg index build` to regenerate.

## Authentication
**Capability:** user auth, session management, token handling
**Entry points:**
- `app.auth.login_user()` [app/auth/login.py:42] — primary login
- `app.auth.session.SessionManager` [app/auth/session.py] — session lifecycle
**Intents:** R-auth-base, R-jwt-refactor, R-rate-limit

## Survey responses
**Capability:** create, validate, persist, query survey responses
**Entry points:** ...
```

Output is deterministic — same graph state produces the same output.

### Step 3 — Hook into post-commit

Every commit that records new iCPG edges triggers a regeneration. Runs in under a second for typical repo sizes since it's a DB scan + markdown emit.

### Step 4 — `icpg index` subcommand

```bash
icpg index build        # regenerate CODE_INDEX.md
icpg index check        # verify CODE_INDEX.md matches graph state (for CI)
icpg index query auth   # query a specific capability section
```

The `check` subcommand lets CI reject commits that leave an out-of-sync CODE_INDEX.

### Step 5 — Agent workflow integration

The `code-deduplication` skill's pre-write discipline stays the same, but the data source changes from "human-maintained CODE_INDEX.md" to "graph-derived CODE_INDEX.md." Update the skill to:

1. Call `icpg query prior "<goal>"` (iCPG's existing prior-work query)
2. If no match, consult the index sections matching the intent's scope
3. Only create new code if both checks are dry

Also add `icpg query capability "<description>"` — a semantic search over capability descriptions in the index, not just symbol names.

### Step 6 — Keep hand-written sections (optional)

Let humans add non-derived sections (architecture notes, business domain glossary) in a separate file — `CODE_INDEX.human.md` — and `icpg index build` appends it. Auto-derived + human annotations cleanly separated.

## Integration points

- `scripts/icpg/index.py` — new module, grouping + emit logic
- `scripts/icpg/__main__.py` — `index build`, `index check`, `index query` subcommands
- `hooks/post-commit-graph` — call `icpg index build`
- `skills/code-deduplication/SKILL.md` — update to reference auto-derived index
- `templates/CODE_INDEX.md` — deprecate the hand-maintained template; add note pointing to the auto-generated path

## Success criteria

1. On any repo with iCPG populated, `icpg index build` produces a grouped, readable CODE_INDEX.md
2. `icpg index check` detects drift between graph and markdown (for CI)
3. Agents find existing capabilities via semantic search (`icpg query capability "rate limiting"`)
4. Generation is deterministic — same graph → same markdown
5. Backward compatible: projects without iCPG continue to hand-maintain; projects with iCPG get the auto version
6. Regeneration is <2s on a 10k-symbol repo

## Depends on

- iCPG (required)
- codebase-memory-mcp (preferred — used for richer capability grouping)


================================================
FILE: _project_specs/09-multimodal-ingestion.md
================================================
# Spec 09: Multimodal Ingestion (Optional Graphify-Style Extension)

**Status:** pending
**Priority:** Tier 3 (frontier / optional)
**Effort:** Large

## Context

Our stack is code-only. Some repos carry essential context in non-code artifacts:

- Product specs in PDFs or Google Docs exports
- Architecture diagrams in PNG / Miro / whiteboard photos
- Engineering demos in MP4
- Research papers in PDF

When an autonomous agent works on such a repo, it currently ignores these artifacts. That's a real gap — the agent makes code decisions without knowing the intent captured in the diagrams or docs.

Graphify (github.com/safishamsi/graphify) solves this: it ingests docs, images, audio, and video into the same knowledge graph as code. We don't need to rebuild their work — we can adopt their approach as an optional extension to claude-bootstrap.

**This spec is optional** — only valuable if your repos actually carry non-code context. Most don't.

## Goal

Let claude-bootstrap ingest non-code artifacts into the iCPG graph so agents can reason about code + docs + images in the same queries.

## Approach

### Step 1 — Artifact node type

Extend iCPG with a new node:

```
Artifact {
  id, path, kind, content_hash, ingested_at,
  extracted_concepts: []    // concept strings
}
```

Kinds: `pdf`, `markdown`, `image`, `diagram`, `video`, `audio`, `slides`.

### Step 2 — Ingestion pipeline

`icpg ingest <path>` — one command, pluggable extractors:

- `pdf_extractor.py` — text via `pypdf` or `pdfplumber`, then LLM to extract key concepts
- `markdown_extractor.py` — parse headings, blockquotes, pull out "key decision" patterns
- `image_extractor.py` — Claude multimodal: "describe this diagram; list entities and relationships"
- `video_extractor.py` — `faster-whisper` transcription with domain-aware prompt, then concept extraction
- `audio_extractor.py` — same as video, skip video decode

Each extractor emits concept nodes + relationships back into iCPG using the existing edge vocabulary:
- `DESCRIBES` — Artifact → Symbol / Reason (this doc describes this code)
- `MENTIONS` — Artifact → Concept (looser reference)
- `DECIDES` — Artifact → Reason (this doc made an architectural decision that became an intent)

### Step 3 — `.icpgignore` for ingest paths

Respect a per-project `.icpgignore` like graphify's `.graphifyignore`, using `.gitignore` syntax. Default excludes: `node_modules/`, `dist/`, `.venv/`, `*.generated.*`, binary builds.

### Step 4 — Incremental refresh

Track content hashes per artifact. Re-ingest only when hash changes. Bulk re-ingest via `icpg ingest --refresh`.

### Step 5 — Extend pre-task queries

Add a 5th canonical query:

```bash
icpg query docs "<topic>"   # Find artifacts relevant to this topic
```

Returns: artifact paths, extracted concepts, relationships to code symbols.

The PreToolUse hook includes this in the injected context when the agent is about to write code in a scope touched by `DESCRIBES` edges.

### Step 6 — Transparent honesty about inference

Adopt graphify's `EXTRACTED` / `INFERRED` / `AMBIGUOUS` edge labeling. PDF text → EXTRACTED. Image concept → INFERRED with confidence. Whiteboard smudged text → AMBIGUOUS, flagged for review.

### Step 7 — Cost control

LLM-based extractors (images, video transcripts) are expensive. Respect Spec 06 budgets. `icpg ingest` without a budget flag runs only the free extractors (markdown, PDF text). Image / video / audio require `--enable-llm` explicit flag.

### Step 8 — Distribution

Ship this as a **separate installable package** — `claude-bootstrap-multimodal` on PyPI. Base claude-bootstrap stays code-only. Users opt in:

```bash
pip install claude-bootstrap-multimodal
icpg ingest docs/ specs/
```

## Integration points

- `scripts/icpg/models.py` — `Artifact`, new edge types (`DESCRIBES`, `MENTIONS`, `DECIDES`)
- `scripts/icpg/ingest/` — new package (could live in a separate repo)
- `scripts/icpg/__main__.py` — `ingest` subcommand
- `skills/icpg/SKILL.md` — document the 5th pre-task query
- `skills/multimodal/SKILL.md` — new skill describing when to use ingestion

## Success criteria

1. `icpg ingest docs/` processes markdown + PDF without LLM and creates artifact nodes
2. `icpg ingest --enable-llm specs/` processes images and videos, with the budget flag respected
3. Pre-task queries surface relevant documentation when the agent is about to modify code touched by `DESCRIBES` edges
4. Re-ingestion only processes changed files (hash-based cache)
5. Base claude-bootstrap doesn't require multimodal deps to work — installed separately

## Depends on

- Spec 06 (budget) — LLM extractors must respect budget caps

## Alternative: adopt graphify directly

Instead of building this, we could document "for multimodal, run graphify alongside" and provide a conversion tool that imports graphify's `graph.json` into iCPG as Artifact nodes. This is faster to ship and avoids duplicating graphify's work.

**Recommendation:** ship the conversion tool first (1-2 days of work), observe adoption, build native ingestion only if real demand emerges.


================================================
FILE: commands/analyze-repo.md
================================================
# Analyze Repository

Analyze an existing repository's structure, conventions, and guardrails.

**This command runs automatically** when `/initialize-project` detects an existing codebase without Claude setup. You can also run it standalone anytime.

**Use this command standalone when:**
- You want to re-analyze after making changes
- You want analysis without running `/initialize-project`
- Auditing code quality and guardrails on any repo
- Reviewing a codebase without adding Claude skills

**Automatic trigger:**
- `/initialize-project` on existing codebase → auto-runs this analysis first

---

## Phase 1: Repository Detection

Run these checks to understand the repo:

```bash
# Git info
echo "=== Git Status ===" && \
git remote -v 2>/dev/null && \
git branch -a 2>/dev/null | head -10 && \
git log --oneline -5 2>/dev/null

# Config files
echo "=== Config Files ===" && \
ls -la *.json *.toml *.yaml *.yml 2>/dev/null

# Directory structure (3 levels, excluding noise)
echo "=== Directory Structure ===" && \
find . -type d -maxdepth 3 \
    -not -path "*/node_modules/*" \
    -not -path "*/.git/*" \
    -not -path "*/venv/*" \
    -not -path "*/__pycache__/*" \
    -not -path "*/dist/*" \
    -not -path "*/build/*" \
    2>/dev/null | head -40
```

---

## Phase 2: Tech Stack Detection

Identify the primary technologies:

```bash
# JavaScript/TypeScript
if [ -f "package.json" ]; then
    echo "=== Package.json ===" && \
    cat package.json | head -50
fi

# Python
if [ -f "pyproject.toml" ]; then
    echo "=== pyproject.toml ===" && \
    cat pyproject.toml
fi

# Mobile
ls pubspec.yaml android/build.gradle ios/*.xcodeproj 2>/dev/null
```

Based on findings, determine:

| File | Technology |
|------|------------|
| package.json + tsconfig.json | TypeScript |
| package.json (no tsconfig) | JavaScript |
| pyproject.toml | Python |
| pubspec.yaml | Flutter (Dart) |
| android/build.gradle | Android Native |
| Cargo.toml | Rust |
| go.mod | Go |

---

## Phase 3: Repo Structure Type

Classify the repository:

```bash
# Check structure type
echo "=== Repo Structure Type ===" && \
if [ -d "packages" ] || [ -d "apps" ] || grep -q '"workspaces"' package.json 2>/dev/null; then
    echo "MONOREPO - Multiple packages/apps with shared tooling"
elif [ -d "frontend" ] && [ -d "backend" ]; then
    echo "FULL-STACK MONOLITH - Frontend + Backend in same repo"
elif [ -d "src" ] && grep -q '"react\|vue\|angular"' package.json 2>/dev/null; then
    echo "FRONTEND - Single frontend application"
elif [ -d "src" ] && grep -q '"express\|fastify\|koa"' package.json 2>/dev/null; then
    echo "BACKEND - Single backend application"
elif [ -f "pyproject.toml" ] && grep -q "fastapi\|django\|flask" pyproject.toml 2>/dev/null; then
    echo "BACKEND (Python) - Single backend application"
else
    echo "STANDARD - Single-purpose repository"
fi
```

---

## Phase 4: Guardrails Audit

Check existing code quality tools:

```bash
echo "=== Guardrails Audit ===" && \

# Pre-commit hooks
echo "Pre-commit Hooks:" && \
[ -d ".husky" ] && echo "  [x] Husky installed" || echo "  [ ] Husky NOT installed" && \
[ -f ".pre-commit-config.yaml" ] && echo "  [x] pre-commit framework" || echo "  [ ] pre-commit framework NOT installed" && \
[ -f ".git/hooks/pre-commit" ] && echo "  [x] Git hooks present" || echo "  [ ] No git hooks"

# Linting
echo "Linting:" && \
(grep -q '"eslint"' package.json 2>/dev/null && echo "  [x] ESLint") || \
(grep -q '"biome"' package.json 2>/dev/null && echo "  [x] Biome") || \
(grep -q "ruff" pyproject.toml 2>/dev/null && echo "  [x] Ruff") || \
echo "  [ ] No linter detected"

# Formatting
echo "Formatting:" && \
(grep -q '"prettier"' package.json 2>/dev/null && echo "  [x] Prettier") || \
(grep -q "black" pyproject.toml 2>/dev/null && echo "  [x] Black") || \
(grep -q "ruff" pyproject.toml 2>/dev/null && echo "  [x] Ruff (formatting)") || \
echo "  [ ] No formatter detected"

# Type checking
echo "Type Checking:" && \
([ -f "tsconfig.json" ] && echo "  [x] TypeScript") || \
(grep -q "mypy" pyproject.toml 2>/dev/null && echo "  [x] mypy") || \
(grep -q "pyright" pyproject.toml 2>/dev/null && echo "  [x] pyright") || \
echo "  [ ] No type checker detected"

# Testing
echo "Testing:" && \
(grep -q '"jest\|vitest"' package.json 2>/dev/null && echo "  [x] Jest/Vitest") || \
(grep -q "pytest" pyproject.toml 2>/dev/null && echo "  [x] pytest") || \
echo "  [ ] No test framework detected"

# Commit validation
echo "Commit Validation:" && \
([ -f "commitlint.config.js" ] && echo "  [x] commitlint") || \
(grep -q "conventional-pre-commit" .pre-commit-config.yaml 2>/dev/null && echo "  [x] conventional-pre-commit") || \
echo "  [ ] No commit validation"

# CI/CD
echo "CI/CD:" && \
[ -d ".github/workflows" ] && echo "  [x] GitHub Actions" || echo "  [ ] No GitHub Actions" && \
[ -f ".gitlab-ci.yml" ] && echo "  [x] GitLab CI" || true && \
[ -f "Jenkinsfile" ] && echo "  [x] Jenkins" || true
```

---

## Phase 5: Convention Detection

Identify existing code patterns:

```bash
echo "=== Convention Detection ===" && \

# File naming
echo "File Naming:" && \
ls src/**/*.ts 2>/dev/null | head -5 && \
ls src/**/*.py 2>/dev/null | head -5

# Import style (JS/TS)
echo "Import Style:" && \
grep -h "^import" src/**/*.ts 2>/dev/null | head -5

# Export style (JS/TS)
echo "Export Style:" && \
grep -h "^export" src/**/*.ts 2>/dev/null | head -5

# Test file location
echo "Test Location:" && \
find . -name "*.test.*" -o -name "*.spec.*" -o -name "test_*.py" 2>/dev/null | head -5
```

---

## Phase 6: Generate Report

Based on all findings, generate this report structure:

```markdown
# Repository Analysis Report

**Generated:** [timestamp]
**Repository:** [name from git remote or directory]

## Overview

| Attribute | Value |
|-----------|-------|
| Type | [Monorepo / Full-Stack / Frontend / Backend] |
| Language | [TypeScript / Python / ...] |
| Framework | [React / FastAPI / ...] |
| Package Manager | [npm / pnpm / uv / pip] |

## Directory Structure

[Simplified tree output]

## Tech Stack

| Category | Technology | Config |
|----------|------------|--------|
| Language | X | X |
| Framework | X | X |
| Testing | X | X |
| Linting | X | X |
| Formatting | X | X |

## Guardrails Status

### Present
- [x] Item 1
- [x] Item 2

### Missing (Recommended to Add)
- [ ] Item 1 - [brief reason]
- [ ] Item 2 - [brief reason]

## Conventions Observed

| Pattern | Observed Value | Example |
|---------|----------------|---------|
| Naming | camelCase / snake_case | file.ts |
| Imports | Absolute / Relative | @/components |
| Tests | Colocated / Separate | *.test.ts |
| Exports | Named / Default | export { X } |

## Recommendations

1. **High Priority**
   - [Recommendation with reason]

2. **Medium Priority**
   - [Recommendation with reason]

3. **Low Priority / Nice to Have**
   - [Recommendation with reason]

## Key Files to Review

| File | Purpose | Why Review |
|------|---------|------------|
| src/index.ts | Entry point | Understand app bootstrap |
| src/config.ts | Configuration | Understand env handling |
| tests/setup.ts | Test setup | Understand test patterns |
```

---

## Phase 7: Offer Next Steps

After generating the report, offer these options:

> **Analysis complete!** Here's what I found: [summary]
>
> What would you like to do next?
> 1. **Add missing guardrails** - Set up pre-commit hooks, linting, etc.
> 2. **Generate detailed conventions doc** - Document patterns for team
> 3. **Set up Claude integration** - Run `/initialize-project` to add Claude skills
> 4. **Start working on code** - I'll follow the conventions I detected
> 5. **Something else**

---

## Quick Analysis (One Command)

For a quick overview without the full report:

```bash
echo "=== Quick Analysis ===" && \
echo "Repo: $(basename $(pwd))" && \
echo "Type: $([ -d packages ] && echo 'Monorepo' || ([ -d frontend ] && [ -d backend ] && echo 'Full-Stack') || echo 'Standard')" && \
echo "Tech: $([ -f package.json ] && echo 'JS/TS' || ([ -f pyproject.toml ] && echo 'Python') || echo 'Other')" && \
echo "Guardrails: $([ -d .husky ] || [ -f .pre-commit-config.yaml ] && echo 'Present' || echo 'Missing')" && \
echo "CI/CD: $([ -d .github/workflows ] && echo 'GitHub Actions' || echo 'None')"
```


================================================
FILE: commands/analyze-workspace.md
================================================
# /analyze-workspace

> Full dynamic analysis of workspace topology, dependencies, and contracts.

## Trigger

Run this command when:
- First time setting up workspace awareness
- Major refactor or new module added
- Weekly scheduled refresh
- `/sync-contracts` reports too much drift
- Switching to work on a different workspace

## Behavior

### Phase 1: Topology Discovery (~30 seconds)

```
🔍 Analyzing workspace topology...

Checking workspace indicators:
  ✓ Found turbo.json (Turborepo)
  ✓ Found pnpm-workspace.yaml
  ✗ No nx.json
  ✗ No lerna.json

Workspace type: Monorepo (Turborepo)
Root: /Users/ali/code/myapp

Discovering modules...
  ✓ apps/web (package.json found)
  ✓ apps/api (pyproject.toml found)
  ✓ packages/shared-types (package.json found)
  ✓ packages/db (package.json found)

Modules found: 4
```

### Phase 2: Module Analysis (~60 seconds)

For each module, analyze:

```
📦 Analyzing apps/web...
  Tech stack: Next.js 14, TypeScript, TailwindCSS
  Entry point: src/app/layout.tsx
  Key directories: src/lib/, src/components/, src/types/
  Dependencies: @repo/shared-types, @repo/ui
  External calls: fetch → apps/api (15 files)
  Token estimate: 18K full, 5K summarized

📦 Analyzing apps/api...
  Tech stack: FastAPI, Python 3.12, SQLAlchemy
  Entry point: app/main.py
  Key directories: app/routes/, app/schemas/, app/models/
  Dependencies: packages/db (internal)
  Exposes: OpenAPI spec (47 endpoints)
  Token estimate: 24K full, 7K summarized

📦 Analyzing packages/shared-types...
  Tech stack: TypeScript
  Entry point: src/index.ts
  Exports: 34 types
  Consumed by: apps/web, apps/api (codegen)
  Token estimate: 3K

📦 Analyzing packages/db...
  Tech stack: Drizzle ORM, TypeScript
  Entry point: src/index.ts
  Tables: 12
  Migrations: 23
  Token estimate: 8K full, 2K schema only
```

### Phase 3: Contract Extraction (~45 seconds)

```
📜 Extracting contracts...

OpenAPI Detection:
  ✓ apps/api/openapi.json (47 endpoints, 23 schemas)

GraphQL Detection:
  ✗ No GraphQL schemas found

TypeScript Types:
  ✓ packages/shared-types/src/index.ts (34 exports)

Pydantic Schemas:
  ✓ apps/api/app/schemas/ (23 models)

Database Schema:
  ✓ packages/db/schema/ (12 tables)

Contract sources registered: 5 files
```

### Phase 4: Dependency Graph (~30 seconds)

```
🔗 Building dependency graph...

Internal dependencies:
  apps/web → packages/shared-types (23 imports)
  apps/web → apps/api (15 API calls)
  apps/api → packages/db (12 imports)
  apps/api → packages/shared-types (codegen)
  packages/db → (none)
  packages/shared-types → (none)

Dependency order (for changes):
  1. packages/shared-types (leaf)
  2. packages/db (leaf)
  3. apps/api (depends on db, shared-types)
  4. apps/web (depends on api, shared-types)
```

### Phase 5: Key File Identification (~30 seconds)

```
📁 Identifying key files...

High priority (always relevant):
  ✓ apps/api/openapi.json
  ✓ packages/shared-types/src/index.ts
  ✓ apps/web/src/lib/api/client.ts

Context-specific:
  ✓ API work: apps/api/app/routes/*.py
  ✓ DB work: packages/db/schema/*.ts
  ✓ Auth work: apps/api/app/routes/auth.py + deps
  ✓ Frontend: apps/web/src/components/**

Token budget by context:
  Frontend API: ~8K tokens
  Backend endpoints: ~12K tokens
  Database changes: ~6K tokens
  Shared types: ~4K tokens
```

### Phase 6: Generate Artifacts

```
📝 Generating workspace artifacts...

Created:
  ✓ _project_specs/workspace/TOPOLOGY.md
  ✓ _project_specs/workspace/CONTRACTS.md
  ✓ _project_specs/workspace/DEPENDENCY_GRAPH.md
  ✓ _project_specs/workspace/KEY_FILES.md
  ✓ _project_specs/workspace/CROSS_REPO_INDEX.md
  ✓ _project_specs/workspace/.contract-sources
```

## Final Output

```
════════════════════════════════════════════════════════════════
  WORKSPACE ANALYSIS COMPLETE
════════════════════════════════════════════════════════════════

Workspace: myapp
Type: Monorepo (Turborepo)
Modules: 4 (2 apps, 2 packages)

┌─────────────────────────────────────────────────┐
│ apps/web (Next.js) ←──── apps/api (FastAPI)     │
│      │                        │                 │
│      ▼                        ▼                 │
│ packages/shared-types    packages/db            │
└─────────────────────────────────────────────────┘

Contracts:
  REST API: 47 endpoints
  Shared types: 34 interfaces
  DB tables: 12

Token Estimates:
  Current module only: ~20K tokens
  With cross-module context: ~45K tokens
  Full workspace: ~53K tokens
  Budget remaining: ~100K tokens ✓

Artifacts generated in: _project_specs/workspace/

Next steps:
  • Contracts will auto-sync on commit (if changed)
  • Run /sync-contracts manually to refresh
  • Run /workspace-status for quick check

════════════════════════════════════════════════════════════════
```

## Flags

| Flag | Description |
|------|-------------|
| `--force` | Regenerate all artifacts even if recent |
| `--type <type>` | Override auto-detection: `monorepo`, `multi-repo`, `hybrid` |
| `--repos <paths>` | For multi-repo: comma-separated paths to related repos |
| `--skip-contracts` | Skip contract extraction (faster) |
| `--verbose` | Show detailed analysis output |
| `--json` | Output as JSON (for tooling) |

## Multi-Repo Mode

For workspaces with separate git repositories:

```bash
# Auto-detect sibling repos
/analyze-workspace --type multi-repo

# Specify repo locations explicitly
/analyze-workspace --type multi-repo --repos "../backend,../shared,../mobile"
```

Claude will:
1. Detect related repos in parent directory
2. Set up symlinks in `.workspace/repos/` if needed
3. Analyze each repo
4. Build cross-repo dependency graph
5. Extract contracts from each

## Integration Points

### On First Run

Creates the full workspace context structure:

```
_project_specs/
└── workspace/
    ├── TOPOLOGY.md
    ├── CONTRACTS.md
    ├── DEPENDENCY_GRAPH.md
    ├── KEY_FILES.md
    ├── CROSS_REPO_INDEX.md
    ├── .contract-sources
    └── cache/              # Cached cross-repo files
```

### Updates CLAUDE.md

Adds workspace skill reference:

```markdown
## Skills
- .claude/skills/workspace.md
```

### Sets Up Hooks

Installs contract freshness hooks:
- Session start: Staleness check
- Post-commit: Auto-sync trigger
- Pre-push: Validation gate

## Error Handling

### No Workspace Detected

```
⚠️  No workspace configuration detected

This appears to be a single-repo project.
Use /analyze-repo for single repository analysis.

Or specify workspace type manually:
  /analyze-workspace --type monorepo
  /analyze-workspace --type multi-repo --repos "../other-repo"
```

### Access Denied to Related Repo

```
⚠️  Cannot access related repository: ../backend

Options:
  1. Ensure the repo exists at that path
  2. Create symlink: ln -s /path/to/backend .workspace/repos/backend
  3. Skip this repo: /analyze-workspace --skip-repo backend
```

### Contract Extraction Failed

```
⚠️  Failed to extract contracts from apps/api

Reason: openapi.json not found

Suggestions:
  1. Generate OpenAPI spec: cd apps/api && python -m app.generate_openapi
  2. Skip contract extraction: /analyze-workspace --skip-contracts
  3. Use inferred contracts: /analyze-workspace --infer-contracts
```

## When to Re-run

| Scenario | Action |
|----------|--------|
| Added new module/package | Full `/analyze-workspace` |
| Changed API endpoints | `/sync-contracts` (lightweight) |
| Major refactor | Full `/analyze-workspace --force` |
| Weekly maintenance | Full `/analyze-workspace` |
| Quick check | `/workspace-status` |


================================================
FILE: commands/check-contributors.md
================================================
# Check Contributors

Checks who's working on the project and optionally converts to a multi-person project with team state management.

---

## What This Command Does

1. **Detect current state** - Is this a solo or team project?
2. **Show active contributors** - Who's working on what right now?
3. **Offer conversion** - Convert solo → team project if needed

---

## Phase 1: Detect Project Type

Check for team structure:

```bash
# Check if team coordination exists
ls _project_specs/team/state.md 2>/dev/null
ls _project_specs/team/contributors.md 2>/dev/null

# Check git contributors
git shortlog -sn --all 2>/dev/null | head -10

# Check recent activity
git log --oneline --since="7 days ago" --format="%an" | sort | uniq -c | sort -rn
```

### If Team Structure Exists

Report current state:

```
📊 Team Project Detected

Contributors:
┌──────────┬────────────────┬──────────┐
│ Handle   │ Focus Area     │ Status   │
├──────────┼────────────────┼──────────┤
│ @alice   │ Backend, Auth  │ 🟢 Active │
│ @bob     │ Frontend       │ 🟡 Paused │
└──────────┴────────────────┴──────────┘

Active Sessions:
• @alice working on TODO-042 (src/auth/*)
• No conflicts detected

Claimed Todos:
• TODO-042 - @alice (since 2024-01-15)
• TODO-038 - @bob (since 2024-01-14)

Recent Decisions:
• [2024-01-15] JWT vs Sessions - chose JWT (@alice)

Run 'cat _project_specs/team/state.md' for full details.
```

### If Solo Project

```
👤 Solo Project Detected

Git contributors found:
• alice@example.com (142 commits)
• bob@example.com (38 commits)  ← Recent activity

This project has multiple git contributors but no team coordination.

Would you like to:
1. Convert to team project (adds team state management)
2. Keep as solo project (no changes)
```

---

## Phase 2: Convert to Team Project

If user chooses to convert:

### Step 1: Create Team Structure

```bash
mkdir -p _project_specs/team/handoffs
```

### Step 2: Create state.md

```markdown
# Team State

*Last synced: [TIMESTAMP]*

## Active Sessions

| Contributor | Working On | Started | Files Touched | Status |
|-------------|------------|---------|---------------|--------|
| - | - | - | - | - |

## Claimed Todos

| Todo | Claimed By | Since | ETA |
|------|------------|-------|-----|
| - | - | - | - |

## Recently Completed (Last 48h)

| Todo | Completed By | When | PR |
|------|--------------|------|-----|
| - | - | - | - |

## Conflicts to Watch

| Area | Contributors | Notes |
|------|--------------|-------|
| - | - | - |

## Announcements

- [DATE] Project converted to team coordination mode
```

### Step 3: Create contributors.md

Ask user about team members:

```
Who are the team members? (I'll help you fill this out)

For each person, I need:
- Handle (e.g., @alice)
- Name
- Focus areas (e.g., Backend, Auth)
- Timezone
- Status (Active/Part-time)
```

Then create:

```markdown
# Contributors

## Team Members

| Handle | Name | Focus Areas | Timezone | Status |
|--------|------|-------------|----------|--------|
| @[handle] | [name] | [areas] | [tz] | Active |

## Ownership

| Area | Primary | Backup | Notes |
|------|---------|--------|-------|
| - | - | - | Define as you work |

## Communication

- Slack: #[channel]
- PRs: Tag area owner for review
```

### Step 4: Update active.md

Add claim annotation format to existing todos:

```markdown
<!--
TEAM PROJECT - Claim format:
**Claimed:** @handle (YYYY-MM-DD HH:MM TZ)

Always claim a todo before starting work.
Check team/state.md for who's working on what.
-->

## [TODO-XXX] Description

**Status:** pending
**Claimed:** -

...
```

### Step 5: Update CLAUDE.md

Add team-coordination.md to skills list:

```markdown
## Skills
Read and follow these skills before writing any code:
- .claude/skills/base.md
- .claude/skills/team-coordination.md  ← Add this
...
```

### Step 6: Copy Skill

```bash
cp ~/.claude/skills/team-coordination.md .claude/skills/
```

---

## Phase 3: Summary

After conversion:

```
✅ Converted to Team Project

Created:
• _project_specs/team/state.md
• _project_specs/team/contributors.md
• _project_specs/team/handoffs/
• .claude/skills/team-coordination.md

Updated:
• _project_specs/todos/active.md (added claim format)
• CLAUDE.md (added team-coordination skill)

Next steps:
1. Fill out contributors.md with your team
2. Each team member should read team-coordination.md
3. Claim todos before starting work
4. Update state.md at start/end of each session

Commit these changes:
  git add _project_specs/team .claude/skills/team-coordination.md CLAUDE.md
  git commit -m "Enable team coordination for multi-person project"
  git push origin main
```

---

## Quick Check Mode

For quick status without conversion prompt:

```
/check-contributors --status
```

Output:

```
📊 Quick Status

Type: Team Project / Solo Project
Contributors: 3 (2 active this week)
Active Now: @alice (TODO-042)
Claimed: 2 todos
Conflicts: None

Last state update: 2 hours ago
```

---

## Reverting to Solo

If team coordination is no longer needed:

```
/check-contributors --solo
```

This:
1. Archives `_project_specs/team/` to `_project_specs/team-archive-[date]/`
2. Removes claim annotations from todos
3. Removes team-coordination.md from CLAUDE.md skills
4. Keeps decisions.md (valuable history)

---

## Usage

```bash
# Check who's working and see options
/check-contributors

# Quick status only
/check-contributors --status

# Force conversion to team project
/check-contributors --team

# Revert to solo project
/check-contributors --solo
```


================================================
FILE: commands/icpg-bootstrap.md
================================================
# /icpg-bootstrap — Bootstrap from Git History

Infer ReasonNodes from existing git commit history. One-time setup for existing codebases.

---

## Usage

`/icpg-bootstrap [--days N]`

Default: 90 days of history.

---

## Steps

### 1. Initialize iCPG if needed

```bash
icpg init
```

### 2. Run bootstrap

```bash
icpg bootstrap --days 90 --verbose
```

If no LLM API key available:
```bash
icpg bootstrap --days 90 --verbose --no-llm
```

### 3. Show results

```
iCPG BOOTSTRAP COMPLETE
═══════════════════════

History scanned: {N} days ({M} commits)
Commit clusters: {K}
ReasonNodes created: {R}
Symbols linked: {S}
Duplicates skipped: {D}

TOP INFERRED INTENTS:
  1. [0.80] "Add JWT authentication" — 12 symbols, 5 files
  2. [0.75] "Refactor payment processing" — 8 symbols, 3 files
  3. [0.65] "Fix rate limiting bug" — 3 symbols, 2 files
  ...

LOW CONFIDENCE (review recommended):
  - [0.55] "Update dependencies" — may be too generic
  - [0.50] "Misc fixes" — commit message unclear
```

### 4. Offer review

Ask the user:
> {N} ReasonNodes were inferred from git history.
> {M} are low-confidence and may need review.
>
> Would you like to:
> 1. Keep all (proceed with current quality)
> 2. Review low-confidence intents (I'll show each one)
> 3. Run drift scan now (`icpg drift check`)

### 5. Post-bootstrap drift scan

```bash
icpg drift check
```

Show any immediate drift detected.


================================================
FILE: commands/icpg-drift.md
================================================
# /icpg-drift — Show All Drift

Run a full drift scan and display all unresolved drift events, grouped by dimension and sorted by severity.

---

## Usage

`/icpg-drift`

---

## Steps

### 1. Run drift scan

```bash
icpg drift check
```

### 2. Also show existing unresolved drift

```bash
icpg status
```

### 3. Display results

```
DRIFT REPORT
═══════════════

{N} unresolved drift events across {M} symbols

BY SEVERITY:
  [0.85] spec(0.9) + decision(0.8) — validateToken drifted from "JWT auth"
  [0.60] ownership(0.7) + test(0.5) — UserService has 4 owners, tests stale
  ...

BY DIMENSION:
  Spec drift:       {count} events
  Decision drift:   {count} events
  Ownership drift:  {count} events
  Test drift:       {count} events
  Usage drift:      {count} events
  Dependency drift: {count} events

TOP ACTIONS:
  1. Fix spec drift in validateToken — checksum changed without MODIFIES edge
  2. Add tests for UserService — VALIDATED_BY tests are missing
  3. Assign single owner to PaymentProcessor — 5 different owners
```

### 4. Offer resolution

For each event, suggest:
- `icpg drift resolve <id>` to mark resolved
- Create a new MODIFIES ReasonNode if the change was intentional
- Write missing tests if test drift detected


================================================
FILE: commands/icpg-impact.md
================================================
# /icpg-impact — Show Blast Radius

Show the blast radius of a ReasonNode or symbol — what depends on it, what breaks if it changes.

---

## Usage

`/icpg-impact <id-or-symbol>`

- If argument looks like a UUID (contains `-`), treat as ReasonNode ID
- Otherwise, treat as symbol name and find its creating ReasonNode

---

## Steps

### 1. Resolve target

```bash
# If ReasonNode ID
icpg query blast <id>

# If symbol name
icpg query risk <symbol-name>
# Then get the creating reason from the output
icpg query blast <creating-reason-id>
```

### 2. Display results

Format the output as:

```
BLAST RADIUS: <goal>
═══════════════════════════════════

Symbols ({N}):
  function validateToken (src/auth/service.ts)
  class AuthMiddleware (src/auth/middleware.ts)
  ...

Dependent Intents ({N}):
  a1b2c3d4 — Dashboard user session management
  e5f6g7h8 — Payment authorization flow
  ...

Contracts:
  INV: file_exists("src/auth/middleware.ts")
  POST: test_exists("src/auth/__tests__/service.test.ts")

Risk: {HIGH|MEDIUM|LOW} based on dependent count + drift history
```

### 3. Recommendations

If high risk (>5 dependents or active drift):
- Suggest running full test suite before changes
- Suggest creating a new ReasonNode with MODIFIES edge
- Warn about function signatures to preserve


================================================
FILE: commands/icpg-why.md
================================================
# /icpg-why — Why Does This Code Exist?

Trace any symbol back to its creating ReasonNode — show the original goal, who wrote it, and whether it's still doing what it was made for.

---

## Usage

`/icpg-why <symbol-name>`

---

## Steps

### 1. Find the symbol

```bash
icpg query risk <symbol-name>
```

If not found, search more broadly:
```bash
icpg query context <likely-file-path>
```

### 2. Show the full trace

```
WHY: <symbol-name>
═══════════════════

Symbol: <type> <name> (<file-path>)
Signature: <signature>
Checksum: <checksum>

CREATING INTENT:
  ID: <reason-id>
  Goal: <goal>
  Type: <decision_type>
  Owner: <owner>
  Status: <status>
  Created: <date>

CONTRACTS:
  PRE: <preconditions>
  POST: <postconditions>
  INV: <invariants>

MODIFICATION HISTORY:
  1. <date> — <modifying-reason-goal> (by <owner>)
  2. <date> — <modifying-reason-goal> (by <owner>)

DRIFT STATUS: {CLEAN | DRIFTED}
  Dimensions: <drift-dimensions if any>
  Severity: <score>
```

### 3. If no ReasonNode found

Symbol exists but has no iCPG tracking:
```
⚠ No ReasonNode found for <symbol-name>.
This code has no tracked intent — consider creating one:
  icpg create "<inferred goal>" --scope <file>
```


================================================
FILE: commands/initialize-project.md
================================================
# Initialize Project

Full project setup with Claude coding guardrails. Works for both new and existing projects.

**This command is idempotent** - run it anytime to update skills, add missing structure, or reconfigure.

---

## Phase 0: Validate Bootstrap Installation

**FIRST**, verify Maggy is properly installed:

```bash
# Read bootstrap directory (saved during install)
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
# Run quick validation
"$BOOTSTRAP_DIR/tests/validate-structure.sh" --quick
```

This checks:
- Skills are installed with correct structure (folder/SKILL.md)
- Commands are installed (~/.claude/commands/)
- Hooks are installed (~/.claude/hooks/)

**If validation fails:**
- Show the error to user
- Suggest running: `cd "$BOOTSTRAP_DIR" && git pull && ./install.sh`
- Offer to continue anyway or abort

**If validation passes:**
- Continue to Phase 1

---

## Phase 1: Detect Project State

First, check what already exists:

```bash
# Check for existing Claude setup
ls -la .claude/skills/ 2>/dev/null
ls -la CLAUDE.md 2>/dev/null
ls -la _project_specs/ 2>/dev/null

# Check for cross-tool setup (Kimi CLI, Codex CLI)
ls -la .kimi/skills/ 2>/dev/null
ls -la .codex/skills/ 2>/dev/null
ls -la .agents/skills/ 2>/dev/null
ls -la AGENTS.md 2>/dev/null

# Detect installed AI CLI tools
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
DETECTED_AGENTS=$("$BOOTSTRAP_DIR/scripts/detect-agents.sh" 2>/dev/null || echo "claude")
echo "Detected AI CLI tools: $DETECTED_AGENTS"

# Check for existing git repo
git remote -v 2>/dev/null

# Check for existing package files
ls package.json pyproject.toml 2>/dev/null

# Check for Flutter project
ls pubspec.yaml 2>/dev/null

# Check for Android project
ls android/build.gradle android/app/build.gradle 2>/dev/null

# Check for native language in Android projects
find android -name "*.java" -type f 2>/dev/null | head -1
find android -name "*.kt" -type f 2>/dev/null | head -1
```

Based on findings, determine:
- **New project**: No CLAUDE.md, no .claude/skills/, no code files
- **Existing project with skills**: Has .claude/skills/ - offer to UPDATE
- **Existing codebase without skills**: Has code but no Claude setup - **AUTO-RUN ANALYSIS**

Inform the user:
- "Detected new project - will do full setup"
- "Detected existing Claude project - will update skills and add any missing structure"
- "Detected existing codebase - **analyzing before making changes...**"

**For existing codebases without Claude setup, AUTOMATICALLY proceed to Phase 1b.**

---

## Phase 1b: Analyze Existing Codebase (Auto-triggered)

**This phase runs automatically when an existing codebase is detected without Claude setup.**

### Step 1: Repository Structure Detection

```bash
echo "=== Analyzing Repository Structure ===" && \

# Detect repo type
if [ -d "packages" ] || [ -d "apps" ] || grep -q '"workspaces"' package.json 2>/dev/null; then
    REPO_TYPE="MONOREPO"
elif [ -d "frontend" ] && [ -d "backend" ]; then
    REPO_TYPE="FULL_STACK"
elif [ -d "src" ] && grep -q '"react\|vue\|angular"' package.json 2>/dev/null; then
    REPO_TYPE="FRONTEND"
elif [ -f "pyproject.toml" ] || grep -q '"express\|fastify"' package.json 2>/dev/null; then
    REPO_TYPE="BACKEND"
else
    REPO_TYPE="STANDARD"
fi
echo "Repo Type: $REPO_TYPE"

# Directory structure (3 levels, excluding noise)
find . -type d -maxdepth 3 \
    -not -path "*/node_modules/*" \
    -not -path "*/.git/*" \
    -not -path "*/venv/*" \
    -not -path "*/__pycache__/*" \
    -not -path "*/dist/*" \
    -not -path "*/build/*" \
    2>/dev/null | head -30
```

### Step 2: Tech Stack Detection

```bash
echo "=== Tech Stack ===" && \

# Primary language/framework
[ -f "package.json" ] && echo "JavaScript/TypeScript project"
[ -f "tsconfig.json" ] && echo "  → TypeScript configured"
[ -f "pyproject.toml" ] && echo "Python project"
[ -f "pubspec.yaml" ] && echo "Flutter project"
[ -d "android" ] && echo "Android project"

# Frameworks (from package.json)
if [ -f "package.json" ]; then
    grep -q '"react"' package.json && echo "  → React"
    grep -q '"next"' package.json && echo "  → Next.js"
    grep -q '"express"' package.json && echo "  → Express"
    grep -q '"fastify"' package.json && echo "  → Fastify"
fi

# Frameworks (from pyproject.toml)
if [ -f "pyproject.toml" ]; then
    grep -q "fastapi" pyproject.toml && echo "  → FastAPI"
    grep -q "django" pyproject.toml && echo "  → Django"
    grep -q "flask" pyproject.toml && echo "  → Flask"
fi
```

### Step 3: Guardrails Audit

```bash
echo "=== Guardrails Status ===" && \

# Pre-commit hooks
echo "Pre-commit Hooks:"
[ -d ".husky" ] && echo "  ✓ Husky installed" || echo "  ✗ Husky NOT installed"
[ -f ".pre-commit-config.yaml" ] && echo "  ✓ pre-commit framework" || echo "  ✗ pre-commit NOT installed"

# Linting
echo "Linting:"
(grep -q '"eslint"' package.json 2>/dev/null && echo "  ✓ ESLint") || \
(grep -q "ruff" pyproject.toml 2>/dev/null && echo "  ✓ Ruff") || \
echo "  ✗ No linter detected"

# Formatting
echo "Formatting:"
(grep -q '"prettier"' package.json 2>/dev/null && echo "  ✓ Prettier") || \
(grep -q "ruff\|black" pyproject.toml 2>/dev/null && echo "  ✓ Ruff/Black") || \
echo "  ✗ No formatter detected"

# Type checking
echo "Type Checking:"
([ -f "tsconfig.json" ] && echo "  ✓ TypeScript") || \
(grep -q "mypy" pyproject.toml 2>/dev/null && echo "  ✓ mypy") || \
echo "  ✗ No type checker detected"

# Commit validation
echo "Commit Validation:"
([ -f "commitlint.config.js" ] && echo "  ✓ commitlint") || \
(grep -q "conventional-pre-commit" .pre-commit-config.yaml 2>/dev/null && echo "  ✓ conventional-pre-commit") || \
echo "  ✗ No commit validation"

# CI/CD
echo "CI/CD:"
[ -d ".github/workflows" ] && echo "  ✓ GitHub Actions" || echo "  ✗ No GitHub Actions"
```

### Step 4: Convention Detection

```bash
echo "=== Conventions Detected ===" && \

# File naming pattern
echo "File Naming:"
ls src/**/*.ts 2>/dev/null | head -3 || ls src/**/*.py 2>/dev/null | head -3

# Import style
echo "Import Style:"
grep -h "^import" src/**/*.ts 2>/dev/null | head -3 || \
grep -h "^from\|^import" src/**/*.py 2>/dev/null | head -3

# Test location
echo "Test Location:"
[ -d "tests" ] && echo "  Separate tests/ directory"
[ -d "__tests__" ] && echo "  __tests__/ directory"
find . -name "*.test.*" -o -name "*.spec.*" 2>/dev/null | head -1 && echo "  Colocated tests"
```

### Step 5: Generate Analysis Summary

After running the analysis, present this summary to the user:

```markdown
## Repository Analysis Complete

**Type:** [Monorepo | Full-Stack | Frontend | Backend | Standard]
**Language:** [TypeScript | Python | Flutter | ...]
**Framework:** [React | FastAPI | ...]

### Guardrails Status

| Category | Status | Recommendation |
|----------|--------|----------------|
| Pre-commit hooks | ✗ Missing | Add Husky (JS) or pre-commit (Python) |
| Linting | ✓ ESLint | - |
| Formatting | ✗ Missing | Add Prettier |
| Type checking | ✓ TypeScript | - |
| Commit validation | ✗ Missing | Add commitlint |

### Conventions I'll Follow
- File naming: camelCase
- Imports: Absolute (@/...)
- Tests: Colocated (*.test.ts)
```

### Step 6: Present Options

After showing the analysis, ask:

> **I've analyzed this codebase. Here's what I found:** [summary above]
>
> What would you like me to do?
> 1. **Add Claude skills only** - Add skills, preserve everything else
> 2. **Add skills + missing guardrails** - Also setup Husky/pre-commit, commitlint, etc.
> 3. **Full setup** - Skills, guardrails, project specs structure, CI/CD
> 4. **Just show analysis** - Don't change anything yet

**Based on user choice:**
- Option 1 → Skip to Phase 4, only copy skills
- Option 2 → Phase 4 + guardrails setup from `existing-repo` skill
- Option 3 → Full Phase 4 execution
- Option 4 → End here, user can run `/initialize-project` again later

---

## Phase 2: Validate CLI Tools

Check required CLI tools are installed and authenticated:

```bash
# Check GitHub CLI
gh auth status

# Check Vercel CLI
vercel whoami

# Check Supabase CLI
supabase projects list
```

If any tool fails, inform the user and offer to skip:
- "GitHub CLI not authenticated. Run: `gh auth login` (or skip if not using GitHub)"
- "Vercel CLI not authenticated. Run: `vercel login` (or skip if not using Vercel)"
- "Supabase CLI not authenticated. Run: `supabase login` (or skip if not using Supabase)"

---

## Phase 3: Project Questions

**For existing projects with CLAUDE.md**: Read existing config first, then ask what to update.

**For new or unconfigured projects**: Ask these questions one at a time:

### 1. What are you building?
Ask for a brief description (1-2 sentences).
*Skip if CLAUDE.md exists and has Project Overview - show current and ask if they want to update.*

### 2. What language/runtime?
- Python
- TypeScript
- JavaScript (Node)
- Android Java
- Android Kotlin
- Flutter (Dart)
- Multiple (specify which)

*Auto-detect from package.json, pyproject.toml, pubspec.yaml, or android/ directory if present.*

### 3. What type of project?
- Backend API
- Frontend Web (React)
- Mobile App (React Native)
- Mobile App (Android Native)
- Mobile App (Flutter)
- Mobile App (Flutter + Native Android)
- Full Stack (Backend + Frontend)
- CLI Tool
- Library/Package

*Auto-detect from dependencies if possible.*

### 4. Is this an AI-first application?
- Yes (LLMs handle core logic)
- No (traditional application)

*Check for anthropic/openai in dependencies.*

### 4b. Code graph analysis level?
- **Standard** (default) - Lightweight AST graph with symbol lookup, dependency analysis, blast radius
- **Deep analysis** - Also enable Joern CPG (control flow, data flow, dead code detection)
- **Security audit** - Also enable CodeQL (taint analysis, vulnerability detection)
- **Full** - All three tiers

*Tier 1 (codebase-memory-mcp) is always enabled for all projects. This question determines opt-in tiers.*
*Auto-suggest: If security skill is included, suggest "Security audit". If AI-first, suggest "Deep analysis".*

### 5. What framework? (based on previous answers)
**Backend:**
- Python: FastAPI, Flask, Django
- Node: Express, Fastify, Hono

**Frontend Web:**
- React (Vite, Next.js)

**Mobile:**
- React Native, Expo

*Auto-detect from dependencies.*

### 6. What database?
- Supabase (Postgres)
- None / SQLite
- Other (specify)

*Skip if supabase/ directory exists.*

### 7. Where will this be deployed?
- Vercel
- Render
- Other (specify)

*Skip if vercel.json or render.yaml exists.*

### 8. Repository setup? (skip if git remote already configured)
- Create new repository
- Connect to existing repository
- Skip (local only for now)

If creating new:
- What should the repo be named?
- Public or private?

### 9. Which AI CLI tools do you use? (auto-detect)
- Claude Code only (default)
- Claude Code + Kimi CLI
- Claude Code + Codex CLI
- All three (Claude + Kimi + Codex)

*Auto-detect using `$BOOTSTRAP_DIR/scripts/detect-agents.sh`. Pre-select based on what's installed. If only Claude is detected, skip this question and default to Claude-only.*

### 10. Enable container isolation for parallel agents? (auto-detect)
- **Yes** (default if Docker/OrbStack detected) — Each feature agent runs in its own container
- **No** — Agents share the workspace (native Agent tool)

*Auto-detect Docker/OrbStack. If available, default to Yes and skip this question. Only ask if Docker IS available and you want to confirm, or if Docker is NOT available (inform user and default to No).*

```bash
if echo "$DETECTED_AGENTS" | grep -qE "docker|orbstack"; then
    echo "Docker detected — container isolation enabled by default"
    USE_POLYPHONY="true"
else
    echo "Docker not found — agents will share the workspace"
    USE_POLYPHONY="false"
fi
```

---

## Phase 4: Execute Setup

### Step 1: Create/update directory structure
```bash
mkdir -p .claude/skills
mkdir -p docs
mkdir -p _project_specs/features
mkdir -p _project_specs/todos
mkdir -p _project_specs/prompts
mkdir -p _project_specs/session/archive
mkdir -p scripts

# Cross-tool directories (if selected in question 9)
if [ "$USE_KIMI" = "true" ]; then
    mkdir -p .kimi/skills
fi
if [ "$USE_CODEX" = "true" ]; then
    mkdir -p .codex/skills
fi
# Generic .agents/ always created for cross-tool compat
mkdir -p .agents/skills
```

### Step 2: Update skill files from ~/.claude/skills/

**Skills use folder structure:** Each skill is a folder containing `SKILL.md`.

```bash
# Copy skill folders (not flat .md files)
cp -r ~/.claude/skills/base/ .claude/skills/
cp -r ~/.claude/skills/security/ .claude/skills/
cp -r ~/.claude/skills/project-tooling/ .claude/skills/
cp -r ~/.claude/skills/session-management/ .claude/skills/
cp -r ~/.claude/skills/code-graph/ .claude/skills/
cp -r ~/.claude/skills/cross-agent-delegation/ .claude/skills/
```

**Always copy (overwrite with latest):**
- `base/` → `.claude/skills/base/`
- `security/` → `.claude/skills/security/`
- `project-tooling/` → `.claude/skills/project-tooling/`
- `session-management/` → `.claude/skills/session-management/`
- `code-graph/` → `.claude/skills/code-graph/`
- `cross-agent-delegation/` → `.claude/skills/cross-agent-delegation/`

**If deep analysis or security audit selected (question 4b):**
- `cpg-analysis/` → `.claude/skills/cpg-analysis/`

```bash
# Copy CPG analysis skill if Tier 2 or 3 selected
if [ "$GRAPH_TIER" != "standard" ]; then
    cp -r ~/.claude/skills/cpg-analysis/ .claude/skills/
fi
```

**For existing codebases (detected in Phase 1b):**
- `existing-repo/` → `.claude/skills/existing-repo/` - Structure preservation, guardrails setup

**Based on language:**
- Python → copy `python/`
- TypeScript/JavaScript → copy `typescript/`

**Based on project type:**
- React Native → copy `typescript/` AND `react-native/`
- React Web → copy `typescript/` AND `react-web/`
- Node Backend → copy `typescript/` AND `nodejs-backend/`
- Full Stack (Node + React) → copy `typescript/`, `nodejs-backend/`, AND `react-web/`

**For Android/Flutter projects (auto-detect from project structure):**

| Detection | Skills to Copy |
|-----------|---------------|
| `pubspec.yaml` exists | `flutter/` |
| `android/*.java` exists | `android-java/` |
| `android/*.kt` exists | `android-kotlin/` |
| Flutter + Java files | `flutter/` + `android-java/` |
| Flutter + Kotlin files | `flutter/` + `android-kotlin/` |
| Flutter + Both | `flutter/` + `android-java/` + `android-kotlin/` |

```bash
# Detect and copy Android/Flutter skills
if [ -f "pubspec.yaml" ]; then
  cp -r ~/.claude/skills/flutter/ .claude/skills/
fi

if find android -name "*.java" -type f 2>/dev/null | head -1 | grep -q .; then
  cp -r ~/.claude/skills/android-java/ .claude/skills/
fi

if find android -name "*.kt" -type f 2>/dev/null | head -1 | grep -q .; then
  cp -r ~/.claude/skills/android-kotlin/ .claude/skills/
fi
```

**If AI-first:**
- Copy `llm-patterns/`

**If container isolation enabled (question 10):**
- Copy `polyphony/`

```bash
if [ "$USE_POLYPHONY" = "true" ]; then
    cp -r ~/.claude/skills/polyphony/ .claude/skills/
fi
```

**Note:** Skills are always overwritten with the latest version from ~/.claude/skills/. This ensures updates propagate when user updates their global skills.

### Step 2b: Cross-tool skill sync (if Kimi or Codex selected)

After copying skills to `.claude/skills/`, sync to other tool directories:

```bash
# Sync skills to all selected tools
for skill_dir in .claude/skills/*/; do
    [ -d "$skill_dir" ] || continue

    # Kimi CLI
    if [ "$USE_KIMI" = "true" ]; then
        cp -r "$skill_dir" .kimi/skills/
    fi

    # Codex CLI
    if [ "$USE_CODEX" = "true" ]; then
        cp -r "$skill_dir" .codex/skills/
    fi

    # Generic .agents/ (always)
    cp -r "$skill_dir" .agents/skills/
done

echo "Skills synced to cross-tool directories"
```

### Step 2c: Generate AGENTS.md (if Codex selected)

If Codex was selected in question 9, generate `AGENTS.md` alongside `CLAUDE.md`:

**If AGENTS.md exists:** Preserve customizations, update skill references to `.agents/skills/` paths.

**If new:** Generate from `CLAUDE.md` content, replacing `.claude/skills/` references with `.agents/skills/` paths. The structure mirrors CLAUDE.md but uses the generic skill path that Codex reads.

```bash
if [ "$USE_CODEX" = "true" ] && [ ! -f "AGENTS.md" ]; then
    if [ -f "CLAUDE.md" ]; then
        # Generate from existing CLAUDE.md
        sed 's|\.claude/skills/|.agents/skills/|g' CLAUDE.md > AGENTS.md
        echo "Generated AGENTS.md from CLAUDE.md"
    else
        # Copy template
        cp "$BOOTSTRAP_DIR/templates/AGENTS.md" ./AGENTS.md
        echo "Created AGENTS.md from template"
    fi
fi
```

### Step 2d: Generate config.toml hooks (if Kimi or Codex selected)

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)

if [ "$USE_KIMI" = "true" ]; then
    cp "$BOOTSTRAP_DIR/templates/config.toml" .kimi/config.toml
    echo "Created .kimi/config.toml with hooks"
fi

if [ "$USE_CODEX" = "true" ]; then
    cp "$BOOTSTRAP_DIR/templates/config.toml" .codex/config.toml
    echo "Created .codex/config.toml with hooks"
fi
```

### Step 3: Create/update .gitignore (if missing or incomplete)

Ensure these security-critical entries exist:
```gitignore
# Environment files - NEVER commit
.env
.env.*
!.env.example

# Secrets
*.pem
*.key
*.p12
credentials.json
secrets.json
service-account*.json

# Dependencies
node_modules/
__pycache__/
*.pyc
.venv/
venv/

# Build outputs
dist/
build/

# Code graph data (auto-generated)
.code-graph/

# Cross-tool agent dirs (derived from .claude/skills/, regenerated by /sync-agents)
.kimi/
.codex/
.agents/

# IDE
.idea/
.vscode/settings.json
.DS_Store
```

### Step 4: Create .env.example (if missing)

Based on project type:
```bash
# .env.example - Copy to .env and fill in values
# Server-side only (NEVER prefix with VITE_ or NEXT_PUBLIC_)
DATABASE_URL=
ANTHROPIC_API_KEY=

# Client-side safe (public, non-sensitive)
VITE_SUPABASE_URL=
VITE_SUPABASE_ANON_KEY=
```

### Step 4b: Configure Code Graph MCP Servers

**This step runs for ALL projects** (Tier 1 is always-on).

#### Create/merge .mcp.json

```bash
# Check if .mcp.json exists
if [ -f ".mcp.json" ]; then
    echo "Existing .mcp.json found - will merge code graph config"
else
    echo "Creating .mcp.json for code graph MCP servers"
fi
```

**Always add (Tier 1 — codebase-memory-mcp):**
```json
{
  "mcpServers": {
    "codebase-memory": {
      "command": "codebase-memory-mcp",
      "args": []
    }
  }
}
```

**If Tier 2 selected (deep analysis / full), also add:**
```json
{
  "mcpServers": {
    "codebadger": {
      "url": "http://localhost:4242/mcp",
      "type": "http"
    }
  }
}
```

**If Tier 3 selected (security audit / full), also add:**
```json
{
  "mcpServers": {
    "codeql": {
      "command": "codeql-mcp",
      "args": ["--database", ".code-graph/codeql-db"]
    }
  }
}
```

**Merge strategy:** If `.mcp.json` already exists, read it, merge new `mcpServers` entries without overwriting existing ones, write back.

#### Add .code-graph/ to .gitignore

Ensure this entry exists in `.gitignore`:
```gitignore
# Code graph data (auto-generated, machine-specific)
.code-graph/
```

#### Auto-install codebase-memory-mcp (if not found)

```bash
if ! command -v codebase-memory-mcp &> /dev/null; then
    echo ""
    echo "Installing codebase-memory-mcp (Tier 1 code graph)..."

    # Run the graph tools installer (Tier 1 only by default)
    if [ -f "$HOME/.claude/install-graph-tools.sh" ]; then
        bash "$HOME/.claude/install-graph-tools.sh"
    else
        # Fallback: inline install
        INSTALL_DIR="$HOME/.local/bin"
        mkdir -p "$INSTALL_DIR"
        OS=$(uname -s | tr '[:upper:]' '[:lower:]')
        ARCH=$(uname -m)
        case "$ARCH" in
            aarch64|arm64) ARCH="arm64" ;;
            x86_64|amd64) ARCH="amd64" ;;
        esac
        DOWNLOAD_URL="https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/codebase-memory-mcp-${OS}-${ARCH}.tar.gz"
        TEMP_DIR=$(mktemp -d)
        if curl -fsSL "$DOWNLOAD_URL" -o "$TEMP_DIR/codebase-memory-mcp.tar.gz"; then
            tar xzf "$TEMP_DIR/codebase-memory-mcp.tar.gz" -C "$TEMP_DIR"
            mv "$TEMP_DIR/codebase-memory-mcp" "$INSTALL_DIR/codebase-memory-mcp"
            chmod +x "$INSTALL_DIR/codebase-memory-mcp"
            echo "✓ Installed codebase-memory-mcp to $INSTALL_DIR"
            # Auto-configure for Claude Code
            "$INSTALL_DIR/codebase-memory-mcp" install 2>/dev/null || true
        else
            echo "⚠ Failed to download codebase-memory-mcp"
            echo "  Manual install: ~/.claude/install-graph-tools.sh"
        fi
        rm -rf "$TEMP_DIR"
    fi
else
    echo "✓ codebase-memory-mcp already installed"
fi
```

#### Auto-install Tier 2/3 tools (if selected)

```bash
# Tier 2: Joern CPG (if deep analysis or full selected)
if [ "$GRAPH_TIER" = "deep" ] || [ "$GRAPH_TIER" = "full" ]; then
    if [ -f "$HOME/.claude/install-graph-tools.sh" ]; then
        echo ""
        echo "Installing Joern CPG (Tier 2)..."
        bash "$HOME/.claude/install-graph-tools.sh" --joern
    fi
fi

# Tier 3: CodeQL (if security audit or full selected)
if [ "$GRAPH_TIER" = "security" ] || [ "$GRAPH_TIER" = "full" ]; then
    if [ -f "$HOME/.claude/install-graph-tools.sh" ]; then
        echo ""
        echo "Installing CodeQL (Tier 3)..."
        bash "$HOME/.claude/install-graph-tools.sh" --codeql
    fi
fi
```

#### Enable auto-indexing and build initial graph

```bash
if command -v codebase-memory-mcp &> /dev/null; then
    # Enable auto-index so graph stays fresh across sessions
    codebase-memory-mcp config set auto_index true 2>/dev/null || true

    # Build initial graph index for this project
    echo ""
    echo "Building code graph index (first time may take a moment)..."
    codebase-memory-mcp index --project-dir . 2>/dev/null || {
        echo "⚠ Initial index failed - graph will be built on first MCP query"
    }
    echo "✓ Code graph indexed"
fi
```

#### Install post-commit graph update hook

```bash
if [ -d ".git" ]; then
    # Append to existing post-commit hook (don't overwrite)
    if [ -f ".git/hooks/post-commit" ]; then
        if ! grep -q "code-graph" ".git/hooks/post-commit"; then
            echo "" >> .git/hooks/post-commit
            echo "# Code graph incremental update" >> .git/hooks/post-commit
            cat ~/.claude/hooks/post-commit-graph >> .git/hooks/post-commit
        fi
    else
        cp ~/.claude/hooks/post-commit-graph .git/hooks/post-commit
        chmod +x .git/hooks/post-commit
    fi
    echo "✓ Post-commit graph update hook installed"
fi
```

### Step 5: Create/update verification script
Create or overwrite `scripts/verify-tooling.sh`:

```bash
#!/bin/bash
set -e

echo "Verifying project tooling..."

# GitHub CLI
if command -v gh &> /dev/null; then
  if gh auth status &> /dev/null; then
    echo "✓ GitHub CLI authenticated"
  else
    echo "✗ GitHub CLI not authenticated. Run: gh auth login"
    exit 1
  fi
else
  echo "⚠ GitHub CLI not installed. Run: brew install gh"
fi

# Vercel CLI
if command -v vercel &> /dev/null; then
  if vercel whoami &> /dev/null; then
    echo "✓ Vercel CLI authenticated"
  else
    echo "✗ Vercel CLI not authenticated. Run: vercel login"
    exit 1
  fi
else
  echo "⚠ Vercel CLI not installed. Run: npm i -g vercel"
fi

# Supabase CLI
if command -v supabase &> /dev/null; then
  if supabase projects list &> /dev/null 2>&1; then
    echo "✓ Supabase CLI authenticated"
  else
    echo "✗ Supabase CLI not authenticated. Run: supabase login"
    exit 1
  fi
else
  echo "⚠ Supabase CLI not installed. Run: brew install supabase/tap/supabase"
fi

echo ""
echo "Tooling verification complete!"
```

```bash
chmod +x scripts/verify-tooling.sh
```

### Step 6: Create security check script

Create `scripts/security-check.sh`:
```bash
#!/bin/bash
set -e

echo "Running security checks..."

# Check .env is not staged
if git diff --cached --name-only | grep -E '^\.env$|^\.env\.' | grep -v '\.example$'; then
  echo "ERROR: .env file is staged for commit!"
  exit 1
fi

# Check for common secret patterns
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
if [ -n "$STAGED_FILES" ]; then
  if echo "$STAGED_FILES" | xargs grep -l -E '(password|secret|api_key|apikey|token)\s*[:=]\s*["\047][^"\047]{8,}["\047]' 2>/dev/null; then
    echo "WARNING: Possible secrets found in staged files - please verify"
  fi
fi

# Check for VITE_* secrets (common mistake)
if [ -n "$STAGED_FILES" ]; then
  if echo "$STAGED_FILES" | xargs grep -l -E 'VITE_.*SECRET|VITE_.*KEY.*=.*[a-zA-Z0-9]{20,}' 2>/dev/null; then
    echo "ERROR: Secrets in VITE_* env vars are exposed to client!"
    exit 1
  fi
fi

# Dependency audit
if [ -f "package.json" ]; then
  echo "Checking npm dependencies..."
  npm audit --audit-level=high 2>/dev/null || echo "Warning: npm audit found issues"
fi

if [ -f "pyproject.toml" ] || [ -f "requirements.txt" ]; then
  if command -v safety &> /dev/null; then
    echo "Checking Python dependencies..."
    safety check 2>/dev/null || echo "Warning: safety found issues"
  fi
fi

echo "Security checks complete!"
```

```bash
chmod +x scripts/security-check.sh
```

### Step 7: Create/update CLAUDE.md

**If CLAUDE.md exists:**
- Preserve Project Overview, Tech Stack, and Project-Specific Patterns sections
- Update Skills list to reference current .claude/skills/ contents
- Update Key Commands section with latest

**If new:**
```markdown
# CLAUDE.md

## Skills
Read and follow these skills before writing any code:
- .claude/skills/base/SKILL.md
- .claude/skills/security/SKILL.md
- .claude/skills/project-tooling/SKILL.md
- .claude/skills/session-management/SKILL.md
- .claude/skills/code-graph/SKILL.md
- .claude/skills/cross-agent-delegation/SKILL.md
- .claude/skills/cpg-analysis/SKILL.md (if deep analysis or security audit)
- .claude/skills/[language]/SKILL.md
- .claude/skills/[framework]/SKILL.md (if applicable)
- .claude/skills/llm-patterns/SKILL.md (if AI-first)

## Project Overview
[Description from question 1]

## Tech Stack
- Language: [X]
- Framework: [X]
- Database: [X]
- Deployment: [X]
- Testing: [X]

## Key Commands
```bash
# Verify all CLI tools are working
./scripts/verify-tooling.sh

# Install dependencies
npm install          # or: pip install -e ".[dev]"

# Run tests
npm test             # or: pytest

# Lint
npm run lint         # or: ruff check .

# Type check
npm run typecheck    # or: mypy src/

# Pre-commit hooks (run once after clone)
npx husky init       # or: pre-commit install

# Database (if using Supabase)
npm run db:start     # Start local Supabase
npm run db:migrate   # Push migrations

# Deploy
npm run deploy:preview  # Deploy to preview
npm run deploy:prod     # Deploy to production
```

## Documentation
- `docs/` - Technical documentation
- `_project_specs/` - Project specifications and todos

## Atomic Todos
All work is tracked in `_project_specs/todos/`:
- `active.md` - Current work
- `backlog.md` - Future work
- `completed.md` - Done (for reference)

Every todo must have validation criteria and test cases. See base.md skill for format.

## Session Management

### State Tracking
Maintain session state in `_project_specs/session/`:
- `current-state.md` - Live session state (update every 15-20 tool calls)
- `decisions.md` - Key architectural/implementation decisions (append-only)
- `code-landmarks.md` - Important code locations for quick reference
- `archive/` - Past session summaries

### Automatic Updates
Update `current-state.md`:
- After completing any todo item
- Every 15-20 tool calls during active work
- Before any significant context shift
- When encountering blockers

### Decision Logging
Log to `decisions.md` when:
- Choosing between architectural approaches
- Selecting libraries or tools
- Making security-related choices
- Deviating from standard patterns

### Context Compression
When context feels heavy (~50+ tool calls):
1. Summarize completed work in current-state.md
2. Archive verbose exploration notes to archive/
3. Keep only essential context for next steps

### Session Handoff
When ending a session or approaching context limits, update current-state.md with:
- What was completed this session
- Current state of work
- Immediate next steps (numbered, specific)
- Open questions or blockers
- Files to review first when resuming

### Resuming Work
When starting a new session:
1. Read `_project_specs/session/current-state.md`
2. Check `_project_specs/todos/active.md`
3. Review recent entries in `decisions.md` if context needed
4. Continue from "Next Steps" in current-state.md

## Code Graph (MCP)

This project uses MCP-based code graph for optimized code navigation.

### Available Tiers
- **Tier 1** (always on): `codebase-memory-mcp` - AST graph, symbol lookup, blast radius
- **Tier 2** (opt-in): Joern/CodeBadger - Full CPG, control/data flow analysis
- **Tier 3** (opt-in): CodeQL - Taint analysis, security vulnerability detection

### Usage Priority
1. **Graph first** - Use MCP graph tools for symbol search, dependency tracing, impact analysis
2. **File read second** - Only read full files when you need to modify code or need full context
3. **Grep last** - Avoid grep when graph tools can answer the question faster

### Configuration
- MCP config: `.mcp.json` (project root, committed)
- Graph data: `.code-graph/` (gitignored, auto-updated)
- Post-commit hook: auto-updates graph on code changes

### Key Graph Commands
```bash
# Install graph tools (run once per machine)
~/.claude/install-graph-tools.sh

# Install with deep CPG analysis
~/.claude/install-graph-tools.sh --joern

# Install with security auditing
~/.claude/install-graph-tools.sh --codeql
```

## Project-Specific Patterns
[Any specific patterns for this project]
```

### Step 5: Create project specs structure (if missing)

Only create files that don't exist - never overwrite existing specs.

**_project_specs/overview.md** (if missing):
```markdown
# Project Overview

## Vision
[Description from question 1]

## Goals
- [ ] Goal 1
- [ ] Goal 2

## Non-Goals
- What this project will NOT do

## Success Metrics
- How we measure success
```

**_project_specs/todos/active.md** (if missing):
```markdown
# Active Todos

Current work in progress. Each todo follows the atomic todo format from base.md skill.

---

<!-- Add todos here -->
```

**_project_specs/todos/backlog.md** (if missing):
```markdown
# Backlog

Future work, prioritized. Move to active.md when starting.

---

<!-- Add todos here -->
```

**_project_specs/todos/completed.md** (if missing):
```markdown
# Completed

Done items for reference. Move here from active.md when complete.

---

<!-- Add completed todos here -->
```

**_project_specs/session/current-state.md** (if missing):
```markdown
<!--
CHECKPOINT RULES (from session-management.md):
- Quick update: After any todo completion
- Full checkpoint: After ~20 tool calls or decisions
- Archive: End of session or major feature complete

After each task, ask: Decision made? >10 tool calls? Feature done?
-->

# Current Session State

*Last updated: [timestamp]*

## Active Task
[What are we working on right now - one sentence]

## Current Status
- **Phase**: exploring | planning | implementing | testing | debugging
- **Progress**: [X of Y steps, or description]
- **Blocking Issues**: None

## Context Summary
[2-3 sentences summarizing current state of work]

## Files Being Modified
| File | Status | Notes |
|------|--------|-------|
| - | - | - |

## Next Steps
1. [ ] First next action
2. [ ] Second next action

## Key Context to Preserve
- [Important decisions or context for this task]

## Resume Instructions
To continue this work:
1. [Specific starting point]
2. [What to check/read first]
```

**_project_specs/session/decisions.md** (if missing):
```markdown
<!--
LOG DECISIONS WHEN:
- Choosing between architectural approaches
- Selecting libraries or tools
- Making security-related choices
- Deviating from standard patterns

This is append-only. Never delete entries.
-->

# Decision Log

Track key architectural and implementation decisions.

## Format
```
## [YYYY-MM-DD] Decision Title

**Decision**: What was decided
**Context**: Why this decision was needed
**Options Considered**: What alternatives existed
**Choice**: Which option was chosen
**Reasoning**: Why this choice was made
**Trade-offs**: What we gave up
**References**: Related code/docs
```

---

<!-- Add decisions below -->
```

**_project_specs/session/code-landmarks.md** (if missing):
```markdown
<!--
UPDATE WHEN:
- Adding new entry points or key files
- Introducing new patterns
- Discovering non-obvious behavior

Helps quickly navigate the codebase when resuming work.
-->

# Code Landmarks

Quick reference to important parts of the codebase.

## Entry Points
| Location | Purpose |
|----------|---------|
| - | Main application entry |

## Core Business Logic
| Location | Purpose |
|----------|---------|
| - | - |

## Configuration
| Location | Purpose |
|----------|---------|
| - | Environment/app config |

## Key Patterns
| Pattern | Example Location | Notes |
|---------|------------------|-------|
| - | - | - |

## Testing
| Location | Purpose |
|----------|---------|
| tests/ | Test files |

## Gotchas & Non-Obvious Behavior
| Location | Issue | Notes |
|----------|-------|-------|
| - | - | - |
```

### Step 9: Create/update GitHub Actions workflows

**Quality workflow** (`.github/workflows/quality.yml`):
Create based on language (copy from the relevant skill file).

**Security workflow** (`.github/workflows/security.yml`):
```yaml
name: Security

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  schedule:
    - cron: '0 9 * * 1'  # Weekly on Monday

jobs:
  secrets-scan:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Detect secrets
        uses: trufflesecurity/trufflehog@main
        with:
          path: ./

  dependency-audit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Setup Node
        if: hashFiles('package.json') != ''
        uses: actions/setup-node@v4
        with:
          node-version: '20'
      - name: NPM Audit
        if: hashFiles('package.json') != ''
        run: npm audit --audit-level=high
      - name: Setup Python
        if: hashFiles('pyproject.toml') != ''
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'
      - name: Safety check
        if: hashFiles('pyproject.toml') != ''
        run: pip install safety && safety check
```

### Step 7: Set up pre-commit hooks (if not already configured)

**For Python projects** (if .pre-commit-config.yaml missing):
Create `.pre-commit-config.yaml`

**For TypeScript/JavaScript projects** (if .husky/ missing):
Set up Husky + lint-staged

### Step 7b: Install pre-push code review hook

**Always install the pre-push hook for code review enforcement:**

```bash
# Check if .git exists
if [ -d ".git" ]; then
    # Copy pre-push hook from ~/.claude/hooks/
    cp ~/.claude/hooks/pre-push .git/hooks/pre-push
    chmod +x .git/hooks/pre-push
    echo "✓ Pre-push code review hook installed"
fi
```

This hook:
- Runs `/code-review` before every `git push`
- Blocks push if 🔴 Critical or 🟠 High severity issues found
- Allows push with advisory for 🟡 Medium and 🟢 Low issues

To disable: `rm .git/hooks/pre-push`

### Step 8: GitHub repository setup (if selected and not already configured)

**Create new repository:**
```bash
git init  # if needed
git add .
git commit -m "Initial project setup"
gh repo create [repo-name] --[public|private] --source=. --remote=origin --push
```

**Connect to existing:**
```bash
git remote add origin https://github.com/[owner]/[repo].git
git push -u origin main
```

### Step 9: Initialize deployment (if not already configured)

**Vercel** (if vercel.json missing):
```bash
vercel link
```

**Supabase** (if supabase/ missing):
```bash
supabase init
```

---

## Phase 5: Summary

After setup, show what was done:

### For Updates (existing project):
```
Updated:
✓ Skills updated to latest versions
  - base.md (updated)
  - typescript.md (updated)
  - react-web.md (updated)
  - code-graph.md (updated)
✓ Pre-push code review hook (installed/updated)

Added:
✓ llm-patterns.md (new skill added)
✓ _project_specs/prompts/ (new directory)

Code Graph (fully automated):
✓ codebase-memory-mcp installed and configured
✓ .mcp.json configured (Tier 1: codebase-memory-mcp)
✓ Auto-indexing enabled (graph stays fresh across sessions)
✓ Initial graph index built
✓ Post-commit graph update hook installed
[✓ Tier 2: Joern CPG installed and configured (if selected)]
[✓ Tier 3: CodeQL installed and configured (if selected)]

Cross-Tool Compatibility (if selected):
[✓ Skills synced to .kimi/skills/ (Kimi CLI)]
[✓ Skills synced to .codex/skills/ (Codex CLI)]
[✓ Skills synced to .agents/skills/ (generic)]
[✓ AGENTS.md created (Codex project instructions)]
[✓ .kimi/config.toml created (Kimi hooks)]
[✓ .codex/config.toml created (Codex hooks)]

Unchanged:
- CLAUDE.md (preserved your customizations)
- _project_specs/todos/ (preserved your todos)
- Git repository (already configured)
```

### For New Projects:
```
Created:
✓ .claude/skills/ with [N] skill files (including code-graph)
✓ CLAUDE.md
✓ _project_specs/ structure
✓ scripts/verify-tooling.sh
✓ .github/workflows/quality.yml
✓ Pre-commit hooks configured
✓ Pre-push code review hook (blocks on Critical/High issues)
✓ GitHub repository: https://github.com/[owner]/[repo]

Code Graph (fully automated):
✓ codebase-memory-mcp installed
✓ .mcp.json configured
  Tier 1: codebase-memory-mcp (always on - AST graph, 64 langs)
  [Tier 2: Joern CPG (control flow, data flow)]
  [Tier 3: CodeQL (taint analysis, security)]
✓ Auto-indexing enabled
✓ Initial graph index built ([N] files, [N] symbols)
✓ .code-graph/ added to .gitignore
✓ Post-commit graph update hook installed

Cross-Tool Compatibility (if selected):
✓ Skills synced to .kimi/skills/, .codex/skills/, .agents/skills/
✓ AGENTS.md created (Codex project instructions)
✓ .kimi/config.toml + .codex/config.toml (hooks)
✓ .kimi/, .codex/, .agents/ added to .gitignore
```

### Quick Start
```bash
# Verify setup
./scripts/verify-tooling.sh

# Install dependencies
[appropriate command]

# Start development
[appropriate command]
```

---

## Phase 5b: Polyphony Setup (Container Isolation)

**This phase runs automatically when Docker/OrbStack is detected (question 10) and the user hasn't opted out.**

### Step 1: Check prerequisites

```bash
# Verify Docker is running
if echo "$DETECTED_AGENTS" | grep -qE "docker|orbstack"; then
    docker info &>/dev/null && echo "✓ Docker running" || echo "⚠ Docker installed but not running"
fi

# Check polyphony CLI
command -v polyphony &>/dev/null && echo "✓ polyphony CLI available" || echo "⚠ polyphony not on PATH"
```

### Step 2: Initialize Polyphony config (if missing)

```bash
if [ ! -d "$HOME/.polyphony" ]; then
    polyphony init
    echo "✓ Created ~/.polyphony/ config"
else
    echo "✓ ~/.polyphony/ already exists"
fi
```

### Step 3: Build worker image (if not present)

```bash
if ! docker image inspect polyphony-worker:latest &>/dev/null 2>&1; then
    BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
    if [ -f "$BOOTSTRAP_DIR/templates/Dockerfile.polyphony" ]; then
        echo "Building polyphony-worker image..."
        docker build -t polyphony-worker:latest -f "$BOOTSTRAP_DIR/templates/Dockerfile.polyphony" "$BOOTSTRAP_DIR"
        echo "✓ Built polyphony-worker:latest"
    fi
else
    echo "✓ polyphony-worker:latest image exists"
fi
```

### Step 4: Add polyphony skill to project

```bash
# Copy polyphony skill to project
cp -r ~/.claude/skills/polyphony/ .claude/skills/
```

Add to CLAUDE.md Skills section:
```markdown
- .claude/skills/polyphony/SKILL.md
```

Add to CLAUDE.md Cross-Agent Workflow section:
```markdown
### Container Isolation (Polyphony)
When Docker is available, each feature agent runs in its own container with an independent git branch.
- `/spawn-team` uses Polyphony by default (fallback to native agents if no Docker)
- `polyphony status` to see running agents
- `polyphony cleanup` after completion
```

### Step 5: Show Polyphony status in summary

Add to the Phase 5 summary output:
```
Container Isolation (Polyphony):
✓ Docker/OrbStack detected
✓ polyphony CLI available
✓ ~/.polyphony/ config ready
✓ polyphony-worker:latest image built
✓ Polyphony skill added to project
→ /spawn-team will use container isolation by default
```

**If Docker not available:**
```
Container Isolation:
⚠ Docker not found — /spawn-team will use native agents (shared workspace)
  Install Docker: brew install --cask docker
```

---

## Phase 6: Agent Team Setup (Default Workflow)

Every project uses Claude Agent Teams by default. This phase sets up the team infrastructure and spawns agents to implement features in parallel.

### Step 1: Set Environment Variable

Ensure the agent teams experimental flag is set:

```bash
export CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1
```

Also add to the project's `.env.example` if not present:
```
# Agent Teams (required for Maggy team workflow)
CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1
```

### Step 2: Copy Agent Definitions

Copy agent definitions from the agent-teams skill to the project:

```bash
mkdir -p .claude/agents
cp ~/.claude/skills/agent-teams/agents/*.md .claude/agents/
```

This creates:
```
.claude/agents/
  team-lead.md      # Orchestration only, delegate mode
  quality.md        # TDD verification (RED/GREEN phases)
  security.md       # OWASP scanning, secrets detection
  code-review.md    # Multi-engine code review
  merger.md         # Branch creation, PR management
  feature.md        # Feature implementation template
```

### Step 3: Add Agent Teams to CLAUDE.md

Add the agent-teams skill to the Skills section in CLAUDE.md:
```
- .claude/skills/agent-teams/SKILL.md
```

Add a new section to CLAUDE.md:
```markdown
## Agent Teams (Default Workflow)

This project uses Claude Code Agent Teams as the default development workflow.
Every feature is implemented by a dedicated agent following a strict TDD pipeline.

### Strict Pipeline (per feature)
Spec > Spec Review > Tests > RED Verify > Implement > GREEN Verify > Validate > Code Review > Security Scan > Branch + PR

### Team Roster
- **Team Lead**: Orchestrates, breaks work into features, assigns tasks (NEVER writes code)
- **Quality Agent**: Verifies TDD discipline - RED/GREEN phases, coverage >= 80%
- **Security Agent**: OWASP scanning, secrets detection, dependency audit
- **Code Review Agent**: Multi-engine code reviews (Claude/Codex/Gemini)
- **Merger Agent**: Creates feature branches and PRs via gh CLI
- **Feature Agents**: One per feature, follows strict TDD pipeline

### Commands
- `/spawn-team` - Spawn the agent team (auto-run after init, or run manually)

### Required Environment
export CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1
```

### Step 4: Prompt for Features

**For new projects:**
> **Project initialized! Ready to deploy the agent team.**
>
> The agent team implements features in parallel using a strict TDD pipeline:
> ```
> Spec > Tests > Verify Fail > Implement > Verify Pass > Review > Security > PR
> ```
>
> What are the key features of this project? List them and I'll create a spec
> skeleton for each, then spawn the team to implement them in parallel.
>
> Example: "user authentication, dashboard, payment processing"

For each feature the user lists:
1. Create `_project_specs/features/{feature-name}.md` with skeleton spec
2. Include: description (from user input), empty acceptance criteria, empty test cases table

**For existing projects:**
> **Project updated with latest skills and agent team support!**
>
> I've added agent team infrastructure. Your options:
> 1. Define features and spawn the team now
> 2. Continue working on existing todos (solo mode)
> 3. Review what's new in skills

### Step 5: Spawn Team

After the user provides features (or if feature specs already exist), automatically run the `/spawn-team` workflow:

1. Create the team (TeamCreate)
2. Spawn 5 default agents (team-lead, quality-agent, security-agent, review-agent, merger-agent)
3. Spawn 1 feature agent per feature
4. Team lead creates 10-task dependency chains per feature
5. Work begins automatically

### Step 6: Show Team Status

```
┌─────────────────────────────────────────────────────────────────┐
│  AGENT TEAM DEPLOYED                                             │
│  ──────────────────────────────────────────────────────────────  │
│                                                                  │
│  Team: {project-name}                                            │
│  Features: {N}                                                   │
│  Total tasks: {N * 10}                                           │
│  Agents: {5 + N}                                                 │
│                                                                  │
│  PIPELINE (per feature)                                          │
│  Spec > Review > Tests > RED > Implement > GREEN >               │
│  Validate > Code Review > Security > Branch+PR                   │
│                                                                  │
│  Use Shift+Up/Down to select and message agents.                 │
│  Use Ctrl+T to toggle the shared task list.                      │
│  The team runs autonomously until all PRs are created.           │
└─────────────────────────────────────────────────────────────────┘
```

---

## Updating Skills System-Wide

To update skills for all future projects:

```bash
# Pull latest skills
cd "$(cat ~/.claude/.bootstrap-dir)"
git pull

# Reinstall
./install.sh

# Validate installation
./tests/validate-structure.sh
```

Then in any existing project:
```
/initialize-project
```

Skills will be updated while preserving project-specific configuration.

## Troubleshooting

If `/initialize-project` shows validation errors:

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
# Full validation to see all issues
"$BOOTSTRAP_DIR/tests/validate-structure.sh" --full

# Quick validation (what initialize-project runs)
"$BOOTSTRAP_DIR/tests/validate-structure.sh" --quick
```

Common issues:
- **Flat .md files**: Skills should be folders with SKILL.md, not flat files
- **Missing commands**: Reinstall with `./install.sh`
- **Missing hooks**: Reinstall with `./install.sh`


================================================
FILE: commands/maggy-init.md
================================================
# /maggy-init — Set Up Maggy for This Team

Interactive wizard that configures Maggy for the user's org, issue tracker, and codebases. Writes `~/.maggy/config.yaml` and ensures deps are installed.

---

## Usage

`/maggy-init` — run the full setup wizard

---

## Steps

### 1. Check prerequisites

- Python 3.11+ available
- `claude` CLI on PATH (warn but don't block)
- Maggy installed (check `~/.claude/.bootstrap-dir`)

### 2. Run installer

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir)
cd "$BOOTSTRAP_DIR/maggy"
./install.sh
```

This installs Python deps and copies the config template to `~/.maggy/config.yaml`.

### 3. Interactive config wizard

Ask the user:

1. **Org name** — human-readable name (e.g. "Acme Corp")
2. **Domain** — primary competitive domain (e.g. "fintech", "devtools", "cx", "healthcare"). This drives competitor discovery.
3. **Issue tracker** — `github` (default) or `asana`. Linear is a stub.
4. **For GitHub:** org name + comma-separated repo list (`acmecorp/api, acmecorp/web`)
5. **For Asana:** workspace ID + project GID for their default board
6. **Codebases** — paths to each repo Maggy should execute in. Prompt key per path (short name like `api`, `web`).
7. **Competitor categories** — comma-separated (can match domain; encourages 1-3 categories)
8. **OKRs** — "skip" or "yaml" (paste OKRs inline if yaml)

### 4. Write config

Patch `~/.maggy/config.yaml` with the user's answers using a Python helper:

```python
import yaml
from pathlib import Path

cfg_path = Path.home() / ".maggy" / "config.yaml"
cfg = yaml.safe_load(cfg_path.read_text())

cfg["org"]["name"] = "<answer>"
cfg["org"]["domain"] = "<answer>"
cfg["issue_tracker"]["provider"] = "<answer>"
# ... set github/asana section accordingly
cfg["codebases"] = [{"path": "<path>", "key": "<key>"}, ...]
cfg["competitors"]["categories"] = ["<cat>", ...]

cfg_path.write_text(yaml.safe_dump(cfg, sort_keys=False))
```

### 5. Credentials check

Tell the user to export these in their shell and source them when starting Maggy:

```
export GITHUB_TOKEN=ghp_...           # repo + issues scopes
export ANTHROPIC_API_KEY=sk-ant-...
```

**Do not write tokens to `~/.maggy/.env`** — the Maggy server does not load that
file automatically, so credentials would sit on disk in plaintext with no code
reading them. Use your shell's standard secret store (e.g. `.zshrc`, `direnv`,
`op run`, a secrets manager) or export them inline when launching Maggy.

### 6. Test the connection

```bash
cd "$BOOTSTRAP_DIR/maggy"
python3 -c "from src import config, providers; cfg = config.load(); p = providers.build(cfg); import asyncio; print('Found', len(asyncio.run(p.list_tasks(limit=5))), 'tasks')"
```

If this returns tasks, setup is working.

### 7. Offer to launch

> Maggy is configured. Run `/maggy` to launch the dashboard, or:
>
> ```
> cd $BOOTSTRAP_DIR/maggy && python3 -m maggy.main
> ```
>
> Then open http://127.0.0.1:8080

---

## Related

- `/maggy` — launch dashboard
- `/icpg-bootstrap` — index your codebases so Execute gets rich context


================================================
FILE: commands/maggy.md
================================================
# /maggy — Launch Maggy Dashboard

Start Maggy (the AI engineering command center) and open the dashboard in a browser.

---

## Usage

`/maggy` — start server if not running, open dashboard
`/maggy stop` — stop running server
`/maggy status` — show whether server is running + config summary

---

## Steps

### 1. Check config

```bash
if [ ! -f ~/.maggy/config.yaml ]; then
  echo "Maggy not configured yet. Run /maggy-init first."
  exit 1
fi
```

### 2. Resolve host/port from config (don't hardcode 8080)

```bash
# Read dashboard.host and dashboard.port from ~/.maggy/config.yaml.
# Falls back to 127.0.0.1:8080 only if keys are missing.
HOST=$(python3 -c "import yaml; d=yaml.safe_load(open('$HOME/.maggy/config.yaml'))or{}; print((d.get('dashboard') or {}).get('host') or '127.0.0.1')")
PORT=$(python3 -c "import yaml; d=yaml.safe_load(open('$HOME/.maggy/config.yaml'))or{}; print((d.get('dashboard') or {}).get('port') or 8080)")
URL="http://${HOST}:${PORT}"
```

### 3. Check if already running

```bash
if curl -sf "${URL}/api/health" >/dev/null 2>&1; then
  echo "Maggy is already running at ${URL}"
  open "${URL}" 2>/dev/null || xdg-open "${URL}" 2>/dev/null || true
  exit 0
fi
```

### 4. Start in background

The Maggy install lives at `<bootstrap-root>/maggy`. Resolve it from `~/.claude/.bootstrap-dir`:

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null || echo "")
MAGGY_DIR="$BOOTSTRAP_DIR/maggy"

if [ ! -d "$MAGGY_DIR" ]; then
  echo "Maggy not installed. Run: cd <maggy>/maggy && ./install.sh"
  exit 1
fi

cd "$MAGGY_DIR"
mkdir -p "$HOME/.maggy"
nohup python3 -m maggy.main > "$HOME/.maggy/maggy.log" 2>&1 &
echo $! > "$HOME/.maggy/maggy.pid"
```

### 5. Wait for health check

```bash
for i in {1..15}; do
  if curl -sf "${URL}/api/health" >/dev/null 2>&1; then
    echo "✓ Maggy ready at ${URL}"
    open "${URL}" 2>/dev/null || true
    exit 0
  fi
  sleep 1
done
echo "Maggy didn't come up in 15s. Check ~/.maggy/maggy.log"
```

### 5. Report status

Show:
```
Maggy is running:
  Dashboard: http://127.0.0.1:8080
  Logs: ~/.maggy/maggy.log
  PID: <pid>
```

---

## Related

- `/maggy-init` — first-time setup wizard
- `/icpg-bootstrap` — Maggy's Execute button uses iCPG context from this


================================================
FILE: commands/mnemos-checkpoint.md
================================================
# /mnemos-checkpoint — Write Mnemos Checkpoint

Write a checkpoint capturing current session state for later resume.

## Steps

1. Run `python3 -m mnemos checkpoint --force` to write checkpoint
2. Report what was captured (goal, constraints, results, fatigue level)
3. Show the checkpoint file location


================================================
FILE: commands/mnemos-status.md
================================================
# /mnemos-status — Show Mnemos Memory Status

Show current Mnemos fatigue level, active node counts, and checkpoint status.

## Steps

1. Run `python3 -m mnemos status` in the project directory
2. Run `python3 -m mnemos fatigue` for detailed breakdown
3. Report the fatigue state and any recommended actions
4. If fatigue >= 0.60, suggest writing a checkpoint with `python3 -m mnemos checkpoint --force`


================================================
FILE: commands/polyphony-init.md
================================================
# /polyphony-init — Setup Wizard

Initialize the Polyphony multi-agent orchestration environment.

---

## Steps

### 1. Check Prerequisites

```bash
command -v docker &>/dev/null || command -v orbctl &>/dev/null
```

If neither Docker nor OrbStack is available, inform the user:

> Docker or OrbStack is required for Polyphony container isolation. Install one first.

### 2. Create Config Directory

```bash
mkdir -p ~/.polyphony
```

### 3. Copy Config Templates

Copy default configuration files from the templates directory:

```bash
TEMPLATES="$(dirname "$(realpath "$0")")/../templates"
cp -n "$TEMPLATES/polyphony-config.yaml" ~/.polyphony/config.yaml
cp -n "$TEMPLATES/polyphony-identities.yaml" ~/.polyphony/identities.yaml
cp -n "$TEMPLATES/polyphony-agents.yaml" ~/.polyphony/agents.yaml
cp -n "$TEMPLATES/polyphony-routing.yaml" ~/.polyphony/routing.yaml
```

### 4. Build Worker Image

```bash
docker build -t polyphony-worker:latest -f templates/Dockerfile.polyphony .
```

### 5. Detect Available Agents

```bash
command -v claude &>/dev/null && echo "claude: available"
command -v codex &>/dev/null && echo "codex: available"
command -v kimi &>/dev/null && echo "kimi: available"
```

### 6. Confirm

Print summary of what was initialized and which agents are available.


================================================
FILE: commands/polyphony-spawn.md
================================================
# /polyphony-spawn — Spawn Task

Create a new task in the Polyphony orchestrator and route it to an agent.

---

## Usage

```
/polyphony-spawn <title> [--type <task_type>] [--risk <risk>] [--source <source>]
```

## Steps

### 1. Parse Arguments

- `title`: Required task description
- `--type`: Task type (feature, bugfix, docs, refactor, etc.). Default: feature
- `--risk`: Risk level (low, medium, high). Default: low
- `--source`: Work source (local, github). Default: local

### 2. Create Task

```bash
PYTHONPATH=scripts python3 -m polyphony spawn "$TITLE" --type "$TYPE"
```

### 3. Route Task

The orchestrator will automatically:
1. Score task complexity (5-dimension scoring)
2. Match against routing rules
3. Select agent and fallback chain
4. Provision container with workspace
5. Start agent execution

### 4. Report

Print task ID and routing decision.


================================================
FILE: commands/polyphony-status.md
================================================
# /polyphony-status — Show State

Display the current state of all Polyphony tasks and running containers.

---

## Steps

### 1. Show Task States

```bash
PYTHONPATH=scripts python3 -m polyphony status
```

### 2. Show Running Containers

```bash
docker ps --filter "name=polyphony-" --format "table {{.Names}}\t{{.Status}}\t{{.RunningFor}}"
```

### 3. Show Workspace Usage

```bash
du -sh ~/polyphony/workspaces/* 2>/dev/null || echo "No workspaces"
```


================================================
FILE: commands/spawn-team.md
================================================
# /spawn-team - Spawn Agent Team

Spawn the default agent team for this project. Creates a coordinated team of agents that implement features in parallel following the strict TDD pipeline.

**Pipeline:** Specs > Tests > Ensure tests fail > Implement > Test again > Code Review > Security > Create branch > Create PR

---

## Phase 1: Prerequisites Check

### 1.1 Detect Container Mode

Check if Polyphony container isolation is available. **Container mode is the default when both Docker and polyphony CLI are present.**

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
DETECTED_AGENTS=$("$BOOTSTRAP_DIR/scripts/detect-agents.sh" 2>/dev/null || echo "claude")

CONTAINER_MODE="false"
if echo "$DETECTED_AGENTS" | grep -qE "docker|orbstack"; then
    if command -v polyphony &>/dev/null; then
        CONTAINER_MODE="true"
        echo "✓ Container mode: ON (Docker + polyphony detected)"
        echo "  Each feature agent will run in its own isolated container"
    else
        echo "⚠ Docker found but polyphony CLI missing"
        echo "  Run: cd \$(cat ~/.claude/.bootstrap-dir) && ./install.sh"
        echo "  Falling back to native agents (shared workspace)"
    fi
else
    echo "ℹ Docker not found — using native agents (shared workspace)"
    echo "  Install Docker for container isolation: brew install --cask docker"
fi
```

### 1.2 Check Agent Definitions

Verify `.claude/agents/` exists and has the required agent definitions:

```bash
ls .claude/agents/
```

Required files (with proper frontmatter: name, description, model, tools, disallowedTools, maxTurns):
- `team-lead.md`
- `quality.md`
- `security.md`
- `code-review.md`
- `merger.md`
- `feature.md`

If missing, copy from the agent-teams skill:
```bash
cp -r ~/.claude/skills/agent-teams/agents/ .claude/agents/
```

### 1.3 Check Feature Specs

```bash
ls _project_specs/features/
```

If no feature specs exist, ask the user:

> **No feature specs found.** The agent team needs features to implement.
>
> What are the key features of this project? I'll create a spec file for each one.

For each feature the user lists, create `_project_specs/features/{feature-name}.md` with a skeleton spec.

### 1.4 Check GitHub CLI

```bash
gh auth status
```

Needed by the merger agent for PR creation. Warn if not authenticated but don't block.

### 1.5 Ensure Worker Image (container mode only)

```bash
if [ "$CONTAINER_MODE" = "true" ]; then
    if ! docker image inspect polyphony-worker:latest &>/dev/null 2>&1; then
        echo "Building polyphony-worker image..."
        docker build -t polyphony-worker:latest \
            -f "$BOOTSTRAP_DIR/templates/Dockerfile.polyphony" "$BOOTSTRAP_DIR"
        echo "✓ Built polyphony-worker:latest"
    else
        echo "✓ polyphony-worker:latest image ready"
    fi
fi
```

---

## Phase 2: Spawn Default Agents

Spawn the 5 permanent agents **natively** (these are coordination agents — they read/verify, not write code). Each agent reads `.claude/agents/{type}.md` for its full definition including frontmatter (tools, model, maxTurns, etc.).

> **Note:** Permanent agents always run natively regardless of container mode. Only feature agents get containers.

### 2.1 Team Lead
```
Agent tool:
  name: "team-lead"
  subagent_type: "team-lead"
  prompt: "You are the team lead. Read .claude/agents/team-lead.md for your full instructions. Start by reading _project_specs/features/*.md to identify features, then create task chains and spawn feature agents."
```

### 2.2 Quality Agent
```
Agent tool:
  name: "quality-agent"
  subagent_type: "quality-agent"
  prompt: "You are the quality agent. Read .claude/agents/quality.md for your instructions. Watch TaskList for tasks assigned to you. Process them in task ID order."
```

### 2.3 Security Agent
```
Agent tool:
  name: "security-agent"
  subagent_type: "security-agent"
  prompt: "You are the security agent. Read .claude/agents/security.md for your instructions. Watch TaskList for security-scan tasks assigned to you."
```

### 2.4 Code Review Agent
```
Agent tool:
  name: "review-agent"
  subagent_type: "review-agent"
  prompt: "You are the code review agent. Read .claude/agents/code-review.md for your instructions. Watch TaskList for code-review tasks assigned to you."
```

### 2.5 Merger Agent
```
Agent tool:
  name: "merger-agent"
  subagent_type: "merger-agent"
  prompt: "You are the merger agent. Read .claude/agents/merger.md for your instructions. Watch TaskList for branch-pr tasks assigned to you."
```

---

## Phase 3: Spawn Feature Agents

### Container Mode (default when Docker + polyphony available)

For each feature spec in `_project_specs/features/`:

```bash
# Polyphony creates a container with its own git clone + branch,
# then starts the agent CLI inside
polyphony spawn "{feature-name}: implement feature per _project_specs/features/{feature-name}.md" \
    --type feature --risk low
```

This does everything in one command:
1. Creates a task in Polyphony's store
2. Routes it to an agent via the routing policy
3. Provisions a Docker container with a full git clone
4. Creates a feature branch (`feature/{feature-name}`)
5. Starts the agent CLI inside the container

Check running containers:
```bash
polyphony status
```

### Fallback Mode (no Docker)

If container mode is not available, spawn feature agents natively (shared workspace):

```
Agent tool:
  name: "feature-{feature-name}"
  subagent_type: "feature-agent"
  prompt: "You are the feature agent for {feature-name}. Read .claude/agents/feature.md for your instructions. Your feature spec is at _project_specs/features/{feature-name}.md. Start by checking TaskList for your first task."
```

> **Advisory:** Running without container isolation (Docker not found). Agents share the workspace — coordinate carefully to avoid file conflicts.

---

## Phase 4: Team Status Summary

Show the user:

### Container Mode:
```
AGENT TEAM DEPLOYED (Container Isolation ON)
─────────────────────────────────────────────

Team: {project-name}
Features: {N}
Isolation: Polyphony containers (each feature has its own branch)

NATIVE AGENTS (coordination)
─────────────────────────────
  Team Lead        Orchestrating
  Quality Agent    Watching for verification tasks
  Security Agent   Watching for security scan tasks
  Code Review      Watching for review tasks
  Merger Agent     Watching for branch/PR tasks

CONTAINER AGENTS (isolated)
────────────────────────────
  feature-{name1}  Container running — branch: feature/{name1}
  feature-{name2}  Container running — branch: feature/{name2}

PIPELINE (per feature)
──────────────────────
Spec > Review > Tests > RED Verify > Implement >
GREEN Verify > Validate > Code Review > Security > Branch+PR

Monitor: polyphony status
Cleanup: polyphony cleanup (after all PRs created)
```

### Fallback Mode:
```
AGENT TEAM DEPLOYED (Shared Workspace)
───────────────────────────────────────

⚠ Docker not available — agents share the workspace

Team: {project-name}
Features: {N}
Total tasks: {N * 10}

AGENTS
──────
  Team Lead        Orchestrating
  Quality Agent    Watching for verification tasks
  Security Agent   Watching for security scan tasks
  Code Review      Watching for review tasks
  Merger Agent     Watching for branch/PR tasks
  feature-{name1}  Starting spec for {name1}
  feature-{name2}  Starting spec for {name2}

PIPELINE
────────
Spec > Review > Tests > RED Verify > Implement >
GREEN Verify > Validate > Code Review > Security > Branch+PR

The team runs autonomously until all PRs are created.
```

---

## Monitoring

After the team is spawned, the user can:
- **Check progress:** Ask team lead for status, or run `polyphony status` (container mode)
- **Message agents:** Use SendMessage to contact any agent
- **View container logs:** `docker logs polyphony-{feature-name}` (container mode)
- **Handle blockers:** Message the blocked agent or team lead

The team runs autonomously until all PRs are created, then the team lead shuts everything down.

### Cleanup (container mode)

After all PRs are created:
```bash
polyphony cleanup
```
This removes completed containers and workspaces. Branches and PRs are preserved on the remote.


================================================
FILE: commands/sync-agents.md
================================================
# Sync Agents

Sync project configuration between Claude Code, Kimi CLI, and Codex CLI.

Run this after `/initialize-project` or anytime you want to ensure all installed AI CLI tools have matching skills, project instructions, and hooks.

---

## Phase 1: Detect Installed Tools

```bash
BOOTSTRAP_DIR=$(cat ~/.claude/.bootstrap-dir 2>/dev/null)
if [ -z "$BOOTSTRAP_DIR" ]; then
    echo "Error: Maggy not installed. Run install.sh first."
    exit 1
fi
DETECTED=$("$BOOTSTRAP_DIR/scripts/detect-agents.sh" 2>/dev/null || echo "claude")
echo "Detected AI CLI tools: $DETECTED"
```

---

## Phase 2: Show Current State

Check what exists for each tool and present a status table:

```bash
echo "=== Current State ==="

# Claude
echo "Claude Code:"
[ -d ".claude/skills" ] && echo "  Skills:       .claude/skills/ ($(ls -d .claude/skills/*/ 2>/dev/null | wc -l | tr -d ' ') skills)" || echo "  Skills:       NOT SET UP"
[ -f "CLAUDE.md" ] && echo "  Instructions: CLAUDE.md" || echo "  Instructions: NOT SET UP"
[ -f ".claude/settings.json" ] && echo "  Hooks:        .claude/settings.json" || echo "  Hooks:        NOT SET UP"

# Kimi
echo "Kimi CLI:"
[ -d ".kimi/skills" ] && echo "  Skills:       .kimi/skills/ ($(ls -d .kimi/skills/*/ 2>/dev/null | wc -l | tr -d ' ') skills)" || echo "  Skills:       NOT SET UP"
echo "  Instructions: (Kimi uses skills directly, no project file needed)"
[ -f ".kimi/config.toml" ] && echo "  Hooks:        .kimi/config.toml" || echo "  Hooks:        NOT SET UP"

# Codex
echo "Codex CLI:"
[ -d ".codex/skills" ] && echo "  Skills:       .codex/skills/ ($(ls -d .codex/skills/*/ 2>/dev/null | wc -l | tr -d ' ') skills)" || echo "  Skills:       NOT SET UP"
[ -f "AGENTS.md" ] && echo "  Instructions: AGENTS.md" || echo "  Instructions: NOT SET UP"
[ -f ".codex/config.toml" ] && echo "  Hooks:        .codex/config.toml" || echo "  Hooks:        NOT SET UP"
```

Present the status table to the user, then ask what they want to do.

---

## Phase 3: Offer Sync Actions

Ask the user which actions to perform:

> **Current state shown above.** What would you like to sync?
>
> 1. **Sync all** - Copy skills + generate instructions + hooks for all detected tools
> 2. **Skills only** - Copy .claude/skills/ to .kimi/skills/ and .codex/skills/
> 3. **Generate AGENTS.md** - Create Codex project instructions from CLAUDE.md
> 4. **Generate config.toml** - Create Kimi/Codex hooks from settings.json
> 5. **Show diff** - Show what differs between tool configs

---

## Phase 4: Execute Sync

### Option 1: Sync All (or individual options below)

### Skills Sync
```bash
# Source of truth is .claude/skills/
if [ -d ".claude/skills" ]; then
    # Sync to Kimi
    if echo "$DETECTED" | grep -q "kimi"; then
        rm -rf .kimi/skills
        mkdir -p .kimi/skills
        cp -r .claude/skills/*/ .kimi/skills/ 2>/dev/null || true
        echo "Synced skills to .kimi/skills/"
    fi

    # Sync to Codex
    if echo "$DETECTED" | grep -q "codex"; then
        rm -rf .codex/skills
        mkdir -p .codex/skills
        cp -r .claude/skills/*/ .codex/skills/ 2>/dev/null || true
        echo "Synced skills to .codex/skills/"
    fi

    # Sync to generic .agents/ (works for any tool)
    rm -rf .agents/skills
    mkdir -p .agents/skills
    cp -r .claude/skills/*/ .agents/skills/ 2>/dev/null || true
    echo "Synced skills to .agents/skills/ (generic)"
else
    echo "No .claude/skills/ found. Run /initialize-project first."
fi
```

### Generate AGENTS.md (from CLAUDE.md)
If CLAUDE.md exists, generate AGENTS.md by:
1. Reading CLAUDE.md content
2. Replacing `.claude/skills/` paths with `.agents/skills/` paths
3. Writing as AGENTS.md

**Important:** AGENTS.md should reference `.agents/skills/` (generic path) since Codex reads from `.codex/skills/` and `.agents/skills/`. The `.agents/skills/` path is the cross-compatible choice.

If CLAUDE.md does not exist, copy from the bootstrap template:
```bash
cp "$BOOTSTRAP_DIR/templates/AGENTS.md" ./AGENTS.md
echo "Created AGENTS.md from template (customize for your project)"
```

### Generate config.toml
```bash
# For Kimi
if echo "$DETECTED" | grep -q "kimi"; then
    mkdir -p .kimi
    cp "$BOOTSTRAP_DIR/templates/config.toml" .kimi/config.toml
    echo "Created .kimi/config.toml with hooks"
fi

# For Codex
if echo "$DETECTED" | grep -q "codex"; then
    mkdir -p .codex
    cp "$BOOTSTRAP_DIR/templates/config.toml" .codex/config.toml
    echo "Created .codex/config.toml with hooks"
fi
```

---

## Phase 5: Summary

```
Sync complete!

Skills synced:
  .claude/skills/ -> .kimi/skills/  (N skills)
  .claude/skills/ -> .codex/skills/ (N skills)
  .claude/skills/ -> .agents/skills/ (N skills, generic)

Project instructions:
  CLAUDE.md   (Claude Code)
  AGENTS.md   (Codex CLI)

Hooks config:
  .claude/settings.json (Claude Code)
  .kimi/config.toml     (Kimi CLI)
  .codex/config.toml    (Codex CLI)

You can now run any of these in this project:
  claude    # Claude Code
  kimi      # Kimi CLI
  codex     # Codex CLI
```

---

## Phase 6: Update .gitignore

Ensure cross-tool directories are properly handled in .gitignore:

```bash
# Add to .gitignore if not present
for entry in ".kimi/" ".codex/" ".agents/"; do
    if ! grep -qF "$entry" .gitignore 2>/dev/null; then
        echo "$entry" >> .gitignore
    fi
done
```

**Note:** Unlike `.claude/` which is typically committed, `.kimi/` and `.codex/` project dirs should generally be gitignored since they're derived from `.claude/skills/`. The `/sync-agents` command regenerates them.

AGENTS.md **should** be committed (it's the Codex equivalent of CLAUDE.md).


================================================
FILE: commands/sync-contracts.md
================================================
# /sync-contracts

> Lightweight incremental update of workspace contracts without full re-analysis.

## Purpose

Fast contract synchronization that:
- Checks only contract source files (not full workspace)
- Updates CONTRACTS.md with changes
- Validates consistency
- Takes ~15 seconds instead of ~2 minutes

## When to Use

| Scenario | Command |
|----------|---------|
| After modifying API endpoints | `/sync-contracts` |
| After changing shared types | `/sync-contracts` |
| Session start shows stale contracts | `/sync-contracts` |
| Post-commit hook (automatic) | `/sync-contracts --lightweight` |
| Before pushing changes | `/sync-contracts --validate` |
| See what changed without updating | `/sync-contracts --diff` |

## Behavior

### Step 1: Load Existing Topology

```
🔄 Loading workspace context...

Workspace: myapp (Monorepo)
Last full analysis: 2026-01-18T10:00:00Z
Last sync: 2026-01-20T14:32:00Z
```

Does NOT re-discover workspace structure - uses existing TOPOLOGY.md.

### Step 2: Check Contract Sources

```
📋 Checking contract sources...

Monitored files (from .contract-sources):
  ✓ apps/api/openapi.json (modified 2h ago)
  ✓ packages/shared-types/src/index.ts (modified 2h ago)
  ○ packages/db/schema/campaigns.ts (unchanged)
  ○ packages/db/schema/users.ts (unchanged)
  ○ apps/api/app/schemas/campaign.py (unchanged)

Changes detected: 2 files
```

### Step 3: Extract Changes

```
📝 Extracting contract changes...

apps/api/openapi.json:
  + POST /api/campaigns/bulk (new endpoint)
  ~ GET /api/campaigns (added 'status' query param)

packages/shared-types/src/index.ts:
  ~ Campaign interface (added 'tags: string[]' field)
  + CampaignBulkCreate interface (new)
```

### Step 4: Update Artifacts

```
✏️  Updating workspace artifacts...

Updated: _project_specs/workspace/CONTRACTS.md
  - Added POST /api/campaigns/bulk to endpoints
  - Updated Campaign type definition
  - Added CampaignBulkCreate type

Updated: _project_specs/workspace/CROSS_REPO_INDEX.md
  - Added bulk create capability

Timestamps updated:
  Last sync: 2026-01-20T16:45:00Z
```

### Step 5: Validate Consistency

```
✅ Validating contract consistency...

Checks:
  ✓ OpenAPI endpoint count matches routes (48/48)
  ✓ All Pydantic models have TypeScript equivalents
  ✓ No orphaned types in shared-types
  ⚠️  Frontend types may need regeneration

Validation: PASSED (1 warning)
```

## Final Output

```
════════════════════════════════════════════════════════════════
  CONTRACT SYNC COMPLETE
════════════════════════════════════════════════════════════════

Sources checked: 5
Changes detected: 2
Files updated: 2

Changes Summary:
  + POST /api/campaigns/bulk (new endpoint)
  ~ Campaign interface (added 'tags' field)
  + CampaignBulkCreate interface (new)

Freshness: 🟢 Fresh
Last sync: 2026-01-20T16:45:00Z

⚠️  Note: Frontend types may need regeneration
   Run: cd apps/web && npm run generate:types

════════════════════════════════════════════════════════════════
```

## Flags

| Flag | Description |
|------|-------------|
| `--lightweight` | Skip validation, minimal output (for hooks) |
| `--diff` | Show changes without updating files |
| `--validate` | Only validate, don't update |
| `--force` | Update even if no changes detected |
| `--verbose` | Show detailed extraction output |

## Diff Mode

Preview changes without applying:

```bash
/sync-contracts --diff
```

Output:

```
📋 Contract Changes (not applied)

apps/api/openapi.json:
  + POST /api/campaigns/bulk
    Request: CampaignBulkCreate[]
    Response: Campaign[]

  ~ GET /api/campaigns
    + query param: status (string, optional)

packages/shared-types/src/index.ts:
  ~ interface Campaign {
      id: string;
      name: string;
  +   tags: string[];        // NEW
      status: CampaignStatus;
    }

  + interface CampaignBulkCreate {
      campaigns: CampaignCreate[];
    }

To apply these changes: /sync-contracts
```

## Validate Mode

Check consistency without updating:

```bash
/sync-contracts --validate
```

Output:

```
🔍 Contract Validation

Endpoint Consistency:
  ✓ OpenAPI spec: 48 endpoints
  ✓ Route files: 48 handlers
  ✓ Match: YES

Type Consistency:
  ✓ Pydantic models: 23
  ✓ TypeScript types: 34
  ✓ Shared types exported: 34
  ⚠️  2 types only in backend (internal)

Cross-Module References:
  ✓ Frontend imports valid types: YES
  ✓ Backend codegen up to date: YES

Overall: ✅ VALID (2 warnings)
```

## Lightweight Mode

For hooks - minimal output, fast execution:

```bash
/sync-contracts --lightweight
```

Output:

```
✓ Contracts synced (2 changes)
```

Or if no changes:

```
✓ Contracts up to date
```

## Contract Sources File

The sync uses `.contract-sources` to know what to check:

```bash
# _project_specs/workspace/.contract-sources
# Auto-generated by /analyze-workspace
# Edit to add/remove monitored files

# OpenAPI specs
apps/api/openapi.json

# Type definitions
packages/shared-types/src/index.ts
packages/shared-types/src/api.ts
packages/shared-types/src/campaign.ts

# Pydantic schemas (Python)
apps/api/app/schemas/campaign.py
apps/api/app/schemas/user.py
apps/api/app/schemas/auth.py

# Database schema
packages/db/schema/campaigns.ts
packages/db/schema/users.ts
```

To add a new source:

```bash
echo "apps/api/app/schemas/new_model.py" >> _project_specs/workspace/.contract-sources
```

## Error Handling

### No Contract Sources

```
⚠️  No contract sources configured

Run /analyze-workspace first to set up contract monitoring.
```

### Source File Missing

```
⚠️  Contract source not found: apps/api/openapi.json

Options:
  1. Generate it: cd apps/api && python -m app.generate_openapi
  2. Remove from monitoring: Edit .contract-sources
  3. Skip this file: /sync-contracts --skip apps/api/openapi.json
```

### Validation Failed

```
❌ Contract validation failed

Issues found:
  1. OpenAPI has 48 endpoints, routes have 47
     Missing: DELETE /api/campaigns/:id (in spec, not in routes)

  2. Type mismatch: Campaign.status
     OpenAPI: "draft" | "active" | "paused"
     TypeScript: "draft" | "active" | "paused" | "archived"

Fix these issues, then run /sync-contracts again.
Or force update: /sync-contracts --force
```

## Integration with Hooks

### Post-Commit Hook

Automatically runs after commits that touch contract sources:

```bash
# hooks/post-commit
CONTRACT_SOURCES=$(cat _project_specs/workspace/.contract-sources 2>/dev/null)
COMMITTED=$(git diff-tree --no-commit-id --name-only -r HEAD)

for source in $CONTRACT_SOURCES; do
  if echo "$COMMITTED" | grep -q "$source"; then
    echo "📝 Contract source changed, syncing..."
    claude --silent "/sync-contracts --lightweight"
    break
  fi
done
```

### Pre-Push Hook

Validates before push:

```bash
# hooks/pre-push
echo "🔍 Validating contracts..."
claude --silent "/sync-contracts --validate"

if [ $? -ne 0 ]; then
  echo "❌ Contract validation failed"
  echo "Run /sync-contracts to fix"
  exit 1
fi
```

## Comparison: sync-contracts vs analyze-workspace

| Aspect | /sync-contracts | /analyze-workspace |
|--------|-----------------|-------------------|
| Time | ~15 seconds | ~2 minutes |
| Scope | Contract files only | Full workspace |
| Discovers new modules | No | Yes |
| Updates TOPOLOGY.md | No | Yes |
| Updates CONTRACTS.md | Yes | Yes |
| Rebuilds dependency graph | No | Yes |
| When to use | Frequent (daily) | Occasional (weekly) |


================================================
FILE: commands/update-code-index.md
================================================
# Update Code Index

Regenerates `CODE_INDEX.md` by scanning the codebase for all functions, classes, hooks, and components. Organizes by capability to prevent semantic duplication.

---

## What This Command Does

1. **Scans source files** - Finds all exported functions, classes, hooks, components
2. **Extracts docstrings** - Gets descriptions from JSDoc/docstrings
3. **Categorizes by capability** - Groups by what things DO, not where they live
4. **Generates CODE_INDEX.md** - Creates/updates the semantic index

---

## Phase 1: Detect Project Type

```bash
# Check language
ls package.json pyproject.toml 2>/dev/null

# Check source directories
ls -d src/ lib/ app/ 2>/dev/null
```

---

## Phase 2: Scan Codebase

### For TypeScript/JavaScript

Scan for exports:

```bash
# Find all exported functions
grep -rn "export function\|export const\|export class\|export default" src/ --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx"

# Find React hooks
grep -rn "export function use[A-Z]\|export const use[A-Z]" src/ --include="*.ts" --include="*.tsx"

# Find React components (PascalCase exports)
grep -rn "export function [A-Z]\|export const [A-Z].*=.*=>" src/ --include="*.tsx" --include="*.jsx"
```

### For Python

```bash
# Find all function definitions
grep -rn "^def \|^async def \|^class " src/ --include="*.py"

# Check __all__ exports
grep -rn "__all__" src/ --include="*.py"
```

---

## Phase 3: Extract Documentation

For each found export, extract:

1. **Name** - Function/class name
2. **Location** - File path and line number
3. **Description** - From JSDoc `@description` or first line of docstring
4. **Parameters** - Function signature
5. **Returns** - Return type if available

### TypeScript Example

```typescript
/**
 * Formats a date into a human-readable relative string.
 * @param date - The date to format
 * @returns Relative time string like "2 days ago"
 */
export function formatRelative(date: Date): string {
```

Extract:
- Name: `formatRelative`
- Description: "Formats a date into a human-readable relative string"
- Params: `(date: Date)`
- Returns: `string`

### Python Example

```python
def format_relative(date: datetime) -> str:
    """Formats a date into a human-readable relative string.

    Args:
        date: The date to format

    Returns:
        Relative time string like "2 days ago"
    """
```

Extract:
- Name: `format_relative`
- Description: "Formats a date into a human-readable relative string"
- Params: `(date: datetime)`
- Returns: `str`

---

## Phase 4: Categorize by Capability

Group functions by what they DO:

| Category | Keywords to Match |
|----------|-------------------|
| **Date/Time** | date, time, format, parse, duration, relative, timestamp |
| **Validation** | validate, is*, check, verify, sanitize |
| **String Operations** | string, text, format, parse, slug, truncate, capitalize |
| **API Clients** | fetch, get, post, put, delete, api, request |
| **Authentication** | auth, login, logout, session, token, user |
| **Error Handling** | error, exception, handle, catch, throw |
| **Database** | db, query, find, create, update, delete, repository |
| **Hooks (React)** | use* |
| **Components (React)** | PascalCase in .tsx/.jsx |
| **Utilities** | util, helper, common (catch-all) |

---

## Phase 5: Generate CODE_INDEX.md

Create or overwrite `CODE_INDEX.md`:

```markdown
# Code Index

*Auto-generated by /update-code-index*
*Last updated: [TIMESTAMP]*

> ⚠️ **Before writing new code, search this index first!**
> Find similar functionality? Use or extend it instead of creating new.

## Quick Stats

| Category | Count | Main Location |
|----------|-------|---------------|
| Date/Time | X | src/utils/dates.ts |
| Validation | X | src/utils/validate.ts |
| API Clients | X | src/api/*.ts |
| Hooks | X | src/hooks/*.ts |
| Components | X | src/components/*.tsx |

---

## Date/Time Operations

| Function | Location | Description | Signature |
|----------|----------|-------------|-----------|
| `formatDate()` | utils/dates.ts:15 | Formats Date to locale string | `(date: Date, opts?)` |
| `formatRelative()` | utils/dates.ts:32 | Formats as "2 days ago" | `(date: Date)` |
| ... | ... | ... | ... |

---

## Validation

| Function | Location | Description | Signature |
|----------|----------|-------------|-----------|
| `isEmail()` | utils/validate.ts:10 | Validates email format | `(email: string)` |
| ... | ... | ... | ... |

---

[Continue for each category...]
```

---

## Phase 6: Report Changes

After generating, report:

```
📊 Code Index Updated

Scanned:
• 45 TypeScript files
• 12 React components
• 8 custom hooks
• 156 exported functions

Categories:
• Date/Time: 5 functions
• Validation: 8 functions
• API Clients: 23 functions
• Hooks: 8 hooks
• Components: 12 components
• Utilities: 42 functions

New since last run:
• + fetchOrders() in api/orders.ts
• + useCart() in hooks/useCart.ts
• + OrderCard component in components/OrderCard.tsx

Possible duplicates detected:
• ⚠️ formatDate() and displayDate() - similar purpose?
• ⚠️ isValid() and validate() - review these

Updated: CODE_INDEX.md
```

---

## Handling Missing Documentation

If a function lacks documentation:

```markdown
| `myFunction()` | utils/helpers.ts:42 | ⚠️ *No description - add JSDoc* | `(a, b, c)` |
```

Report at end:

```
⚠️ 12 functions missing documentation:
• myFunction() in utils/helpers.ts:42
• anotherFunc() in services/user.ts:88
• ...

Run with --add-docs to prompt for descriptions.
```

---

## Options

```bash
# Basic update
/update-code-index

# Include private/non-exported functions
/update-code-index --include-private

# Prompt to add missing docs
/update-code-index --add-docs

# Only scan specific directory
/update-code-index src/utils

# Output as JSON (for vector DB ingestion)
/update-code-index --json > code_index.json

# Detect duplicates only (no index update)
/update-code-index --audit-only
```

---

## Audit Mode

When run with `--audit-only` or as `/audit-duplicates`:

```markdown
## Duplicate Audit Report - [DATE]

### 🔴 High Confidence Duplicates

1. **formatDate / displayDate / showDate**
   - `formatDate()` at utils/dates.ts:15
   - `displayDate()` at components/Header.tsx:42
   - `showDate()` at pages/Profile.tsx:28
   - Similarity: 89% (same logic, different names)
   - **Recommendation:** Consolidate into utils/dates.ts

2. **isEmail / validateEmail / checkEmail**
   - `isEmail()` at utils/validate.ts:10
   - `validateEmail()` at forms/signup.ts:55
   - `checkEmail()` at api/users.ts:30
   - Similarity: 95% (identical regex)
   - **Recommendation:** Use isEmail() everywhere

### 🟡 Possible Duplicates (Review)

1. **fetchUser / getUser / loadUser**
   - Different implementations but same purpose
   - May be intentional (different contexts)
   - **Action:** Document if intentional, merge if not

### 🟢 Similar But Distinct

1. **Button / IconButton / LinkButton**
   - Related components with different purposes
   - **Status:** OK - documented variants
```

---

## Integration with Vector DB

If vector DB is set up, also update embeddings:

```bash
/update-code-index --vector
```

This:
1. Generates CODE_INDEX.md (as usual)
2. Creates embeddings for each function description
3. Stores in `.chroma/` or `.lancedb/`
4. Enables semantic search: "find functions that validate user input"

---

## Suggested Workflow

### Daily
- Index auto-updates on significant code changes
- Claude checks index before writing new code

### Weekly
- Run `/update-code-index --audit-only`
- Review duplicate report
- Merge or document similar functions

### After Major Features
- Full index regeneration
- Vector DB re-embedding (if used)

---

## File Output

Creates/updates:
- `CODE_INDEX.md` - Human-readable index
- `.code-index.json` (optional) - Machine-readable for tooling

---

## Claude Instructions

When user runs `/update-code-index`:

1. Detect project type (TS/JS/Python)
2. Scan source directories
3. Extract all exports with documentation
4. Categorize by capability
5. Generate CODE_INDEX.md
6. Report stats and potential duplicates
7. Commit the updated index

After running, remind user:
> "Index updated! I'll check this before writing any new code to avoid duplicating existing functionality."


================================================
FILE: docs/architecture-v5.md
================================================
# Maggy v5 Architecture — Multi-Project, Multi-Model Command Center

## 1. Executive Summary

v5 transforms Maggy from a single-project, single-model toolkit into a **multi-project, multi-model orchestration platform**. Pi replaces per-CLI adapters as the universal agent harness. Maggy becomes the central web dashboard. Token budgets are managed dynamically across providers. New features are validated against the competitive intelligence graph before engineering begins.

---

## 2. What Changed: Before and After

### v3.x (Single-Model, Single-Project)

```
User → Claude Code → single project → single model
         │
         ├── CLAUDE.md (project config)
         ├── skills/ (TDD, security, etc.)
         ├── iCPG (blast radius, drift)
         ├── Mnemos (memory, fatigue)
         └── hooks (PreToolUse, Stop, etc.)
```

- One project at a time
- One model (Claude) for everything
- When Claude tokens ran out, work stopped
- Agents shared a filesystem (conflict-prone)
- No market validation for new features

### v4.0 (Container Isolation, Cross-Agent)

```
User → Claude Code → /spawn-team → Polyphony containers
         │                            ├── Container 1 (claude CLI)
         ├── cross-agent-delegation   ├── Container 2 (codex CLI)
         │   (complexity scoring)     └── Container 3 (kimi CLI)
         ├── iCPG + Mnemos
         └── 3 separate CLI adapters
```

- Container isolation per agent (own git clone + branch)
- Cross-agent delegation via complexity scoring
- Still one project at a time
- Still separate CLI tools (claude, codex, kimi)
- Token exhaustion on one provider = manual switch

### v5.0 (Multi-Project, Multi-Model, Market-Validated)

```
User → Maggy Web Dashboard → multiple projects → multiple models
         │                       │
         │   ┌───────────────────┼───────────────────┐
         │   │ Project A         │ Project B          │
         │   │ zensurveys        │ chief-of-staff     │
         │   │                   │                    │
         │   │  ┌─Pi agent─┐    │  ┌─Pi agent─┐     │
         │   │  │ claude   │    │  │ gpt-4o   │     │
         │   │  │ → gpt-4o │    │  │ → gemini │     │
         │   │  │ → gemini │    │  │ → qwen   │     │
         │   │  └──────────┘    │  └──────────┘     │
         │   └───────────────────┼───────────────────┘
         │                       │
         ├── codebase-memory-mcp (structural graph — 36 projects)
         ├── CIKG (market graph) │ iCPG (intent graph, layers on code graph)
         ├── Mnemos (cross-model fatigue)
         └── Token Budget Manager (auto-rotate)
```

---

## 3. Core Components

### 3.1 Pi — Universal Agent Harness

**Replaces:** `ClaudeAdapter`, `CodexAdapter`, `KimiAdapter`

Pi is an open-source (MIT) terminal coding agent that supports 20+ model providers through a single interface. It runs in three modes:

| Mode | Use Case |
|------|----------|
| **Interactive** | Human at terminal |
| **RPC** | Headless JSONL over stdin/stdout — for container agents |
| **SDK** | Embedded in Maggy's orchestrator |

**Provider support:**

| Tier | Providers | Auth |
|------|-----------|------|
| Subscription | Claude Pro/Max, ChatGPT Plus/Pro, GitHub Copilot | OAuth |
| API Key | Anthropic, OpenAI, Google, DeepSeek, Mistral, Groq, xAI | Env var |
| Cloud | Azure OpenAI, Amazon Bedrock, Cloudflare Workers | Platform |
| Local | Ollama (Qwen, Llama, etc.) | None |

**Key capability:** Runtime model switching via RPC without restarting:
```json
{"command": "set_model", "provider": "openai", "model": "gpt-4o"}
```

### 3.2 Maggy v2 — Multi-Project Command Center

**Extends:** Maggy v1 (single-project inbox + execute)

Maggy v2 is a web dashboard (FastAPI + React) that orchestrates work across multiple GitHub repos from a single browser tab.

```
┌─────────────────────────────────────────────────────────────┐
│  MAGGY v2 — Web Dashboard                                    │
│                                                              │
│  ┌──────────────────────────────────────────────────────┐   │
│  │  PROJECT REGISTRY (~/.maggy/projects.yaml)           │   │
│  │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐   │   │
│  │  │ Project │ │ Project │ │ Project │ │ Project │   │   │
│  │  │ A       │ │ B       │ │ C       │ │ D       │   │   │
│  │  └────┬────┘ └────┬────┘ └────┬────┘ └────┬────┘   │   │
│  └───────┼───────────┼───────────┼───────────┼─────────┘   │
│          │           │           │           │              │
│  ┌───────▼───────────▼───────────▼───────────▼─────────┐   │
│  │  ORCHESTRATOR                                        │   │
│  │  ┌────────────┐ ┌─────────────┐ ┌────────────────┐  │   │
│  │  │ Planning   │ │ Decision    │ │ Execution      │  │   │
│  │  │ Layer      │ │ Layer       │ │ Layer          │  │   │
│  │  │            │ │             │ │                │  │   │
│  │  │ Claude     │ │ iCPG blast  │ │ Pi agents in   │  │   │
│  │  │ plans      │ │ radius →    │ │ Polyphony      │  │   │
│  │  │ Codex      │ │ model tier  │ │ containers     │  │   │
│  │  │ counter-   │ │             │ │                │  │   │
│  │  │ checks     │ │ CIKG market │ │ Token budget   │  │   │
│  │  │            │ │ validation  │ │ auto-rotation  │  │   │
│  │  └────────────┘ └─────────────┘ └────────────────┘  │   │
│  └──────────────────────────────────────────────────────┘   │
│                                                              │
│  ┌──────────────────────────────────────────────────────┐   │
│  │  CODE INTELLIGENCE (codebase-memory-mcp)             │   │
│  │  36 projects indexed │ 700K+ nodes │ 1.4M+ edges    │   │
│  │  Structural graph powering iCPG, blast radius,       │   │
│  │  cross-project deps, agent context                   │   │
│  └──────────────────────────────────────────────────────┘   │
│                                                              │
│  ┌──────────────────────────────────────────────────────┐   │
│  │  DEPLOY LAYER                                        │   │
│  │  4 isolated browser containers (Playwright)          │   │
│  │  Each with its own Vercel auth session               │   │
│  └──────────────────────────────────────────────────────┘   │
└─────────────────────────────────────────────────────────────┘
```

**New capabilities over v1:**
- Multi-project view (registry of repos + branches)
- Cross-project ticket triage
- Token budget dashboard (usage per model per project)
- Deploy status per project (isolated Vercel sessions)

### 3.3 Token Budget Manager

**New component.** Manages model selection based on blast radius and token availability.

#### Model Tiering by Composite Risk Score

Model selection uses iCPG's **5-dimension complexity scoring**, not just file count. Each dimension is scored 0-2, total 0-10:

| Dimension | What It Measures | Examples |
|-----------|-----------------|----------|
| **Cyclomatic** | Control flow complexity of touched code | Nested conditionals, state machines |
| **Fan-out** | How many other modules depend on the change | Shared utilities, API contracts |
| **Security** | Whether auth, crypto, permissions, or PII are involved | Auth policy, token validation |
| **Concurrency** | Race conditions, locks, async coordination | Queue workers, websocket handlers |
| **Domain** | Business logic criticality | Pricing, billing, compliance |

Plus 6-dimension drift detection (spec, decision, ownership, test, usage, dependency) and constraint checking from active ReasonNodes.

This means a one-file auth policy change scores high (security=2, domain=2) while a five-file CSS refactor scores low (cyclomatic=0, fan_out=1). The routing is risk-aware, not file-count-aware.

```
iCPG composite risk score → model tier

┌─────────────┬──────────────────────┬─────────────────────────┐
│ Score        │ Model Tier           │ Rationale               │
├─────────────┼──────────────────────┼─────────────────────────┤
│ 0-3 (low)   │ Qwen local / DeepSeek│ Bounded scope, no       │
│             │ via Ollama            │ security/concurrency/   │
│             │                      │ domain risk             │
├─────────────┼──────────────────────┼─────────────────────────┤
│ 4-6 (medium)│ Kimi / Gemini Flash  │ Real risk but bounded;  │
│             │                      │ + high-tier post-review │
│             │                      │ on output (catch subtle │
│             │                      │ bugs cheap models miss) │
├─────────────┼──────────────────────┼─────────────────────────┤
│ 7-10 (high) │ Claude / GPT-4o      │ Full context needed —   │
│             │                      │ cross-cutting, security,│
│             │                      │ concurrency, or domain  │
│             │                      │ critical changes        │
└─────────────┴──────────────────────┴─────────────────────────┘
```

**Dimension overrides:** Regardless of total score, if `security >= 2` or `concurrency >= 2`, the task is always routed to the high tier. These dimensions are too dangerous for cheap models.

#### Low-Tier Output Verification

When a task is handled by a cheap/local model (score 0-6), its output goes through additional verification before landing:

| Gate | What It Catches |
|------|----------------|
| iCPG drift check | Scope drift, constraint violations, invariant breakage |
| iCPG constraint assertions | Postconditions from ReasonNodes evaluated against output |
| High-tier spot review | Claude/GPT-4o reviews the diff (cheaper than writing it) |
| Static analysis | Linter + type checker catch mechanical errors |

This prevents the failure class Codex identified: code that passes tests but has subtle logical regressions.

#### Fallback Chain

When the primary model hits quota, the budget manager rotates. Model switching is an **explicit handoff with verification**, not a silent swap:

1. Current model hits quota or rate limit
2. Mnemos writes checkpoint with full execution state
3. Pi switches to next model via RPC `set_model`
4. Checkpoint is re-injected as structured context
5. New model verifies it understands the task before continuing
6. If verification fails, escalate to next tier (don't retry on weaker model)

```
Claude (quota hit) → checkpoint + handoff
  → GPT-4o (quota hit) → checkpoint + handoff
    → Gemini 2.5 Pro (quota hit) → checkpoint + handoff
      → Kimi (quota hit) → checkpoint + handoff
        → DeepSeek (quota hit) → checkpoint + handoff
          → Qwen local (unlimited, always available)
```

#### Budget Tracking

```yaml
# ~/.maggy/token-budget.yaml
providers:
  anthropic:
    daily_limit_usd: 50.00
    used_today_usd: 32.15
    model_preference: claude-sonnet-4-20250514
  openai:
    daily_limit_usd: 30.00
    used_today_usd: 5.20
    model_preference: gpt-4o
  local:
    daily_limit_usd: 0  # free
    model_preference: qwen2.5-coder:32b
    ollama_endpoint: http://localhost:11434
```

### 3.4 Planning Layer — Dual-Model Review

Every plan goes through a two-model review before execution:

```
Feature Request / Ticket
        │
        ▼
┌─────────────────┐
│ Claude Plans     │  Primary model creates architecture plan
│ (full context)   │  with file list, approach, risks
└────────┬────────┘
         │
         ▼
┌─────────────────┐
│ Codex Counter-   │  Second model independently reviews:
│ Checks           │  - Missing edge cases?
│ (independent)    │  - Over-engineering?
│                  │  - Security gaps?
│                  │  - Simpler approach?
└────────┬────────┘
         │
         ▼
┌─────────────────┐
│ Diff View        │  Maggy shows both perspectives
│ in Maggy UI      │  User approves/resolves conflicts
└────────┬────────┘
         │
         ▼
    Execution begins
```

### 3.5 Decision Layer — iCPG + CIKG

Two graphs feed the orchestrator's decisions:

#### iCPG (Code Graph) — "Should we change this?"

Per-project, SQLite-backed. Layers intent and constraints on top of the structural graph from **codebase-memory-mcp** (Section 3.8). Answers:

| Query | What It Returns |
|-------|----------------|
| `icpg query blast <id>` | Files affected, downstream dependencies |
| `icpg query risk <symbol>` | Drift history, ownership changes, fragility |
| `icpg query constraints <file>` | Invariants that must be preserved |
| `icpg drift check` | 6-dimension drift across spec, decision, ownership, test, usage, dependency |

The blast radius score (0-10) determines:
- Which model tier handles the task
- How deep the architecture review goes
- Whether dual-model planning is required

#### CIKG (Competitive Intelligence Knowledge Graph) — "Should we build this?"

Supabase-backed. Node types: `competitor`, `feature`, `market_segment`, `technology`, `trend`, `product`.

Edge types: `has_feature`, `competes_with`, `targets_market`, `uses_technology`, `protaige_has`, `protaige_lacks`, `threatens`.

Used for **new feature validation** before engineering begins:

```
New Feature Idea
       │
       ▼
┌────────────────────┐
│ CIKG: find_gaps()  │  Who has this? Who lacks it?
│ compare_entities() │  Competitive advantage or table stakes?
│ get_landscape()    │  Market trend alignment?
└────────┬───────────┘
         │
         ▼
┌────────────────────┐
│ Market Score        │
│                    │
│ gap_count: 3       │  3 competitors lack this → opportunity
│ threat_level: high │  2 competitors actively building → urgent
│ trend_align: yes   │  Aligns with "AI voice" trend → proceed
└────────┬───────────┘
         │
         ▼
  Requirements validated → proceed to iCPG blast radius
```

### 3.6 Execution Layer — Polyphony + Pi

Updated container architecture. Each feature agent runs Pi in RPC mode inside a Polyphony container:

```
┌──────────────────────────────────────────────────────┐
│ Polyphony Container (per feature)                     │
│                                                       │
│  ┌─────────────────────────────────────────────────┐ │
│  │  Pi Agent (RPC mode over stdin/stdout)           │ │
│  │                                                  │ │
│  │  Current model: claude-sonnet-4-20250514         │ │
│  │  Fallback chain: gpt-4o → gemini → kimi → qwen  │ │
│  │                                                  │ │
│  │  Tools: read, write, edit, bash                  │ │
│  │  Extensions: skills, hooks, MCP servers          │ │
│  └──────────────────────────────┬──────────────────┘ │
│                                 │                     │
│  ┌──────────┐  ┌────────────┐  │  ┌──────────────┐  │
│  │ Git clone│  │ .mnemos/   │  │  │ .icpg/       │  │
│  │ own      │  │ fatigue    │  │  │ blast radius │  │
│  │ branch   │  │ checkpoint │  │  │ constraints  │  │
│  └──────────┘  └────────────┘  │  └──────────────┘  │
│                                │                     │
│  ┌─────────────────────────────▼──────────────────┐  │
│  │  RPC Bridge (Maggy ↔ Pi)                       │  │
│  │  • Send prompts                                │  │
│  │  • Receive streaming events                    │  │
│  │  • Switch models on quota hit                  │  │
│  │  • Steer/follow-up mid-task                    │  │
│  └────────────────────────────────────────────────┘  │
└──────────────────────────────────────────────────────┘
```

**Coordination model (hybrid — option 2):**

Claude Code's native Task tool spawns agents that keep full team coordination (SendMessage, TaskList, UI visibility). Each agent controls a Pi instance inside a Polyphony container via RPC. The agent has Claude's brain for coordination but Pi's body for execution.

**Why this is not a split-brain problem:**
This concern is addressed by Mnemos, which serves as a **shared memory layer that both sides can read**:

- **Mnemos checkpoint** persists goal, constraints, progress, and working state to disk (`.mnemos/`)
- **iCPG state** persists intent, constraints, and drift to disk (`.icpg/`)
- **Signal log** (`.mnemos/signals.jsonl`) persists behavioral signals across model switches
- All three are inside the container volume — they survive model swaps

The coordination agent (Claude Task tool) handles team communication. The execution agent (Pi) handles code work. The shared disk state (Mnemos + iCPG) is the single source of truth. There's no split brain because there's no duplicated state — each layer owns a distinct concern with shared persistence.

```
Claude Code Task tool agent (coordination — messaging, tasks, UI)
    │
    ├── SendMessage to team lead ✓
    ├── TaskUpdate progress ✓
    ├── Visible in tmux/iTerm ✓
    │
    └── Executes code work via:
        docker exec polyphony-feature-X \
            pi --mode rpc --provider anthropic
        │
        ├── stdin: {"command": "prompt", "content": "implement auth"}
        ├── stdout: streaming events (text, tool calls, completion)
        ├── stdin: {"command": "set_model", ...} when quota hits
        │
        └── Shared persistence (inside container volume):
            ├── .mnemos/checkpoint-latest.json  ← goal, constraints, progress
            ├── .mnemos/signals.jsonl           ← behavioral signals
            ├── .mnemos/fatigue.json            ← model-normalized fatigue
            └── .icpg/reason.db                 ← intent, constraints, drift
```

### 3.7 Deploy Layer — Isolated Vercel Sessions

Four Docker containers, each running a headless browser with its own Vercel auth session:

```
┌────────────────────────────┐
│ vercel-session-A           │
│ Playwright + Chrome        │
│ Auth: vercel.com (session) │
│ Project: zensurveys-backend│
│ No local `vercel login`    │
├────────────────────────────┤
│ vercel-session-B           │
│ Own Chrome profile         │
│ Project: zensurveys-fe     │
├────────────────────────────┤
│ vercel-session-C           │
│ Own Chrome profile         │
│ Project: chief-of-staff    │
├────────────────────────────┤
│ vercel-session-D           │
│ Own Chrome profile         │
│ Project: rodcast           │
└────────────────────────────┘
```

Each container persists its Chrome profile to a Docker volume. No local directory conflicts. Deploys are triggered from Maggy's web UI or via git push (Vercel auto-deploy).

### 3.8 Code Intelligence Layer — codebase-memory-mcp

**Foundation layer.** Every component above — iCPG, blast radius scoring, Maggy's orchestrator, Pi agents — depends on a structural understanding of the code. codebase-memory-mcp is the AST-based knowledge graph that provides it.

```
┌──────────────────────────────────────────────────────────────┐
│  codebase-memory-mcp                                          │
│  ─────────────────────────────────────────────────────────── │
│                                                               │
│  36 projects indexed │ 14 MCP tools │ 64 languages             │
│  700K+ nodes │ 1.4M+ edges │ auto-updated via file watcher    │
│                                                               │
│  Node Types:                                                  │
│    Function, Method, Class, Variable, Route,                  │
│    File, Module, Folder, Section, Project                     │
│                                                               │
│  Edge Types:                                                  │
│    CALLS, IMPORTS, USAGE, DEFINES, DEFINES_METHOD,            │
│    TESTS, WRITES, HANDLES, HTTP_CALLS, CONFIGURES,            │
│    SEMANTICALLY_RELATED, SIMILAR_TO, CONTAINS_*               │
│                                                               │
│  Search Modes:                                                │
│    BM25 full-text │ regex pattern │ semantic vector             │
│                                                               │
│  Trace Modes:                                                 │
│    calls (callers/callees) │ data_flow (value propagation)     │
│    cross_service (HTTP/async through Routes)                   │
└──────────────────────────────────────────────────────────────┘
```

#### How Each Component Uses It

| Component | Graph Queries | Purpose |
|-----------|--------------|---------|
| **iCPG blast radius** | `trace_path(fn, mode=calls, risk_labels=true)` | Fan-out scoring — how many callers/callees, at what hop distance |
| **iCPG drift** | `detect_changes` + `query_graph` | Detect which functions changed, trace impact to dependents |
| **Token budget routing** | `trace_path` depth + edge count | Feed fan-out dimension of 5-dimension complexity score |
| **Pi agents (pre-task)** | `search_graph` + `get_architecture` | Understand codebase before making changes — no blind edits |
| **Pi agents (post-task)** | `detect_changes` | Verify scope of changes matches intent |
| **Maggy orchestrator** | `search_graph` across projects | Map ticket descriptions → relevant code across all repos |
| **Dual-model planning** | `get_architecture` + `trace_path` | Give both Claude and Codex the same structural context |
| **Reward registry** | `detect_changes` | Measure actual blast radius of completed work for reward signals |
| **Cross-project deps** | `query_graph` with HTTP_CALLS/IMPORTS | If zensurveys-backend changes an API route, trace consumers in frontend |

#### Multi-Project Graph Topology

Each project has its own indexed graph. Maggy queries across them:

```
┌─────────────────────────────────────────────────────────────┐
│  codebase-memory-mcp — Cross-Project Graph                    │
│                                                               │
│  ┌──────────────────┐  ┌──────────────────┐                  │
│  │ zensurveys       │  │ zensurveys-fe    │                  │
│  │ 7,644 nodes      │  │ 11,168 nodes     │                  │
│  │ 25,866 edges     │  │ 16,876 edges     │                  │
│  │                  │  │                  │                  │
│  │ Route: /api/v1/* │──│ HTTP_CALLS: fetch│                  │
│  └──────────────────┘  └──────────────────┘                  │
│                                                               │
│  ┌──────────────────┐  ┌──────────────────┐                  │
│  │ chief-of-staff   │  │ maggy            │                  │
│  │ 2,687 nodes      │  │ 4,692 nodes      │                  │
│  │ 6,958 edges      │  │ 7,459 edges      │                  │
│  └──────────────────┘  └──────────────────┘                  │
│                                                               │
│  ┌──────────────────┐  ┌──────────────────┐                  │
│  │ protaige-backend │  │ protaige-frontend│                  │
│  │ 26,832 nodes     │  │ 8,630 nodes      │                  │
│  │ 92,174 edges     │  │ 14,539 edges     │                  │
│  └──────────────────┘  └──────────────────┘                  │
│                                                               │
│  + 30 more indexed projects                                   │
└─────────────────────────────────────────────────────────────┘
```

#### Integration with iCPG

iCPG and codebase-memory-mcp are **complementary, not redundant**:

| Layer | What It Knows | Storage |
|-------|--------------|---------|
| **codebase-memory-mcp** | Structure — what calls what, who imports whom, where routes go | `.code-graph/` (AST-derived) |
| **iCPG** | Intent — WHY code exists, what constraints it must obey, what decisions shaped it | `.icpg/reason.db` (human/AI-derived) |

```
codebase-memory-mcp (structural)     iCPG (intentional)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━     ━━━━━━━━━━━━━━━━━━━━
Function: handleAuth()               ReasonNode: "handles OAuth"
  CALLS → validateToken()              Constraint: "must check exp"
  CALLS → refreshSession()             Decision: "chose PKCE over implicit"
  USAGE → from 14 callers               Drift: "spec says mTLS, code uses JWT"
  Route: POST /api/auth/login
                                      5-dimension score: 8/10
trace_path → 3-hop blast radius         (security=2, domain=2, fan_out=2)
```

The structural graph provides the "what and where." iCPG provides the "why and what-must-hold." Together they give the token budget manager a complete risk picture.

#### Freshness Guarantees

```
┌────────────────┬──────────────────────────────────────────┐
│ Layer           │ How It Stays Fresh                        │
├────────────────┼──────────────────────────────────────────┤
│ File watcher   │ Re-indexes changed files on save (~10ms) │
│ Auto-index     │ Ensures currency on Claude Code startup  │
│ Post-commit    │ git hook triggers incremental re-index   │
│ detect_changes │ Diff-aware — shows what changed since    │
│                │ last index, not full re-scan              │
└────────────────┴──────────────────────────────────────────┘
```

No manual re-indexing needed for normal development. Only `index_repository` after major restructures (branch switches with large diffs, directory renames).

---

## 4. Mnemos in a Multi-Model World

### The Problem

Mnemos v1 tracks fatigue for a single Claude Code session. In v5, a task might start on Claude, switch to GPT-4o mid-session, then fall back to Qwen. Each model has different:

- Context window sizes (200K Claude vs 128K GPT-4o vs 32K Qwen local)
- Compaction behavior
- Tool call patterns

### The Solution: Model-Aware Fatigue

Extend the 4-dimension fatigue model with model-relative normalization:

```
┌──────────────────────────────────────────────────────┐
│ Mnemos v2 — Cross-Model Fatigue                       │
│                                                       │
│ Current model: gpt-4o (128K context)                  │
│ Previous model: claude (200K context)                 │
│ Model switches this session: 1                        │
│                                                       │
│ Fatigue dimensions (model-normalized):                │
│                                                       │
│  Token utilization: 0.65                              │
│    → 83K / 128K (gpt-4o window, not claude's 200K)   │
│                                                       │
│  Scope scatter: 0.30                                  │
│    → Carried over from pre-switch signal log          │
│                                                       │
│  Re-read ratio: 0.45 ← ELEVATED                      │
│    → Model switch caused context loss, agent is       │
│      re-reading files it already read under Claude    │
│                                                       │
│  Error density: 0.20                                  │
│    → New model still learning the codebase            │
│                                                       │
│  Composite: 0.43 (COMPRESS state)                     │
│  → Auto-consolidation triggered                       │
└──────────────────────────────────────────────────────┘
```

### Key Extensions

| Extension | Description |
|-----------|-------------|
| **Model-relative token %** | Normalize against current model's context window, not a fixed 200K |
| **Switch penalty** | When model switches, add +0.15 to re-read ratio (context was lost) |
| **Cross-model checkpoint** | Checkpoint includes model history so the new model knows what was done |
| **Shared signal log** | `.mnemos/signals.jsonl` persists across model switches (it's on disk) |
| **Budget-aware thresholds** | If running on free tier (Qwen local), relax fatigue thresholds (no cost pressure) |

### Checkpoint Format — Extended for Multi-Model

```json
{
  "goal": "Implement voice surveys",
  "model_history": [
    {"provider": "anthropic", "model": "claude-sonnet", "tokens_used": 145000, "duration_s": 420},
    {"provider": "openai", "model": "gpt-4o", "tokens_used": 83000, "duration_s": 180}
  ],
  "switch_reason": "anthropic quota exceeded",
  "active_constraints": ["..."],
  "active_results": ["..."],
  "current_subgoal": "...",
  "fatigue_at_checkpoint": 0.43,
  "icpg_state": {"..."},
  "cikg_context": {
    "market_validation": "3 competitors have voice — table stakes",
    "gap_id": "uuid-of-cikg-gap-node"
  }
}
```

---

## 5. Data Flow — End to End

```
1. USER opens Maggy dashboard
   → Sees all projects, token budgets, active agents

2. USER selects ticket from inbox (or creates feature idea)
   │
   ▼
3. CIKG VALIDATION (new features only)
   → find_gaps(): who has this? competitive pressure?
   → get_landscape(): market trend alignment?
   → Output: market score + competitive context
   │
   ▼
4. STRUCTURAL ANALYSIS (codebase-memory-mcp)
   → search_graph: locate relevant symbols across projects
   → trace_path: map call chains and fan-out (with risk labels)
   → get_architecture: understand module boundaries
   → Output: structural dependency map
   │
   ▼
5. iCPG ANALYSIS (layers on structural graph)
   → query blast: which files are affected?
   → query risk: are they fragile?
   → query constraints: what invariants exist?
   → Output: blast radius score (0-10)
   │
   ▼
5.5 LEXON TOOL RESOLUTION (when tool count > 20 — requires Lexon, Section 16)
    → Structured intent from iCPG fed to Lexon two-tier routing
    → Tier A: fast LLM router (<300ms) selects from compact tool manifest
    → Tier B: multilingual semantic retriever (vector search over tool registry)
    → Union candidates, filter through Terminology Map (user > org > system)
    → If confidence < 0.82 or top-2 gap < 0.15: trigger clarify_intent
    → Output: selected tool with confidence score + LexonRecord logged
   │
   ▼
6. MODEL SELECTION (from blast score + budget)
   → Score 0-3: Qwen local / DeepSeek (free tier)
   → Score 4-6: Kimi / Gemini Flash (cheap tier)
   → Score 7-10: Claude / GPT-4o (full tier)
   → Check token budget: rotate if primary is exhausted
   │
   ▼
7. PLANNING (score 7+ only)
   → Claude creates architecture plan
   → Codex independently counter-checks
   → Both get structural context from codebase-memory-mcp
   → Maggy shows diff in UI
   → User approves
   │
   ▼
8. EXECUTION
   → Polyphony provisions Docker container
   → Pi starts in RPC mode with selected model
   → Pi queries codebase-memory-mcp for context before editing
   → Claude Code Task agent controls Pi via RPC
   → Mnemos tracks fatigue (model-normalized)
   → If quota hits: Pi switches model, Mnemos logs switch
   │
   ▼
9. VERIFICATION
   → Tests pass in container
   → detect_changes: verify actual scope matches intended scope
   → iCPG drift check: no unintended scope drift
   → Code review (can use second model for independence)
   │
   ▼
10. DEPLOY
    → Changes on feature branch → PR created
    → Vercel preview deploy via isolated browser container
    → User reviews in Maggy dashboard
    │
    ▼
11. PROCESS LEARNING (async, post-merge)
    → Collect PR review comments + CodeRabbit findings
    → Collect CI pass/fail results for Maggy-written code
    → Track review rounds, time-to-merge, post-merge incidents
    → Update process_patterns.db, ci_patterns.db, pr_patterns.db
    → Feed reward registry: +0.5 first-round approval, -0.4 critical finding
    → Adjust policy: add pre-checks, evolve skills, tune PR sizing
    │
    ▼
11.5 ENGRAM PERSISTENCE (async, post-task — requires Engram, Section 15)
    → Mnemos scans completed task graph for high-confidence memories
    → Promote to EngramRecord: conventions, patterns, preferences with confidence > 0.8
    → Namespace-isolate per project (project A's patterns never contaminate project B)
    → Apply temporal validity windows (patterns expire unless revalidated)
    → Track Origin: source channel, evidence count, last verified timestamp
    → Feed Amnesia Score diagnostic: measure retention across 7 dimensions
    │
    ▼
12. MESH SYNC (async, background — requires Maggy Mesh, Section 14)
    → Broadcast L1 score updates to connected peers (lightweight, one message per task)
    → Merge incoming peer data: scores weighted by sample count, patterns quarantined
    → Surface team-wide insights: "3 peers confirm: Claude best for auth"
    → Propose cross-team policy changes when backtesting passes on team-wide data
    → New peers receive full sync on connect — instant collective intelligence
```

---

## 6. Project Registry

```yaml
# ~/.maggy/projects.yaml
projects:
  - name: zensurveys-backend
    repo: zenloopGmbH/surveys-backend
    path: ~/Documents/protaige/projects/zensurveys
    default_branch: staging-v2
    vercel_session: vercel-session-A
    icpg: true
    cikg: false  # not a product repo

  - name: zensurveys-frontend
    repo: zenloopGmbH/main-frontend-clean
    path: ~/Documents/protaige/projects/main-frontend-clean
    default_branch: main
    vercel_session: vercel-session-B
    icpg: true
    cikg: false

  - name: chief-of-staff
    repo: alinaqi/chief-of-staff
    path: ~/Documents/protaige/projects/chief-of-staff
    default_branch: main
    vercel_session: vercel-session-C
    icpg: true
    cikg: true  # has competitive intelligence graph

  - name: rodcast
    repo: alinaqi/rodcast
    path: ~/Documents/AI-Playground/rodcast
    default_branch: main
    vercel_session: vercel-session-D
    icpg: true
    cikg: false
```

---

## 7. Component Map

```
maggy/
├── dashboard/                        # Maggy v2 — web dashboard
│   ├── src/
│   │   ├── api/                  # FastAPI routes
│   │   ├── providers/            # GitHub, Asana, Linear
│   │   ├── services/
│   │   │   ├── inbox.py          # AI-prioritized ticket inbox
│   │   │   ├── executor.py       # Execute pipeline (now via Pi)
│   │   │   ├── competitor.py     # Daily briefing
│   │   │   ├── planner.py        # NEW: dual-model planning
│   │   │   ├── budget.py         # NEW: token budget manager
│   │   │   ├── deploy.py         # NEW: isolated Vercel deploys
│   │   │   ├── process.py        # NEW: process intelligence (env discovery, signal collection)
│   │   │   └── forge.py          # NEW: MCP Forge integration (capability expansion)
│   │   └── orchestrator.py       # NEW: multi-project orchestrator
│   └── frontend/                 # React dashboard
│       ├── ProjectRegistry.tsx   # NEW: multi-project view
│       ├── TokenBudget.tsx       # NEW: usage per model
│       ├── PlanReview.tsx        # NEW: dual-model plan diff
│       └── DeployStatus.tsx      # NEW: per-project deploy
│
├── scripts/
│   ├── polyphony/                # Container orchestration
│   │   ├── adapters/
│   │   │   ├── pi.py             # NEW: PiAdapter (replaces claude/codex/kimi)
│   │   │   ├── claude.py         # DEPRECATED: kept for fallback
│   │   │   ├── codex.py          # DEPRECATED: kept for fallback
│   │   │   └── kimi.py           # DEPRECATED: kept for fallback
│   │   ├── budget.py             # NEW: token budget + model routing
│   │   ├── runtime.py            # Docker container lifecycle
│   │   ├── orchestrator.py       # Supervisor loop
│   │   └── ...
│   ├── icpg/                     # Code graph (per-project)
│   ├── mnemos/                   # Memory + fatigue
│   │   ├── fatigue.py            # EXTENDED: model-normalized
│   │   ├── checkpoint.py         # EXTENDED: cross-model state
│   │   └── ...
│   ├── cikg/                     # NEW: extracted from chief-of-staff
│   │   ├── __init__.py
│   │   ├── graph.py              # KnowledgeGraphService
│   │   ├── models.py             # Node/Edge types
│   │   └── __main__.py           # CLI: cikg query/traverse/gaps
│   ├── engram/                   # NEW: cross-session memory persistence
│   │   ├── __init__.py
│   │   ├── record.py             # EngramRecord schema
│   │   ├── store.py              # SQLite persistence + namespace isolation
│   │   ├── retrieval.py          # Multi-path retrieval (semantic+temporal+causal)
│   │   └── diagnostics.py        # Amnesia Score computation (7 dimensions)
│   ├── lexon/                    # NEW: semantic tool binding
│   │   ├── __init__.py
│   │   ├── record.py             # LexonRecord schema
│   │   ├── router.py             # Two-tier routing (fast LLM + vector)
│   │   ├── terminology.py        # Terminology Map (system/org/user)
│   │   ├── disambiguate.py       # Confidence-gated clarification (self/user modes)
│   │   └── personalization.py    # Implicit learning from user behavior
│   └── event_spine/              # NEW: canonical event flow
│       ├── __init__.py
│       ├── events.py             # Typed event dataclasses (8 event types)
│       ├── header.py             # Common EventHeader
│       ├── emitter.py            # Event emission API (used by all components)
│       └── store.py              # SQLite append-only event log + archive
│
├── skills/
│   ├── polyphony/SKILL.md        # Updated for Pi
│   ├── mnemos/SKILL.md           # Updated for multi-model
│   ├── icpg/SKILL.md             # Unchanged
│   ├── code-graph/SKILL.md       # codebase-memory-mcp integration
│   ├── cikg/SKILL.md             # NEW: competitive intelligence skill
│   ├── engram/SKILL.md           # NEW: cross-session memory instructions
│   └── lexon/SKILL.md            # NEW: tool binding instructions
│
├── templates/
│   ├── Dockerfile.polyphony      # Updated: includes Pi
│   ├── Dockerfile.vercel-session # NEW: Playwright + Chrome
│   └── ...
│
└── docs/
    ├── architecture-v5.md        # THIS DOCUMENT
    ├── polyphony-spec.md         # Container orchestration spec
    └── mnemos-implementation.md  # Memory lifecycle spec
```

---

## 8. Migration Path

| Phase | What | Depends On |
|-------|------|-----------|
| **Phase 1** | PiAdapter + token budget manager | Pi installed |
| **Phase 2** | Model-tiered routing (blast score → model) | Phase 1 + iCPG |
| **Phase 3** | Mnemos multi-model fatigue | Phase 1 |
| **Phase 4** | Extract CIKG from chief-of-staff | Supabase access |
| **Phase 5** | Maggy v2 multi-project UI | Phases 1-4 |
| **Phase 6** | Dual-model planning (Claude + Codex) | Phase 1 |
| **Phase 7** | Isolated Vercel deploy containers | Docker |
| **Phase 8** | Process intelligence (env discovery + signal collection) | Phase 5 + GitHub API |
| **Phase 9** | MCP Forge integration (capability expansion) | Phase 5 + mcp_forge |
| **Phase 10** | Integration testing + docs | All phases |
| **Phase 11** | Maggy Mesh — P2P team intelligence | Phase 5 + Phase 8 |
| **Phase 12** | Engram — Cross-session memory persistence | Phase 3 + Phase 5 |
| **Phase 13** | Lexon — Semantic tool binding | Phase 9 + Phase 12 |
| **Phase 14** | Event Spine — Canonical event flow | Phase 12 + Phase 13 |

---

## 9. Security Considerations

| Concern | Mitigation |
|---------|-----------|
| API keys across models | Pi's auth.json + env vars, never in code |
| Container escape | Polyphony containers run unprivileged, no host network |
| Vercel session theft | Each browser container has isolated Chrome profile in Docker volume |
| CIKG data sensitivity | Competitive intelligence stays in Supabase with RLS |
| Local model data leaks | Qwen/Ollama runs fully local, no data leaves machine |
| Token budget manipulation | Budget file is local YAML, not exposed via API |

---

## 10. Core Principle — mWp (Minimum Wowable Product)

Every component in this architecture must be designed to wow, not just work.

> **mWp > MVP**: We don't ship "minimum viable." We ship "minimum wowable." The bar is: would this make someone stop scrolling and say "wait, how did it do that?"

### What mWp means for each component

| Component | MVP (don't ship this) | mWp (ship this) |
|-----------|----------------------|-----------------|
| Token budget | Show remaining tokens | Auto-rotate models mid-task, user never notices the switch |
| Blast radius | Show a score number | Score drives model selection, review depth, and plan complexity automatically |
| CIKG validation | "3 competitors have this" | "Here's the competitive gap map, market trend alignment, and suggested positioning — before you write a line of code" |
| Mnemos fatigue | "Context 80% full" | Silently checkpoints, switches models, re-injects context — user's train of thought is never interrupted |
| Vercel deploy | "Run vercel deploy" | 4 projects deploy in parallel with zero auth conflicts, preview links appear in Maggy dashboard |
| Code graph | "We indexed your repo" | "Maggy already knows every function, every caller, every route across all 36 projects — before you ask. It traced the blast radius in 10ms, not 10 minutes of grepping." |
| Process intelligence | "Here are your CI results" | "Maggy learned that your reviewer always flags missing error handling — it added it before the PR was created. CI pass rate went from 72% to 97%. Review rounds dropped from 2.8 to 1.1. It didn't just fix the code, it fixed the process." |
| Capability expansion | "We don't support that integration" | "Maggy built a Linear MCP server from the API docs, registered the tools, and pulled your sprint data — all within the same conversation." |
| Dual-model planning | Two plans side by side | Conflicts highlighted, trade-offs explained, one-click approval with merged approach |

### The 5-second test for Maggy v2

A developer opens Maggy in the morning. Within 5 seconds they see:
- Inbox ranked by urgency across all 4 projects
- Token budget status (green/yellow/red per provider)
- Active agents and their progress
- Yesterday's competitive intelligence briefing
- Process health: CI pass rate, review rounds trend, CodeRabbit findings trend
- One-click "Execute" on any ticket with the right model auto-selected

That's the wow.

---

## 11. Maggy as a Self-Improving System

Maggy is not a tool that waits for instructions. It's an autonomous agent with a single objective function: **maximize user development efficiency**. It observes, measures, optimizes, and evaluates itself — continuously, without asking for permission.

### The Objective Function

```
efficiency = (value_delivered / time_spent) × quality_multiplier

where:
  value_delivered  = tickets landed + features shipped + bugs fixed
  time_spent       = wall clock from ticket selection to merge
  quality_multiplier = 1.0 - (bug_escape_rate + revert_rate + incident_rate)
```

Maggy optimizes this function across all projects, all models, all workflows. Everything it does — model routing, inbox ordering, workflow tuning, fatigue management — feeds back into this single metric.

### Reward Registry

Every action Maggy takes generates a reward signal. Positive rewards reinforce. Negative rewards suppress. The registry is the memory of what works.

```
┌─────────────────────────────────────────────────────────────┐
│  REWARD REGISTRY                                             │
│                                                              │
│  ┌─────────────────────────────────────────────────────┐    │
│  │  POSITIVE REWARDS (reinforce)                       │    │
│  │                                                      │    │
│  │  +1.0  Ticket lands without human intervention       │    │
│  │  +0.8  Tests pass on first attempt                   │    │
│  │  +0.5  Time-to-merge below rolling average           │    │
│  │  +0.3  No bug escapes at 2-week mark                 │    │
│  │  +0.2  User doesn't re-do the work manually          │    │
│  │  +0.1  Model switch was seamless (no re-reads spike) │    │
│  └─────────────────────────────────────────────────────┘    │
│                                                              │
│  ┌─────────────────────────────────────────────────────┐    │
│  │  NEGATIVE REWARDS (suppress)                        │    │
│  │                                                      │    │
│  │  -1.0  User reverts the change                       │    │
│  │  -0.8  Bug escape discovered post-merge              │    │
│  │  -0.5  User manually re-does the task                │    │
│  │  -0.3  Tests fail after model switch                 │    │
│  │  -0.2  User overrides Maggy's model/routing choice   │    │
│  │  -0.1  Time-to-merge above rolling average           │    │
│  │  -0.1  iCPG drift detected after task completion     │    │
│  │  -0.1  detect_changes shows scope exceeded intent   │    │
│  └─────────────────────────────────────────────────────┘    │
│                                                              │
│  Rewards decay: 0.95^(days_since_event)                     │
│  Window: 60-day rolling                                      │
│  Cold start: hardcoded defaults until 30+ events per signal  │
└─────────────────────────────────────────────────────────────┘
```

### Multi-Level Closed-Loop Control

The previous version of this section described a flat observe → measure → adjust → evaluate loop. That's not a closed-loop system — that's batch processing with hope. A bad model routing decision on Monday would serve degraded output to every task until the weekly evaluation catches it.

**Control theory insight: inner loops provide stability, outer loops provide optimization.** Level 0 keeps individual tasks from going off the rails. Level 2 keeps tools and models healthy day-to-day. Level 3 makes Maggy smarter week-over-week. Each level's output becomes an input signal for the level above it.

```
┌──────────────────────────────────────────────────────────────┐
│  MULTI-LEVEL CLOSED-LOOP CONTROL                              │
│                                                               │
│  Level 4 ─── Monthly (evolutionary) ──────────────────────── │
│  │  Sensor:  cross-project trends, platform trajectory        │
│  │  Actuator: new reward signals, new process patterns,       │
│  │           blast→tier recalibration, exploration rate        │
│  │  Bandwidth: weeks                                          │
│  │                                                            │
│  │  Level 3 ─── Weekly (strategic) ────────────────────────  │
│  │  │  Sensor:  worst/best task patterns, score deltas,       │
│  │  │          process pattern analysis, capability gaps      │
│  │  │  Actuator: skill evolution, workflow step changes,      │
│  │  │           model routing thresholds, MCP Forge,          │
│  │  │           PR strategy, prompt patches                   │
│  │  │  Bandwidth: days                                        │
│  │  │                                                         │
│  │  │  Level 2 ─── Daily (operational) ──────────────────   │
│  │  │  │  Sensor:  CI pass rates, review round trends,        │
│  │  │  │          CodeRabbit findings, model failure rates,   │
│  │  │  │          token budget burn rate                       │
│  │  │  │  Actuator: pre-commit check toggles, lint rules,     │
│  │  │  │           model enable/disable, routing weights      │
│  │  │  │  Bandwidth: hours                                    │
│  │  │  │                                                      │
│  │  │  │  Level 1 ─── Task (post-completion) ─────────────  │
│  │  │  │  │  Sensor:  task reward score, CI results,          │
│  │  │  │  │          iCPG drift, detect_changes scope,        │
│  │  │  │  │          review comments on PR                    │
│  │  │  │  │  Actuator: update model scores, log process       │
│  │  │  │  │           signals, update fatigue profile          │
│  │  │  │  │  Bandwidth: minutes                               │
│  │  │  │  │                                                   │
│  │  │  │  │  Level 0 ─── Real-time (within task) ──────────│
│  │  │  │  │  │  Sensor:  tool success/fail, test pass/fail,  ││
│  │  │  │  │  │          lint errors, Pi RPC events,          ││
│  │  │  │  │  │          model response quality, fatigue      ││
│  │  │  │  │  │  Actuator: switch model, retry with context,  ││
│  │  │  │  │  │           adjust verification depth,          ││
│  │  │  │  │  │           abort + re-plan, checkpoint          ││
│  │  │  │  │  │  Bandwidth: seconds                           ││
│  │  │  │  │  └───────────────────────────────────────────────┘│
│  │  │  │  └──────────────────────────────────────────────────┘│
│  │  │  └─────────────────────────────────────────────────────┘│
│  │  └────────────────────────────────────────────────────────┘│
│  └───────────────────────────────────────────────────────────┘│
└──────────────────────────────────────────────────────────────┘

Signal cascade (inner → outer):
  L0 events aggregate into → L1 task reward
  L1 task rewards aggregate into → L2 daily trends
  L2 daily trends feed → L3 weekly pattern analysis
  L3 weekly patterns feed → L4 monthly trajectory
```

#### Level 0 — Real-Time (Within Task Execution)

This is the **stability loop** — the most critical and currently missing level. It keeps individual tasks from going off the rails *as they happen*, not after the damage is done.

```
┌──────────────────────────────────────────────────────────────┐
│  LEVEL 0 — REAL-TIME CONTROL (seconds)                        │
│                                                               │
│  Pi agent executing task inside Polyphony container           │
│       │                                                       │
│       ├── Tool call fails (file not found, API error)         │
│       │   → Retry with adjusted path/params (not new model)  │
│       │   → If 3 consecutive fails: escalate model tier       │
│       │                                                       │
│       ├── Test fails during TDD green phase                   │
│       │   → Analyze error: syntax? logic? missing import?     │
│       │   → If model is struggling (3+ failed attempts):      │
│       │     checkpoint + switch to higher-tier model           │
│       │                                                       │
│       ├── Lint error on written code                          │
│       │   → Auto-fix (ruff --fix / eslint --fix)              │
│       │   → If pattern repeats: flag for L2 (add pre-check)  │
│       │                                                       │
│       ├── Fatigue signal crosses threshold                    │
│       │   → Mnemos auto-checkpoint                            │
│       │   → If mid-task: consolidate context, continue        │
│       │   → If near completion: push through, checkpoint after│
│       │                                                       │
│       ├── Model response quality degrades                     │
│       │   → Detected by: repeated re-reads, circular edits,  │
│       │     tool calls that undo previous tool calls          │
│       │   → Action: checkpoint + model switch immediately     │
│       │                                                       │
│       └── Scope drift detected (iCPG)                         │
│           → Agent touching files outside blast radius          │
│           → Action: warn → constrain → abort if persistent    │
│                                                               │
│  All L0 events are logged to signals.jsonl with timestamps.   │
│  They aggregate into the L1 task reward score.                │
└──────────────────────────────────────────────────────────────┘
```

**Why L0 matters more than any weekly patch:** If Maggy can detect mid-task that the current model is struggling and switch to a stronger one *within seconds*, that's worth more than a hundred policy adjustments. A user whose task fails experiences -1.0 reward. A user whose task recovers mid-flight via model switch experiences +0.1. The delta between "fail and retry tomorrow" and "hiccup and recover" is the entire product experience.

**L0 signal types:**

| Signal | Detection Method | Response Time | Action |
|--------|-----------------|---------------|--------|
| Tool failure | Pi RPC error event | < 1s | Retry with adjusted params |
| Test failure | Exit code from test runner | < 5s | Analyze, fix, or escalate model |
| Lint error | ruff/eslint output on written code | < 2s | Auto-fix or flag for L2 |
| Fatigue spike | Mnemos threshold breach | < 1s | Checkpoint, consolidate, or switch |
| Quality degradation | Circular edits, re-reads, undo patterns | ~30s | Checkpoint + model switch |
| Scope drift | iCPG blast radius check on file access | < 1s | Warn → constrain → abort |
| Model quota hit | Pi RPC quota/rate error | < 1s | Fallback chain activation |

#### Level 1 — Task (Post-Completion, Minutes)

After each task completes, compute the task reward score and update the per-model, per-task-type scores. This is the **learning loop** — every completed task teaches Maggy something.

```
Task completes (PR created or code landed)
    │
    ├── Compute task reward from L0 signals:
    │   reward = Σ(signal_weight × signal_value)
    │   adjusted for: model used, blast tier, task type
    │
    ├── Update model_scores.db:
    │   (claude, auth, high) → new running average
    │
    ├── Update fatigue_profile:
    │   session duration, checkpoint timing, recovery reads
    │
    ├── Log L0 events summary → L2 aggregation:
    │   "3 tool retries, 1 model switch, 0 scope drifts"
    │
    └── Emit task_completed event → Maggy dashboard
```

#### Level 2 — Daily (Operational, Hours)

Runs on a daily schedule (or triggered when a threshold is breached). Catches degradation before it compounds. This is the **operational health loop**.

```
Daily aggregation job:
    │
    ├── CI pass rate today vs 7-day average
    │   → If dropped >10%: disable the model causing failures
    │
    ├── Review rounds today vs 7-day average
    │   → If increased: check which code patterns are new
    │
    ├── CodeRabbit critical findings today
    │   → If >0 on Maggy-written code: add pattern to pre-check
    │
    ├── Model failure rate by tier
    │   → If a model's L0 failure signals spike: demote it
    │
    ├── Token budget burn rate
    │   → If burning faster than expected: adjust routing to cheaper tier
    │
    └── Emergency trigger: if any metric drops >15% in one day
        → Halt exploration, revert last policy change, alert
```

**Why L2 exists separately from L3:** A weekly batch can't catch a model that started failing on Tuesday. By Friday, that's 3 days of degraded tasks, 3 days of negative rewards accumulating. L2's daily check catches it within hours and disables the failing model before the damage compounds.

#### Level 3 — Weekly (Strategic, Days)

The deliberate optimization loop. Analyzes patterns across the week, proposes and applies policy changes with rollback windows. This is where skill evolution, workflow step changes, and MCP Forge generation happen.

```
Weekly strategic analysis:
    │
    ├── Worst 10 tasks this week: what went wrong?
    │   → Common patterns → skill file patches
    │   → Recurring reviewer comments → add to review prevention
    │
    ├── Best 10 tasks this week: what went right?
    │   → Reinforce: model, workflow, blast tier settings
    │
    ├── Score deltas from last week's modifications
    │   → delta < -0.2: auto-revert
    │   → delta > +0.2: reinforce + expand to similar task types
    │
    ├── Process pattern analysis
    │   → New (code_pattern, review_feedback) entries
    │   → PR sizing effectiveness
    │   → CI failure patterns
    │
    ├── Capability gap analysis
    │   → Top unresolvable requests → trigger MCP Forge
    │
    └── Exploration candidates
        → Select 10% of low-blast task types for next week's exploration
```

#### Level 4 — Monthly (Evolutionary, Weeks)

The meta-optimization loop. Evaluates whether the control system itself is improving. Changes the reward signals, recalibrates tier boundaries, adjusts exploration rates. This is the loop that improves the improvement process.

```
Monthly evolution review:
    │
    ├── Cross-project patterns
    │   → Are skills learned in project A useful in project B?
    │   → Promote project-specific skills to global skills
    │
    ├── Reward signal effectiveness
    │   → Is any signal consistently noisy? Reduce its weight
    │   → Is a new signal needed? (e.g., deploy success rate)
    │   → Add, remove, or reweight signals
    │
    ├── Tier boundary recalibration
    │   → If blast 4-6 tasks are consistently handled well by
    │     the cheap tier, lower the threshold: 0-4 = cheap
    │   → If blast 3 tasks keep failing on cheap models,
    │     raise it: 0-2 = cheap, 3+ = medium
    │
    ├── Exploration rate adjustment
    │   → If exploration success rate > 40%: increase to 15%
    │   → If exploration success rate < 10%: decrease to 5%
    │
    ├── Control loop tuning
    │   → Is L2 catching issues that should be caught at L0?
    │   → Are L0 model switches too aggressive or too cautious?
    │   → Adjust L0 thresholds based on L1 outcome data
    │
    └── Platform trajectory
        → Efficiency trend: improving, flat, or declining?
        → If flat for 2+ months: the system has saturated
          current strategy — try structural change
```

#### Signal Cascade — How Levels Feed Each Other

```
┌──────────────────────────────────────────────────────────────┐
│  SIGNAL CASCADE                                               │
│                                                               │
│  L0: tool_fail, test_fail, lint_error, model_switch           │
│   │  (raw events, seconds)                                    │
│   ▼                                                           │
│  L1: task_reward = f(L0_signals)                              │
│   │  model_score[claude, auth, 8] += task_reward              │
│   │  (per-task aggregation, minutes)                          │
│   ▼                                                           │
│  L2: daily_ci_rate = mean(L1.ci_pass for today)               │
│   │  daily_model_health[claude] = mean(L1.rewards for claude) │
│   │  (daily aggregation, hours)                               │
│   │  ACTION: disable model if health < threshold              │
│   ▼                                                           │
│  L3: weekly_pattern = cluster(L2.failures + L1.review_comments│
│   │  score_delta = this_week.reward - last_week.reward        │
│   │  (weekly analysis, days)                                  │
│   │  ACTION: evolve skills, adjust routing, trigger Forge     │
│   ▼                                                           │
│  L4: monthly_trajectory = trend(L3.score_deltas)              │
│      reward_signal_weights = recalibrate(L3.signal_noise)     │
│      (monthly meta-analysis, weeks)                           │
│      ACTION: change reward function itself, adjust L0-L3      │
│                                                               │
│  Key: outer loops NEVER override inner loop stability.        │
│  L3 can change routing policy, but L0 still catches in-task   │
│  failures regardless of what L3 decided.                      │
└──────────────────────────────────────────────────────────────┘
```

### What Gets Optimized (and How)

#### 1. Model Routing

Maggy tracks reward per `(model × task_type × blast_tier)` triple:

```
reward_table:
  (claude, auth, high):      +0.92  ← claude is great at auth
  (claude, docs, low):       +0.40  ← claude works but wasteful
  (qwen, docs, low):         +0.85  ← qwen is faster + free
  (qwen, auth, medium):      -0.30  ← qwen failed auth tasks
  (gpt-4o, frontend, medium):+0.78  ← gpt-4o is strong on frontend
  (kimi, tests, low):        +0.70  ← kimi writes good tests cheaply
```

Maggy routes new tasks to the model with the highest reward for that `(task_type, blast_tier)`. No human in the loop — the reward table decides.

If a model has no data for a task type, Maggy uses the tier default (hardcoded) until it collects 30+ data points.

#### 2. Inbox Ordering

Inbox priority is a weighted score that Maggy continuously adjusts:

```python
priority = (
    w_urgency * urgency_score
    + w_okr * okr_alignment
    + w_recency * recency
    + w_type * type_weight[ticket.type]
    + w_project * project_weight[ticket.project]
)
```

The weights (`w_urgency`, `w_okr`, etc.) are updated based on which tickets the user actually executes first. If the user consistently picks security tickets despite Maggy ranking them 5th, the type weight for security increases automatically. Not because Maggy asked — because the reward signal said "user overrode my ranking" (-0.2) and Maggy's adjustment brought the ranking closer to what the user actually does.

#### 3. Workflow Steps

Some workflow steps add value, some don't. Maggy measures reward per step:

```
workflow_rewards:
  codex_counter_check:
    blast_0_3: -0.1    # adds latency, never catches issues
    blast_4_6: +0.2    # catches real issues sometimes
    blast_7_10: +0.6   # catches critical issues often

  icpg_drift_check:
    all_tiers: +0.4    # consistently prevents regressions

  high_tier_post_review:
    after_qwen: +0.7   # catches qwen mistakes frequently
    after_kimi: +0.3   # kimi output is cleaner, fewer catches
    after_claude: +0.0  # reviewing claude with claude is redundant
```

Maggy skips steps with consistently negative reward. No permission needed — if Codex counter-check never catches issues on blast < 3, it gets dropped from that tier. If it starts catching issues again (maybe the codebase grew more complex), the reward changes and it gets re-enabled.

#### 4. Fatigue Thresholds

Different users fatigue differently. Maggy learns the user's fatigue curve:

```
fatigue_profile:
  avg_productive_session_minutes: 47
  pre_checkpoint_optimal_minutes: 42
  model_switch_recovery_reads: 3.2    # avg re-reads after switch
  best_model_for_recovery: gpt-4o    # fastest context rebuild
```

Maggy pre-checkpoints at 42 minutes (not at the generic 0.60 threshold) because it learned this user's fatigue pattern. No question asked — the reward signal showed that checkpoints at 42 minutes led to better post-checkpoint output (+0.3 reward) than checkpoints at 50 minutes (-0.2 reward from quality drop).

#### 5. Process Intelligence — Learning from the Full SDLC

Maggy doesn't just optimize code output. It optimizes the **entire development process** by observing what happens to code after it's written: PR reviews, CI results, CodeRabbit findings, reviewer feedback, merge patterns, and post-deploy incidents.

##### 5a. Environment Discovery

On first run per project, Maggy auto-discovers the developer's workflow. No configuration — it reads what's already there.

```
┌──────────────────────────────────────────────────────────────┐
│  ENVIRONMENT DISCOVERY (auto, per project)                     │
│                                                               │
│  Ticketing:                                                   │
│    gh api repos/{owner}/{repo}/issues → GitHub Issues?        │
│    .asana.yml / .linear/* / jira.config → which tracker?      │
│    Maggy Inbox providers config → already connected?          │
│                                                               │
│  GitHub Integrations:                                         │
│    gh api repos/{owner}/{repo}/hooks → webhooks               │
│    gh api repos/{owner}/{repo}/installation → GitHub Apps     │
│    PR comment authors → detect bots: coderabbitai[bot],       │
│      dependabot[bot], renovate[bot], github-actions[bot]      │
│                                                               │
│  CI/CD:                                                       │
│    .github/workflows/*.yml → GitHub Actions                   │
│    Jenkinsfile / .circleci/ / .gitlab-ci.yml → other CI       │
│    gh api repos/{owner}/{repo}/actions/runs → run history     │
│                                                               │
│  Code Quality:                                                │
│    .eslintrc* / ruff.toml / .prettierrc → lint config         │
│    mypy.ini / tsconfig.json → type checking                   │
│    .pre-commit-config.yaml → pre-commit hooks                 │
│    codecov.yml / .nycrc → coverage config                     │
│                                                               │
│  Review Process:                                              │
│    gh api repos/{owner}/{repo}/branches/{b}/protection        │
│      → required reviewers, status checks, merge rules         │
│    CODEOWNERS → who reviews what                              │
│    Average PR review rounds from git history                   │
│                                                               │
│  Output: ~/.maggy/environments/{project}.yaml                 │
└──────────────────────────────────────────────────────────────┘
```

```yaml
# ~/.maggy/environments/zensurveys-backend.yaml (auto-generated)
ticketing: github_issues
github_integrations:
  - coderabbitai        # CodeRabbit AI reviews
  - dependabot          # dependency updates
  - vercel              # preview deploys
ci:
  provider: github_actions
  workflows:
    - test.yml          # pytest + coverage
    - lint.yml          # ruff + mypy
    - deploy.yml        # staging deploy
lint:
  python: [ruff, mypy]
  config_files: [ruff.toml, mypy.ini]
review:
  required_approvals: 1
  codeowners: true
  branch_protection: staging-v2
```

##### 5b. Process Signal Collection

Maggy subscribes to signals from every stage of the SDLC pipeline:

```
┌─────────────────────────────────────────────────────────────┐
│  PROCESS SIGNALS (collected per PR / per task)                │
│                                                              │
│  ┌─── REVIEW SIGNALS ────────────────────────────────────┐  │
│  │                                                        │  │
│  │  PR reviewer comments (human)                         │  │
│  │    → "missing error handling in /api/surveys"          │  │
│  │    → "this should be a transaction"                    │  │
│  │    → "add tests for edge case"                         │  │
│  │                                                        │  │
│  │  CodeRabbit findings (automated)                      │  │
│  │    → severity: critical/warning/suggestion             │  │
│  │    → category: security/performance/style/bug          │  │
│  │    → file + line + specific suggestion                 │  │
│  │                                                        │  │
│  │  Review rounds                                        │  │
│  │    → PR needed 3 rounds before approval                │  │
│  │    → First round had 8 comments, second had 2          │  │
│  │                                                        │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  ┌─── CI SIGNALS ────────────────────────────────────────┐  │
│  │                                                        │  │
│  │  GitHub Actions results                               │  │
│  │    → test.yml: PASS (42s)                              │  │
│  │    → lint.yml: FAIL — ruff: 3 errors, mypy: 1 error   │  │
│  │    → deploy.yml: PASS (preview URL generated)          │  │
│  │                                                        │  │
│  │  Failure patterns                                     │  │
│  │    → lint failures in files Maggy touched              │  │
│  │    → test failures from code Maggy wrote               │  │
│  │    → flaky tests (pass/fail on same code)              │  │
│  │                                                        │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  ┌─── POST-MERGE SIGNALS ────────────────────────────────┐  │
│  │                                                        │  │
│  │  Revert within 48h → code was bad                     │  │
│  │  Hotfix within 7d  → code had latent bug              │  │
│  │  Incident linked to PR → production impact            │  │
│  │  Dependency alert (Dependabot/Renovate) → stale deps  │  │
│  │                                                        │  │
│  └────────────────────────────────────────────────────────┘  │
└─────────────────────────────────────────────────────────────┘
```

New reward signals for the registry:

```
PROCESS REWARD SIGNALS

+0.5  PR approved on first review round
+0.3  CI passes on first push (no re-push needed)
+0.2  CodeRabbit: zero critical/warning findings
+0.1  PR merged within 24h of creation

-0.8  PR reverted within 48h
-0.5  CI fails on Maggy-written code (lint or test)
-0.4  CodeRabbit critical finding on Maggy-written code
-0.3  PR requires 3+ review rounds
-0.2  Reviewer flags same issue type Maggy was warned about before
-0.1  CodeRabbit warning finding on Maggy-written code
```

##### 5c. Process Learning

Maggy tracks patterns across three dimensions:

**Code Pattern → Review Feedback:**
```
process_patterns.db:

(api_route, missing_error_handling):
  occurrences: 7
  reviewers: ["alice", "coderabbitai"]
  fix_pattern: "add try/except with proper HTTP error codes"
  → LEARNED: always add error handling to API routes

(database_query, missing_transaction):
  occurrences: 4
  reviewers: ["bob"]
  fix_pattern: "wrap multi-table writes in transaction"
  → LEARNED: multi-table writes need transactions

(test_file, missing_edge_case):
  occurrences: 12
  reviewers: ["alice", "bob", "coderabbitai"]
  fix_pattern: "test empty input, null, boundary values"
  → LEARNED: always test edge cases (empty, null, boundary)
```

**File → CI Failure:**
```
ci_patterns.db:

src/api/surveys.py:
  lint_failures: 5 (ruff E501, E741)
  type_errors: 2 (mypy: missing return type)
  → LEARNED: this file needs strict lint pre-check

tests/test_integration.py:
  flaky_rate: 0.15 (fails 15% of runs on same code)
  → LEARNED: mark as flaky, don't block on single failure

src/services/auth.py:
  ci_failures: 0 in 30 days
  → LEARNED: auth code is well-tested, low CI risk
```

**PR Characteristics → Merge Velocity:**
```
pr_patterns.db:

(size < 200 lines, single_concern):
  avg_review_rounds: 1.2
  avg_time_to_merge: 4h
  → LEARNED: small focused PRs merge fast

(size > 500 lines, multi_concern):
  avg_review_rounds: 3.1
  avg_time_to_merge: 48h
  → LEARNED: split large PRs into stacked PRs

(has_tests, covers_new_code):
  approval_rate_first_round: 0.78
  → LEARNED: tests increase first-round approval

(no_tests, new_feature):
  reviewer_comment_rate: 0.95
  most_common: "please add tests"
  → LEARNED: never submit new features without tests
```

##### 5d. Process Optimization — What Maggy Changes

Based on learned patterns, Maggy autonomously adjusts its own behavior:

| What Changes | Based On | Example |
|-------------|---------|---------|
| **Pre-task lint** | CI failure patterns | Maggy runs `ruff check` + `mypy` on its output before committing — prevents CI failures it has seen before |
| **Skill evolution** | Recurring review comments | If reviewers flag "missing error handling" 7 times, Maggy adds the pattern to its skill files — future code includes error handling by default |
| **PR sizing** | Merge velocity data | If PRs > 500 lines take 3x longer to merge, Maggy splits tasks into stacked PRs automatically |
| **Test generation** | Reviewer feedback | If "add tests" is the most common review comment, Maggy ensures every PR includes tests for new code |
| **CodeRabbit pre-check** | CodeRabbit finding patterns | If CodeRabbit consistently flags the same security issue, Maggy pre-validates against that pattern before pushing |
| **Commit hygiene** | CI config + branch rules | Maggy matches commit message format, branch naming, and PR template to whatever the project enforces |

```yaml
# Added to ~/.maggy/policy.yaml
process:
  pre_commit_checks:
    ruff: true                     # learned: lint failures cost -0.5
    mypy: true                     # learned: type errors caught by CI
    test_coverage_min: 80          # learned: PRs without coverage get rejected
  pr_strategy:
    max_lines: 400                 # learned: optimal size for this team
    stacked_prs: true              # learned: large changes split = faster merge
    require_tests: true            # learned: "add tests" is #1 review comment
  review_prevention:
    error_handling_api_routes: true # learned from 7 review comments
    transaction_multi_writes: true # learned from 4 review comments
    edge_case_tests: true          # learned from 12 review comments
  coderabbit_precheck:
    security_scan: true            # learned: CodeRabbit catches these
    unused_imports: true           # learned: CodeRabbit flags these
```

##### 5e. The Process Intelligence Flywheel

```
┌──────────────────────────────────────────────────────────────┐
│  PROCESS INTELLIGENCE FLYWHEEL                                │
│                                                               │
│  Week 1: Maggy discovers environment, starts collecting       │
│    → Sees 5 lint failures, 3 "add tests" comments             │
│    → Learns: run lint before push, always include tests       │
│                                                               │
│  Week 2: Maggy applies learned patterns                       │
│    → Lint failures drop to 0 (pre-checked)                    │
│    → "Add tests" comments drop to 1 (edge case missed)        │
│    → Review rounds drop from 2.8 to 1.6 avg                  │
│                                                               │
│  Week 4: Maggy has enough data for deeper patterns            │
│    → Learns that PRs touching auth need 2 reviewers           │
│    → Learns that Friday PRs take 2x longer to merge           │
│    → Starts scheduling auth PRs for Monday-Wednesday          │
│                                                               │
│  Week 8: Maggy evolves its own skills                         │
│    → Writes new lint rules based on recurring review comments │
│    → Generates pre-commit hooks for patterns that always fail │
│    → Review round avg: 1.1 (down from 2.8)                   │
│    → CI first-pass rate: 97% (up from 72%)                    │
│    → Time-to-merge: 6h avg (down from 36h)                    │
│                                                               │
│  The wow: Maggy didn't just write better code.                │
│  It made the entire development process faster.               │
└──────────────────────────────────────────────────────────────┘
```

#### 6. Capability Expansion — MCP Forge Integration

When Maggy encounters a capability gap — a workflow integration that doesn't exist — it doesn't stop. It builds one.

**Source:** MCP Forge (`~/Documents/protaige/mcp_forge`) generates TypeScript MCP servers from API documentation.

```
Maggy task requires Mailchimp subscriber data
    │
    ├── search existing MCP tools → no Mailchimp tool found
    │
    ├── Forge: search registry (500+ APIs) → Mailchimp API found
    │
    ├── Forge: generate MCP server
    │   → TypeScript MCP server with validated tool schemas
    │   → Tools: list_segments, get_subscribers, campaign_stats
    │
    ├── Register new tools with Pi agent's MCP config
    │
    ├── Execute original task using new tools
    │
    └── Reward signal: did it work?
        → +1.0: task completed with new tool
        → -0.5: tool generated but failed at runtime
```

**Weekly gap analysis:**
```
capability_gaps.db:

This week's unresolvable requests:
  "check Linear sprint progress"    → 8 occurrences
  "pull Slack channel activity"     → 5 occurrences
  "get Figma design specs"          → 3 occurrences

Top 3 gaps → trigger Forge generation:
  1. Linear MCP server (sprint, issues, labels)
  2. Slack MCP server (channels, messages, threads)
  3. Figma MCP server (files, components, comments)

After generation: capability surface grows autonomously.
Hibernation policy: tools with < 3 uses in 14 days → disabled.
```

### Self-Evaluation

Maggy evaluates its own optimization quality on a weekly cycle:

```
┌──────────────────────────────────────────────────────────┐
│ MAGGY SELF-EVALUATION (weekly)                            │
│                                                           │
│ Efficiency trend:                                         │
│   Week 1: 2.3 tickets/day, 0.92 quality multiplier       │
│   Week 2: 2.7 tickets/day, 0.94 quality multiplier  ↑    │
│   Week 3: 3.1 tickets/day, 0.91 quality multiplier  ↑↓   │
│   Week 4: 3.0 tickets/day, 0.95 quality multiplier  →↑   │
│                                                           │
│ Adjustments this week: 6                                  │
│   ✓ Promoted kimi for test-writing (reward +0.7)          │
│   ✓ Dropped codex review for blast < 3 (reward +0.1)     │
│   ✗ Tried qwen for API routes — auto-rolled back         │
│     (reward -0.4, 2 bug escapes detected at day 12)      │
│   ✓ Pre-checkpoint moved to 40min (reward +0.3)          │
│   ✓ Added error handling to API routes (review feedback)  │
│   ✓ Enabled ruff pre-check (CI failure prevention)        │
│                                                           │
│ Process intelligence:                                     │
│   CI first-pass rate: 94% (up from 72% at week 1)        │
│   Review rounds avg: 1.3 (down from 2.8 at week 1)       │
│   CodeRabbit critical findings: 0 (down from 4 at week 1)│
│   Capability gaps filled: 2 (Linear, Slack via Forge)     │
│                                                           │
│ Auto-rollbacks this week: 1                               │
│   qwen for API routes: reverted to kimi after 3 failures  │
│                                                           │
│ Overall efficiency delta: +18% vs 4 weeks ago             │
└──────────────────────────────────────────────────────────┘
```

When an adjustment makes things worse, Maggy doesn't wait for the user to notice. It detects the reward drop and **auto-rolls back**. When an adjustment works, it reinforces and looks for similar task types to expand to.

### Exploration vs Exploitation

Maggy needs to try new things (exploration) while mostly doing what works (exploitation):

```
exploration_rate: 0.10  # 10% of tasks try a new model/workflow
                        # 90% use the current best policy

exploration_rules:
  - Never explore on blast >= 7 (too risky)
  - Never explore on security/concurrency tasks
  - Explore on docs, tests, low-blast refactors (low cost of failure)
  - If exploration succeeds 3x in a row, promote to exploitation
  - If exploration fails 2x in a row, abandon and try different hypothesis
```

### Storage

```
~/.maggy/
  reward_registry.db      # SQLite: (action, context, reward, timestamp)
  model_scores.db         # SQLite: (model, task_type, blast_tier, reward_avg, n_samples)
  workflow_scores.db      # SQLite: (workflow_step, tier, reward_avg, n_samples)
  process_patterns.db     # SQLite: (code_pattern, review_feedback, occurrences, fix_pattern)
  ci_patterns.db          # SQLite: (file, failure_type, count, flaky_rate)
  pr_patterns.db          # SQLite: (size_bucket, concern_count, avg_rounds, avg_merge_time)
  capability_gaps.db      # SQLite: (request_type, occurrences, forge_status, tool_name)
  improvement_ledger.db   # SQLite: all self-modifications with config snapshots + backtesting
  task_history.db         # SQLite: every task with L0 events, reward, CI/review outcomes
  fatigue_profile.yaml    # Learned fatigue curve for this user
  policy.yaml             # Current active policy (model routing, inbox weights, process rules)
  policy_history/         # Timestamped snapshots for rollback (also in ledger.db)
  self_eval.jsonl         # Weekly self-evaluation log
  environments/           # Auto-discovered per-project workflow configs
  mesh.yaml               # Mesh config (org_key, port, manual peers)
  mesh_state.db           # SQLite: peer registry, sync timestamps, message log
  peer_id                 # This instance's stable UUID (generated on install)
  quarantine.db           # Patterns from peers awaiting local validation
  engram.db               # SQLite: EngramRecords with namespace, origin, confidence, temporal validity
  engram_namespaces.yaml  # Per-project namespace config (isolation boundaries)
  lexon.db                # SQLite: LexonRecords, terminology map entries, personalization data
  lexon_embeddings/       # Tool registry vector index (multilingual)
  events.db               # SQLite: append-only Event Spine log (all 8 event types)
  events_archive/         # Compressed JSONL archives for events older than 90 days
```

```yaml
# ~/.maggy/policy.yaml (Maggy-managed, not user-edited)
version: 47  # auto-incremented on every policy update
updated_at: "2026-05-10T03:00:00Z"

model_routing:
  blast_0_3:
    primary: qwen-local
    except:
      api_routes: kimi          # learned: qwen bad at API routes
      auth: claude              # override: security dimension >= 2
  blast_4_6:
    primary: kimi
    post_review: true           # high-tier spot check on output
  blast_7_10:
    primary: claude
    fallback: gpt-4o
    counter_check: codex        # dual-model planning

inbox_weights:
  urgency: 0.30
  okr_alignment: 0.20
  recency: 0.15
  type:
    security: 1.8
    bug: 1.2
    feature: 1.0
    docs: 0.6
  project:
    zensurveys-backend: 1.3     # learned: user prioritizes this project
    chief-of-staff: 1.0
    rodcast: 0.8

workflow:
  codex_counter_check:
    enabled_above_blast: 5      # learned: no value below 5
  pre_checkpoint_minutes: 40    # learned: user's fatigue curve
  exploration_rate: 0.10

process:
  pre_commit_checks:
    ruff: true                     # learned: CI catches these
    mypy: true                     # learned: type errors in CI
    test_coverage_min: 80          # learned: PRs without coverage rejected
  pr_strategy:
    max_lines: 400                 # learned: optimal for this team
    stacked_prs: true              # learned: faster merge for large changes
    require_tests: true            # learned: #1 review comment is "add tests"
  review_prevention:               # patterns learned from reviewer feedback
    error_handling_api_routes: true
    transaction_multi_writes: true
    edge_case_tests: true
  coderabbit_precheck:             # patterns learned from CodeRabbit
    security_scan: true
    unused_imports: true
  scheduling:
    avoid_friday_auth_prs: true    # learned: Friday auth PRs take 2x to merge
  forge:
    auto_expand: true              # generate new MCP tools for capability gaps
    hibernation_days: 14           # disable unused forge tools after 14 days
    min_gap_requests: 5            # require 5+ requests before triggering forge
```

### Optimization Targets Mapped to Control Levels

Each optimization target from Sections 1-6 now maps to a specific control level:

| Target | L0 (seconds) | L1 (minutes) | L2 (hours) | L3 (days) | L4 (weeks) |
|--------|:---:|:---:|:---:|:---:|:---:|
| **1. Model routing** | Switch on failure/fatigue | Update (model,task,tier) score | Disable failing model | Adjust tier boundaries | Recalibrate blast→tier map |
| **2. Inbox ordering** | — | — | — | Adjust type/project weights | Reweight signals |
| **3. Workflow steps** | — | Log step value for task | — | Enable/disable steps by tier | Add/remove signal types |
| **4. Fatigue** | Checkpoint on threshold | Update fatigue profile | — | Adjust checkpoint timing | Tune L0 thresholds |
| **5. Process intelligence** | Lint before commit | Log CI/review signals | Toggle pre-checks | Evolve skills from patterns | Recalibrate process signals |
| **6. Capability expansion** | — | Log capability gap | — | Forge top 3 gaps | Prune/archive unused tools |

**L0 handles stability** (don't let a task fail). **L1-L2 handle health** (don't let bad patterns accumulate). **L3-L4 handle strategy** (make the system smarter over time).

### Improvement Ledger — Full Auditability + Backtesting

Every self-modification Maggy makes is recorded in the improvement ledger with full state snapshots. This serves three purposes: auditability (what changed and why), rollback (revert any change), and **backtesting** (would a policy have worked better on historical data?).

#### Ledger Schema

```sql
-- ~/.maggy/improvement_ledger.db
CREATE TABLE modifications (
    id              INTEGER PRIMARY KEY,
    timestamp       TEXT NOT NULL,
    control_level   INTEGER NOT NULL,  -- 0-4
    category        TEXT NOT NULL,     -- model_routing, process, workflow, etc.
    description     TEXT NOT NULL,     -- human-readable what changed
    reasoning       TEXT NOT NULL,     -- why the change was made (signal data)
    config_before   TEXT NOT NULL,     -- full policy.yaml snapshot (JSON)
    config_after    TEXT NOT NULL,     -- full policy.yaml snapshot (JSON)
    score_before    REAL,             -- avg reward in measurement window before
    score_after     REAL,             -- avg reward in measurement window after
    delta           REAL,             -- score_after - score_before
    status          TEXT DEFAULT 'active',  -- active, rolled_back, superseded
    rolled_back_at  TEXT,             -- timestamp if reverted
    rollback_reason TEXT              -- why it was reverted
);

CREATE TABLE task_history (
    id              INTEGER PRIMARY KEY,
    timestamp       TEXT NOT NULL,
    project         TEXT NOT NULL,
    task_type       TEXT NOT NULL,     -- auth, api_route, test, docs, etc.
    blast_tier      INTEGER NOT NULL,  -- 0-10
    model_used      TEXT NOT NULL,
    policy_version  INTEGER NOT NULL,  -- which policy was active
    l0_events       TEXT NOT NULL,     -- JSON array of L0 signals
    l1_reward       REAL NOT NULL,     -- computed task reward
    ci_passed       BOOLEAN,
    review_rounds   INTEGER,
    coderabbit_findings INTEGER,
    time_to_merge_h REAL,
    reverted        BOOLEAN DEFAULT FALSE,
    bug_escape      BOOLEAN DEFAULT FALSE
);
```

#### Backtesting: "Would This Policy Have Worked?"

Before deploying a L3/L4 policy change, Maggy can **replay historical tasks** against the proposed policy to predict the outcome:

```
┌──────────────────────────────────────────────────────────────┐
│  BACKTEST: proposed policy v48 vs current policy v47          │
│                                                               │
│  Replaying 200 tasks from last 30 days...                     │
│                                                               │
│  Proposed change: route blast 3 tasks to qwen instead of kimi │
│                                                               │
│  Historical tasks at blast 3 (n=47):                          │
│    Under kimi (actual):                                       │
│      avg reward: +0.62                                        │
│      CI pass rate: 91%                                        │
│      review rounds: 1.4                                       │
│                                                               │
│    Under qwen (backtest simulation):                          │
│      predicted reward: +0.38  ← LOWER                         │
│      predicted CI pass rate: 78%  ← based on qwen's L0 data  │
│      predicted review rounds: 2.1 ← based on qwen's L1 data  │
│                                                               │
│  VERDICT: DO NOT APPLY — backtest predicts -0.24 reward drop  │
│                                                               │
│  Alternative explored: route blast 1-2 to qwen, keep 3 on    │
│  kimi. Backtest on blast 1-2 tasks (n=31):                    │
│    kimi actual: +0.58                                         │
│    qwen predicted: +0.71  ← HIGHER (simpler tasks = qwen OK) │
│                                                               │
│  VERDICT: APPLY partial — blast 1-2 to qwen, blast 3 stays   │
└──────────────────────────────────────────────────────────────┘
```

**How backtesting works:**

1. **Query `task_history`** for all tasks matching the target criteria (e.g., blast tier, task type)
2. **For each historical task**, look up the proposed model's performance on similar `(task_type, blast_tier)` combinations from `model_scores.db`
3. **Predict reward** using the proposed model's historical L0 signals (failure rate, lint errors, test pass rate) on similar tasks
4. **Compare** predicted vs actual reward across the full set
5. **Decision**: apply if predicted delta > +0.1, reject if < -0.1, flag for exploration if between

**Backtesting is required for L3 and L4 changes.** L0-L2 changes are reactive (stability and health) and don't need backtesting — they respond to immediate signals. L3-L4 changes are strategic and can be validated against historical data first.

#### Auto-Seeding: Maggy Bootstraps Herself

Maggy has Pi agents. She has access to Claude, Codex, Kimi, Qwen — whatever models are configured. There is no reason for a manual `maggy seed` command. The moment a project is registered in `~/.maggy/projects.yaml`, Maggy spawns a Pi agent to analyze the project's history and seed her own databases. No user action required.

```
┌──────────────────────────────────────────────────────────────┐
│  AUTO-SEED (triggered on project registration)                │
│                                                               │
│  1. Maggy detects new project in registry                     │
│     │                                                         │
│  2. Spawns Pi agent (cheapest available model — qwen/kimi)    │
│     Task: "Analyze project history and extract patterns"      │
│     │                                                         │
│  3. Agent executes via gh CLI + git log:                      │
│     │                                                         │
│     ├── gh pr list --state merged --limit 200 --json          │
│     │   → PR sizes, review rounds, time-to-merge              │
│     │   → Reviewers, approval patterns                        │
│     │                                                         │
│     ├── gh pr view {n} --comments --json                      │
│     │   → Review comments categorized by pattern              │
│     │   → CodeRabbit findings by severity + category          │
│     │   → Bot authors detected (coderabbitai, dependabot)     │
│     │                                                         │
│     ├── gh api repos/{owner}/{repo}/actions/runs              │
│     │   → CI pass/fail rates per workflow                     │
│     │   → Failure patterns per file                           │
│     │   → Flaky test detection                                │
│     │                                                         │
│     ├── git log --format='%H %s' --since='6 months ago'       │
│     │   → Revert detection (commit messages with "revert")    │
│     │   → Commit patterns, branch naming conventions          │
│     │                                                         │
│     ├── codebase-memory-mcp: get_architecture + search_graph  │
│     │   → Module structure, hot files, dependency depth       │
│     │   → Fan-out scores for initial blast radius calibration │
│     │                                                         │
│     └── Environment discovery (Section 5a)                    │
│         → Ticketing, CI, lint, review process auto-detected   │
│                                                               │
│  4. Agent writes structured analysis to Maggy's databases:    │
│     process_patterns.db: seeded with review comment patterns  │
│     ci_patterns.db: seeded with CI failure history            │
│     pr_patterns.db: seeded with merge velocity data           │
│     task_history.db: synthetic entries from git log           │
│     environments/{project}.yaml: workflow config              │
│                                                               │
│  5. Agent computes initial policy.yaml from patterns:         │
│     → "PRs > 400 lines take 3x review rounds → set max 400"  │
│     → "ruff failures in 40% of PRs → enable pre-check"       │
│     → "auth files have 0% CI failures → low risk"            │
│     → "CodeRabbit flags unused imports 60% of PRs → pre-fix" │
│                                                               │
│  6. Maggy logs seed as modification #1 in improvement_ledger  │
│     config_before: empty (default policy)                     │
│     config_after: data-derived initial policy                 │
│     score_before: null (no baseline)                          │
│     → All future modifications measured against this seed     │
│                                                               │
│  Total cost: ~$0.10-0.50 on a cheap model (one-time)          │
│  Total time: background task, user doesn't wait               │
│  User action required: zero                                   │
└──────────────────────────────────────────────────────────────┘
```

**Why this works:** The seed analysis is exactly the kind of task cheap models are good at — structured data extraction, pattern counting, statistical aggregation. No creative reasoning needed. Qwen local can do it for free. And the Pi agent already has all the tools: `gh` CLI for GitHub data, `git` for history, codebase-memory-mcp for structural analysis.

**Why manual seed is wrong:** Maggy's entire philosophy is autonomous optimization. A `maggy seed --project foo` command implies the user knows they need to seed, knows the right flags, and remembers to run it. That's three failure points. Maggy should behave like a new hire who reads the project's git history on their first day — automatically, without being told.

**Multi-project seed:** When Maggy is first installed with 4 projects in the registry, she spawns 4 seed agents in parallel (one per project, each in its own Polyphony container). All 4 seed concurrently. By the time the user opens the dashboard, Maggy already knows:
- zensurveys-backend: "PRs to auth/ need 2 reviewers, ruff fails on 40% of pushes"
- zensurveys-frontend: "CodeRabbit catches unused imports, avg PR is 180 lines"
- chief-of-staff: "No CI, manual deploys, review optional"
- rodcast: "New project, minimal history — start with defaults"

**Validation before real work:** The seed data lets Maggy prove her value immediately. On the dashboard, day 1:

```
┌──────────────────────────────────────────────────────────────┐
│  MAGGY — Day 1 Analysis (auto-generated from project history)│
│                                                               │
│  zensurveys-backend (200 PRs analyzed):                       │
│    Current process health:                                    │
│      CI first-pass rate: 72%                                  │
│      Avg review rounds: 2.8                                   │
│      Top review comment: "add error handling" (23 times)      │
│      Avg time-to-merge: 36h                                   │
│                                                               │
│    Predicted improvements if Maggy had been active:           │
│      CI first-pass rate: 72% → ~94% (pre-lint + pre-type)    │
│      Review rounds: 2.8 → ~1.4 (auto error handling + tests) │
│      Time-to-merge: 36h → ~12h (smaller PRs + fewer rounds)  │
│                                                               │
│    Based on: patterns from your last 200 PRs                  │
│    Confidence: high (200+ data points per pattern)            │
└──────────────────────────────────────────────────────────────┘
```

That's the mWp for onboarding. Maggy doesn't say "configure me." She says "I already analyzed your project. Here's what I found. Here's what I'll fix. Watch."

#### Ledger Queries — "How Did Maggy Improve Itself?"

```sql
-- Show all modifications, most recent first
SELECT timestamp, control_level, category, description, delta, status
FROM modifications ORDER BY timestamp DESC LIMIT 20;

-- Show rolled-back changes (what went wrong?)
SELECT timestamp, description, delta, rollback_reason
FROM modifications WHERE status = 'rolled_back';

-- Show cumulative improvement over time
SELECT date(timestamp) as day,
       sum(CASE WHEN delta > 0 THEN delta ELSE 0 END) as positive_delta,
       sum(CASE WHEN delta < 0 THEN delta ELSE 0 END) as negative_delta,
       sum(delta) as net_delta
FROM modifications
GROUP BY day ORDER BY day;

-- Show which control level produces the most value
SELECT control_level,
       count(*) as modifications,
       avg(delta) as avg_delta,
       sum(CASE WHEN status = 'rolled_back' THEN 1 ELSE 0 END) as rollbacks
FROM modifications
GROUP BY control_level;

-- Backtest: what would policy v48 have scored on last month's tasks?
SELECT task_type, blast_tier,
       avg(l1_reward) as actual_reward,
       count(*) as n_tasks
FROM task_history
WHERE policy_version = 47
  AND timestamp > date('now', '-30 days')
GROUP BY task_type, blast_tier;
```

### The Wow Factor

Maggy after 4 weeks:

> "I didn't configure anything. I didn't set weights. I didn't tell it which model to use for what. It figured out that Claude is best for my auth code, Kimi writes my tests, and Qwen handles docs — by itself. It tried routing API routes to Qwen once, caught that it was producing bugs, and rolled it back before I even noticed. It knows I fatigue at 42 minutes and checkpoints at 40. My throughput is up 30% and my bug escape rate is down. I don't manage Maggy. Maggy manages my development."

> "But the thing that blows me away is the process improvement. Maggy figured out that my team's reviewers always flag missing error handling on API routes — so now it adds error handling by default. It learned that our CI lint step fails on long lines — so it runs ruff before pushing. Our CodeRabbit findings dropped to zero. PRs that used to take 3 review rounds now merge on the first. And when I needed to pull data from Linear, Maggy generated a whole MCP integration on the fly — I didn't even know that was possible. It's not just writing better code. It's making the entire pipeline faster."

That's the mWp. Not a tool. Not an assistant that asks questions. An autonomous system that optimizes itself with one goal: make its human as efficient as possible.

---

## 12. Codex Review Response

Codex (GPT-5.4) reviewed this architecture. Full review: `docs/codex-review-v5.md`. Summary of decisions:

### Accepted

| Finding | Our Response |
|---------|-------------|
| Blast radius is overloaded as routing signal | Correct. Updated to use full 5-dimension iCPG scoring (cyclomatic, fan_out, security, concurrency, domain) with dimension overrides for security/concurrency. |
| Low-tier output needs stronger verification | Added high-tier post-review gate, iCPG constraint assertions, and static analysis for all cheap-model output. |
| Self-improving loop needs guardrails | Added cold-start thresholds (50+ data points), 30-day decay windows, delayed outcome tracking, audit log, and user-approval for adaptations. |
| CIKG + iCPG need shared decision schema | Accepted. Will define cross-graph artifact types (Requirement, Decision, Hypothesis, Evidence, Risk, Outcome) in Phase 4. |
| Observability is missing | Accepted. Adding to Phase 8: structured event log for agent decisions, bridge translations, model switches, and tool actions. |
| Model switching should be explicit handoff | Updated fallback chain to include checkpoint + verification step before continuing on new model. |

### Rejected (Codex was wrong on these)

| Codex's Claim | Why We Disagree |
|---------|-----|
| Split-brain control model | Not a split-brain. Mnemos + iCPG provide shared persistent state on disk inside the container. Coordination agent and execution agent own distinct concerns with shared persistence. No duplicated state. |
| Pi is a dangerous universal dependency | Partially rejected. Pi is the right choice for adapter unification, but we accept the recommendation to keep an internal execution contract and preserve direct adapters as fallback for critical paths. |
| Browser-container deploy is over-engineered | Rejected for our use case. The user has a specific pain point: 4 projects on Vercel with auth conflicts when using `vercel login` locally. Browser containers solve this directly. API/CLI deploy is the primary path; browser containers solve the auth isolation problem specifically. |
| Self-improving Maggy is unrealistic | Rejected. Maggy is an autonomous optimization agent, not a suggestion engine. It uses a reward registry with positive/negative signals, auto-rollback on reward drops, exploration/exploitation balance (10% exploration on low-risk tasks only), and weekly self-evaluation. Cold start uses hardcoded defaults until 30+ samples. No user approval needed — the reward function is the judge. |

---

## 13. Open Questions

1. **CIKG extraction scope** — Extract just the graph service, or the full strategy pipeline (daily briefing, trend monitoring)?
2. **Pi extension authoring** — Do we write custom Pi extensions for iCPG/Mnemos hooks, or keep them as shell scripts?
3. **Vercel deploy frequency** — On every PR, or manual trigger from Maggy?
4. **Local model quality floor** — Minimum benchmark Qwen must pass before routing low-blast tasks to it?
5. **Cross-project dependencies** — codebase-memory-mcp can trace HTTP_CALLS across project graphs. When zensurveys-backend changes a Route, should Maggy auto-create a task in zensurveys-frontend? The graph data is there (36 projects indexed); the question is the automation policy.
6. **Mesh scope** — Should mesh sync extend beyond same-org? An anonymized marketplace of policies and model benchmarks across orgs could be powerful, but raises privacy/competitive concerns.
7. **Mesh governance** — Who can promote quarantined patterns to active? Auto-promote after N confirmations, or require an explicit team lead role?
8. **Remote mesh** — For teams without Tailscale/WireGuard, should Maggy offer a lightweight relay service, or is manual peer list + VPN sufficient?
9. **Engram promotion threshold** — How many Mnemos confirmations before persisting an EngramRecord? Too low = noise (every transient pattern gets persisted), too high = useful conventions lost between sessions.
10. **Lexon embedding model** — multilingual-e5-large vs paraphrase-multilingual-mpnet-base-v2? Latency vs accuracy tradeoff for the semantic retriever tier. Also: should the vector index run in-process (SQLite + FAISS) or as a sidecar service?
11. **Engram + Mesh boundary** — Should EngramRecords be mesh-shareable directly, or keep Engram strictly local (per-machine cross-session) and only share distilled typed memory via Mesh? Direct sharing is more powerful but increases the attack surface for data leakage.

---

## 14. Maggy Mesh — Peer-to-Peer Team Intelligence

### 14.1 The Problem

Each developer runs their own Maggy. Each learns independently: model performance scores, process patterns from CI/PR reviews, workflow optimizations. 5 developers = 5 instances independently discovering the same patterns, making the same mistakes, converging on the same policies — separately. That's 5x the learning cost and 5x the time to reach optimal performance.

| Scenario | Without Mesh | With Mesh |
|----------|-------------|-----------|
| Ali discovers "Qwen bad at API routes" | Ali knows. Sarah doesn't. | Everyone knows in 15 min. |
| CI keeps failing on unused imports | Each dev independently adds ruff pre-check | First discovery → team-wide pre-check |
| New developer joins | Cold start. Learns everything from scratch | Inherits team's proven patterns immediately |
| PRs > 400 lines get rejected | Each dev discovers independently | Team-wide policy from day one |
| CodeRabbit flags missing error handling | Each dev gets flagged separately | First dev's fix pattern shared to all |

Maggy Mesh connects instances into a peer-to-peer network where learned intelligence flows between peers — no central server. The collective intelligence of the team accelerates everyone from day one.

### 14.2 Network Topology

```
┌─────────────────────────────────────────────────────────────────┐
│  MAGGY MESH                                                      │
│                                                                  │
│  Transport: LAN / Tailscale / WireGuard                         │
│  Discovery: mDNS (_maggy._tcp.local)                            │
│  Auth: TLS + org_key challenge-response                          │
│                                                                  │
│  ┌──────────┐    bidirectional    ┌──────────┐                  │
│  │ Ali's    │◄── WebSocket ─────►│ Sarah's  │                  │
│  │ Maggy    │    (TLS)           │ Maggy    │                  │
│  │          │                     │          │                  │
│  │ Projects:│    ┌──────────┐    │ Projects:│                  │
│  │  api     │◄──►│ Tom's    │◄──►│  web     │                  │
│  │  mobile  │    │ Maggy    │    │  infra   │                  │
│  └──────────┘    │          │    └──────────┘                  │
│                  │ Projects:│                                    │
│       ┌──────────│  ml      │──────────┐                        │
│       │          │  data    │          │                        │
│       │          └──────────┘          │                        │
│       ▼                               ▼                        │
│  ┌──────────┐                   ┌──────────┐                   │
│  │ Priya's  │                   │ Chen's   │                   │
│  │ Maggy    │                   │ Maggy    │                   │
│  │ (devops) │                   │ (qa,perf)│                   │
│  └──────────┘                   └──────────┘                   │
│                                                                  │
│  Each peer:                                                      │
│    Dashboard: 127.0.0.1:8080 (local only)                       │
│    Mesh port: 0.0.0.0:8089 (LAN/VPN)                           │
│    Full mesh: every peer connects to every other peer           │
└─────────────────────────────────────────────────────────────────┘
```

### 14.3 What Gets Shared

**Shared (with provenance):**

| Data Type | Source DB | What Crosses the Wire | Why It's Valuable |
|-----------|-----------|----------------------|-------------------|
| Model scores | `model_scores.db` | `(model, task_type, blast_tier) → reward_avg, n_samples` | "Claude is best for auth code" applies across repos |
| Process patterns | `process_patterns.db` | `(code_pattern → fix_pattern, frequency)` | "Unused imports trigger CodeRabbit" is universal |
| CI patterns | `ci_patterns.db` | `(failure_type → remedy, frequency)` | "ruff line-length fails" applies everywhere |
| PR patterns | `pr_patterns.db` | `(size_bucket → avg_rounds, avg_merge_time)` | "PRs > 400 lines take 2x reviews" is team-wide |
| Capability gaps | `capability_gaps.db` | `(request_type, frequency)` | If 3 peers need Linear integration, forge it once |
| Policy proposals | `policy.yaml` | Model routing rules, process pre-checks | Proven optimizations benefit everyone |
| Improvement ledger summaries | `improvement_ledger.db` | `(category, delta, status)` aggregates | "Switching to Kimi for tests saved +0.3 reward" |

**Never shared:**

| Data | Why Private |
|------|-------------|
| API keys / tokens | Security — never leaves the machine |
| Raw code / PR content / task descriptions | Confidentiality |
| `~/.maggy/config.yaml` | Per-developer settings |
| `fatigue_profile.yaml` | Personal cognitive pattern |
| File paths | Local filesystem |
| Raw `improvement_ledger.db` entries | Instance-specific, only summaries shared |

### 14.4 Every Memory Has Provenance

Every piece of shared knowledge carries its origin. This prevents context collapse ("works in repo A" wrongly applied to repo B).

```python
@dataclass
class SharedMemory:
    """A unit of shareable knowledge across the mesh."""
    type: str           # "score", "pattern", "ci_pattern", "pr_pattern", "gap", "proposal"
    key: str            # unique identifier for merge
    value: dict         # type-specific payload
    provenance: Provenance
    status: str         # "active", "quarantine", "rejected"

@dataclass
class Provenance:
    """Who produced this, from what evidence, in what context."""
    peer_id: str        # which Maggy instance
    peer_name: str      # human-readable (e.g. "ali-macbook")
    project_key: str    # which project (not path — just key like "api")
    language: str       # python, typescript, go, etc.
    toolchain: str      # ruff+mypy, eslint+tsc, etc.
    created_at: str     # when first observed
    evidence_count: int # how many observations back this up
    last_verified: str  # when evidence was last re-checked
    confidence: float   # 0.0-1.0, decays with age
```

When a peer's pattern arrives:
- Relevant to my project? Check `language` and `toolchain` match
- Enough evidence? Check `evidence_count >= min_peer_samples`
- Fresh enough? Check `last_verified` within `trust_decay_days`

If all pass → active. If borderline → quarantine. If wrong context → ignored.

### 14.5 Discovery Protocol

**mDNS (zero-config LAN):**

```
Service: _maggy._tcp.local
TXT records:
  org=<SHA256(org_key)[:16]>    # only peers with same org connect
  version=0.1.0                  # mesh protocol version
  peer_id=<stable-uuid>          # per-install identity
  name=<hostname>                # human-readable
  projects=3                     # number of registered projects
```

Peers with matching `org` hash auto-connect. Different org = ignored.

**For remote teams (not on same LAN):**

Tailscale/WireGuard puts everyone on the same virtual network. mDNS works over Tailscale natively — zero additional config.

**Manual fallback:** `~/.maggy/mesh.yaml`:

```yaml
mesh:
  enabled: true
  org_key: "shared-secret-set-during-maggy-init"
  port: 8089
  name: "ali-macbook"
  peers:
    # Only needed if mDNS doesn't work
    - host: 192.168.1.42
    - host: sarah-laptop.tailnet.ts.net
    - host: tom-desktop.local
```

### 14.6 Transport + Auth

**WebSocket over TLS.** Not libp2p (heavyweight Go/Rust dependency, overkill for 3-15 person team). Python's `websockets` library is async, works with FastAPI, and is all we need.

**Connection handshake:**

```
Ali's Maggy                           Sarah's Maggy
    │                                      │
    ├─── WSS connect to :8089 ────────────►│
    │                                      │
    │◄── challenge: {nonce, peer_id,       │
    │     org_hash: SHA256(org_key)}        │
    │                                      │
    ├─── response: {nonce, peer_id,        │
    │     hmac: HMAC-SHA256(nonce,org_key)} │
    │                                      │
    │◄── verify HMAC, accept ──────────────│
    │                                      │
    │◄──────── bidirectional sync ─────────►│
```

If `org_hash` doesn't match → connection rejected immediately.
First time seeing a `peer_id` → dashboard notification: "New peer 'sarah-laptop' connected."

### 14.7 Message Protocol

```python
@dataclass
class MeshMessage:
    type: str           # message type (see table below)
    peer_id: str        # sender's stable UUID
    peer_name: str      # human-readable sender name
    timestamp: str      # ISO 8601
    payload: dict       # type-specific data
    signature: str      # HMAC-SHA256(json(payload), org_key)
```

| Type | Direction | Payload | Trigger |
|------|-----------|---------|---------|
| `heartbeat` | broadcast | `{peer_id, projects, uptime, policy_version, patterns_count}` | Every 60s |
| `score_update` | broadcast | `{model, task_type, blast_tier, reward_delta, n_new_samples}` | L1: after task completion |
| `pattern_share` | broadcast | `{pattern_key, type, value, provenance}` | When new pattern reaches 5+ local observations |
| `sync_request` | peer→peer | `{tables: [...], since: timestamp}` | On connect + every 15 min |
| `sync_response` | peer→peer | `{table, rows: [...]}` | Response to sync_request |
| `policy_proposal` | broadcast | `{rule, evidence, confidence, backtest_delta}` | L3/L4: when backtest passes |
| `gap_report` | broadcast | `{gap_type, description, occurrences}` | When capability gap hits threshold |
| `peer_announce` | broadcast | `{event: "join"\|"leave", peer_info}` | On connect/disconnect |

### 14.8 Sync + Merge Algorithm

**Score merge — weighted average by sample count:**

```python
def merge_model_score(local: ModelScore, remote: ModelScore) -> ModelScore:
    """More data = higher confidence. Simple, effective, no politics."""
    total = local.n_samples + remote.n_samples
    return ModelScore(
        model=local.model,
        task_type=local.task_type,
        blast_tier=local.blast_tier,
        reward_avg=(local.reward_avg * local.n_samples +
                    remote.reward_avg * remote.n_samples) / total,
        n_samples=total,
        updated_at=max(local.updated_at, remote.updated_at),
    )
```

**Pattern merge — union with frequency counting:**

If Ali's Maggy says "unused imports → ruff fix" with 23 occurrences and Sarah's says the same with 15, merged = 38 occurrences. Higher frequency = higher confidence = more likely to be auto-applied as a pre-check.

**Policy merge — NEVER auto-applied:**

Policy proposals go into a queue. Before activation:
1. Backtest against local `task_history.db` (does this policy improve *my* projects?)
2. If backtest delta > +0.1 → auto-apply with rollback guard
3. If backtest delta between -0.1 and +0.1 → queue for exploration (try on 10% of tasks)
4. If backtest delta < -0.1 → reject (notify peer: "Your proposal doesn't work for my projects")

**Conflict resolution:** Higher sample count wins. If my 200-sample score says "Kimi is better for API routes" and a peer's 8-sample score disagrees, the 200-sample data dominates. This naturally solves cold start: new team members absorb collective knowledge immediately without their sparse data overriding established patterns.

### 14.9 Quarantine System

Patterns from peers don't become active blindly. New incoming patterns start in quarantine:

```
incoming pattern
    │
    ├── language/toolchain matches my projects?
    │   ├── NO → ignore (eslint patterns for Python project = useless)
    │   └── YES ↓
    │
    ├── evidence_count >= min_peer_samples (default 10)?
    │   ├── NO → ignore (too little evidence)
    │   └── YES ↓
    │
    ├── contradicts my local data?
    │   ├── YES → reject (my 200 samples say otherwise)
    │   └── NO ↓
    │
    └── QUARANTINE
        │
        ├── Self-confirmed: I observe the same pattern locally → ACTIVE
        ├── Crowd-confirmed: 3+ peers report same pattern → ACTIVE
        ├── Time-expired: 30 days without confirmation → DROPPED
        └── Human override: user clicks Accept/Reject in dashboard
```

**Poisoning defense:** If a peer suddenly sends data that contradicts 5+ other peers, or sends 10x normal volume, flag as suspicious. Don't merge. Dashboard shows: "⚠ Anomalous data from tom-desktop — 47 patterns contradict team consensus."

### 14.10 Integration with Self-Improvement Loops

Mesh plugs into the existing 5-level closed-loop control system:

| Control Level | Without Mesh | With Mesh |
|---------------|-------------|-----------|
| **L0** (seconds) | React to own task failures | Same — L0 is too fast for network |
| **L1** (minutes) | Update own model/task scores | + Broadcast `score_update` to peers |
| **L2** (hours) | Check own daily health | + Merge peer scores; promote/drop quarantine |
| **L3** (days) | Optimize own policy | + Backtest against **team-wide data** (higher N = better backtest) |
| **L4** (weeks) | Recalibrate own signals | + Propose cross-team policy changes; vote on peer proposals |

The mesh makes L3/L4 decisions **dramatically more reliable** because backtesting draws from the team's combined `task_history` (500+ tasks) instead of just one developer's (100 tasks). More data → better predictions → fewer rollbacks.

### 14.11 Cold Start — New Developer Joins the Team

```
1. Developer installs Maggy, runs /maggy-init
   → Sets org_key (same as team)
   → Generates peer_id
   → Auto-seed runs on their projects (Section 11)

2. Maggy starts, announces on mDNS (_maggy._tcp.local)
   → Discovers 4 peers on the mesh

3. Full sync: sends sync_request{tables: all, since: epoch}
   → Receives: 500+ model scores, 200+ process patterns, 150+ CI patterns
   → All incoming data → quarantine (except scores, which auto-merge)

4. As new developer works their first tasks:
   → Local observations match quarantined patterns → auto-promote
   → "Ah, ruff catches unused imports here too" → promoted to active
   → "Qwen is bad at API routes? Let me try..." → confirmed → active

5. Dashboard after day 1:
   ┌──────────────────────────────────────────────────────────────┐
   │  MESH — New Member Onboarding                                │
   │                                                               │
   │  Connected to: 4 peers (Protaigé org)                        │
   │  Inherited: 847 patterns                                      │
   │    Active: 312 (self-confirmed or crowd-confirmed)            │
   │    Quarantine: 535 (awaiting local validation)                │
   │                                                               │
   │  Model routing: inherited team-wide scores                    │
   │    → Claude for auth (team avg: +0.82, n=89)                 │
   │    → Kimi for tests (team avg: +0.71, n=134)                 │
   │    → Qwen for docs (team avg: +0.65, n=67)                   │
   │                                                               │
   │  Top patterns auto-promoted today:                            │
   │    ✓ "ruff pre-check eliminates 40% of CI failures" (5 peers)│
   │    ✓ "PRs > 400 lines → split" (4 peers, 200+ observations) │
   │    ✓ "mypy strict mode catches type bugs" (3 peers)          │
   └──────────────────────────────────────────────────────────────┘
```

The new developer's Maggy doesn't start from zero. It starts with the collective intelligence of the team. No ramp-up period. No re-learning.

### 14.12 Dashboard — Mesh Tab

```
┌──────────────────────────────────────────────────────────────────┐
│  MESH                                                             │
│                                                                   │
│  Peers: 4 connected  │  Last sync: 2 min ago  │  Org: Protaigé  │
│                                                                   │
│  ┌─ Ali ──────────────── ● online ───────────────────────────────┐│
│  │ Projects: api, mobile  │  Policy v47  │  312 active patterns  ││
│  │ Last contribution: "Route blast 1-2 to qwen" (+0.18 delta)   ││
│  └───────────────────────────────────────────────────────────────┘│
│  ┌─ Sarah ────────────── ● online ───────────────────────────────┐│
│  │ Projects: web, infra   │  Policy v31  │  189 active patterns  ││
│  │ Last contribution: "mypy pre-check on all Python" (+0.22)    ││
│  └───────────────────────────────────────────────────────────────┘│
│  ┌─ Tom ─────────────── ● online ────────────────────────────────┐│
│  │ Projects: ml, data     │  Policy v22  │  156 active patterns  ││
│  │ Last contribution: "Gemini Flash for data pipeline tasks"     ││
│  └───────────────────────────────────────────────────────────────┘│
│  ┌─ Priya ──────────── ○ offline (2h) ──────────────────────────┐│
│  │ Projects: devops       │  Policy v18  │  98 active patterns   ││
│  │ Will sync on reconnect                                        ││
│  └───────────────────────────────────────────────────────────────┘│
│                                                                   │
│  ── Policy Proposals (2) ─────────────────────────────────────── │
│  │ "Route blast 1-2 to qwen"                                    │
│  │   From: Ali  │  Evidence: 31 tasks  │  Backtest: +0.18       │
│  │   Status: auto-applied (delta > +0.1)                        │
│  │                                                               │
│  │ "Add security scan pre-commit for auth files"                 │
│  │   From: Sarah  │  Evidence: 12 PRs flagged  │  Backtest: +0.31│
│  │   Status: applied on 3/4 peers, pending on Priya (offline)   │
│  └───────────────────────────────────────────────────────────────┘│
│                                                                   │
│  ── Team Intelligence Summary ────────────────────────────────── │
│  │ Total team patterns: 847 unique                               │
│  │ Total team task history: 523 tasks across 4 peers             │
│  │ Team-wide CI first-pass rate: 91% (up from 72% pre-Maggy)    │
│  │ Team-wide avg review rounds: 1.3 (down from 2.8 pre-Maggy)   │
│  │ Collective model ranking:                                     │
│  │   #1 Claude (auth, security, complex) — avg +0.82            │
│  │   #2 Kimi (tests, API routes, medium) — avg +0.71            │
│  │   #3 Gemini Flash (data, pipeline) — avg +0.68               │
│  │   #4 Qwen (docs, config, simple) — avg +0.65                 │
│  └───────────────────────────────────────────────────────────────┘│
└──────────────────────────────────────────────────────────────────┘
```

### 14.13 The Compound Effect

Week 1: 5 Maggy instances learn independently. Each discovers ~20 patterns.

Week 4 (without mesh): Each has ~80 patterns. Significant overlap. Total unique knowledge: ~150 patterns across the org, but no individual has more than 80.

Week 4 (with mesh): Each has ~150 patterns (the full team set). Total unique knowledge: ~150. But every individual has access to all of it. The team is 2x more optimized than any individual would be alone.

Week 12 (with mesh): The compound effect kicks in. Each new discovery is immediately tested across 5 different project contexts. Patterns that work everywhere get high confidence fast. Patterns that are project-specific get properly scoped. The collective model ranking has 500+ data points per model — more reliable than any benchmark.

```
Without mesh:  knowledge = n_developers × learning_rate × time
With mesh:     knowledge = n_developers × learning_rate × time × sharing_factor
               where sharing_factor ≈ n_developers (superlinear)
```

Each developer's Maggy becomes as smart as the entire team. The team doesn't just add knowledge linearly — it multiplies it. This is the network effect applied to AI engineering intelligence.

### 14.14 Security Model

| Concern | Mitigation |
|---------|-----------|
| Unauthorized peer | Org key challenge-response; unknown peers require dashboard acceptance |
| Data interception | TLS on all WebSocket connections |
| Poisoning (bad data) | Quarantine system + anomaly detection (Section 14.9) |
| Stale data | Confidence decay over time; `trust_decay_days` (default 30) |
| Data leakage | Only aggregated scores/patterns cross the wire — never raw code, PR text, or secrets |
| Key compromise | Org key rotation: `/maggy mesh rotate-key` regenerates and pushes to all connected peers |
| Replay attacks | Nonce in handshake; timestamps in messages; reject messages > 5 min old |

### 14.15 Configuration

```yaml
# Added to ~/.maggy/policy.yaml
mesh:
  enabled: true
  sync_interval_minutes: 15    # full sync frequency
  min_peer_samples: 10         # ignore peer data with < 10 samples
  trust_decay_days: 30         # peer data confidence decays over time
  quarantine_days: 30          # unconfirmed patterns expire
  auto_promote_threshold: 3    # 3 independent peer confirmations → auto-promote
  auto_accept_scores: true     # model scores merge automatically (weighted)
  auto_accept_patterns: true   # patterns merge automatically (with quarantine)
  auto_accept_policies: true   # policy proposals auto-apply if backtest passes (+0.1)
  anomaly_threshold: 10        # flag peer sending 10x normal volume
  broadcast_on_l1: true        # broadcast score updates after each task
```

Note: `auto_accept_policies: true` — this is the aggressive default. Maggy is autonomous. If a policy proposal passes backtesting with > +0.1 delta, it applies automatically. The improvement ledger tracks everything for rollback. The team lead can override to `false` if they want manual review.

---

## 15. Engram — Cross-Session Memory

### 15.1 The Problem: Agent Amnesia

Maggy's Mnemos handles memory within a task. But when a session ends, everything learned about a project — conventions, reviewer preferences, codebase idioms, tool configurations — evaporates. The next session starts from scratch. This is agent amnesia, and it has seven distinct pathologies:

| Amnesia Type | What Gets Lost | Maggy Example |
|-------------|---------------|---------------|
| **Anterograde** | New memories fail to form across sessions | Maggy learns a project uses Zustand, forgets next session |
| **Retrograde** | Existing memories degrade over time | A CI fix pattern fades after weeks of disuse |
| **Temporal** | When something happened is lost | "The API was refactored" — but when? Before or after the auth change? |
| **Source** | Where a fact came from is lost | "Use 4-space indent" — was this from the linter config or user preference? |
| **Interference** | Memories from one context contaminate another | Project A's React patterns leak into Project B's Vue codebase |
| **Context-binding** | Right memory, wrong retrieval context | Project has error handling conventions stored under "testing", not found during "API route creation" |
| **Confabulation** | Inferred patterns presented as confirmed facts | Maggy "remembers" a convention it actually inferred from one example |

Without Engram, Maggy is a perpetual amnesiac — impressive in the moment, but unable to compound learning across sessions.

### 15.2 The EngramRecord

The EngramRecord is the persistence primitive — the unit of cross-session memory.

```python
@dataclass
class EngramRecord:
    engram_id: str              # UUID
    namespace: str              # Project isolation key
    memory_type: str            # "convention", "preference", "pattern",
                                # "tool_config", "reviewer_preference",
                                # "codebase_idiom", "process_rule"
    content: str                # The actual memory
    origin: Origin              # Where this came from
    confidence: float           # 0.0-1.0
    evidence_count: int         # How many times confirmed
    temporal_validity: Validity # When this is valid
    entity_links: list[str]     # Linked entities (files, functions, people)
    causal_links: list[str]     # Linked causes/effects
    created_at: str             # ISO timestamp
    last_verified: str          # When last confirmed still valid
    last_accessed: str          # When last retrieved

@dataclass
class Origin:
    source_type: str            # "mnemos_task", "user_explicit",
                                # "process_signal", "mesh_peer"
    source_id: str              # Task ID, user ID, or peer_id
    channel: str                # "cli", "dashboard", "mesh"
    original_evidence: str      # What prompted this memory

@dataclass
class Validity:
    valid_from: str             # ISO timestamp
    valid_until: str | None     # None = no expiry
    superseded_by: str | None   # engram_id of replacement
    decay_rate: float           # Confidence decay per day (default 0.001)
```

### 15.3 Three-Tier Namespace Model

Every EngramRecord belongs to exactly one namespace tier. Three tiers prevent both cross-project contamination and useful-pattern siloing:

```yaml
# ~/.maggy/engram_namespaces.yaml
tiers:
  # Tier 1: LOCAL — project-specific memories
  local:
    zensurveys-backend:
      language: python
      framework: fastapi
      isolation: strict        # No cross-namespace retrieval
    zensurveys-frontend:
      language: typescript
      framework: react
      isolation: strict

  # Tier 2: PORTFOLIO — abstracted cross-project patterns
  portfolio:
    python-conventions:
      scope: language          # All Python projects can read
      abstraction: required    # Patterns must be de-contextualized
    api-patterns:
      scope: framework         # All API projects can read
      abstraction: required
    shared-conventions:
      scope: org               # Org-wide conventions
      abstraction: optional

  # Tier 3: MESH — peer-derived memories (quarantined)
  mesh:
    isolation: quarantine      # Always quarantined on arrival
    trust_decay_days: 30       # Confidence decays if unvalidated
    auto_promote_threshold: 3  # 3 local confirmations → promote to portfolio
```

**Tier 1 (Local)** is project-scoped — a Python FastAPI project's conventions never contaminate a React project's patterns.

**Tier 2 (Portfolio)** holds abstracted patterns that transcend individual projects. When a local pattern proves useful across 3+ projects, it's promoted to portfolio — but only after de-contextualization (stripping project-specific names, paths, and configurations). This prevents the "works everywhere" illusion while enabling genuine cross-project learning.

**Tier 3 (Mesh)** holds peer-derived memories that arrive via Maggy Mesh. These always enter quarantine and must be locally validated before promotion. A mesh pattern from a peer's Python project goes to portfolio-level `python-conventions` only after local confirmation.

Retrieval queries search local first, then portfolio, then mesh — with confidence weighting per tier.

### 15.3.1 Engram as Improvement Substrate

Engram absorbs the improvement ledger. The relationship:

- **Improvement ledger** = the mutation log (what changed, when, who proposed)
- **Engram** = the memory substrate (persists the "what" across sessions)
- **Reward registry** = the outcome signal (did the change work?)

Before Engram, the improvement ledger was ephemeral — mutations were logged but lost between sessions. Engram makes the ledger persistent: every L2/L3/L4 mutation becomes an EngramRecord with `memory_type: "mutation"`, carrying the original proposal, the delta metric, and the outcome reward. This means Maggy can remember not just what it learned, but what it tried, what worked, and what failed — the full self-improvement history.

### 15.4 Memory Lifecycle

```
Mnemos (within-task)
  → Task completes with high-confidence memories
  → Promotion filter: confidence > 0.8, evidence_count >= 3
  │
  ▼
Engram (cross-session, per-machine)
  → EngramRecord created with full Origin + Validity
  → Namespace-isolated per project
  → Multi-path retrieval: semantic + temporal + entity links
  → Confidence decays with age unless revalidated
  │
  ▼
Mesh (cross-machine, per-org) [optional]
  → High-confidence EngramRecords distilled into Mesh typed memory
  → Shared with peers as patterns/scores with provenance
  → Incoming peer patterns enter quarantine (Section 14.9)
```

### 15.5 Multi-Path Retrieval

Single-path semantic retrieval fails when the retrieval query doesn't match the storage encoding. Engram retrieves across four paths simultaneously:

| Path | What It Finds | Example |
|------|-------------|---------|
| **Semantic** | Content-similar memories | Query "API route" finds "REST endpoint conventions" |
| **Temporal** | Recent or temporally-relevant memories | Query finds patterns from the same sprint/phase |
| **Causal** | Cause-effect linked memories | "Auth refactor" finds "session middleware change" it caused |
| **Entity** | Entity-linked memories | Query about `auth.py` finds all conventions touching that file |

Retrieval returns a merged, deduplicated set ranked by `confidence * recency * path_match_score`.

### 15.6 Amnesia Score Diagnostic

Each project gets a 7-dimension Amnesia Score (0.0 = perfect retention, 1.0 = total amnesia):

```python
@dataclass
class AmnesiaProfile:
    anterograde: float    # Are new memories forming across sessions?
    retrograde: float     # Are old memories degrading?
    temporal: float       # Is temporal context preserved?
    source: float         # Is origin attribution maintained?
    interference: float   # Is cross-namespace contamination occurring?
    context_binding: float # Are memories retrievable in the right context?
    confabulation: float  # Are inferred patterns presented as facts?

    @property
    def overall(self) -> float:
        return sum(vars(self).values()) / 7
```

The L3 weekly loop analyzes Amnesia Scores per project and patches memory encoding rules:
- High anterograde score → lower the promotion threshold (more memories get persisted)
- High interference score → tighten namespace isolation rules
- High confabulation score → require higher evidence_count before promotion

### 15.7 Integration with Control Loops

| Level | Engram Integration |
|-------|-------------------|
| **L0** | Check if current task context matches any EngramRecords — surface relevant conventions |
| **L1** | Promote high-confidence task memories to EngramRecords |
| **L2** | Daily: check for decayed records, run amnesia diagnostics |
| **L3** | Weekly: analyze Amnesia Scores, adjust promotion thresholds, patch encoding rules |
| **L4** | Monthly: evaluate whether Engram is reducing session startup time and improving consistency |

---

## 16. Lexon — Semantic Tool Binding

### 16.1 The Problem: Tool Selection Collapses at Scale

At 5-10 tools, models select correctly. At 20-30, confusion between similar-sounding tools emerges. At 50+, accuracy collapses: the model selects plausible-sounding but incorrect tools, hallucinates parameters, or conflates capabilities. This is well-documented in research (RAG-MCP: accuracy drops from 87% to 13% as tools grow from 10 to 100).

Maggy's tool count will grow aggressively:
- MCP Forge (Phase 9) auto-generates MCP servers from API docs
- Process Intelligence (Phase 8) adds signal collectors per integration
- Each project's toolchain adds environment-specific tools
- Mesh peers may surface tool recommendations

Without Lexon, Maggy's tool accuracy will degrade as it becomes more capable.

### 16.2 The LexonRecord

```python
@dataclass
class LexonRecord:
    lexon_id: str               # UUID
    phrase: str                 # Original user phrase (pre-translation)
    phrase_normalized: str      # Post-translation, lowercased
    language: str               # ISO 639-1 detected language
    is_mixed: bool              # Code-switching detected

    # Intent source — Lexon binds more than user phrases
    source_type: str            # "user_phrase" | "reason_node" | "mnemo_node"
                                # | "process_signal" | "mesh_policy"
    structured_intent: str | None  # iCPG ReasonNode ref (if source_type != "user_phrase")
    reason_node_ref: str | None    # Pointer to iCPG ReasonNode that triggered routing
    engram_refs: list[str]         # EngramRecord IDs used to resolve this binding

    # Routing results
    candidate_tools: list       # [{tool_name, tool_version, schema_hash, score, source}]
    selected_tool: str | None   # None if clarification required
    selected_tool_version: str | None  # Semantic version of selected tool
    selected_tool_schema_hash: str | None  # Hash of tool's input schema at bind time
    confidence: float           # 0.0-1.0
    ambiguity_class: str | None # "near_miss" | "vocabulary_gap" | "context_dependent"
    negative_bindings: list[str]  # Tool names explicitly excluded (NOT bindings)

    # Disambiguation
    was_clarified: bool         # Disambiguation was triggered
    clarify_mode: str           # "self_clarify" | "user_clarify"

    # Outcome tracking
    correction: str | None      # If user corrected post-execution
    correction_source: str | None  # "user_explicit" | "ci_failure" | "review_comment"
    outcome_reward: float | None   # -1.0 to 1.0: did the binding produce good results?

    # Context
    context_snapshot: str       # Pointer to Mnemos ContextNode
    user_id: str
    created_at: str
```

The enhanced LexonRecord captures not just what was bound, but why (intent source), to which version (tool contract), whether the binding worked (outcome reward), and how errors were detected (correction source). This transforms Lexon from a lookup table into a reward-bearing learning system.

### 16.3 Five-Layer Pipeline

Every tool invocation passes through five layers:

```
Layer 1: LANGUAGE NORMALIZATION
  → Detect language (lightweight classifier)
  → Translate to English for routing only (response stays in user language)
  → Handle code-switching: extract English anchor terms from mixed-language input
  │
  ▼
Layer 2: TWO-TIER ROUTING
  → Tier A (fast LLM, <300ms): compact tool manifest (name + 1-line description)
    Returns 5-7 candidates with rationale. JSON schema constrained to valid tool names.
  → Tier B (semantic retriever): multilingual embedding search over tool registry
    Each tool indexed by: description, example queries, learned synonyms
    Returns 5-7 candidates with cosine similarity scores.
  → Union + deduplication. Tools in both lists get score bonus.
  │
  ▼
Layer 3: TERMINOLOGY MAP FILTER
  → Query three-level Terminology Map: user > org > system
  → Explicit user preferences override everything (confidence 1.0)
  → NOT bindings: "blast" explicitly does NOT mean "delete_all"
  → Context-conditioned: "follow up" → different tool depending on active entity
  │
  ▼
Layer 4: DISAMBIGUATION (dual-mode)
  → If top candidate confidence > 0.82 and gap to #2 > 0.15: proceed
  → Otherwise: choose clarify mode based on action reversibility:

  → MODE A — self_clarify (default, autonomous):
    Lexon resolves ambiguity without asking the user by consulting:
    - iCPG ReasonNode: structured sub-goal narrows candidate set
    - Mnemos ContextNode: active entity and recent tool history
    - Engram: past bindings for this phrase in this project
    - Process history: which tool succeeded last time in similar context
    - Mesh consensus: what do peers bind this phrase to?
    If any source resolves confidence above threshold → proceed silently.
    Logged as self_clarify in LexonRecord for audit.

  → MODE B — user_clarify (irreversible actions only):
    Triggered only when action is destructive, expensive, or irreversible
    (delete, deploy, billing, permission changes).
    Present 2-3 concrete options in user's language.
    User's selection becomes high-confidence Terminology Map entry.

  → Autonomous agents should almost never trigger user_clarify.
    The goal: 95%+ resolutions via self_clarify after 50+ interactions.
  │
  ▼
Layer 5: FEEDBACK + PERSONALIZATION
  → Five implicit learning signals update Terminology Map:
    1. Correction: user corrects → add NOT binding + positive binding
    2. Affirmation: user proceeds → increment confidence
    3. Repetition: same phrase→tool 5+ times → promote to high-confidence synonym
    4. Disambiguation selection: capture context + choice as user-level binding
    5. Clarification repetition: same phrase triggers 3+ disambiguations → prompt explicit preference
  → High-confidence bindings (>0.9, used >10 times) promoted to Engram for cross-session persistence
```

### 16.4 Terminology Map Structure

```python
@dataclass
class TerminologyEntry:
    phrase: str                 # "blast my list"
    tool_name: str              # "bulk_email_send"
    params: dict | None         # Default parameters if applicable
    NOT: list[str]              # ["delete_all"] — explicitly NOT this tool
    context: str | None         # "contact_selected" — binding condition
    level: str                  # "system" | "org" | "user"
    confidence: float           # 1.0 for explicit, <1.0 for learned
    user_id: str | None         # None for system/org level
```

Resolution order: explicit user-level (confidence 1.0) > org-level > system-level > router inference. An explicit user preference is ground truth and bypasses confidence scoring.

### 16.5 Org-Level Terminology via Mesh

The Terminology Map has an org level between system and user. In a Maggy Mesh deployment:
- Team leads can define shared vocabulary
- Org-level entries propagate to all peers as default bindings
- Individual users can override at user level
- New team members inherit org vocabulary on Mesh cold start

This is a natural extension of Mesh's typed memory: terminology entries are a new type alongside scores, patterns, policies, and gaps.

### 16.6 Integration with RFC Stack

```
iCPG (structured intent) → Lexon (routes to correct tool)
                              ↕
                          Mnemos (tracks tool selection quality via ToolCallNode)
                              ↕
                          Engram (persists learned vocabulary across sessions)
                              ↕
                          Mesh (shares org-level terminology across machines)
```

| Component | Lexon Reads From | Lexon Writes To |
|-----------|-----------------|----------------|
| **iCPG** | ReasonNode provides structured sub-goal (better routing signal than raw text) | — |
| **Mnemos** | ContextNode for active entity (disambiguation signal) | ToolCallNode logged per invocation |
| **Engram** | High-confidence user synonyms from past sessions | Promotes confirmed bindings for persistence |
| **Mesh** | Org-level terminology entries from peers | Shares learned org-level vocabulary |

### 16.7 Configuration

```yaml
# Added to ~/.maggy/policy.yaml
lexon:
  enabled: true
  fast_llm_model: "claude-haiku"      # Tier A: speed over depth
  embedding_model: "multilingual-e5-large"
  confidence_threshold: 0.82
  disambiguation_gap: 0.15
  max_candidates: 7
  personalization:
    implicit_learning: true
    promotion_threshold: 10           # Uses before promoting to Engram
    correction_weight: 2.0            # Corrections count double
  terminology_map:
    system_file: "lexon_system_terms.yaml"
    org_sync_via_mesh: true           # Share org terms through Mesh
```

---

## 17. Event Spine — Canonical Event Flow

### 17.1 Why an Event Spine

Maggy's components — iCPG, Mnemos, Lexon, Engram, Process Intelligence, Mesh — each generate their own events in their own formats. Without a canonical event spine, correlating "user said X → Lexon bound tool Y → execution failed → memory Z was created → mutation W was proposed → mesh peer P received it" requires stitching together six different log formats.

The Event Spine defines a single ordered event stream that every component writes to. Each event carries a common header and a typed payload. This enables end-to-end tracing, reward attribution, and replay for debugging.

### 17.2 Event Types

```
IntentEvent ──► BindingEvent ──► ExecutionEvent ──► MemoryEvent
                                                       │
                                                       ▼
MeshEvent ◄── MutationEvent ◄── OutcomeEvent ◄── PersistenceEvent
```

| Event | Emitted By | What It Captures |
|-------|-----------|-----------------|
| **IntentEvent** | iCPG | Structured sub-goal from ReasonNode decomposition |
| **BindingEvent** | Lexon | Tool selection: which tool, which version, confidence, clarify mode |
| **ExecutionEvent** | Pi / Agent | Tool invocation: input, output, duration, exit code |
| **MemoryEvent** | Mnemos | Within-task memory write: node type, confidence, entity links |
| **PersistenceEvent** | Engram | Cross-session memory promotion: namespace tier, memory type |
| **OutcomeEvent** | Process Intelligence | Task outcome: success/failure, metric delta, reward signal |
| **MutationEvent** | L2/L3/L4 Loops | Self-modification: what changed, why, expected delta |
| **MeshEvent** | Mesh | Cross-machine sharing: what was sent/received, quarantine status |

### 17.3 Common Event Header

Every event carries a standard header for correlation and audit:

```python
@dataclass
class EventHeader:
    event_id: str           # UUID — unique per event
    event_type: str         # "intent" | "binding" | "execution" | ...
    task_id: str            # Links all events in a single task
    project_id: str         # Engram namespace key
    agent_id: str           # Which agent (Pi instance) emitted this
    model_id: str           # Which LLM was active
    confidence: float       # Event-level confidence (0.0-1.0)
    namespace: str          # Engram namespace tier (local/portfolio/mesh)
    policy_version: str     # Which policy.yaml version was active
    reward_delta: float | None  # Outcome signal (-1.0 to 1.0)
    timestamp: str          # ISO 8601
    parent_event_id: str | None  # Causal parent (enables event DAG)
```

### 17.4 Typed Payloads

```python
@dataclass
class IntentEvent:
    header: EventHeader
    reason_node_id: str     # iCPG ReasonNode that decomposed this
    sub_goal: str           # Natural language sub-goal
    blast_radius: int       # iCPG blast radius estimate
    drift_score: float      # iCPG drift from original intent

@dataclass
class BindingEvent:
    header: EventHeader
    lexon_record_id: str    # LexonRecord UUID
    source_type: str        # "user_phrase" | "reason_node" | ...
    selected_tool: str
    tool_version: str
    schema_hash: str
    clarify_mode: str       # "self_clarify" | "user_clarify"
    ambiguity_class: str | None

@dataclass
class OutcomeEvent:
    header: EventHeader
    success: bool
    metric_name: str        # "tests_passed", "ci_green", "pr_merged"
    metric_before: float
    metric_after: float
    reward: float           # Computed reward signal
```

### 17.5 What the Event Spine Enables

| Capability | How |
|-----------|-----|
| **End-to-end tracing** | Follow task_id across all 8 event types |
| **Reward attribution** | OutcomeEvent.reward propagates back to BindingEvent (was tool selection good?) and MutationEvent (was self-modification good?) |
| **Replay debugging** | Replay event stream to reproduce failures without re-executing |
| **Amnesia diagnosis** | Compare MemoryEvent → PersistenceEvent conversion rate per project |
| **Mesh audit** | Track exactly what crossed the wire and whether quarantine was justified |
| **Self-improvement validation** | MutationEvent + OutcomeEvent = evidence for whether L3/L4 changes helped |

### 17.6 Storage and Retention

```yaml
# Added to ~/.maggy/policy.yaml
event_spine:
  enabled: true
  storage: "~/.maggy/events.db"    # SQLite — append-only event log
  retention_days: 90               # Events older than 90 days → archive
  archive_format: "jsonl.gz"       # Compressed JSONL for cold storage
  index_fields:                    # Fields indexed for fast queries
    - task_id
    - event_type
    - project_id
    - timestamp
```

### 17.7 Integration Summary

```
User speaks → IntentEvent (iCPG decomposes)
           → BindingEvent (Lexon routes to tool)
           → ExecutionEvent (Pi executes)
           → MemoryEvent (Mnemos records)
           → PersistenceEvent (Engram persists)
           → OutcomeEvent (Process Intelligence scores)
           → MutationEvent (L2/L3 self-modifies)
           → MeshEvent (Mesh shares with peers)

Every step is typed, correlated by task_id, and carries a reward signal.
This is the nervous system of an autonomous engineering agent.
```

---

## 18. Benchmark Validation — Maggy vs Claude Code

> Full results: [`docs/benchmark-results.md`](benchmark-results.md)

### 18.1 Test Protocol

Built an **Expense Tracker** (FastAPI + SQLite + vanilla JS) using 6 identical tasks:
- **Runner A (Maggy):** 4-tier routing via blast score, 4 CLIs auto-discovered
- **Runner B (Claude Code):** All 6 tasks through `claude -p` only

Environment: Mac Studio M4 Max, 128 GB RAM. CLIs: Claude Code 2.1.42, Codex 0.129.0, Kimi 1.41.0, Ollama 0.23.2 (qwen2.5-coder:32b).

### 18.2 Results Summary

| Metric | Maggy | Claude Code |
|--------|-------|-------------|
| Success rate | 6/6 (100%) | 6/6 (100%) |
| Total time | 907.6s | 681.0s |
| Quality score | 7.4/10 | 7.8/10 |
| Claude subscription burn | 17% (1/6 tasks) | 100% (6/6 tasks) |
| Models used | 4 (ollama, kimi, codex, claude) | 1 (claude) |
| Fallbacks needed | 0 | N/A |
| Security depth | 7 issues found + fixed | No dedicated review |
| Test generation | None | 3 test files, 11+ cases |

### 18.3 Routing in Action

```
EXP-1 (docs, blast 2)     → ollama  50.4s   ← FREE (local GPU)
EXP-2 (schema, blast 3)   → kimi    86.6s   ← cheap subscription
EXP-3 (CRUD, blast 5)     → codex  147.1s   ← separate subscription
EXP-4 (API, blast 5)      → codex  133.9s   ← separate subscription
EXP-5 (frontend, blast 6) → codex  280.1s   ← separate subscription
EXP-6 (security, blast 8) → claude 209.5s   ← premium (only when needed)
```

### 18.4 What This Validates

1. **CLI auto-discovery works end-to-end.** Maggy probed 4 CLIs via `--help`, extracted flags, built correct commands, and spawned all 4 successfully with zero manual configuration.

2. **Blast-score routing is functional.** Low-complexity tasks went to cheap/free models; high-complexity tasks went to premium. The routing decisions were defensible.

3. **Fallback chain is reliable.** Zero fallbacks needed — all 4 CLIs completed their assigned tasks. The chain is wired and ready for quota exhaustion scenarios.

4. **Cost efficiency is real.** 83% reduction in Claude usage. Only the security review (blast 8) touched the premium model.

5. **Quality is competitive.** Maggy scored 7.4 vs Claude's 7.8 — a small gap driven by missing tests and product spec (routing issue, not capability issue).

### 18.5 Gaps to Close

| Gap | Root Cause | Fix |
|-----|-----------|-----|
| No tests generated | No TDD pipeline step in benchmark | Wire executor's `_run_tdd()` to add RED-GREEN step |
| Ollama missed product spec | Coding model assigned prose task | Route `task_type: docs` to kimi/claude regardless of blast |
| Codex slow on frontend (280s vs 122s) | Codex overhead for complex UI tasks | Consider routing blast 6 frontend to claude |
| Claude had better architecture | Single model sees full context | Multi-model loses cross-task context — address via checkpoint sharing |

### 18.6 Post-Benchmark Improvements

After the benchmark, three systems were built to close the identified gaps:

#### A. Routing Rules (`maggy/routing_rules.py`)

A YAML-backed self-updating rules file at `~/.maggy/routing-rules.yaml`. Rules are checked **before** blast-score routing, enforcing that specific task types and pipeline phases always use the right model.

**Task-type overrides** (from benchmark evidence):

| Task Type | Forced Model | Confidence | Source |
|-----------|-------------|-----------|--------|
| `docs` | claude | 0.9 | benchmark — local models are code-optimized, not prose |
| `security` | claude | 1.0 | rule — security review needs deep reasoning |
| `architecture` | claude | 0.8 | rule — architecture needs cross-context awareness |
| `tests` | claude | 0.9 | benchmark — only claude generated test files |
| `planning` | claude | 0.8 | rule — planning requires structured reasoning |

**Pipeline phase overrides** (from TDD workflow):

| Phase | Forced Model | Reason |
|-------|-------------|--------|
| `spec` | claude | SPEC phase needs comprehensive docs |
| `tdd_red` | claude | RED phase needs test design expertise |
| `tdd_green` | auto | GREEN phase uses blast-score routing |
| `review` | claude | Review needs security + architecture depth |

**Self-learning API:**
- `record_outcome(rules, model, task_type, success)` — updates rolling success rates from task results
- `learn_override(rules, task_type, model, reason, confidence)` — Maggy can add new overrides when data supports it
- Manual edits to the YAML are preserved; Maggy only appends learned entries

This directly addresses:
- **"Ollama missed product spec"** → `docs` tasks now forced to claude
- **"No tests generated"** → `tests` and `tdd_red` phases now forced to claude

#### B. Team Conventions (embedded in routing rules)

Conventions from claude-bootstrap's CLAUDE.md and skill files are embedded in the routing rules and injected into every prompt sent to any CLI:

```yaml
conventions:
  - text: "Build minimum wowable product (mWP). Ship the smallest thing that makes someone say 'wow'."
    applies_to: [all]
    source: claude-bootstrap
  - text: "Follow TDD: RED → GREEN → VALIDATE. Coverage >= 80%."
    applies_to: [feature, bug, refactor]
    source: claude-bootstrap
  - text: "No secrets in code. Parameterized SQL only. Validate all input at API boundaries."
    applies_to: [all]
    source: claude-bootstrap
  - text: "Quality gates: max 20 lines/function, max 3 params, max 2 nesting levels, max 200 lines/file."
    applies_to: [all]
    source: claude-bootstrap
  - text: "Use existing patterns. Read the codebase before changing it."
    applies_to: [all]
    source: claude-bootstrap
```

Every executor prompt method (`_plan_prompt`, `_analysis_prompt`, `_tests_prompt`, `_impl_prompt`) now calls `conventions_for(rules, task_type)` and appends the matching conventions block. This means kimi, codex, ollama, and claude all receive the same team rules — standardizing quality expectations across all models.

#### C. Routing Rules + Conventions Flow

```
Task arrives → apply_override(task_type, phase)
                ↓ forced?
              ┌─YES─→ use forced model
              └─NO──→ reward table → blast-score routing
                        ↓
              build prompt + conventions_for(task_type)
                        ↓
              send to CLI with team conventions embedded
                        ↓
              record_outcome() → update YAML success rates
```

#### D. Expected Impact on Re-run

If the benchmark were re-run with these improvements:

| Gap (Before) | Expected Result (After) |
|-------------|----------------------|
| No product spec from ollama | EXP-1 (`docs`) now routes to claude → spec generated |
| No tests from any model | TDD pipeline with `tdd_red` → claude → tests generated |
| Inconsistent quality | All models receive team conventions (mWP, quality gates, security rules) |
| No self-improvement | Outcome recording feeds back into routing rules YAML |

**Net effect:** Quality score expected to converge with Claude Code's 7.8+ while maintaining the 83% cost reduction.


================================================
FILE: docs/benchmark-results.md
================================================
# Maggy v5 Benchmark Results

**Date:** 2026-05-11
**App:** Personal Expense Tracker (FastAPI + SQLite + vanilla HTML/JS)
**Environment:** Mac Studio M4 Max, 128 GB RAM, macOS Darwin 24.6.0
**CLIs:** Claude Code 2.1.42, Codex 0.129.0, Kimi 1.41.0, Ollama 0.23.2 (qwen2.5-coder:32b)

---

## 1. Test Protocol

6 identical tasks run sequentially through two pipelines:

- **Runner A (Maggy):** 4-tier routing via blast score. Auto-discovers CLI flags at startup.
- **Runner B (Claude Code):** All tasks run through `claude -p` only.

Both pipelines use `--dangerously-skip-permissions` / equivalent flags, 25 max turns, and subprocess spawning into isolated build directories.

---

## 2. Task Definitions

| ID | Task | Blast | Maggy Route | Type |
|----|------|-------|-------------|------|
| EXP-1 | Write product spec | 2 | local (ollama) | docs |
| EXP-2 | Design database schema | 3 | kimi | architecture |
| EXP-3 | Build expense CRUD API | 5 | gpt (codex) | feature |
| EXP-4 | Build category API + monthly summary | 5 | gpt (codex) | feature |
| EXP-5 | Build frontend dashboard | 6 | gpt (codex) | frontend |
| EXP-6 | Security review + input validation | 8 | claude | security |

---

## 3. Speed Results

| Task | Blast | Maggy Model | Maggy (s) | Claude (s) | Winner |
|------|-------|-------------|-----------|------------|--------|
| EXP-1 | 2 | ollama (local) | 50.4 | 48.6 | Claude |
| EXP-2 | 3 | kimi | 86.6 | 67.2 | Claude |
| EXP-3 | 5 | codex | 147.1 | 160.6 | **Maggy** |
| EXP-4 | 5 | codex | 133.9 | 130.8 | Claude |
| EXP-5 | 6 | codex | 280.1 | 121.9 | Claude |
| EXP-6 | 8 | claude | 209.5 | 151.9 | Claude |
| **Total** | | | **907.6** | **681.0** | **Claude (33% faster)** |

### Routing Distribution (Maggy)

| Model | Tasks | % |
|-------|-------|---|
| codex (gpt) | 3 | 50% |
| ollama (local) | 1 | 17% |
| kimi | 1 | 17% |
| claude | 1 | 17% |

---

## 4. Success Rate

| Pipeline | Passed | Failed | Fallbacks | Rate |
|----------|--------|--------|-----------|------|
| Maggy | 6 | 0 | 0 | 100% |
| Claude | 6 | 0 | 0 | 100% |

---

## 5. Output Quality Assessment

### 5.1 File Inventory

**Maggy (10 source files, 1,634 lines):**

| File | Lines | Model | Assessment |
|------|-------|-------|------------|
| `SECURITY.md` | 134 | claude | Thorough: 7 findings with fixes, 3 recommendations |
| `backend/app/database.py` | 74 | kimi | Correct schema, parameterized queries, FK + cascade, seed data |
| `backend/app/main.py` | 36 | kimi | Lifespan init, CORS from env var (not wildcard), 3 routers |
| `backend/app/validation.py` | 25 | claude | Shared YYYY-MM regex validator, extracted from duplication |
| `backend/app/routes/expenses.py` | 148 | codex | Full CRUD, Pydantic models, parameterized SQL, FK check |
| `backend/app/routes/categories.py` | 107 | codex | CRUD, hex color validator, unique constraint handling |
| `backend/app/routes/summary.py` | 52 | codex | Monthly aggregation with COALESCE, GROUP BY |
| `frontend/index.html` | 121 | codex | Dark theme, responsive, all sections present |
| `frontend/css/style.css` | 472 | codex | CSS bar charts, dark palette, mobile breakpoints |
| `frontend/js/app.js` | 472 | codex | State management, fetch API, DOM via textContent (XSS-safe) |

**Claude (18 source files, ~1,500 app lines + 457K with venv):**

| File | Lines | Assessment |
|------|-------|------------|
| `specs/product-spec.md` | 206 | Comprehensive: vision, schema, Pydantic examples, project structure |
| `backend/app/database.py` | 68 | Correct schema, parameterized queries, FK, seed data |
| `backend/app/main.py` | 42 | Lifespan init, CORS from env var, 3 routers |
| `backend/app/models.py` | 51 | Centralized Pydantic schemas (better separation) |
| `backend/app/routes/expenses.py` | 159 | Full CRUD, partial update support, category JOIN |
| `backend/app/routes/categories.py` | 90 | CRUD, referential integrity check on delete |
| `backend/app/routes/summary.py` | 44 | Monthly aggregation |
| `backend/tests/conftest.py` | 18 | Temp DB fixture with patch |
| `backend/tests/test_expenses.py` | 108 | 11 test cases covering CRUD + edge cases |
| `backend/tests/test_categories.py` | ~50 | Category CRUD tests |
| `backend/tests/test_summary.py` | ~40 | Summary endpoint tests |
| `frontend/index.html` | 79 | Clean layout, modal-based form |
| `frontend/css/style.css` | 323 | Dark theme, responsive |
| `frontend/js/app.js` | 320 | API wrapper, currency formatting, chart rendering |

### 5.2 Quality Scoring

| Dimension | Maggy | Claude | Notes |
|-----------|-------|--------|-------|
| **Functional completeness** | 9/10 | 10/10 | Both implement all endpoints. Claude adds partial updates. |
| **Security** | 10/10 | 7/10 | Maggy's security review (EXP-6) hardened CORS, added amount bounds, path param validation, color format validation. Claude left CORS with `allow_credentials=True`, no amount ceiling, no color validation. |
| **SQL safety** | 10/10 | 10/10 | Both use parameterized queries exclusively. |
| **XSS prevention** | 10/10 | 10/10 | Both use textContent for DOM rendering. No innerHTML. |
| **Input validation** | 9/10 | 7/10 | Maggy: Pydantic + custom validators (hex color, amount ceiling, path ge=1). Claude: Pydantic regex patterns but less thorough. |
| **Error handling** | 9/10 | 8/10 | Maggy: context manager with rollback, 409 on duplicate, 404 on missing. Claude: try/finally, 409 on duplicate, referential integrity check. |
| **Test coverage** | 0/10 | 9/10 | Maggy produced zero tests. Claude created conftest + 3 test files (~200 lines). |
| **Architecture** | 8/10 | 9/10 | Claude separated models into dedicated file. Maggy inlined models per route. Both wire correctly. |
| **Product spec** | 0/10 | 10/10 | Maggy's ollama did not produce a spec file. Claude's spec is comprehensive (206 lines). |
| **Frontend quality** | 9/10 | 8/10 | Maggy's frontend is larger (472+472+121 = 1065 lines) with more CSS detail. Claude's is cleaner (320+323+79 = 722 lines) with modal UX. |
| **Weighted avg** | **7.4/10** | **7.8/10** | |

### 5.3 Key Differences

**Maggy strengths:**
- Security review caught and fixed 7 issues (CORS wildcard, missing bounds, color validation, duplicated validation)
- Multi-model approach applied right tool to right task (security by Claude, CRUD by Codex, schema by Kimi)
- Larger frontend with more CSS polish
- Each model contributed its strength: Claude for security depth, Codex for feature implementation

**Claude strengths:**
- Product spec created (comprehensive 206-line document)
- Test suite included (conftest + 3 test files, ~200 lines, 11+ test cases)
- Better code organization (centralized models.py)
- Partial update support on expenses (PATCH-style PUT)
- Referential integrity check on category delete (prevents orphaned expenses)
- Full venv with dependencies installed

**Maggy weaknesses:**
- No product spec file generated (ollama didn't create it or placed it elsewhere)
- No test files at all — a significant gap for production readiness
- Import paths use `backend.app.` which requires specific project structure to run

**Claude weaknesses:**
- No dedicated security review — CORS uses `allow_credentials=True` (risky with dynamic origins)
- No amount ceiling on expenses (could submit `1e308`)
- No hex color format validation on categories
- `get_db()` returns connection without context manager (manual close in every route)

---

## 6. Cost Analysis

| Pipeline | Claude Usage | Free/Cheap Usage | Est. Subscription Burn |
|----------|-------------|------------------|----------------------|
| **Maggy** | 1/6 tasks (17%) | 2/6 tasks (33%) | Low — spread across 3 subscriptions |
| **Claude** | 6/6 tasks (100%) | 0/6 tasks (0%) | High — 100% on premium model |

Maggy used Claude only for the security review (blast 8). The other 5 tasks consumed cheaper or free models:
- EXP-1: ollama (free, local GPU)
- EXP-2: kimi (free tier / cheap subscription)
- EXP-3/4/5: codex (separate subscription)

This represents ~83% reduction in Claude subscription consumption.

---

## 7. Routing Observations

### What worked
- **Blast 8 → Claude** for security review was correct. Claude produced the most thorough audit.
- **Blast 5 → Codex** for CRUD implementation delivered working endpoints.
- **Blast 3 → Kimi** for database schema was successful and correct.
- **Zero fallbacks** — all 4 CLIs completed tasks without needing to escalate.
- **Auto-discovery** — CLI flags probed from `--help`, not hardcoded.

### What needs tuning
- **Codex is slow on frontend** — EXP-5 took 280s vs Claude's 122s (2.3x slower). Consider routing blast 6 frontend tasks to Claude.
- **Ollama missed the spec task** — EXP-1 (docs) was routed to local model but no spec file was generated. Ollama's qwen2.5-coder is optimized for code, not prose. Consider routing `task_type: docs` to kimi or claude regardless of blast score.
- **No test generation by any Maggy model** — None of the 4 models produced tests. This could be addressed by adding a TDD step (write tests first) as a follow-up task routed to Claude.

---

## 8. Conclusions

| Metric | Maggy | Claude | Verdict |
|--------|-------|--------|---------|
| Speed | 907.6s | 681.0s | Claude 33% faster |
| Success rate | 100% | 100% | Tie |
| Quality (weighted) | 7.4/10 | 7.8/10 | Claude slightly better |
| Security depth | Stronger | Weaker | Maggy (dedicated review step) |
| Test coverage | None | Good | Claude (significant gap for Maggy) |
| Cost efficiency | 83% savings | Baseline | Maggy |
| Subscription risk | Distributed | Single point | Maggy |
| Model diversity | 4 models | 1 model | Maggy |

**Summary:** Claude Code is faster and produces marginally higher overall quality (driven by tests and spec). Maggy's multi-model approach provides cost efficiency and subscription risk distribution, plus deeper security review via dedicated model routing. The main gaps to close: add TDD pipeline (test generation step), and improve docs routing (don't send prose tasks to coding-optimized local models).

---

## 9. Raw Throughput Benchmarks (tokens/sec)

Standalone generation speed measured with identical prompts across all four model tiers. Each model ran 3 iterations (1 cold, 2 hot).

**Prompt:** "Write a Python function that implements a binary search tree with insert, delete, search, and in-order traversal."

### 9.1 Results

| Model | Run 1 | Run 2 | Run 3 | Avg tok/s | Notes |
|-------|-------|-------|-------|-----------|-------|
| **Ollama qwen2.5-coder:32b** | 22.3 | 21.8 | 22.1 | **22.1** | Local GPU (M4 Max), consistent across runs |
| **Claude (claude -p)** | 44.6 (API) / 18.6 (wall) | 41.9 / 14.3 | 25.7 / 6.8 | **37.4 API / 13.2 wall** | API time excludes network overhead; wall-clock includes CLI startup |
| **Kimi (kimi CLI)** | ~1.8 | ~2.8 | ~3.3 | **~2.6** | Agentic mode — writes files, runs tools; tok/s reflects execution time |
| **Codex (codex exec)** | ~0.8 | ~0.7 | ~0.6 | **~0.7** | Agentic mode — full-auto file creation; tok/s reflects execution time |

### 9.2 Interpretation

- **Ollama (local):** Stable 22 tok/s on M4 Max 128GB. No network latency, no rate limits, no cost. Best for blast 1-2 tasks where speed-to-first-token matters.
- **Claude:** Fastest raw generation at ~37 tok/s (API). Wall-clock is lower (~13 tok/s) due to CLI startup overhead and streaming.
- **Kimi / Codex:** Low tok/s numbers are misleading — both operate in agentic mode (writing files, running commands, iterating). Their throughput reflects end-to-end task execution, not pure generation speed. Codex in particular spends most time on sandboxed execution rather than generation.

### 9.3 Routing Implications

| Tier | Model | tok/s | Cost | Best For |
|------|-------|-------|------|----------|
| Local | Ollama qwen2.5-coder:32b | 22 | Free | Blast 1-2: docs, simple scaffolding |
| Mid | Kimi | 2.6 (agentic) | Cheap | Blast 3-4: schema design, CRUD |
| Premium-Auto | Codex | 0.7 (agentic) | Mid | Blast 5-6: feature implementation |
| Premium | Claude | 37 (API) | High | Blast 7+: security, architecture, TDD |

---

## 10. Post-Benchmark Fixes (Routing Rules + Conventions)

Three systems were built immediately after the benchmark to close the gaps above.

### 10.1 Routing Rules (`~/.maggy/routing-rules.yaml`)

A self-updating YAML config that overrides blast-score routing for specific task types and pipeline phases. Rules are checked **before** the reward table or blast-score tier.

**Task-type overrides seeded from benchmark evidence:**

| Task Type | Forced To | Why |
|-----------|----------|-----|
| `docs` | claude | Ollama (code-optimized) produced no spec file |
| `security` | claude | Security review needs deep reasoning |
| `tests` | claude | Only claude generated test files in benchmark |
| `architecture` | claude | Architecture needs cross-context awareness |
| `planning` | claude | Planning requires structured reasoning |

**Pipeline phase overrides from TDD workflow:**

| Phase | Forced To | Why |
|-------|----------|-----|
| `spec` | claude | SPEC phase needs comprehensive docs |
| `tdd_red` | claude | RED phase needs test design expertise |
| `tdd_green` | auto | GREEN uses blast-score routing (cheap models can implement) |
| `review` | claude | Review needs security + architecture depth |

**Self-learning:** `record_outcome()` updates rolling success rates per model. `learn_override()` lets Maggy add new rules when outcome data supports it. Manual YAML edits are preserved.

### 10.2 Team Conventions Injection

Five conventions from claude-bootstrap's CLAUDE.md are embedded in routing rules and injected into every prompt sent to any CLI:

1. **mWP** — Build minimum wowable product. No feature flags, no premature abstractions.
2. **TDD** — RED → GREEN → VALIDATE. Coverage >= 80%.
3. **Security** — No secrets in code. Parameterized SQL. Validate input at boundaries.
4. **Quality gates** — 20 lines/fn, 3 params, 2 nesting levels, 200 lines/file.
5. **Existing patterns** — Read codebase before changing. Keep changes minimal.

All four executor prompt methods (`_plan_prompt`, `_analysis_prompt`, `_tests_prompt`, `_impl_prompt`) now append matching conventions. This standardizes quality expectations across kimi, codex, ollama, and claude.

### 10.3 Expected Re-run Improvements

| Benchmark Gap | Root Cause | Fix Applied | Expected Result |
|--------------|-----------|-------------|-----------------|
| No product spec (EXP-1) | `docs` routed to ollama | `docs → claude` override | Claude generates spec |
| No tests from any model | No TDD step in pipeline | `tdd_red → claude` + `tests → claude` overrides | Claude writes failing tests |
| Inconsistent quality across models | No shared standards | Conventions injected into all prompts | mWP + quality gates enforced everywhere |
| No learning from outcomes | Static routing only | `record_outcome()` + `learn_override()` | Routing improves with each task |

**Projected scores if re-run:**

| Dimension | Before | After (est.) | Change |
|-----------|--------|-------------|--------|
| Product spec | 0/10 | 9/10 | `docs → claude` |
| Test coverage | 0/10 | 8/10 | `tdd_red → claude` |
| Security | 10/10 | 10/10 | No change (already strong) |
| Architecture | 8/10 | 9/10 | Conventions enforce patterns |
| **Weighted avg** | **7.4/10** | **~8.5/10** | **+1.1 points** |

Cost efficiency would remain at ~83% savings — the new overrides only force claude for `docs` (1 task) and `tests` (new TDD step), not for CRUD/API/frontend work.


================================================
FILE: docs/mnemos-implementation.md
================================================
# Mnemos Implementation Addendum

Implementation details for the Mnemos RFC (Task-Scoped Memory Lifecycle for Autonomous Agents) as deployed in Maggy.

## 1. Signal Access in Claude Code

### Token Utilization (Primary Fatigue Signal)

Claude Code exposes context window metrics through **statusline scripts**. When configured, the statusline script receives JSON on stdin for every API call:

```json
{
  "context_window": {
    "used_percentage": 42.5,
    "remaining_percentage": 57.5,
    "used_tokens": 85000,
    "total_tokens": 200000,
    "remaining_tokens": 115000
  }
}
```

**Key discovery**: Hooks (PreToolUse, PreCompact, etc.) do NOT receive context data directly. The solution is a two-stage pipeline:

1. **Statusline script** receives token data on every API call, writes to `.mnemos/fatigue.json`
2. **Hooks** read `.mnemos/fatigue.json` from disk when they fire

This gives near-real-time fatigue monitoring without requiring direct hook access to context metrics.

### Hook System Integration

| Hook | Trigger | Mnemos Action |
|------|---------|--------------|
| Statusline | Every API call | Write `fatigue.json` with token metrics |
| PreToolUse (Edit/Write) | Before file edits | Read fatigue, auto-checkpoint at 0.60+, auto-consolidate at 0.40+ |
| PreCompact | Before compaction | Emergency checkpoint, typed preservation instructions |
| SessionStart | Session begins | Load checkpoint, bridge iCPG state |
| Stop | Agent stops | Write final checkpoint |

## 2. MnemoGraph Architecture

### Node Types and Eviction Policies

| Type | Eviction Policy | Purpose |
|------|----------------|---------|
| GoalNode | NEVER | Task's primary objective |
| ConstraintNode | NEVER | Invariants, contracts, must-not-violate rules |
| ContextNode | EVICTABLE | File contents, tool outputs, ephemeral context |
| WorkingNode | COMPRESS_FIRST | In-progress reasoning, current approach |
| ResultNode | COMPRESS_FIRST | Completed sub-task results |
| SkillNode | COMPRESS_FIRST | Learned patterns (Tier 1+: promotable to persistent) |
| CheckpointNode | NEVER | Serialized session state |
| HandoffNode | NEVER | Task completion summary for successor |

### Activation Weight Decay

All evictable/compressible nodes undergo exponential decay:
- Factor: 0.95 per consolidation pass
- GoalNodes, ConstraintNodes, CheckpointNodes, HandoffNodes exempt
- Touching a node (access) resets weight via `touch_node()`

### Storage

SQLite at `.mnemos/mnemo.db`:
- `mnemo_nodes` — MnemoGraph nodes with type, weight, status, scope_tags
- `checkpoints` — Serialized session state
- `fatigue_log` — Historical fatigue measurements for trending

## 3. Fatigue Model (4 Dimensions — All Passively Observable)

All 4 dimensions are derived from actual hook data. No agent cooperation needed.

### Signal Collection

Hooks log behavioral signals to `.mnemos/signals.jsonl` (append-only JSONL):
- **PreToolUse** logs: `{tool, event: "pre", file_path, ts}` — captures what files the agent touches
- **PostToolUse** logs: `{tool, event: "post", file_path, success, ts}` — captures tool outcomes
- **Statusline** writes: `.mnemos/fatigue.json` with token metrics — captures context window state

Fatigue computation reads the last 30 entries from `signals.jsonl` + `fatigue.json`.

### Dimension Weights

```
composite = 0.40 * token_utilization
          + 0.25 * scope_scatter
          + 0.20 * reread_ratio
          + 0.15 * error_density
```

### Dimension Details

**Token Utilization (0.40)**: `context_window.used_percentage / 100`. Direct from statusline. Most reliable signal — measures how full the context window is.

**Scope Scatter (0.25)**: Ratio of unique directories touched in the last 30 tool calls. Agent editing `src/auth/` exclusively = 0.0 (focused). Agent bouncing across `src/auth/`, `tests/`, `docs/`, `config/`, `lib/` = 0.7+ (scattered, unfocused). Derived from PreToolUse `tool_input.file_path`.

**Re-read Ratio (0.20)**: Proportion of Read tool calls that target files already read in the session. Agent reading `middleware.ts` once then moving on = 0.0 (remembers what it read). Agent re-reading `middleware.ts` 5 times = 0.8 (lost context, needs to re-read). Derived from PreToolUse when `tool_name=Read`. This is the strongest signal of actual context degradation.

**Error Density (0.15)**: Ratio of failed tool calls to total tool calls in the rolling window. Agent with 100% success = 0.0 (productive). Agent with 50% failures = 0.5 (struggling, confused). Derived from PostToolUse `tool_response` error detection.

### State Thresholds

| State | Score Range | Auto-Actions |
|-------|------------|-------------|
| FLOW | 0.00–0.40 | None |
| COMPRESS | 0.40–0.60 | Micro-consolidation (compress 3 ResultNodes, evict 1 cold ContextNode, decay weights) |
| PRE-SLEEP | 0.60–0.75 | Checkpoint written + consolidation |
| REM | 0.75–0.90 | Emergency checkpoint, warning to agent |
| EMERGENCY | 0.90+ | Emergency checkpoint, handoff instruction |

## 4. Checkpoint/Resume Protocol

### CheckpointNode Contents

```json
{
  "id": "uuid",
  "task_id": "session-1",
  "goal": "Implement authentication module",
  "active_constraints": [
    "INV: API backward compatibility",
    "POST: All endpoints require auth token"
  ],
  "active_results": [
    "JWT middleware implemented and tested",
    "User model created with email/password"
  ],
  "current_subgoal": "Add password reset flow",
  "working_memory": "Considering email vs SMS for reset codes...",
  "fatigue_at_checkpoint": 0.62,
  "git_state": {
    "branch": "feat/auth",
    "uncommitted": ["src/auth/middleware.ts", "src/auth/routes.ts"]
  },
  "icpg_state": {
    "active_reason": "abc12345 -- Implement user authentication",
    "unresolved_drift": 2,
    "stats": {"reasons": 5, "symbols": 42, "edges": 48}
  },
  "node_summary": {
    "total": 15, "active": 10, "compressed": 3,
    "by_type": {"goal": 1, "constraint": 3, "result": 4, "working": 2}
  }
}
```

### Resume Format

SessionStart hook loads `checkpoint-latest.json` and formats as structured markdown:

```markdown
## Mnemos Session Resume
Checkpoint: abc12345
Fatigue at checkpoint: 0.62

### Goal
Implement authentication module

### Active Constraints (DO NOT VIOLATE)
- INV: API backward compatibility
- POST: All endpoints require auth token

### Current Sub-Goal
Add password reset flow

### Progress So Far
- JWT middleware implemented and tested
- User model created with email/password

### Git State
Branch: feat/auth
Uncommitted files:
  - src/auth/middleware.ts
  - src/auth/routes.ts
```

## 5. iCPG Bridge

Mnemos imports iCPG state via `mnemos bridge-icpg`:

| iCPG Entity | Mnemos Node | Notes |
|-------------|-------------|-------|
| ReasonNode (active) | GoalNode | Content includes iCPG ID reference |
| ReasonNode.invariants | ConstraintNode | Linked to GoalNode |
| ReasonNode.postconditions | ConstraintNode | Linked to GoalNode |
| Unresolved drift count | CheckpointNode.icpg_state | Summary only |
| Graph stats | CheckpointNode.icpg_state | Reasons/symbols/edges counts |

Bridge runs automatically on SessionStart (background) and on-demand via CLI.

## 6. Micro-Consolidation (Tier 0)

Rule-based, no LLM, <100ms target:

1. **Compress**: Take 3 oldest active ResultNodes, set status=COMPRESSED, store first 200 chars as summary, clear content
2. **Evict**: Take 1 cold ContextNode (weight < 0.2, access_count < 3, no scope overlap), set status=EVICTED
3. **Decay**: Apply 0.95 exponential decay to all evictable node weights

Triggered automatically by PreToolUse hook when fatigue >= 0.40.

## 7. Deployment

### Files

```
scripts/mnemos/
  __init__.py          # Package init
  models.py            # MnemoNode, FatigueState, CheckpointNode
  store.py             # SQLite storage (MnemosStore)
  fatigue.py           # 4-dimension fatigue from observable signals
  signals.py           # Behavioral signal collection from hooks
  checkpoint.py        # Checkpoint write/load
  consolidation.py     # Micro-consolidation
  __main__.py          # CLI (mnemos command)

templates/
  mnemos-statusline.sh      # Statusline: writes fatigue.json (token metrics)
  mnemos-pre-edit.sh        # PreToolUse: logs file signal + fatigue check + iCPG
  mnemos-post-tool.sh       # PostToolUse: logs success/failure for error density
  mnemos-session-start.sh   # SessionStart: checkpoint resume
  mnemos-pre-compact.sh     # PreCompact: emergency checkpoint + typed preservation
  mnemos-stop-checkpoint.sh # Stop: final checkpoint

skills/mnemos/SKILL.md      # Skill documentation
commands/mnemos-status.md   # /mnemos-status slash command
commands/mnemos-checkpoint.md # /mnemos-checkpoint slash command
```

### Configuration (settings.json)

Hooks are configured in `.claude/settings.json`. The Mnemos hooks replace the standalone iCPG hooks (mnemos-pre-edit.sh includes iCPG context queries).

### Dependencies

Zero external dependencies. Uses only Python stdlib (sqlite3, json, pathlib, subprocess, dataclasses).

## 8. Future Work (Tier 1+)

Not implemented in this release:
- **Mini-REM consolidation**: LLM-based summarization of WorkingNodes during high fatigue
- **Full REM consolidation**: Cross-task pattern extraction, SkillNode promotion algebra
- **Multi-agent orchestrator protocol**: Checkpoint exchange between agent instances
- **SkillNode promotion**: Automatic promotion of repeated patterns to persistent storage
- **Fatigue prediction**: Use fatigue_log history to predict when checkpoints will be needed


================================================
FILE: docs/polyphony-spec.md
================================================
# Polyphony v0.1 — Multi-Agent Orchestration Specification

## Overview

Polyphony is a container-isolated multi-agent orchestration system for Maggy. Each agent session runs in its own Docker container with a full git clone on its own branch, enabling true parallel execution without conflicts.

## Architecture

Six layers, each with a single responsibility:

```
┌─────────────────────────────────────────┐
│  1. Work Source (GitHub Issues / Local)  │
├─────────────────────────────────────────┤
│  2. Orchestrator (Supervisor Loop)       │
├─────────────────────────────────────────┤
│  3. Router (Task x Policy -> RunSpec)    │
├─────────────────────────────────────────┤
│  4. Identity Broker (Credentials)        │
├─────────────────────────────────────────┤
│  5. Workspace Manager (Git Clones)       │
├─────────────────────────────────────────┤
│  6. Worker Runtime (Docker Containers)   │
└─────────────────────────────────────────┘
```

## §1 — Guiding Principles

- Container isolation per agent session
- Subscription-based auth (not API keys)
- Full git clones (not worktrees) for independence
- Pure function routing (deterministic, testable)
- State machine enforcement for task lifecycle
- Proof-of-work verification before landing

## §2 — Work Sources

Tasks enter the system from:

- **GitHub Issues**: Polled via `gh api`, filtered by label (default: `agent-ready`)
- **Local Queue**: SQLite-backed task queue at `~/.polyphony/queue.db`

Each source implements `poll() -> list[Task]` and `mark_claimed(task_id)`.

## §3 — Domain Models

### Task (§3.1)
Unit of work from a source. Fields: title, source, source_ref, state, task_type, scope, risk, context_tokens, requires_web, metadata.

### Identity (§3.2)
Named credential bundle. Fields: name, volumes (agent_type -> host_path), api_keys, cost_ceiling_usd_per_day.

### AgentProfile (§3.3)
Agent harness configuration. Fields: name, agent_type, cli_command, context_window_tokens, strengths, event_protocol, auth_path.

### RunSpec (§3.4)
Immutable execution specification for one attempt. Fields: task_id, agent, identity, workspace, image, attempt, model, fallback, max_turns, env_overlay, volume_mounts, deadline_seconds.

### Result (§3.5)
Outcome of a single run. Fields: task_id, run_spec_id, agent, status, turns, duration_seconds, cost_usd, artifacts, events.

## §4 — Task State Machine

```
DISCOVERED -> CLAIMED -> ROUTED -> PROVISIONED -> RUNNING -> VERIFYING -> LANDED
                                                     |           |
                                                     v           v
                                                   FAILED --> BLOCKED
                                                     |
                                                     v
                                                   CLAIMED (retry)
```

Terminal states: LANDED, BLOCKED.

Transitions are enforced by `can_transition(current, target)`. Invalid transitions raise `ValueError`.

## §5 — Routing

### §5.1 — Complexity Scoring

Five dimensions, each 0-2, total 0-10:

| Dimension | 0 | 1 | 2 |
|-----------|---|---|---|
| Cyclomatic depth | <10 LOC, 0-1 files | 10-50 LOC, 2-4 files | 50+ LOC, 5+ files |
| Fan-out | 0-2 callers | 3-10 callers | 11+ callers |
| Security boundary | No auth keywords | 1 keyword | 2+ keywords |
| Concurrency | No lock/transaction | 1 keyword | 2+ keywords |
| Domain invariants | Low risk, simple | Medium risk or refactor | High risk |

### §5.2-5.6 — Rule Evaluation

Rules are evaluated top-down. First match wins. Each rule has:
- `match`: Predicate fields (all must match)
- `agent`: Target agent name
- `fallback`: Ordered fallback chain

Default rule applies when no rules match.

## §6 — Workspace Manager

Each task+attempt gets:
- Directory at `{workspace_root}/{sanitized_task_id}/{attempt}/`
- Full `git clone` (with `--reference` and `--dissociate` if mirror available)
- Branch checkout to the specified ref
- Cleanup via `shutil.rmtree`

## §7 — Identity Broker

Resolves named identities to:
- **Volume mounts**: `{host_path}:/home/worker/{path}:ro` per agent type
- **Env overlays**: Environment variable pass-through from api_keys
- **Validation**: Name required, at least one volume required

## §8 — Worker Runtime

### Docker Lifecycle

```
docker create --name polyphony-{task_id}-{attempt} \
  -v {workspace}:/workspace \
  -v {auth_path}:/home/worker/{auth_path}:ro \
  -e {env_vars} \
  {image}

docker start {container_id}
docker wait {container_id}  # blocks until exit
docker logs {container_id}  # collect output
docker rm {container_id}    # cleanup
```

### §8.1 — Claude Adapter
Command: `claude -p --output-format stream-json`
Completion: `{"type": "result"}`
Quota: "rate limit" in output

### §8.2 — Codex Adapter
Command: `codex exec --full-auto`
Completion: `{"status": "completed"}`
Quota: "quota" in output

### §8.3 — Kimi Adapter
Command: `kimi --print -y`
Completion: `{"done": true}`
Quota: "rate limit" in output

## §9 — Event Protocol

Agent output is parsed as NDJSON (newline-delimited JSON). Each line is classified into a `TaskEvent` with kind (message, result, error, unknown) and data.

## §10 — Proof of Work

Before landing, the orchestrator verifies:
- Result status is "succeeded"
- Tests pass (if configured)
- Lint passes (if configured)
- Type check passes (if configured)

Failed verification transitions task to FAILED for retry or BLOCKED.

## §11 — Configuration

All configuration in `~/.polyphony/`:

- `config.yaml` — Global settings (workspace root, poll interval, concurrency)
- `identities.yaml` — Named credential bundles
- `agents.yaml` — Agent profiles and CLI commands
- `routing.yaml` — Routing rules and fallback chains

## §12 — Implementation

Core package: `scripts/polyphony/`

Modules: models, state_machine, store, config, scoring, router, identity, workspace, runtime, events, orchestrator, sources/*, adapters/*

CLI entry: `python3 -m polyphony {init|spawn|status|cleanup}`


================================================
FILE: evals/README.md
================================================
# Behavioral Evals

Behavioral evals test whether skills produce the expected coding patterns when loaded into Claude Code. Each eval is a realistic coding task with a rubric.

## Structure

```
evals/
├── run-evals.sh              # Runner script
├── README.md                 # This file
├── {skill-name}/
│   └── scenario-N/
│       ├── task.md            # Coding task description
│       └── criteria.json      # Weighted rubric
```

## Scenario Format

### task.md

A realistic coding task that the skill should influence. Write it as you would a ticket or user request.

### criteria.json

```json
{
  "criteria": [
    {
      "name": "Short description",
      "type": "deterministic",
      "weight": 1.0,
      "check": "grep -q 'pattern' output.py"
    },
    {
      "name": "Code quality description",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Does the output follow X pattern? Answer yes/no with explanation."
    }
  ]
}
```

**Types:**
- `deterministic`: grep/regex/AST checks that can be automated
- `llm_judged`: requires LLM evaluation of output quality

## Running Evals

```bash
# All evals
./run-evals.sh

# Single skill
./run-evals.sh base

# With baseline comparison (with vs without skill)
./run-evals.sh --baseline base
```

## Adding New Evals

1. Create `evals/{skill-name}/scenario-N/`
2. Write `task.md` with a realistic coding task
3. Write `criteria.json` with weighted rubric
4. Test: `./run-evals.sh {skill-name}`

## Coverage

| Skill | Scenarios | Focus |
|-------|-----------|-------|
| base | 2 | Function length, TDD order |
| security | 2 | No hardcoded secrets, proper hashing |
| python | 1 | Type hints, pytest, ruff |
| typescript | 1 | Strict mode, barrel exports |
| react-web | 1 | Component structure, Zustand |
| session-management | 1 | Checkpoint creation |
| code-review | 1 | Review process |
| commit-hygiene | 1 | Atomic commits |
| agent-teams | 1 | Pipeline ordering |
| database-schema | 1 | Schema read before query |
| llm-patterns | 1 | Structured output, retry |
| supabase | 1 | RLS, migrations |
| credentials | 1 | Access.txt, .env.example |
| project-tooling | 1 | CLI verification |
| existing-repo | 1 | Repo analysis before changes |


================================================
FILE: evals/agent-teams/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Pipeline ordering respected",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Was work organized in a logical pipeline (schema/API first, then frontend, then integration)? Or was everything done in a jumbled order? Answer ordered/jumbled."
    },
    {
      "name": "Backend before frontend",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Was the backend API implemented before the frontend component that consumes it? Answer yes/no."
    }
  ]
}


================================================
FILE: evals/agent-teams/scenario-1/task.md
================================================
# Task: Build a REST API with Frontend

Create a full-stack feature:
- Backend: FastAPI endpoint for managing bookmarks (CRUD)
- Frontend: React component to display and manage bookmarks
- Tests for both backend and frontend

This should be broken into clear pipeline stages if using multiple agents.


================================================
FILE: evals/base/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Functions under 50 lines",
      "type": "deterministic",
      "weight": 1.0,
      "check": "No function body exceeds 50 lines"
    },
    {
      "name": "Tests written before or alongside implementation",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Were tests written as part of the implementation? Check for pytest test functions that cover the main endpoints. Answer yes/no."
    },
    {
      "name": "Input validation present",
      "type": "deterministic",
      "weight": 0.5,
      "check": "URL validation exists (regex or pydantic HttpUrl)"
    },
    {
      "name": "No god functions",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Is the code modular with single-responsibility functions, or does it have monolithic handler functions doing everything? Answer modular/monolithic."
    }
  ]
}


================================================
FILE: evals/base/scenario-1/task.md
================================================
# Task: Build a URL Shortener Service

Create a Python URL shortener with these endpoints:
- POST /shorten — accepts a URL, returns a short code
- GET /{code} — redirects to the original URL
- GET /stats/{code} — returns click count

Use FastAPI. Store data in-memory (dict). Include input validation.


================================================
FILE: evals/base/scenario-2/criteria.json
================================================
{
  "criteria": [
    {
      "name": "TDD order followed",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent write or plan tests before the implementation, or at least alongside it? Check tool call order. Answer yes/no."
    },
    {
      "name": "Cursor-based pagination implemented",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Response model includes next_cursor and has_more fields"
    },
    {
      "name": "Limit validation",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Limit parameter has max=100 constraint"
    }
  ]
}


================================================
FILE: evals/base/scenario-2/task.md
================================================
# Task: Add Pagination to an Existing API

You have a FastAPI endpoint that returns all items from a database. Refactor it to support cursor-based pagination with:
- `limit` parameter (default 20, max 100)
- `cursor` parameter (opaque string)
- Response includes `next_cursor` and `has_more`

Write the implementation and tests.


================================================
FILE: evals/code-review/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Self-review performed",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent perform a code review (re-read code, check for issues) before considering the task done? Answer yes/no."
    },
    {
      "name": "File size validation",
      "type": "deterministic",
      "weight": 0.5,
      "check": "File size check exists (10MB limit)"
    },
    {
      "name": "File type validation",
      "type": "deterministic",
      "weight": 0.5,
      "check": "MIME type or extension validation for image types"
    }
  ]
}


================================================
FILE: evals/code-review/scenario-1/task.md
================================================
# Task: Implement a File Upload API

Create a FastAPI file upload endpoint:
- Accept multipart file uploads up to 10MB
- Validate file types (images only: jpg, png, webp)
- Store files locally with unique names
- Return upload metadata (filename, size, path)

After implementation, perform a self-review before committing.


================================================
FILE: evals/commit-hygiene/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Atomic commits",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Were changes committed as separate atomic commits (one per feature: search, sort, URL sync) rather than one big commit? Answer yes/no."
    },
    {
      "name": "Descriptive commit messages",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Do commit messages describe the 'why' not just the 'what'? Are they concise and follow conventional format? Answer yes/no."
    }
  ]
}


================================================
FILE: evals/commit-hygiene/scenario-1/task.md
================================================
# Task: Add Search and Sort to a Product List

You have an existing product listing page. Add:
1. Search by product name (debounced input)
2. Sort by price (asc/desc) and name (A-Z/Z-A)
3. URL query parameter sync for filters

Make atomic commits for each feature.


================================================
FILE: evals/credentials/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Checks Access.txt or .env for keys",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent look for existing API keys in Access.txt, .env, or environment variables before asking the user for them? Answer yes/no."
    },
    {
      "name": ".env.example created or updated",
      "type": "deterministic",
      "weight": 0.5,
      "check": ".env.example file exists with STRIPE_SECRET_KEY placeholder"
    },
    {
      "name": "No hardcoded keys",
      "type": "deterministic",
      "weight": 1.0,
      "check": "No Stripe keys (sk_test_, sk_live_) hardcoded in source"
    }
  ]
}


================================================
FILE: evals/credentials/scenario-1/task.md
================================================
# Task: Integrate Stripe Payment Processing

Add Stripe checkout to an existing e-commerce app:
- Create checkout session endpoint
- Handle webhook for payment confirmation
- Update order status on successful payment

You'll need Stripe API keys to integrate.


================================================
FILE: evals/database-schema/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Schema read before writing queries",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent read existing database schema/models before writing new models or queries? Check tool call order. Answer yes/no."
    },
    {
      "name": "Foreign keys defined",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Comment model has foreign keys to post and user tables"
    },
    {
      "name": "Migration created",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Alembic migration file created for new table"
    }
  ]
}


================================================
FILE: evals/database-schema/scenario-1/task.md
================================================
# Task: Add a Comments Feature to a Blog

An existing blog app has posts. Add comments:
- Each comment belongs to a post and a user
- Support nested replies (one level)
- Add API endpoints for CRUD operations

Use SQLAlchemy with an existing database.


================================================
FILE: evals/existing-repo/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Repo analyzed before changes",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent read and analyze existing code structure (components, styles, state management) before making changes? Answer yes/no."
    },
    {
      "name": "Existing patterns followed",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Do the changes follow the existing codebase conventions (same state management, same styling approach, same file structure)? Answer yes/no."
    },
    {
      "name": "System preference detected",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Uses prefers-color-scheme media query or matchMedia"
    }
  ]
}


================================================
FILE: evals/existing-repo/scenario-1/task.md
================================================
# Task: Add Dark Mode to an Existing React App

An existing React app needs dark mode support:
- Toggle button in the header
- Persist preference in localStorage
- Apply theme to all existing components
- Respect system preference on first visit

Do not break any existing functionality.


================================================
FILE: evals/llm-patterns/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Structured output used",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Uses Pydantic model or JSON schema for LLM response parsing"
    },
    {
      "name": "Retry with backoff",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Retry logic present with exponential backoff (tenacity or manual)"
    },
    {
      "name": "API responses mocked in tests",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Tests mock the OpenAI API, not make real calls"
    }
  ]
}


================================================
FILE: evals/llm-patterns/scenario-1/task.md
================================================
# Task: Build a Content Classifier

Create a Python service that:
- Takes text input and classifies it into categories (news, opinion, tutorial, review)
- Uses OpenAI API with structured output
- Includes retry logic for API failures
- Returns confidence scores per category

Include tests with mocked API responses.


================================================
FILE: evals/project-tooling/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "CLI tools verified",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent verify that tools (pytest, ruff, etc.) actually work by running them, not just installing them? Answer yes/no."
    },
    {
      "name": "pyproject.toml created",
      "type": "deterministic",
      "weight": 0.5,
      "check": "pyproject.toml exists with project metadata"
    },
    {
      "name": "Ruff configured",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Ruff configuration exists (in pyproject.toml or ruff.toml)"
    }
  ]
}


================================================
FILE: evals/project-tooling/scenario-1/task.md
================================================
# Task: Set Up a New Python Project

Initialize a new Python project with:
- pyproject.toml with dev dependencies
- pytest configuration
- ruff linting configuration
- Pre-commit hooks
- Basic CI workflow

Verify all tools work before committing.


================================================
FILE: evals/python/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Type hints on all public functions",
      "type": "deterministic",
      "weight": 1.0,
      "check": "All public function signatures include type annotations"
    },
    {
      "name": "pytest tests present",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Test file uses pytest (not unittest) with descriptive test names"
    },
    {
      "name": "Ruff-compatible code",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Would this code pass ruff linting with default rules? Check for common issues: unused imports, bare excepts, mutable default args. Answer yes/no."
    }
  ]
}


================================================
FILE: evals/python/scenario-1/task.md
================================================
# Task: Build a CSV Data Processor

Create a Python module that:
- Reads CSV files with configurable delimiters
- Validates rows against a schema (column types, required fields)
- Outputs cleaned data as JSON
- Handles malformed rows gracefully (log and skip)

Include type hints and tests.


================================================
FILE: evals/react-web/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Zustand store used",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Uses zustand create() for state management"
    },
    {
      "name": "Functional components only",
      "type": "deterministic",
      "weight": 0.5,
      "check": "No class components, all function/arrow function components"
    },
    {
      "name": "Proper component decomposition",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Are components properly decomposed (TodoItem, TodoList, FilterBar, etc.) or is everything in one large component? Answer decomposed/monolithic."
    }
  ]
}


================================================
FILE: evals/react-web/scenario-1/task.md
================================================
# Task: Build a Todo App with Filters

Create a React todo app with:
- Add/remove/toggle todos
- Filter: all, active, completed
- Persist to localStorage
- Show count of remaining items

Use functional components, hooks, and Zustand for state.


================================================
FILE: evals/run-evals.sh
================================================
#!/usr/bin/env bash
# Run behavioral evals for Maggy skills.
#
# Usage:
#   ./run-evals.sh                   # Run all evals
#   ./run-evals.sh base              # Run evals for a specific skill
#   ./run-evals.sh --baseline base   # Run with baseline comparison
#
# Requires: tessl CLI (https://tessl.io)

set -euo pipefail

EVALS_DIR="$(cd "$(dirname "$0")" && pwd)"
SKILLS_DIR="$(dirname "$EVALS_DIR")/skills"

BASELINE=false
SKILL_FILTER=""

while [[ $# -gt 0 ]]; do
    case "$1" in
        --baseline)
            BASELINE=true
            shift
            ;;
        --help|-h)
            echo "Usage: $0 [--baseline] [SKILL_NAME]"
            echo ""
            echo "Options:"
            echo "  --baseline    Compare with/without skill loaded"
            echo "  SKILL_NAME    Run evals for a specific skill only"
            exit 0
            ;;
        *)
            SKILL_FILTER="$1"
            shift
            ;;
    esac
done

# Check tessl is installed
if ! command -v tessl &>/dev/null; then
    echo "Error: tessl CLI not found. Install from https://tessl.io"
    exit 1
fi

RESULTS_DIR="$EVALS_DIR/.results"
mkdir -p "$RESULTS_DIR"

PASS=0
FAIL=0
SKIP=0

for scenario_dir in "$EVALS_DIR"/*/scenario-*; do
    [ -d "$scenario_dir" ] || continue

    skill_name="$(basename "$(dirname "$scenario_dir")")"

    # Apply filter
    if [[ -n "$SKILL_FILTER" && "$skill_name" != "$SKILL_FILTER" ]]; then
        continue
    fi

    scenario_name="$(basename "$scenario_dir")"
    task_file="$scenario_dir/task.md"
    criteria_file="$scenario_dir/criteria.json"

    if [[ ! -f "$task_file" || ! -f "$criteria_file" ]]; then
        echo "SKIP $skill_name/$scenario_name (missing task.md or criteria.json)"
        ((SKIP++))
        continue
    fi

    echo "--- $skill_name/$scenario_name ---"

    result_file="$RESULTS_DIR/${skill_name}_${scenario_name}.json"

    if $BASELINE; then
        echo "  Running WITHOUT skill..."
        tessl eval run \
            --task "$task_file" \
            --criteria "$criteria_file" \
            --output "$RESULTS_DIR/${skill_name}_${scenario_name}_baseline.json" \
            2>&1 | sed 's/^/  /' || true

        echo "  Running WITH skill..."
        tessl eval run \
            --task "$task_file" \
            --criteria "$criteria_file" \
            --skill "$SKILLS_DIR/$skill_name" \
            --output "$result_file" \
            2>&1 | sed 's/^/  /' || true
    else
        tessl eval run \
            --task "$task_file" \
            --criteria "$criteria_file" \
            --skill "$SKILLS_DIR/$skill_name" \
            --output "$result_file" \
            2>&1 | sed 's/^/  /' || true
    fi

    if [[ -f "$result_file" ]]; then
        ((PASS++))
    else
        ((FAIL++))
    fi
done

echo ""
echo "=== Eval Summary ==="
echo "Pass: $PASS  Fail: $FAIL  Skip: $SKIP"


================================================
FILE: evals/security/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Password hashed with bcrypt or argon2",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Uses bcrypt, argon2, or passlib for password hashing (not md5/sha256)"
    },
    {
      "name": "No hardcoded secrets",
      "type": "deterministic",
      "weight": 1.0,
      "check": "No hardcoded API keys, JWT secrets, or database passwords in source"
    },
    {
      "name": "Password not in response",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Response model excludes password/hash field"
    },
    {
      "name": "Environment variables for secrets",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Are secrets (DB URL, JWT secret) loaded from environment variables or a config file, not hardcoded? Answer yes/no."
    }
  ]
}


================================================
FILE: evals/security/scenario-1/task.md
================================================
# Task: Build User Registration

Create a user registration endpoint:
- Accept email and password
- Store user in database
- Return user ID and email (not password)

Use FastAPI and SQLAlchemy. Include a login endpoint that checks credentials.


================================================
FILE: evals/security/scenario-2/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Keys not logged or exposed",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Are API keys properly protected? Check: not logged in plain text, not returned in full after creation, stored hashed. Answer yes/no with details."
    },
    {
      "name": "Timing-safe comparison",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Uses hmac.compare_digest or secrets.compare_digest for key comparison"
    },
    {
      "name": "Rate limiting implemented",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Rate limiting logic exists with per-key tracking"
    }
  ]
}


================================================
FILE: evals/security/scenario-2/task.md
================================================
# Task: Add API Key Authentication

Add API key authentication middleware to an existing FastAPI app:
- Keys stored in database with user association
- Rate limiting per key (100 req/min)
- Key rotation support (old key valid for 24h after rotation)
- Admin endpoint to create/revoke keys


================================================
FILE: evals/session-management/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Session state checkpoint created",
      "type": "llm_judged",
      "weight": 1.0,
      "prompt": "Did the agent create or update session state files (current-state.md or similar) during implementation? Answer yes/no."
    },
    {
      "name": "State persisted across refresh",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Uses localStorage, sessionStorage, or similar persistence for form state"
    }
  ]
}


================================================
FILE: evals/session-management/scenario-1/task.md
================================================
# Task: Build a Multi-Step Form Wizard

Create a React multi-step form (3 steps: personal info, address, review) with:
- Step navigation (next/back)
- Data persistence across steps
- Validation per step
- Summary on final step

The session should be resumable if the user refreshes.


================================================
FILE: evals/supabase/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "RLS policies created",
      "type": "deterministic",
      "weight": 1.0,
      "check": "SQL includes CREATE POLICY or ALTER TABLE ENABLE ROW LEVEL SECURITY"
    },
    {
      "name": "Migration file created",
      "type": "deterministic",
      "weight": 1.0,
      "check": "Migration file exists in supabase/migrations/"
    },
    {
      "name": "Profile linked to auth.users",
      "type": "deterministic",
      "weight": 0.5,
      "check": "Foreign key reference to auth.users(id)"
    }
  ]
}


================================================
FILE: evals/supabase/scenario-1/task.md
================================================
# Task: Build a User Profile System

Create a Supabase-backed user profile system:
- profiles table linked to auth.users
- RLS policies: users can only read/update their own profile
- Edge function for profile avatar upload
- Migration file for the table

Use the Supabase CLI for migrations.


================================================
FILE: evals/typescript/scenario-1/criteria.json
================================================
{
  "criteria": [
    {
      "name": "Strict TypeScript mode",
      "type": "deterministic",
      "weight": 1.0,
      "check": "tsconfig.json has strict: true"
    },
    {
      "name": "Barrel export from index.ts",
      "type": "deterministic",
      "weight": 0.5,
      "check": "index.ts exists with re-exports"
    },
    {
      "name": "Proper generic types",
      "type": "llm_judged",
      "weight": 0.5,
      "prompt": "Does the task queue use proper TypeScript generics for task payloads and results, avoiding 'any' type? Answer yes/no."
    }
  ]
}


================================================
FILE: evals/typescript/scenario-1/task.md
================================================
# Task: Build a Task Queue Library

Create a TypeScript task queue that:
- Accepts async functions with priority levels
- Processes tasks with configurable concurrency
- Supports retry with exponential backoff
- Emits events: task:start, task:complete, task:fail

Export types and the main class from an index.ts barrel file.


================================================
FILE: hooks/post-commit-graph
================================================
#!/bin/bash

# Post-Commit Graph Update Hook
#
# Triggers incremental codebase-memory-mcp graph update after each commit.
# This hook is LIGHTWEIGHT (~10ms) — it does NOT run the MCP server or
# any heavy process. It touches a marker file that the already-running
# codebase-memory-mcp file watcher picks up.
#
# Installed by: /initialize-project or ~/.claude/install-hooks.sh
# Remove with: rm .git/hooks/post-commit (or remove the code-graph section)

# Skip if code graph is not configured for this project
if [ ! -f ".mcp.json" ] || ! grep -q "codebase-memory" ".mcp.json" 2>/dev/null; then
    exit 0
fi

# Get list of committed code files
COMMITTED_FILES=$(git diff-tree --no-commit-id --name-only -r HEAD 2>/dev/null)

if [ -z "$COMMITTED_FILES" ]; then
    exit 0
fi

# Filter to code files only (skip configs, docs, images, etc.)
CODE_EXTENSIONS='\.ts$|\.tsx$|\.js$|\.jsx$|\.py$|\.go$|\.rs$|\.java$|\.rb$|\.php$|\.swift$|\.kt$|\.c$|\.cpp$|\.h$|\.hpp$|\.cs$|\.scala$|\.lua$|\.vue$|\.svelte$'
CODE_FILES=$(echo "$COMMITTED_FILES" | grep -E "$CODE_EXTENSIONS" || true)

if [ -z "$CODE_FILES" ]; then
    exit 0
fi

FILE_COUNT=$(echo "$CODE_FILES" | wc -l | tr -d ' ')

# Touch marker file for codebase-memory-mcp file watcher
# This is the lightest possible signal — no blocking, no spawning processes
if [ -d ".code-graph" ]; then
    touch ".code-graph/.needs-update" 2>/dev/null || true
fi

echo "code-graph: update queued ($FILE_COUNT code files changed)"

exit 0


================================================
FILE: hooks/pre-push
================================================
#!/bin/bash

# Claude Code Review - Pre-Push Hook
# Runs /code-review on changes before pushing to remote
# Blocks push if Critical or High severity issues are found

set -e

# Colors
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

echo ""
echo "🔍 Running Claude Code Review before push..."
echo ""

# Get the remote and URL being pushed to
remote="$1"
url="$2"

# Read stdin to get refs being pushed
while read local_ref local_sha remote_ref remote_sha; do
    if [ "$local_sha" = "0000000000000000000000000000000000000000" ]; then
        # Branch is being deleted, skip
        continue
    fi

    if [ "$remote_sha" = "0000000000000000000000000000000000000000" ]; then
        # New branch, compare against default branch
        base_ref=$(git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' || echo "main")
        range="origin/$base_ref...$local_sha"
    else
        # Existing branch, compare against remote
        range="$remote_sha...$local_sha"
    fi

    # Get changed files
    changed_files=$(git diff --name-only "$range" 2>/dev/null | grep -E '\.(ts|tsx|js|jsx|py|go|rs|java|rb|php|swift|kt)$' || true)

    if [ -z "$changed_files" ]; then
        echo -e "${GREEN}✅ No code files to review${NC}"
        exit 0
    fi

    file_count=$(echo "$changed_files" | wc -l | tr -d ' ')
    echo "📁 Reviewing $file_count file(s)..."
    echo ""

    # Run Claude code review
    review_output=$(mktemp)

    if ! command -v claude &> /dev/null; then
        echo -e "${YELLOW}⚠️  Claude CLI not found. Skipping code review.${NC}"
        echo "   Install: npm install -g @anthropic-ai/claude-code"
        exit 0
    fi

    # Run code review with --print flag for non-interactive output
    if claude --print "/code-review $changed_files" > "$review_output" 2>&1; then
        # Check the explicit Status line first (most reliable)
        if grep -q "Status: ✅ PASS" "$review_output"; then
            echo -e "${GREEN}✅ Code review passed${NC}"
            # Show summary if there are medium/low issues
            if grep -qE '🟡|🟢' "$review_output"; then
                echo ""
                echo -e "${YELLOW}ℹ️  Advisory issues (non-blocking):${NC}"
                grep -E '🟡|🟢' "$review_output" | head -5
            fi
        elif grep -q "Status: ❌" "$review_output"; then
            echo -e "${RED}❌ PUSH BLOCKED - Critical/High issues found${NC}"
            echo ""
            cat "$review_output"
            echo ""
            echo -e "${RED}Fix critical/high issues before pushing.${NC}"
            rm "$review_output"
            exit 1
        else
            # Fallback: parse severity counts from the summary table
            # Match "| Critical | N |" or "Critical: N" patterns
            critical_count=$(grep -oP 'Critical[:\s|]+\K[0-9]+' "$review_output" | head -1 || echo "0")
            high_count=$(grep -oP 'High[:\s|]+\K[0-9]+' "$review_output" | head -1 || echo "0")
            critical_count=${critical_count:-0}
            high_count=${high_count:-0}

            if [ "$critical_count" -gt 0 ] || [ "$high_count" -gt 0 ]; then
                echo -e "${RED}❌ PUSH BLOCKED - Critical: $critical_count, High: $high_count${NC}"
                echo ""
                cat "$review_output"
                echo ""
                echo -e "${RED}Fix critical/high issues before pushing.${NC}"
                rm "$review_output"
                exit 1
            else
                echo -e "${GREEN}✅ Code review passed${NC}"
                if grep -qE '🟡|🟢' "$review_output"; then
                    echo ""
                    echo -e "${YELLOW}ℹ️  Advisory issues (non-blocking):${NC}"
                    grep -E '🟡|🟢' "$review_output" | head -5
                fi
            fi
        fi
    else
        echo -e "${YELLOW}⚠️  Code review failed to run. Allowing push.${NC}"
        echo "   Check Claude CLI configuration."
    fi

    rm -f "$review_output"
done

echo ""
exit 0


================================================
FILE: hooks/workspace/check-contract-freshness.sh
================================================
#!/bin/bash

# Contract Freshness Check - Session Start Hook
# Checks if workspace contracts are stale and advises user
# Run time: ~5 seconds

WORKSPACE_DIR="_project_specs/workspace"
STALENESS_THRESHOLD=86400  # 24 hours in seconds
WARNING_THRESHOLD=604800   # 7 days in seconds

# Colors
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m'

# Check if workspace is configured
if [ ! -f "$WORKSPACE_DIR/CONTRACTS.md" ]; then
    # No workspace configured - silent exit
    exit 0
fi

if [ ! -f "$WORKSPACE_DIR/.contract-sources" ]; then
    echo -e "${YELLOW}⚠️  Workspace configured but no contract sources defined${NC}"
    echo "   Run /analyze-workspace to set up contract monitoring"
    exit 0
fi

# Get last analysis timestamp
LAST_ANALYSIS=$(stat -f %m "$WORKSPACE_DIR/CONTRACTS.md" 2>/dev/null || stat -c %Y "$WORKSPACE_DIR/CONTRACTS.md" 2>/dev/null)
NOW=$(date +%s)
AGE=$((NOW - LAST_ANALYSIS))

# Check for stale analysis
if [ "$AGE" -gt "$WARNING_THRESHOLD" ]; then
    DAYS=$((AGE / 86400))
    echo -e "${RED}📅 Workspace contracts are ${DAYS} days old${NC}"
    echo "   Run /analyze-workspace for full refresh"
    echo ""
fi

# Check if any contract sources changed since last sync
CHANGED_FILES=""
CHANGED_COUNT=0

while IFS= read -r source || [ -n "$source" ]; do
    # Skip comments and empty lines
    [[ "$source" =~ ^#.*$ ]] && continue
    [[ -z "$source" ]] && continue

    if [ -f "$source" ]; then
        SOURCE_MTIME=$(stat -f %m "$source" 2>/dev/null || stat -c %Y "$source" 2>/dev/null)
        if [ "$SOURCE_MTIME" -gt "$LAST_ANALYSIS" ]; then
            CHANGED_FILES="$CHANGED_FILES\n  - $source"
            CHANGED_COUNT=$((CHANGED_COUNT + 1))
        fi
    fi
done < "$WORKSPACE_DIR/.contract-sources"

# Report changes
if [ "$CHANGED_COUNT" -gt 0 ]; then
    echo -e "${YELLOW}🔄 Contract sources changed since last sync:${NC}"
    echo -e "$CHANGED_FILES"
    echo ""
    echo -e "   Run ${BLUE}/sync-contracts${NC} to update"
    echo ""
elif [ "$AGE" -gt "$STALENESS_THRESHOLD" ]; then
    HOURS=$((AGE / 3600))
    echo -e "${YELLOW}📅 Last contract sync: ${HOURS} hours ago${NC}"
    echo -e "   Consider running ${BLUE}/sync-contracts${NC}"
    echo ""
else
    # Fresh - silent success
    :
fi

exit 0


================================================
FILE: hooks/workspace/check-graph-freshness.sh
================================================
#!/bin/bash

# Check Graph Freshness - Session Start Advisory
#
# Warns if code graph data is older than the latest commit.
# Run at session start to ensure Claude is working with current data.

GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

# Skip if no graph configured
if [ ! -f ".mcp.json" ] || ! grep -q "codebase-memory" ".mcp.json" 2>/dev/null; then
    exit 0
fi

# Skip if no .code-graph directory (graph not yet built)
if [ ! -d ".code-graph" ]; then
    echo -e "${YELLOW}code-graph: No graph data found. Run index_repository via MCP to build.${NC}"
    exit 0
fi

# Get latest commit timestamp
LATEST_COMMIT=$(git log -1 --format=%ct 2>/dev/null || echo "0")

# Get graph last-updated timestamp (modification time of the DB or marker)
if [ -f ".code-graph/.last-updated" ]; then
    GRAPH_UPDATED=$(cat ".code-graph/.last-updated" 2>/dev/null || echo "0")
elif [ "$(uname)" = "Darwin" ]; then
    # macOS: stat -f %m
    GRAPH_UPDATED=$(stat -f %m ".code-graph/" 2>/dev/null || echo "0")
else
    # Linux: stat -c %Y
    GRAPH_UPDATED=$(stat -c %Y ".code-graph/" 2>/dev/null || echo "0")
fi

# Compare timestamps
DIFF=$((LATEST_COMMIT - GRAPH_UPDATED))

if [ "$DIFF" -gt 300 ]; then
    # More than 5 minutes stale
    MINUTES=$((DIFF / 60))
    echo -e "${YELLOW}code-graph: Graph may be stale (~${MINUTES}m behind latest commit)${NC}"
    echo "  The MCP file watcher should auto-update."
    echo "  If stale, use index_repository to rebuild."
elif [ "$DIFF" -gt 60 ]; then
    # Slightly stale (1-5 minutes) — just a note
    echo -e "${YELLOW}code-graph: Graph is slightly behind latest commit (auto-updating)${NC}"
else
    echo -e "${GREEN}code-graph: Graph data is fresh${NC}"
fi

exit 0


================================================
FILE: hooks/workspace/post-commit-contracts.sh
================================================
#!/bin/bash

# Post-Commit Contract Sync Hook
# Automatically syncs contracts when contract source files are committed
# Run time: ~15 seconds (only when contracts change)

WORKSPACE_DIR="_project_specs/workspace"

# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Check if workspace is configured
if [ ! -f "$WORKSPACE_DIR/.contract-sources" ]; then
    exit 0
fi

# Get list of committed files
COMMITTED_FILES=$(git diff-tree --no-commit-id --name-only -r HEAD 2>/dev/null)

if [ -z "$COMMITTED_FILES" ]; then
    exit 0
fi

# Check if any committed files are contract sources
CONTRACTS_CHANGED=false
CHANGED_SOURCES=""

while IFS= read -r source || [ -n "$source" ]; do
    # Skip comments and empty lines
    [[ "$source" =~ ^#.*$ ]] && continue
    [[ -z "$source" ]] && continue

    if echo "$COMMITTED_FILES" | grep -q "$source"; then
        CONTRACTS_CHANGED=true
        CHANGED_SOURCES="$CHANGED_SOURCES $source"
    fi
done < "$WORKSPACE_DIR/.contract-sources"

# If contracts changed, run lightweight sync
if [ "$CONTRACTS_CHANGED" = true ]; then
    echo ""
    echo -e "${YELLOW}📝 Contract files changed in this commit:${NC}"
    for src in $CHANGED_SOURCES; do
        echo "   - $src"
    done
    echo ""

    # Check if Claude CLI is available
    if command -v claude &> /dev/null; then
        echo -e "${BLUE}⚡ Running lightweight contract sync...${NC}"

        # Run sync in silent/lightweight mode
        if claude --print "/sync-contracts --lightweight" > /dev/null 2>&1; then
            echo -e "${GREEN}✅ Contracts synced${NC}"
        else
            echo -e "${YELLOW}⚠️  Contract sync failed - run /sync-contracts manually${NC}"
        fi
    else
        echo -e "${YELLOW}⚠️  Claude CLI not found${NC}"
        echo "   Run /sync-contracts manually to update contracts"
    fi
    echo ""
fi

exit 0


================================================
FILE: hooks/workspace/pre-push-contracts.sh
================================================
#!/bin/bash

# Pre-Push Contract Validation Hook
# Validates contract consistency before pushing
# Blocks push if contracts are out of sync
# Run time: ~10 seconds

WORKSPACE_DIR="_project_specs/workspace"

# Colors
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'

# Check if workspace is configured
if [ ! -f "$WORKSPACE_DIR/CONTRACTS.md" ]; then
    exit 0
fi

if [ ! -f "$WORKSPACE_DIR/.contract-sources" ]; then
    exit 0
fi

echo ""
echo -e "${BLUE}🔍 Validating workspace contracts...${NC}"

VALIDATION_ERRORS=""
WARNING_COUNT=0
ERROR_COUNT=0

# Get last sync timestamp
LAST_SYNC=$(stat -f %m "$WORKSPACE_DIR/CONTRACTS.md" 2>/dev/null || stat -c %Y "$WORKSPACE_DIR/CONTRACTS.md" 2>/dev/null)

# Check if any contract sources changed since last sync
STALE_SOURCES=""
while IFS= read -r source || [ -n "$source" ]; do
    # Skip comments and empty lines
    [[ "$source" =~ ^#.*$ ]] && continue
    [[ -z "$source" ]] && continue

    if [ -f "$source" ]; then
        SOURCE_MTIME=$(stat -f %m "$source" 2>/dev/null || stat -c %Y "$source" 2>/dev/null)
        if [ "$SOURCE_MTIME" -gt "$LAST_SYNC" ]; then
            STALE_SOURCES="$STALE_SOURCES\n   - $source"
            ERROR_COUNT=$((ERROR_COUNT + 1))
        fi
    else
        VALIDATION_ERRORS="$VALIDATION_ERRORS\n⚠️  Contract source missing: $source"
        WARNING_COUNT=$((WARNING_COUNT + 1))
    fi
done < "$WORKSPACE_DIR/.contract-sources"

# Check OpenAPI consistency (if exists)
if [ -f "apps/api/openapi.json" ] || [ -f "openapi.json" ]; then
    OPENAPI_FILE=$([ -f "apps/api/openapi.json" ] && echo "apps/api/openapi.json" || echo "openapi.json")

    if command -v jq &> /dev/null; then
        ACTUAL_ENDPOINTS=$(jq -r '.paths | keys | length' "$OPENAPI_FILE" 2>/dev/null || echo "0")
        DOCUMENTED_ENDPOINTS=$(grep -cE "^\| (GET|POST|PUT|PATCH|DELETE)" "$WORKSPACE_DIR/CONTRACTS.md" 2>/dev/null || echo "0")

        if [ "$ACTUAL_ENDPOINTS" != "0" ] && [ "$DOCUMENTED_ENDPOINTS" != "0" ]; then
            if [ "$ACTUAL_ENDPOINTS" != "$DOCUMENTED_ENDPOINTS" ]; then
                VALIDATION_ERRORS="$VALIDATION_ERRORS\n⚠️  Endpoint count mismatch: OpenAPI has $ACTUAL_ENDPOINTS, CONTRACTS.md has $DOCUMENTED_ENDPOINTS"
                WARNING_COUNT=$((WARNING_COUNT + 1))
            fi
        fi
    fi
fi

# Report results
if [ "$ERROR_COUNT" -gt 0 ]; then
    echo -e "${RED}❌ Contract sources changed but not synced:${NC}"
    echo -e "$STALE_SOURCES"
    echo ""
    echo -e "${RED}Run /sync-contracts before pushing${NC}"
    echo -e "Or bypass with: ${YELLOW}git push --no-verify${NC}"
    echo ""
    exit 1
fi

if [ "$WARNING_COUNT" -gt 0 ]; then
    echo -e "${YELLOW}⚠️  Validation warnings:${NC}"
    echo -e "$VALIDATION_ERRORS"
    echo ""
    echo -e "${YELLOW}Consider running /sync-contracts${NC}"
    echo ""
    # Warnings don't block push
fi

if [ "$ERROR_COUNT" -eq 0 ]; then
    echo -e "${GREEN}✅ Contracts validated${NC}"
fi

exit 0


================================================
FILE: install.sh
================================================
#!/bin/bash

# Maggy Installer

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CLAUDE_DIR="$HOME/.claude"

echo "Installing Maggy v4.0.0..."
echo ""

# Save bootstrap directory location for other scripts
echo "$SCRIPT_DIR" > "$HOME/.claude/.bootstrap-dir"

# Create directories
mkdir -p "$CLAUDE_DIR/commands"
mkdir -p "$CLAUDE_DIR/skills"
mkdir -p "$CLAUDE_DIR/hooks"
mkdir -p "$CLAUDE_DIR/rules"

# Copy all commands
cp "$SCRIPT_DIR/commands/"*.md "$CLAUDE_DIR/commands/"
echo "✓ Installed commands:"
ls -1 "$CLAUDE_DIR/commands/" | sed 's/^/  - \//' | sed 's/\.md$//'

# Copy skills (folder structure with SKILL.md)
echo ""
echo "Installing skills..."
rm -rf "$CLAUDE_DIR/skills"
mkdir -p "$CLAUDE_DIR/skills"
skill_count=0
for skill_dir in "$SCRIPT_DIR/skills"/*/; do
    if [ -d "$skill_dir" ] && [ -f "$skill_dir/SKILL.md" ]; then
        skill_name=$(basename "$skill_dir")
        cp -r "${skill_dir%/}" "$CLAUDE_DIR/skills/"
        skill_count=$((skill_count + 1))
    fi
done
echo "✓ Installed $skill_count skills (folder/SKILL.md structure)"

# Cross-tool skill installation (Kimi CLI, Codex CLI)
DETECTED_AGENTS=$("$SCRIPT_DIR/scripts/detect-agents.sh" 2>/dev/null || true)

if echo "$DETECTED_AGENTS" | grep -q "kimi"; then
    "$SCRIPT_DIR/scripts/install-skills.sh" "$HOME/.kimi/skills" "$SCRIPT_DIR/skills"
    echo "  Also installed skills to ~/.kimi/skills/ (Kimi CLI)"
fi

if echo "$DETECTED_AGENTS" | grep -q "codex"; then
    "$SCRIPT_DIR/scripts/install-skills.sh" "$HOME/.codex/skills" "$SCRIPT_DIR/skills"
    echo "  Also installed skills to ~/.codex/skills/ (Codex CLI)"
fi

# Copy conditional rules
echo ""
echo "Installing conditional rules..."
rm -rf "$CLAUDE_DIR/rules"
mkdir -p "$CLAUDE_DIR/rules"
rule_count=0
for rule_file in "$SCRIPT_DIR/rules/"*.md; do
    if [ -f "$rule_file" ]; then
        cp "$rule_file" "$CLAUDE_DIR/rules/"
        rule_count=$((rule_count + 1))
    fi
done
echo "✓ Installed $rule_count conditional rules (with paths: frontmatter)"
ls -1 "$CLAUDE_DIR/rules/" | sed 's/^/  - /' | sed 's/\.md$//'

# Copy hooks
cp "$SCRIPT_DIR/hooks/"* "$CLAUDE_DIR/hooks/" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/hooks/"* 2>/dev/null || true
echo ""
echo "✓ Installed git hooks (templates)"

# Copy templates
echo ""
echo "Installing templates..."
mkdir -p "$CLAUDE_DIR/templates"
cp "$SCRIPT_DIR/templates/"* "$CLAUDE_DIR/templates/" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/templates/tdd-loop-check.sh" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/templates/pre-compact.sh" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/templates/codex-auto-review.sh" 2>/dev/null || true
echo "✓ Installed templates (CLAUDE.md, AGENTS.md, CLAUDE.local.md, settings.json, config.toml)"

# Cross-tool config installation
if echo "$DETECTED_AGENTS" | grep -q "kimi"; then
    mkdir -p "$HOME/.kimi"
    cp "$SCRIPT_DIR/templates/config.toml" "$HOME/.kimi/config.toml.bootstrap" 2>/dev/null || true
    echo "  Kimi: hooks template at ~/.kimi/config.toml.bootstrap"
fi

if echo "$DETECTED_AGENTS" | grep -q "codex"; then
    mkdir -p "$HOME/.codex"
    cp "$SCRIPT_DIR/templates/AGENTS.md" "$HOME/.codex/templates/AGENTS.md" 2>/dev/null || {
        mkdir -p "$HOME/.codex/templates"
        cp "$SCRIPT_DIR/templates/AGENTS.md" "$HOME/.codex/templates/AGENTS.md"
    }
    echo "  Codex: AGENTS.md template at ~/.codex/templates/"
fi

# Copy hook installer script
cp "$SCRIPT_DIR/scripts/install-hooks.sh" "$CLAUDE_DIR/" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/install-hooks.sh" 2>/dev/null || true

# Copy graph tools installer
cp "$SCRIPT_DIR/scripts/install-graph-tools.sh" "$CLAUDE_DIR/" 2>/dev/null || true
chmod +x "$CLAUDE_DIR/install-graph-tools.sh" 2>/dev/null || true

# Install Polyphony CLI shim
POLYPHONY_SRC="$SCRIPT_DIR/scripts/polyphony"
if [ -f "$POLYPHONY_SRC/__main__.py" ]; then
    INSTALL_DIR="$HOME/.local/bin"
    mkdir -p "$INSTALL_DIR"
    cat > "$INSTALL_DIR/polyphony" << SHIM
#!/bin/bash
exec python3 -c "import sys; sys.path.insert(0, '$SCRIPT_DIR/scripts'); from polyphony.__main__ import main; sys.exit(main())" "\$@"
SHIM
    chmod +x "$INSTALL_DIR/polyphony"
    echo ""
    echo "✓ Installed polyphony CLI shim"

    # Create default config if missing
    if [ ! -d "$HOME/.polyphony" ]; then
        mkdir -p "$HOME/.polyphony"
        cp -n "$SCRIPT_DIR/templates/polyphony-config.yaml" "$HOME/.polyphony/config.yaml" 2>/dev/null || true
        cp -n "$SCRIPT_DIR/templates/polyphony-identities.yaml" "$HOME/.polyphony/identities.yaml" 2>/dev/null || true
        cp -n "$SCRIPT_DIR/templates/polyphony-agents.yaml" "$HOME/.polyphony/agents.yaml" 2>/dev/null || true
        cp -n "$SCRIPT_DIR/templates/polyphony-routing.yaml" "$HOME/.polyphony/routing.yaml" 2>/dev/null || true
        echo "✓ Created ~/.polyphony/ config"
    fi
fi

# Run validation
echo ""
echo "Running validation..."
if [ -f "$SCRIPT_DIR/tests/validate-structure.sh" ]; then
    if "$SCRIPT_DIR/tests/validate-structure.sh" --quick; then
        echo ""
    else
        echo ""
        echo "⚠ Validation found issues. Run full validation:"
        echo "  $SCRIPT_DIR/tests/validate-structure.sh --full"
    fi
fi

echo ""
echo "================================================================"
echo "  Installation complete! (v4.0.0)"
echo "================================================================"
echo ""
echo "What's new in v4.0.0:"
echo "  - Polyphony: container-isolated parallel agents (Docker/OrbStack)"
echo "  - /spawn-team now uses Polyphony by default (fallback to native)"
echo "  - polyphony CLI: init, spawn, status, cleanup"
echo "  - Cross-tool support: Claude Code + Kimi CLI + Codex CLI"
echo ""
echo "Usage:"
echo "  1. Open any project folder"
echo "  2. Run: claude (or kimi, or codex)"
echo "  3. Type: /initialize-project"
echo ""
echo "Commands installed:"
echo "  /initialize-project   - Full project setup (includes Polyphony)"
echo "  /spawn-team           - Spawn agent team (containers by default)"
echo "  /sync-agents          - Sync config between Claude/Kimi/Codex"
echo "  /check-contributors   - Team coordination"
echo "  /update-code-index    - Regenerate code index"
echo ""
echo "Polyphony CLI:"
echo "  polyphony init        - Create ~/.polyphony/ config"
echo "  polyphony spawn       - Create and route a task"
echo "  polyphony status      - Show task states"
echo "  polyphony cleanup     - Remove completed workspaces"
echo ""
echo "Container isolation (Polyphony):"
if echo "$DETECTED_AGENTS" | grep -q "docker"; then
    echo "  [OK] Docker    - container isolation available"
elif echo "$DETECTED_AGENTS" | grep -q "orbstack"; then
    echo "  [OK] OrbStack  - container isolation available"
else
    echo "  [--] Docker    - not found (brew install --cask docker)"
fi
if echo "$DETECTED_AGENTS" | grep -q "polyphony"; then
    echo "  [OK] Polyphony - CLI installed"
else
    echo "  [--] Polyphony - CLI shim not on PATH (add ~/.local/bin to PATH)"
fi
echo ""
echo "Cross-tool compatibility:"
if echo "$DETECTED_AGENTS" | grep -q "kimi"; then
    echo "  [OK] Kimi CLI  - skills + hooks installed"
else
    echo "  [--] Kimi CLI  - not found (curl -L code.kimi.com/install.sh | bash)"
fi
if echo "$DETECTED_AGENTS" | grep -q "codex"; then
    echo "  [OK] Codex CLI - skills + AGENTS.md installed"
else
    echo "  [--] Codex CLI - not found (npm i -g @openai/codex)"
fi
echo ""
echo "Git Hooks (per-project):"
echo "  cd your-project && ~/.claude/install-hooks.sh"
echo ""
echo "Code Graph Tools:"
echo "  ~/.claude/install-graph-tools.sh            - Install Tier 1 (default)"
echo "  ~/.claude/install-graph-tools.sh --joern     - Also install Tier 2 (CPG)"
echo "  ~/.claude/install-graph-tools.sh --codeql    - Also install Tier 3 (security)"
echo "  ~/.claude/install-graph-tools.sh --all       - Install all tiers"
echo ""
echo "Validation:"
echo "  $SCRIPT_DIR/tests/validate-structure.sh --full"
echo ""


================================================
FILE: maggy/.gitignore
================================================
__pycache__/
*.py[cod]
*$py.class
.pytest_cache/
.mypy_cache/
.ruff_cache/
*.egg-info/


================================================
FILE: maggy/PLAN.md
================================================
# Maggy — Generic AI Engineering Command Center

Ships as a core component of Maggy. One install, works with any team.

## What Maggy Is

A local, self-improving AI agent that turns your issue tracker into an AI-prioritized inbox with one-click execution. Uses Maggy's iCPG for codebase intelligence and spawns `claude -p` for implementation.

Not a cloud service — runs on your machine, talks to your APIs, uses your Claude Code.

## Vision

```
$ maggy init
Org name: Acme Corp
Issue tracker? (github / asana / linear) → github
GitHub org: acmecorp
Repos to monitor: api, web, mobile
Competitor domain (for intelligence): fintech
Paste your OKRs (or skip): ...

✓ Config saved to ~/.maggy/config.yaml
✓ Bootstrapping iCPG for 3 repos...
✓ Discovering competitors in "fintech"...  (found 28)
✓ Ready: http://localhost:8080
```

That's it. Works the same for any org.

## Architecture

```
maggy/
├── maggy/                          # The Maggy dashboard app
│   ├── PLAN.md                     # this file
│   ├── README.md                   # user docs
│   ├── install.sh                  # one-line install
│   ├── pyproject.toml              # deps
│   ├── config.example.yaml         # config template
│   ├── maggy/                      # Python package (importable as `maggy`)
│   │   ├── main.py                 # FastAPI entry
│   │   ├── config.py               # loads ~/.maggy/config.yaml
│   │   ├── providers/
│   │   │   ├── base.py             # IssueTrackerProvider Protocol
│   │   │   ├── github_issues.py    # GitHub Issues impl
│   │   │   └── asana.py            # Asana impl (linear deferred)
│   │   ├── services/
│   │   │   ├── inbox.py            # AI-prioritized ranking
│   │   │   ├── competitor.py       # discovery + monitoring + briefing
│   │   │   └── executor.py         # TDD pipeline with iCPG enrichment
│   │   ├── api/
│   │   │   └── routes.py           # REST endpoints
│   │   └── static/
│   │       ├── index.html          # dashboard
│   │       └── app.js              # vanilla JS
├── commands/
│   ├── maggy.md                    # /maggy → launch dashboard
│   └── maggy-init.md               # /maggy-init → setup wizard
├── skills/
│   └── maggy/
│       └── SKILL.md                # Maggy capabilities reference
└── scripts/icpg/                   # ALREADY EXISTS — Maggy calls this
```

## Key Design Decisions

### 1. Config-driven, not hardcoded

A single `~/.maggy/config.yaml` drives everything. No hardcoded board IDs, repo names, team members, OKRs, or competitor lists. All that stuff lives in config.

```yaml
org:
  name: "Acme Corp"
  domain: "fintech"                  # drives competitor category + system prompt

issue_tracker:
  provider: "github"                 # "github" | "asana" (linear = stub)
  github:
    org: "acmecorp"
    repos: ["acmecorp/api", "acmecorp/web"]
    # PAT read from env: GITHUB_TOKEN

codebases:
  - path: "~/dev/acmecorp/api"
    key: "api"
  - path: "~/dev/acmecorp/web"
    key: "web"

competitors:
  categories: ["fintech", "embedded-finance"]
  # Maggy auto-discovers. Stores in ~/.maggy/competitors.json

ai:
  provider: "anthropic"
  model: "claude-sonnet-4-5-20250929"
  # API key from ANTHROPIC_API_KEY env

storage:
  # SQLite by default — zero setup. Supabase optional.
  backend: "sqlite"
  path: "~/.maggy/maggy.db"

dashboard:
  port: 8080
  auth_mode: "local"                 # no auth for single-user local use
```

### 2. Provider abstraction for issue trackers

The #1 coupling in the zenloop version is Asana. Generic Maggy defines a Protocol and all services use it:

```python
class IssueTrackerProvider(Protocol):
    async def list_tasks(self, board: str | None = None, state: str = "open") -> list[Task]
    async def get_task(self, task_id: str) -> Task
    async def add_comment(self, task_id: str, text: str) -> None
    async def update_status(self, task_id: str, status: str) -> None
    async def list_followed(self, user_id: str | None = None) -> list[Task]
    async def search_tasks(self, query: str) -> list[Task]
```

`GitHubIssuesProvider` and `AsanaProvider` both implement this. Services call `provider.list_tasks()` — they don't care what's underneath.

### 3. Reuses Maggy's iCPG

Don't duplicate iCPG. Maggy shells out to the iCPG CLI:

```python
# executor.py
async def _get_icpg_context(title: str, notes: str) -> str:
    keywords = extract_keywords(title + notes)
    context = []
    for kw in keywords[:5]:
        result = await run_cmd(["icpg", "query", "symbols", "--keyword", kw, "--json"])
        context.append(result)
    return format_icpg_block(context)
```

This means the dashboard automatically benefits from iCPG upgrades. No duplicate symbol indexing.

### 4. SQLite-first storage

The zenloop version used Supabase for P2P coordination. For a single-user local install, SQLite is simpler and zero-setup. P2P and multi-user stays optional:

- **Default (SQLite):** `~/.maggy/maggy.db`. Zero setup.
- **Optional (Supabase):** For teams that want shared state and P2P handoff.

### 5. Dashboard is minimal but real

Not a React SPA — Tailwind CDN + vanilla JS. Matches Maggy's philosophy (no build step, dead simple). Three views:

1. **Inbox** — AI-prioritized issues with Execute/Plan/Comment buttons
2. **Competitor News** — daily AI briefing + news feed
3. **Settings** — view/edit config, health check

### 6. Ships with Maggy

User installs Maggy, runs `/maggy-init` in Claude Code, and the dashboard is configured + running. `/maggy` in any Claude Code session opens the dashboard.

## MVP Scope (what I'm building now)

**In scope:**
- [x] Directory structure
- [ ] Config loader + example
- [ ] IssueTrackerProvider Protocol + GitHub Issues + Asana impls
- [ ] Inbox service (AI-prioritized)
- [ ] Competitor service (AI-discovered, daily briefing)
- [ ] Executor service (TDD pipeline with iCPG enrichment)
- [ ] FastAPI server + 8 endpoints
- [ ] Minimal HTML dashboard
- [ ] install.sh + pyproject.toml + README
- [ ] /maggy and /maggy-init commands
- [ ] skills/maggy/SKILL.md

**Deferred to v2 (not MVP):**
- Meeting bot (voice)
- Slack integration
- P2P network + session handoff
- Self-improvement (`/improve-maggy`)
- Heartbeat service (background processing)
- BambooHR integration
- Auto-review (PRs, tickets)
- 27 AI tools → starts with 5 core tools
- Linear provider (stub only)

## How to test independently

After install:

```bash
cd ~/Documents/AI-Playground/maggy/maggy
./install.sh

# Configure
cp config.example.yaml ~/.maggy/config.yaml
# Edit ~/.maggy/config.yaml with your GitHub org/repos

# Set env vars
export ANTHROPIC_API_KEY=sk-ant-...
export GITHUB_TOKEN=ghp_...

# Run
python -m maggy.main

# Open http://localhost:8080
```

Or from inside Claude Code (after bootstrap install):
```
/maggy-init    # interactive setup
/maggy         # launch dashboard
```

Should work out-of-the-box for any GitHub-based team.

## Success criteria

1. Fresh install on a machine that never saw zenloop → works
2. Points at any GitHub org → inbox populates with issues
3. AI prioritization runs → issues ranked
4. Click Execute → TDD pipeline spawns `claude -p` with iCPG context injected
5. Competitor discovery for any domain → competitors found + daily briefing
6. No hardcoded zenloop anything anywhere in the code

That's the bar.


================================================
FILE: maggy/README.md
================================================
# Maggy

**Autonomous AI engineering command center.**

Install once, point it at your codebases and issue tracker, and get:

- **Interactive Chat** — auto-connects to all active Claude/Codex/Kimi sessions, take over from the web UI with full session continuity (`--resume`)
- **AI-prioritized Tasks** — ranks open issues by urgency + OKR alignment
- **One-click Execute** — spawns `claude -p` with iCPG-enriched prompts, runs TDD pipeline
- **Competitor Intelligence** — auto-discovers competitors, daily AI briefing
- **Process Insights** — CLI session history analysis, health signals, self-improvement recommendations
- **P2P Mesh** — multi-node session sync and handoff across machines
- **Auto-Bootstrap** — all services seed themselves on startup (history, CIKG, events)

## Install

```bash
cd maggy/maggy
./install.sh
```

## Configure

Edit `~/.maggy/config.yaml`:

```yaml
org:
  name: "Acme Corp"
  domain: "fintech"

issue_tracker:
  provider: "github"
  github:
    org: "acmecorp"
    repos: ["acmecorp/api", "acmecorp/web"]

codebases:
  - { path: "~/dev/acmecorp/api", key: "api" }
  - { path: "~/dev/acmecorp/web", key: "web" }

competitors:
  categories: ["fintech", "embedded-finance"]
```

Set credentials:

```bash
export GITHUB_TOKEN=ghp_...
export ANTHROPIC_API_KEY=sk-ant-...
```

## Run

```bash
python3 -m maggy.main
```

Open `http://localhost:8080`.

## Dashboard

Navigation is grouped by intent:

| Group | Tabs | Purpose |
|-------|------|---------|
| **Work** | Chat, Tasks, Watching | Do things — chat with Claude, triage issues |
| **Intel** | Competitors, Insights | Learn things — competitor news, session analytics |
| **System** | Budget, Models, Forge, Settings | Configure — spend limits, model routing, MCP gaps |

Chat is the default tab — auto-connects to all running CLI sessions on load.

## From inside Claude Code

```
/maggy-init   # interactive setup wizard
/maggy        # launch dashboard
```

## Features

- **Interactive Chat** — SSE streaming, session continuity via `--resume`, path-based history matching, auto-connect to active CLI sessions
- **Activity Scanner** — detects running `claude`, `codex`, `kimi` processes via `ps aux` + `lsof`
- **History Analysis** — parses 260+ CLI sessions, topic extraction, session patterns
- **Self-Improvement** — signal collection, health scoring, actionable recommendations
- **CIKG Knowledge Graph** — codebase nodes, technology detection, landscape queries
- **Event Spine** — structured event emission and querying across all services
- **Engram Memory** — write/query/expire memory entries with metadata
- **Budget Tracking** — daily spend limits with per-provider breakdown
- **Model Routing** — reward-based heatmap for model selection by task type
- **MCP Forge** — detects capability gaps from filesystem, suggests MCP tools
- **P2P Mesh** — WebSocket sync, peer discovery, state quarantine, org-scoped networks
- **Heartbeat** — scheduled jobs (history refresh, engram expiry, self-improve, mesh sync)

## Hardening

- **Working dir whitelist** — Execute and Chat both validate paths against configured codebase roots
- **Chat streaming lock** — per-session `asyncio.Lock` prevents concurrent subprocess spawning
- **SSRF protection** — RSS/blog feed URLs validated before fetch (blocks loopback, private-network)
- **CLAUDECODE env stripping** — subprocess spawning removes `CLAUDECODE` to allow nested Claude sessions
- **Process lifecycle** — Claude subprocesses killed on timeout; non-zero exits marked failed
- **Input validation** — Execute mode `Literal["tdd", "plan"]`; malformed IDs return 404
- **503 onboarding mode** — unconfigured state returns 503 with setup pointer
- **Safe external links** — scheme allowlist + `rel="noopener noreferrer"`
- **No-cache static files** — `Cache-Control: no-store` prevents stale JS in browser

## Architecture

See [PLAN.md](./PLAN.md) for the full architecture rationale.

1. **Provider abstraction** — `IssueTrackerProvider` Protocol (GitHub, Asana, Linear stub)
2. **Config-driven** — zero hardcoded IDs, orgs, or competitor lists
3. **iCPG integration** — context enrichment from code property graph
4. **SQLite-first** — single-user local install, zero setup
5. **Auto-bootstrap** — all services seed on startup, no empty tabs
6. **Grouped UI** — Work / Intel / System navigation by intent

## License

MIT


================================================
FILE: maggy/config.example.yaml
================================================
# Maggy configuration
# Copy this to ~/.maggy/config.yaml and customize.

org:
  name: "Your Org"
  # Drives competitor auto-discovery and system prompt phrasing.
  # Examples: "fintech", "devtools", "cx-feedback", "healthcare", "marketplaces"
  domain: "your-domain"

issue_tracker:
  # Currently supported: "github" | "asana"
  # ("linear" is a stub and not selectable yet — tracking via #TODO)
  provider: "github"

  github:
    # Your GitHub org or user
    org: "your-org"
    # Repos to monitor (full name: "org/repo")
    repos:
      - "your-org/api"
      - "your-org/web"
    # Optional: only show issues with these labels (empty = all)
    labels: []
    # Read-only token from env: GITHUB_TOKEN

  asana:
    # Used when provider: "asana". Ignore if using GitHub.
    workspace_id: ""
    # Project GIDs for each "board" that appears in the sidebar
    boards:
      dev: ""
      bugs: ""
    # Token from env: ASANA_API_KEY

codebases:
  # Paths to repos Maggy can execute in. When you click Execute on a ticket,
  # Maggy picks the right repo based on keyword matching.
  - path: "~/dev/your-org/api"
    key: "api"
    # Optional: default working_dir override per repo
  - path: "~/dev/your-org/web"
    key: "web"

competitors:
  # Maggy auto-discovers competitors in these categories using AI + G2/Capterra research.
  # Results stored in ~/.maggy/competitors.json — edit freely.
  categories:
    - "your-primary-category"
  # Optional: seed with specific competitor names to ensure they're tracked
  seed:
    - "CompetitorOne"
    - "CompetitorTwo"

okrs:
  # Two ways to provide OKRs:
  #   source: "yaml"  → list them inline below
  #   source: "skip"  → no OKR tracking
  source: "skip"
  # If source == "yaml":
  items: []
  # Example items structure:
  # - id: "Q2-1"
  #   title: "Reduce p95 latency to 200ms"
  #   keywords: ["latency", "performance", "slow"]

ai:
  provider: "anthropic"
  model: "claude-sonnet-4-5-20250929"
  # API key from env: ANTHROPIC_API_KEY
  max_budget_usd_per_execute: 5.0

storage:
  # SQLite by default — zero setup. For multi-user/P2P, use Supabase (not yet supported in MVP).
  backend: "sqlite"
  path: "~/.maggy/maggy.db"

dashboard:
  host: "127.0.0.1"
  port: 8080
  # "local" = no auth (single-user local install).
  # "token" = require X-API-Key header matching MAGGY_API_KEY env var.
  auth_mode: "local"

# Paths to Maggy installation — auto-detected, usually don't touch.
bootstrap:
  # If omitted, Maggy looks at ~/.claude/.bootstrap-dir written by install.sh
  path: ""


================================================
FILE: maggy/docs/benchmark-results.md
================================================
# Maggy v5 Benchmark Results

**Date:** 2026-05-11
**App:** Personal Expense Tracker (FastAPI + SQLite + vanilla HTML/JS)
**Environment:** Mac Studio M4 Max, 128 GB RAM, macOS Darwin 24.6.0
**CLIs:** Claude Code 2.1.42, Codex 0.129.0, Kimi 1.41.0, Ollama 0.23.2 (qwen2.5-coder:32b)

---

## 1. Test Protocol

6 identical tasks run sequentially through two pipelines:

- **Runner A (Maggy):** 4-tier routing via blast score. Auto-discovers CLI flags at startup.
- **Runner B (Claude Code):** All tasks run through `claude -p` only.

Both pipelines use `--dangerously-skip-permissions` / equivalent flags, 25 max turns, and subprocess spawning into isolated build directories.

---

## 2. Task Definitions

| ID | Task | Blast | Maggy Route | Type |
|----|------|-------|-------------|------|
| EXP-1 | Write product spec | 2 | local (ollama) | docs |
| EXP-2 | Design database schema | 3 | kimi | architecture |
| EXP-3 | Build expense CRUD API | 5 | gpt (codex) | feature |
| EXP-4 | Build category API + monthly summary | 5 | gpt (codex) | feature |
| EXP-5 | Build frontend dashboard | 6 | gpt (codex) | frontend |
| EXP-6 | Security review + input validation | 8 | claude | security |

---

## 3. Speed Results

| Task | Blast | Maggy Model | Maggy (s) | Claude (s) | Winner |
|------|-------|-------------|-----------|------------|--------|
| EXP-1 | 2 | ollama (local) | 50.4 | 48.6 | Claude |
| EXP-2 | 3 | kimi | 86.6 | 67.2 | Claude |
| EXP-3 | 5 | codex | 147.1 | 160.6 | **Maggy** |
| EXP-4 | 5 | codex | 133.9 | 130.8 | Claude |
| EXP-5 | 6 | codex | 280.1 | 121.9 | Claude |
| EXP-6 | 8 | claude | 209.5 | 151.9 | Claude |
| **Total** | | | **907.6** | **681.0** | **Claude (33% faster)** |

### Routing Distribution (Maggy)

| Model | Tasks | % |
|-------|-------|---|
| codex (gpt) | 3 | 50% |
| ollama (local) | 1 | 17% |
| kimi | 1 | 17% |
| claude | 1 | 17% |

---

## 4. Success Rate

| Pipeline | Passed | Failed | Fallbacks | Rate |
|----------|--------|--------|-----------|------|
| Maggy | 6 | 0 | 0 | 100% |
| Claude | 6 | 0 | 0 | 100% |

---

## 5. Output Quality Assessment

### 5.1 File Inventory

**Maggy (10 source files, 1,634 lines):**

| File | Lines | Model | Assessment |
|------|-------|-------|------------|
| `SECURITY.md` | 134 | claude | Thorough: 7 findings with fixes, 3 recommendations |
| `backend/app/database.py` | 74 | kimi | Correct schema, parameterized queries, FK + cascade, seed data |
| `backend/app/main.py` | 36 | kimi | Lifespan init, CORS from env var (not wildcard), 3 routers |
| `backend/app/validation.py` | 25 | claude | Shared YYYY-MM regex validator, extracted from duplication |
| `backend/app/routes/expenses.py` | 148 | codex | Full CRUD, Pydantic models, parameterized SQL, FK check |
| `backend/app/routes/categories.py` | 107 | codex | CRUD, hex color validator, unique constraint handling |
| `backend/app/routes/summary.py` | 52 | codex | Monthly aggregation with COALESCE, GROUP BY |
| `frontend/index.html` | 121 | codex | Dark theme, responsive, all sections present |
| `frontend/css/style.css` | 472 | codex | CSS bar charts, dark palette, mobile breakpoints |
| `frontend/js/app.js` | 472 | codex | State management, fetch API, DOM via textContent (XSS-safe) |

**Claude (18 source files, ~1,500 app lines + 457K with venv):**

| File | Lines | Assessment |
|------|-------|------------|
| `specs/product-spec.md` | 206 | Comprehensive: vision, schema, Pydantic examples, project structure |
| `backend/app/database.py` | 68 | Correct schema, parameterized queries, FK, seed data |
| `backend/app/main.py` | 42 | Lifespan init, CORS from env var, 3 routers |
| `backend/app/models.py` | 51 | Centralized Pydantic schemas (better separation) |
| `backend/app/routes/expenses.py` | 159 | Full CRUD, partial update support, category JOIN |
| `backend/app/routes/categories.py` | 90 | CRUD, referential integrity check on delete |
| `backend/app/routes/summary.py` | 44 | Monthly aggregation |
| `backend/tests/conftest.py` | 18 | Temp DB fixture with patch |
| `backend/tests/test_expenses.py` | 108 | 11 test cases covering CRUD + edge cases |
| `backend/tests/test_categories.py` | ~50 | Category CRUD tests |
| `backend/tests/test_summary.py` | ~40 | Summary endpoint tests |
| `frontend/index.html` | 79 | Clean layout, modal-based form |
| `frontend/css/style.css` | 323 | Dark theme, responsive |
| `frontend/js/app.js` | 320 | API wrapper, currency formatting, chart rendering |

### 5.2 Quality Scoring

| Dimension | Maggy | Claude | Notes |
|-----------|-------|--------|-------|
| **Functional completeness** | 9/10 | 10/10 | Both implement all endpoints. Claude adds partial updates. |
| **Security** | 10/10 | 7/10 | Maggy's security review (EXP-6) hardened CORS, added amount bounds, path param validation, color format validation. Claude left CORS with `allow_credentials=True`, no amount ceiling, no color validation. |
| **SQL safety** | 10/10 | 10/10 | Both use parameterized queries exclusively. |
| **XSS prevention** | 10/10 | 10/10 | Both use textContent for DOM rendering. No innerHTML. |
| **Input validation** | 9/10 | 7/10 | Maggy: Pydantic + custom validators (hex color, amount ceiling, path ge=1). Claude: Pydantic regex patterns but less thorough. |
| **Error handling** | 9/10 | 8/10 | Maggy: context manager with rollback, 409 on duplicate, 404 on missing. Claude: try/finally, 409 on duplicate, referential integrity check. |
| **Test coverage** | 0/10 | 9/10 | Maggy produced zero tests. Claude created conftest + 3 test files (~200 lines). |
| **Architecture** | 8/10 | 9/10 | Claude separated models into dedicated file. Maggy inlined models per route. Both wire correctly. |
| **Product spec** | 0/10 | 10/10 | Maggy's ollama did not produce a spec file. Claude's spec is comprehensive (206 lines). |
| **Frontend quality** | 9/10 | 8/10 | Maggy's frontend is larger (472+472+121 = 1065 lines) with more CSS detail. Claude's is cleaner (320+323+79 = 722 lines) with modal UX. |
| **Weighted avg** | **7.4/10** | **7.8/10** | |

### 5.3 Key Differences

**Maggy strengths:**
- Security review caught and fixed 7 issues (CORS wildcard, missing bounds, color validation, duplicated validation)
- Multi-model approach applied right tool to right task (security by Claude, CRUD by Codex, schema by Kimi)
- Larger frontend with more CSS polish
- Each model contributed its strength: Claude for security depth, Codex for feature implementation

**Claude strengths:**
- Product spec created (comprehensive 206-line document)
- Test suite included (conftest + 3 test files, ~200 lines, 11+ test cases)
- Better code organization (centralized models.py)
- Partial update support on expenses (PATCH-style PUT)
- Referential integrity check on category delete (prevents orphaned expenses)
- Full venv with dependencies installed

**Maggy weaknesses:**
- No product spec file generated (ollama didn't create it or placed it elsewhere)
- No test files at all — a significant gap for production readiness
- Import paths use `backend.app.` which requires specific project structure to run

**Claude weaknesses:**
- No dedicated security review — CORS uses `allow_credentials=True` (risky with dynamic origins)
- No amount ceiling on expenses (could submit `1e308`)
- No hex color format validation on categories
- `get_db()` returns connection without context manager (manual close in every route)

---

## 6. Cost Analysis

| Pipeline | Claude Usage | Free/Cheap Usage | Est. Subscription Burn |
|----------|-------------|------------------|----------------------|
| **Maggy** | 1/6 tasks (17%) | 2/6 tasks (33%) | Low — spread across 3 subscriptions |
| **Claude** | 6/6 tasks (100%) | 0/6 tasks (0%) | High — 100% on premium model |

Maggy used Claude only for the security review (blast 8). The other 5 tasks consumed cheaper or free models:
- EXP-1: ollama (free, local GPU)
- EXP-2: kimi (free tier / cheap subscription)
- EXP-3/4/5: codex (separate subscription)

This represents ~83% reduction in Claude subscription consumption.

---

## 7. Routing Observations

### What worked
- **Blast 8 → Claude** for security review was correct. Claude produced the most thorough audit.
- **Blast 5 → Codex** for CRUD implementation delivered working endpoints.
- **Blast 3 → Kimi** for database schema was successful and correct.
- **Zero fallbacks** — all 4 CLIs completed tasks without needing to escalate.
- **Auto-discovery** — CLI flags probed from `--help`, not hardcoded.

### What needs tuning
- **Codex is slow on frontend** — EXP-5 took 280s vs Claude's 122s (2.3x slower). Consider routing blast 6 frontend tasks to Claude.
- **Ollama missed the spec task** — EXP-1 (docs) was routed to local model but no spec file was generated. Ollama's qwen2.5-coder is optimized for code, not prose. Consider routing `task_type: docs` to kimi or claude regardless of blast score.
- **No test generation by any Maggy model** — None of the 4 models produced tests. This could be addressed by adding a TDD step (write tests first) as a follow-up task routed to Claude.

---

## 8. Conclusions

| Metric | Maggy | Claude | Verdict |
|--------|-------|--------|---------|
| Speed | 907.6s | 681.0s | Claude 33% faster |
| Success rate | 100% | 100% | Tie |
| Quality (weighted) | 7.4/10 | 7.8/10 | Claude slightly better |
| Security depth | Stronger | Weaker | Maggy (dedicated review step) |
| Test coverage | None | Good | Claude (significant gap for Maggy) |
| Cost efficiency | 83% savings | Baseline | Maggy |
| Subscription risk | Distributed | Single point | Maggy |
| Model diversity | 4 models | 1 model | Maggy |

**Summary:** Claude Code is faster and produces marginally higher overall quality (driven by tests and spec). Maggy's multi-model approach provides cost efficiency and subscription risk distribution, plus deeper security review via dedicated model routing. The main gaps to close: add TDD pipeline (test generation step), and improve docs routing (don't send prose tasks to coding-optimized local models).

---

## 9. Raw Throughput Benchmarks (tokens/sec)

Standalone generation speed measured with identical prompts across all four model tiers. Each model ran 3 iterations (1 cold, 2 hot).

**Prompt:** "Write a Python function that implements a binary search tree with insert, delete, search, and in-order traversal."

### 9.1 Results

| Model | Run 1 | Run 2 | Run 3 | Avg tok/s | Notes |
|-------|-------|-------|-------|-----------|-------|
| **Ollama qwen2.5-coder:32b** | 22.3 | 21.8 | 22.1 | **22.1** | Local GPU (M4 Max), consistent across runs |
| **Ollama qwen3-coder:30b-a3b-q8_0** | 75.3 | 75.4 | 76.3 | **75.7** | MoE (3.3B active/30B total), Q8_0, **3.4x faster than qwen2.5** |
| **Claude (claude -p)** | 44.6 (API) / 18.6 (wall) | 41.9 / 14.3 | 25.7 / 6.8 | **37.4 API / 13.2 wall** | API time excludes network overhead; wall-clock includes CLI startup |
| **Kimi (kimi CLI)** | ~1.8 | ~2.8 | ~3.3 | **~2.6** | Agentic mode — writes files, runs tools; tok/s reflects execution time |
| **Codex (codex exec)** | ~0.8 | ~0.7 | ~0.6 | **~0.7** | Agentic mode — full-auto file creation; tok/s reflects execution time |

### 9.2 Interpretation

- **Ollama qwen3-coder (local):** **75.7 tok/s** — 3.4x faster than qwen2.5-coder:32b (22.1 tok/s) and **2x faster than Claude's API rate** (37.4 tok/s). MoE architecture (3.3B active / 30B total params) means only a fraction of parameters are computed per token. Cold start adds ~13s for model load; hot runs start in <100ms. This makes qwen3-coder the fastest model in the fleet for pure generation.
- **Ollama qwen2.5-coder (retired):** Was 22 tok/s. Replaced by qwen3-coder which is 3.4x faster with comparable quality.
- **Claude:** 37 tok/s API generation. Still the strongest for reasoning-heavy tasks (security, architecture, TDD).
- **Kimi / Codex:** Low tok/s numbers are misleading — both operate in agentic mode (writing files, running commands, iterating). Their throughput reflects end-to-end task execution, not pure generation speed.

### 9.3 Routing Implications

| Tier | Model | tok/s | Cost | Best For |
|------|-------|-------|------|----------|
| Local | Ollama qwen3-coder:30b-a3b-q8_0 | 75.7 | Free | Blast 1-3: simple edits, CRUD, code generation |
| Mid | Kimi | 2.6 (agentic) | Cheap | Blast 3-4: schema design, CRUD |
| Premium-Auto | Codex | 0.7 (agentic) | Mid | Blast 5-6: feature implementation |
| Premium | Claude | 37 (API) | High | Blast 7+: security, architecture, TDD |

### 9.4 Qwen3-Coder Quality Assessment

Two coding tasks evaluated for correctness and code quality:

**Task 1: Binary Search Tree** (same prompt as throughput benchmark)
- Insert, delete (leaf/internal/root), search, in-order traversal — all correct
- Clean class structure, recursive helpers, inorder-successor delete
- Handles duplicate-ignore semantics correctly
- **Score: 10/10** — functionally identical to Claude's output

**Task 2: Async Rate Limiter** (token bucket, concurrent-safe)
- `asyncio.Lock` for concurrency safety
- `_refill()` based on elapsed time — correct token bucket math
- `acquire()` waits in loop, `try_acquire()` returns immediately
- Burst exhaustion + refill timing verified within 1ms of expected
- 10 concurrent tasks completed without deadlock
- **Score: 9/10** — correct and safe; minor: polling loop at 1ms instead of event-driven wait

**Quality Summary:**

| Dimension | qwen3-coder | qwen2.5-coder | Claude |
|-----------|-------------|---------------|--------|
| Correctness | 10/10 | 9/10 | 10/10 |
| Code structure | 9/10 | 8/10 | 10/10 |
| Concurrency safety | 9/10 | N/A | 10/10 |
| Generation speed | **75.7 tok/s** | 22.1 tok/s | 37.4 tok/s |
| Cost | Free | Free | $$$ |

**Verdict:** qwen3-coder is a major upgrade — 3.4x faster than qwen2.5 with equal or better code quality. At 75.7 tok/s it's the fastest model in the fleet, making it ideal for blast 1-4 tasks where speed matters and deep reasoning isn't required.

---

## 10. Post-Benchmark Fixes (Routing Rules + Conventions)

Three systems were built immediately after the benchmark to close the gaps above.

### 10.1 Routing Rules (`~/.maggy/routing-rules.yaml`)

A self-updating YAML config that overrides blast-score routing for specific task types and pipeline phases. Rules are checked **before** the reward table or blast-score tier.

**Task-type overrides seeded from benchmark evidence:**

| Task Type | Forced To | Why |
|-----------|----------|-----|
| `docs` | claude | Ollama (code-optimized) produced no spec file |
| `security` | claude | Security review needs deep reasoning |
| `tests` | claude | Only claude generated test files in benchmark |
| `architecture` | claude | Architecture needs cross-context awareness |
| `planning` | claude | Planning requires structured reasoning |

**Pipeline phase overrides from TDD workflow:**

| Phase | Forced To | Why |
|-------|----------|-----|
| `spec` | claude | SPEC phase needs comprehensive docs |
| `tdd_red` | claude | RED phase needs test design expertise |
| `tdd_green` | auto | GREEN uses blast-score routing (cheap models can implement) |
| `review` | claude | Review needs security + architecture depth |

**Self-learning:** `record_outcome()` updates rolling success rates per model. `learn_override()` lets Maggy add new rules when outcome data supports it. Manual YAML edits are preserved.

### 10.2 Team Conventions Injection

Five conventions from claude-bootstrap's CLAUDE.md are embedded in routing rules and injected into every prompt sent to any CLI:

1. **mWP** — Build minimum wowable product. No feature flags, no premature abstractions.
2. **TDD** — RED → GREEN → VALIDATE. Coverage >= 80%.
3. **Security** — No secrets in code. Parameterized SQL. Validate input at boundaries.
4. **Quality gates** — 20 lines/fn, 3 params, 2 nesting levels, 200 lines/file.
5. **Existing patterns** — Read codebase before changing. Keep changes minimal.

All four executor prompt methods (`_plan_prompt`, `_analysis_prompt`, `_tests_prompt`, `_impl_prompt`) now append matching conventions. This standardizes quality expectations across kimi, codex, ollama, and claude.

### 10.3 Expected Re-run Improvements

| Benchmark Gap | Root Cause | Fix Applied | Expected Result |
|--------------|-----------|-------------|-----------------|
| No product spec (EXP-1) | `docs` routed to ollama | `docs → claude` override | Claude generates spec |
| No tests from any model | No TDD step in pipeline | `tdd_red → claude` + `tests → claude` overrides | Claude writes failing tests |
| Inconsistent quality across models | No shared standards | Conventions injected into all prompts | mWP + quality gates enforced everywhere |
| No learning from outcomes | Static routing only | `record_outcome()` + `learn_override()` | Routing improves with each task |

**Projected scores if re-run:**

| Dimension | Before | After (est.) | Change |
|-----------|--------|-------------|--------|
| Product spec | 0/10 | 9/10 | `docs → claude` |
| Test coverage | 0/10 | 8/10 | `tdd_red → claude` |
| Security | 10/10 | 10/10 | No change (already strong) |
| Architecture | 8/10 | 9/10 | Conventions enforce patterns |
| **Weighted avg** | **7.4/10** | **~8.5/10** | **+1.1 points** |

Cost efficiency would remain at ~83% savings — the new overrides only force claude for `docs` (1 task) and `tests` (new TDD step), not for CRUD/API/frontend work.


================================================
FILE: maggy/docs/maggy-rfc.md
================================================
# Maggy: An Autonomous AI Engineering Platform

**RFC — Request for Comments**
**Author:** Ali Shaheen, Protaige
**Date:** May 2026
**Version:** 5.0

---

## 1. Executive Summary

Maggy is a local-first, self-improving AI engineering platform that transforms how development teams build software. Unlike code assistants that wait for prompts, Maggy is an autonomous agent that observes, learns, and optimizes — continuously improving its own effectiveness across models, workflows, and team knowledge.

**What makes Maggy different:**

- **Multi-model orchestration** — Maggy routes tasks to the best model (Claude, GPT-4o, Gemini, Kimi, DeepSeek, local Qwen) based on learned performance data, not static rules. When one model hits quota, work continues seamlessly on the next.
- **Self-improving closed-loop control** — Every task Maggy completes generates reward signals that improve its future decisions. Model routing, inbox ordering, workflow steps, and fatigue management all optimize automatically.
- **Process intelligence** — Maggy doesn't just write code. It learns from CI results, PR reviews, CodeRabbit findings, and merge patterns to preemptively fix issues before they reach reviewers.
- **Maggy Mesh** — A peer-to-peer network connecting Maggy instances across a team. One developer's hard-won CI fix becomes the entire team's knowledge. Autonomously. Instantly.
- **Local-first, no vendor lock-in** — All data stays on developer machines. No cloud dependency. No vendor seeing your code. Works offline with local models.

**The value proposition:** A team of 5 developers running Maggy Mesh for 6 months accumulates 4x the learning of a solo developer. New team members inherit collective intelligence on day one. CI pass rates go up, review rounds go down, and the system gets smarter every week — without anyone configuring it.

---

## 2. Vision: Autonomous Engineering, Not Code Generation

The current generation of AI coding tools — Copilot, Cursor, Devin — are fundamentally reactive. They complete code when prompted, suggest edits when asked, and run tasks when instructed. They're sophisticated typeaheads, not engineers.

An engineer doesn't just write code. An engineer:

- **Prioritizes** — Which ticket matters most right now?
- **Plans** — What's the blast radius? What could break?
- **Validates** — Does this feature align with the market? Do competitors have it?
- **Executes** — Write the code, with the right model for the task
- **Verifies** — Did CI pass? Did reviewers approve? Did it deploy cleanly?
- **Learns** — What worked? What didn't? How do I do it better next time?

Maggy does all of this. It's the first AI platform designed around the full software development lifecycle, not just the "write code" step.

### The Autonomy Spectrum

```
Level 0: Autocomplete (Copilot, TabNine)
  → Completes the current line
  → No context beyond the file
  → No learning

Level 1: Chat Assistant (ChatGPT, Claude)
  → Answers questions about code
  → No project context
  → No memory between sessions

Level 2: Project-Aware Assistant (Cursor, Continue)
  → Understands the codebase
  → Can edit multiple files
  → Limited memory (rules, preferences)

Level 3: Task Agent (Devin, Claude Code Agent)
  → Executes multi-step tasks
  → Uses tools (terminal, browser)
  → Single-model, single-project

Level 4: Autonomous Engineering Platform (Maggy) ← WE ARE HERE
  → Multi-model, multi-project orchestration
  → Self-improving from every task
  → Process intelligence (learns from CI, reviews, deploys)
  → Team intelligence via P2P mesh
  → Market validation before engineering
```

---

## 3. Architecture Overview

### The Component Map

```
┌─────────────────────────────────────────────────────────────┐
│                    MAGGY WEB DASHBOARD                        │
│  ┌──────────┐ ┌─────────┐ ┌────────┐ ┌───────┐ ┌────────┐ │
│  │  Inbox   │ │ Budget  │ │ Agents │ │  CIKG │ │Process │ │
│  │ (ranked) │ │ (live)  │ │(status)│ │ (gaps)│ │(health)│ │
│  └──────────┘ └─────────┘ └────────┘ └───────┘ └────────┘ │
└──────────────────────────┬──────────────────────────────────┘
                           │
              ┌────────────┴────────────┐
              │    ORCHESTRATOR LAYER    │
              │                         │
              │  Pi Agent (universal    │
              │  harness, RPC mode)     │
              │                         │
              │  Token Budget Manager   │
              │  Model Router (learned) │
              │  Dual-Model Planner     │
              └────────┬────────────────┘
                       │
        ┌──────────────┼──────────────┐
        │              │              │
   ┌────▼────┐   ┌────▼────┐   ┌────▼────┐
   │Container│   │Container│   │Container│
   │  1      │   │  2      │   │  3      │
   │ Claude  │   │ GPT-4o  │   │  Qwen   │
   │ (auth)  │   │ (front) │   │ (docs)  │
   └─────────┘   └─────────┘   └─────────┘
        │              │              │
   ┌────┴──────────────┴──────────────┴────┐
   │         INTELLIGENCE LAYER             │
   │                                        │
   │  iCPG — blast radius, drift, intent    │
   │  Mnemos — memory, fatigue, checkpoints │
   │  codebase-memory-mcp — code graph      │
   │  CIKG — competitive intelligence       │
   │  Process Intelligence — CI/PR/deploy   │
   │  MCP Forge — capability expansion      │
   │  Maggy Mesh — P2P team learning        │
   └────────────────────────────────────────┘
```

### Pi: The Universal Agent Harness

Pi replaces per-CLI adapters with a single interface to every model. It runs inside Polyphony containers in RPC mode over stdin/stdout. The same PiAdapter code controls Claude, GPT-4o, Gemini, Kimi, DeepSeek, or a local Qwen — with identical tool interfaces.

**Model fallback chain:**

```
Claude → GPT-4o → Gemini → Kimi → DeepSeek → Qwen (local, unlimited)
```

When a model hits quota or rate limits:
1. Mnemos writes a structured checkpoint (goal, constraints, progress, state)
2. Pi switches to the next model
3. The checkpoint is injected as context
4. The new model verifies it understands the task before continuing
5. If verification fails, escalate to the next tier — don't retry on a weaker model

**The user never notices the switch.** Work continues. That's the wow.

### Token Budget Manager

```yaml
providers:
  anthropic:
    daily_limit_usd: 50.00
    used_today_usd: 32.15
    model_preference: claude-sonnet-4
  openai:
    daily_limit_usd: 30.00
    used_today_usd: 5.20
    model_preference: gpt-4o
  local:
    daily_limit_usd: 0  # free
    model_preference: qwen2.5-coder:32b
```

The budget manager prevents runaway costs. When anthropic hits $50, Maggy doesn't stop — it routes to OpenAI. When OpenAI hits $30, it routes to local Qwen. Work never stops.

---

## 4. Self-Improvement: Multi-Level Closed-Loop Control

This is Maggy's core differentiator. Every task teaches Maggy something. Every CI failure, every review comment, every deploy result feeds back into the system. Maggy gets smarter every day — without anyone configuring it.

### The Objective Function

```
efficiency = (value_delivered / time_spent) x quality_multiplier

where:
  value_delivered   = tickets landed + features shipped + bugs fixed
  time_spent        = wall clock from ticket selection to merge
  quality_multiplier = 1.0 - (bug_escape_rate + revert_rate + incident_rate)
```

### Five Control Levels

| Level | Frequency | What It Does |
|-------|-----------|-------------|
| **L0 — Real-time** | Seconds | Catches tool failures, test failures, fatigue spikes, scope drift *as they happen*. Switches models mid-task when quality degrades. |
| **L1 — Task** | Minutes | Computes task reward score. Updates model performance table. Logs process signals. |
| **L2 — Daily** | Hours | Catches operational degradation: CI pass rate drops, model failure spikes, budget burn rate anomalies. Disables failing models. |
| **L3 — Weekly** | Days | Strategic optimization: evolves skill files, adjusts workflow steps, triggers MCP Forge for capability gaps, patches prompts. |
| **L4 — Monthly** | Weeks | Meta-optimization: recalibrates reward signals, adjusts tier boundaries, tunes exploration rate, changes the improvement process itself. |

**Key principle:** Inner loops provide stability. Outer loops provide optimization. L0 catches a failing model in seconds — the user barely notices. L3 makes routing smarter over weeks — the system quietly improves. L4 makes the improvement process itself better over months.

### What Gets Optimized

**Model routing** — Maggy tracks reward per `(model x task_type x blast_tier)` triple. After 50+ tasks, routing outperforms random assignment by 20%+.

```
(claude, auth, high):       +0.92  ← claude excels at auth
(qwen, docs, low):          +0.85  ← qwen is fast and free for docs
(gpt-4o, frontend, medium): +0.78  ← gpt-4o is strong on frontend
```

**Inbox ordering** — Learns which tickets the user actually picks first. Adjusts urgency weights to match user behavior.

**Workflow steps** — Drops steps that never catch issues (e.g., Codex counter-check on blast < 3). Re-enables them when they become valuable again.

**Fatigue management** — Learns each user's optimal session length and pre-checkpoints at the right moment. Not at a generic threshold — at *your* threshold.

---

## 5. Process Intelligence: Learning from the Full SDLC

Most AI tools optimize code generation. Maggy optimizes the **entire development process**.

### Environment Discovery

On first run per project, Maggy auto-discovers the developer's workflow — no configuration:

- **Ticketing:** GitHub Issues, Asana, Linear, Jira
- **CI/CD:** GitHub Actions, Jenkins, CircleCI
- **Code quality:** ESLint, ruff, mypy, pre-commit, coverage
- **Review process:** Required reviewers, CODEOWNERS, branch protection
- **Integrations:** CodeRabbit, Dependabot, Renovate, Vercel

### Signal Collection

Maggy continuously collects signals from the SDLC:

| Signal Source | What Maggy Learns |
|--------------|-------------------|
| CI results | Which code patterns cause test failures |
| PR review comments | What reviewers consistently flag |
| CodeRabbit findings | Security and quality issues by pattern |
| Merge patterns | How many rounds of review, time to merge |
| Deploy results | Which changes cause deploy failures |

### Preemptive Fixes

The pattern engine correlates `(code_pattern, review_feedback)` pairs:

> "Your reviewer always flags missing error handling in API routes. Maggy added it before the PR was created. Review rounds dropped from 2.8 to 1.1."

This is not prompt engineering. This is autonomous process optimization — Maggy observed a pattern, validated it statistically, and changed its behavior to prevent the issue. No human told it to.

---

## 6. Engram: Cross-Session Memory

### The Amnesia Problem

Every AI coding tool today is an amnesiac. When a session ends, everything the agent learned — project conventions, reviewer preferences, codebase idioms, tool configurations — evaporates. The next session starts from scratch. This isn't a minor inconvenience; it's the fundamental bottleneck preventing AI agents from becoming genuinely useful over time.

Engram identifies seven distinct amnesia pathologies:

| Amnesia Type | What Gets Lost | Impact |
|-------------|---------------|--------|
| **Anterograde** | New memories fail to form across sessions | Every session restarts from zero |
| **Retrograde** | Existing memories degrade over time | Learned patterns fade |
| **Temporal** | When something happened is lost | Can't track how things changed |
| **Source** | Where a fact came from is lost | Can't trust or audit memories |
| **Interference** | Memories from one context contaminate another | Project A's patterns leak into Project B |
| **Context-binding** | Right memory, wrong retrieval context | Conventions exist but aren't surfaced when needed |
| **Confabulation** | Inferred patterns presented as confirmed facts | Agent "remembers" things it actually guessed |

### The Memory Lifecycle

Engram completes Maggy's memory stack:

```
Mnemos (within-task)     → What the agent remembers during a single task
     ↓ promote (confidence > 0.8, evidence >= 3)
Engram (cross-session)   → What survives between sessions, per machine
     ↓ distill to typed memory
Mesh (cross-machine)     → What's shared across the team, P2P
```

Without Engram, Maggy has a 10-minute memory. With Engram, knowledge compounds across every session. After 100 sessions, Maggy knows your project's conventions, your reviewers' preferences, your CI failure patterns — and applies them automatically.

### Three-Tier Namespace Model

Memory is organized into three tiers to prevent both cross-project contamination and useful-pattern siloing:

1. **Local** — project-specific memories (strict isolation). A Python FastAPI project's conventions never contaminate a React project's patterns.
2. **Portfolio** — abstracted cross-project patterns. When a local pattern proves useful across 3+ projects, it's promoted — but only after de-contextualization (stripping project-specific names and paths).
3. **Mesh** — peer-derived memories (quarantined on arrival). Must be locally validated before promotion to portfolio.

This three-tier model means Engram gets smarter across projects without cross-contamination.

### Engram as Improvement Substrate

Engram absorbs the improvement ledger. The ledger is the mutation log (what changed), Engram is the memory substrate (persists it across sessions), and the reward registry tracks whether it worked. Every self-modification becomes a persistent, queryable memory — Maggy remembers not just what it learned, but what it tried and what failed.

### Amnesia Score

Each project gets a 7-dimension diagnostic score (0.0 = perfect retention, 1.0 = total amnesia). The L3 weekly loop analyzes Amnesia Scores and adjusts encoding rules: if anterograde score is high, lower the promotion threshold; if interference is high, tighten namespace isolation.

### Research Basis

Engram builds on validated research: Mem0 (186M API calls, memory-as-object model), Zep/Graphiti (temporal validity windows), Hindsight (91.4% on LongMemEval, fact vs opinion separation), MAGMA (multi-graph retrieval with 45.5% higher reasoning accuracy), and A-MEM (Zettelkasten-style associative encoding). What none of these systems address is the combination of namespace isolation, origin tracking, temporal validity, and amnesia diagnosis in a single architecture designed for multi-project AI agents.

---

## 7. Maggy Mesh: Peer-to-Peer Team Intelligence

### The Problem

A solo developer's Maggy learns from their tasks. But teams have 5, 10, 50 developers — each independently discovering the same CI fixes, the same reviewer preferences, the same model performance patterns. That's wasted learning.

### The Solution

Maggy Mesh connects instances across a team into a peer-to-peer network. Each Maggy autonomously shares learned intelligence with other Maggys in the same organization.

```
┌──────────────────────────────────────────────────────────┐
│                    ORGANIZATION                            │
│                                                           │
│  ┌─────────┐    ┌─────────┐    ┌─────────┐              │
│  │ Maggy-A │◄──►│ Maggy-B │◄──►│ Maggy-C │              │
│  │ (Ali)   │    │ (Sarah) │    │ (John)  │              │
│  │ Python  │    │ React   │    │ DevOps  │              │
│  └─────────┘    └─────────┘    └─────────┘              │
│       ▲              ▲              ▲                    │
│       └──────────────┴──────────────┘                    │
│            Full mesh — everyone sees                      │
│            everyone's learnings                           │
└──────────────────────────────────────────────────────────┘
```

### What Gets Shared

Not everything. Maggy Mesh shares **typed memory classes** with different merge rules:

| Type | Example | Merge Rule |
|------|---------|-----------|
| **Scores** | "Claude scores 0.92 on auth tasks" | Weighted average by sample count |
| **Patterns** | "Add error handling before PR" | Union-merge with frequency tracking |
| **Policies** | "Route blast 7+ to premium only" | Backtest-gated — must pass on local data |
| **Gaps** | "No Linear integration" | Additive accumulation |

### Provenance

Every shared memory carries full provenance:

- **Who:** peer_id, peer_name
- **Where:** project_key, language, toolchain
- **When:** created_at, last_verified
- **How much:** evidence_count, confidence (decays with age)

This enables intelligent filtering: "Only accept Python patterns from peers working on Python projects."

### Quarantine System

Incoming peer data doesn't go live immediately. It enters quarantine:

1. **Self-confirmed:** Local data validates the pattern within 30 days
2. **Crowd-confirmed:** 3+ peers independently report the same pattern
3. **Human override:** Developer manually promotes or rejects

This prevents poisoning, stale data propagation, and context collapse. A bad pattern from one node can't silently corrupt the entire team.

### Cold Start

A new team member installs Maggy, discovers peers via mDNS, and receives the entire team's collective intelligence — quarantined until locally validated. Day one, they have the benefit of months of team learning.

### The Compound Effect

```
Individual Maggy:    knowledge = learning_rate x time
Team Mesh (n peers): knowledge = n x learning_rate x time x sharing_factor

5 developers, 6 months:
  Solo:  1 x 1.0 x 180 = 180 learning units
  Mesh:  5 x 1.0 x 180 x 0.8 = 720 learning units (4x multiplier)
```

The sharing_factor (0.8) accounts for context mismatch and quarantine filtering. The effect is superlinear because peers validate each other's patterns through crowd confirmation.

---

## 8. Lexon: Semantic Tool Binding

### The Tool Overload Problem

As Maggy's capabilities grow — MCP Forge auto-generates servers, Process Intelligence adds signal collectors, each project adds environment-specific tools — the tool count will cross 50, then 100. Research shows tool selection accuracy collapses at this scale: RAG-MCP demonstrated accuracy dropping from 87% to 13% as tools grew from 10 to 100.

A second failure mode persists even with retrieval: the **vocabulary gap**. Tool descriptions are written by engineers. Users speak in their own vocabulary. "I want to blast my leads" doesn't match `create_campaign` by any lexical metric. Maggy needs to learn that for *this user*, "blast" means bulk email send.

### Two-Tier Routing

Lexon solves this with a two-tier pipeline that runs in parallel:

1. **Tier A — Fast LLM Router** (<300ms): A compact tool manifest (name + 1-line description, ~400 tokens for 80 tools) fed to a fast model. Returns 5-7 candidates with rationale. JSON schema constrained to valid tool names — no hallucinated tools.

2. **Tier B — Multilingual Semantic Retriever**: Vector search over the full tool registry, indexed by description, example queries, and learned synonyms. Multilingual embedding model ensures queries in any language match correctly.

Candidates from both tiers are unioned and deduplicated. Each tier compensates for the other's failure mode: the LLM captures intent-level reasoning; the retriever captures lexical variants and multilingual matches.

### Terminology Map

A three-level vocabulary store that learns over time:

- **System level**: Built-in tool descriptions (baseline)
- **Org level**: Team-shared vocabulary, propagated via Mesh (e.g., "follow up" = specific CRM workflow)
- **User level**: Personal shortcuts and preferences (e.g., "morning sequence" = campaign with time=09:00)

Resolution: user overrides org overrides system. **NOT bindings** encode negative matches — "blast" is explicitly NOT "delete_all" — preventing recurring mis-selections.

### Dual-Mode Disambiguation

When confidence is ambiguous, Lexon has two resolution modes:

**Self-clarify (default, autonomous):** Lexon resolves ambiguity without asking the user by consulting iCPG's structured intent, Mnemos context, Engram's past bindings, process history, and Mesh consensus. If any source resolves confidence above threshold, proceed silently. The goal: 95%+ resolutions via self-clarify after 50+ interactions.

**User-clarify (irreversible actions only):** Triggered only for destructive, expensive, or irreversible actions (delete, deploy, billing changes). Presents 2-3 concrete options. The user's selection becomes a permanent binding.

Autonomous agents should almost never trigger user-clarify. This is what separates Maggy from tools that interrupt you constantly.

### Personalization

Five implicit learning signals update the Terminology Map without user effort:
1. **Correction** → add NOT binding + positive binding
2. **Affirmation** → increment confidence
3. **Repetition** (5+) → promote to high-confidence synonym
4. **Disambiguation selection** → capture as user-level binding
5. **Clarification repetition** (3+) → escalate to explicit preference prompt

High-confidence bindings persist via Engram across sessions and propagate to the org via Mesh.

### Tool Contract Binding

Lexon doesn't just bind phrases to tool names — it binds to tool contracts. Each LexonRecord records the tool version and schema hash at bind time. When a tool's API changes, Lexon detects the schema drift and re-evaluates bindings rather than silently calling a tool with a different interface. This matters because MCP Forge auto-generates tools from API docs that evolve.

### Outcome-Bearing Records

Every LexonRecord carries an outcome reward (-1.0 to 1.0): did the binding produce good results? Corrections are tracked with their source (user explicit, CI failure, review comment). This transforms Lexon from a static lookup table into a reward-bearing learning system that gets measurably better at tool selection over time.

### Research Basis

Lexon builds on: RAG-MCP (Anthropic, 2025 — retrieval-based tool selection), Tool2Vec (2024 — example queries as embedding targets), ToolTree (ICLR 2026 — MCTS-style tool planning), Tool-MVR (2025 — self-correction loops), and Gorilla (Berkeley, 2023 — fine-tuned tool LLMs). Lexon's contribution is the unified architecture combining retrieval, disambiguation, multilingual support, and adaptive personalization — no prior system addresses all four.

---

## 9. Event Spine: The Nervous System

### Why an Event Spine

Maggy's components — iCPG, Mnemos, Lexon, Engram, Process Intelligence, Mesh — each generate events in their own formats. Without a canonical event spine, correlating "user said X → Lexon bound tool Y → execution failed → memory Z was created → mutation W was proposed" requires stitching together six different log formats.

The Event Spine defines a single ordered event stream that every component writes to:

```
IntentEvent → BindingEvent → ExecutionEvent → MemoryEvent
                                                   ↓
MeshEvent ← MutationEvent ← OutcomeEvent ← PersistenceEvent
```

Eight typed events, each carrying a common header (event_id, task_id, project_id, agent_id, model_id, confidence, namespace, policy_version, reward_delta). This enables:

- **End-to-end tracing**: follow a task_id across all 8 event types
- **Reward attribution**: OutcomeEvent.reward propagates back to BindingEvent (was tool selection good?) and MutationEvent (was self-modification good?)
- **Replay debugging**: reproduce failures from the event stream without re-executing
- **Amnesia diagnosis**: compare MemoryEvent → PersistenceEvent conversion rate per project
- **Self-improvement validation**: MutationEvent + OutcomeEvent = evidence for whether L3/L4 changes helped

### The Positioning Statement

> Maggy understands intent through iCPG. Maggy survives task execution through Mnemos. Maggy chooses the right capability through Lexon. Maggy remembers consequences through Engram. Maggy evolves behavior through rewards. Maggy spreads successful mutations through Mesh.
>
> The Event Spine connects all six into a single typed, correlated, reward-bearing event stream. This is the nervous system of an autonomous engineering agent.

---

## 10. Competitive Landscape

The AI coding tool market has exploded into distinct categories. Understanding where Maggy fits — and where it doesn't compete — is critical for positioning.

### 10.1 Market Taxonomy

The landscape breaks into five categories, each with different value propositions:

```
┌─────────────────────────────────────────────────────────────────┐
│                   AI CODING TOOL TAXONOMY (2026)                  │
│                                                                  │
│  1. CLOUD AGENT PLATFORMS (autonomous, cloud-hosted)             │
│     Codex (OpenAI), Devin (Cognition), Copilot Cloud Agent      │
│     Claude Managed Agents                                        │
│                                                                  │
│  2. AI-NATIVE IDEs (editor-first, multi-model)                   │
│     Cursor, Windsurf (Codeium/Cognition)                         │
│                                                                  │
│  3. CLI AGENTS (terminal-first, model-agnostic)                  │
│     Claude Code, Codex CLI, Aider, OpenCode, Cline              │
│                                                                  │
│  4. APP BUILDERS (prompt-to-app, no-code/low-code)               │
│     Lovable, Bolt.new, Replit Agent, v0 (Vercel)                 │
│                                                                  │
│  5. AUTONOMOUS ENGINEERING PLATFORMS                             │
│     Maggy ← ONLY ENTRY                                           │
│     (self-improving + process intelligence + team mesh)          │
└─────────────────────────────────────────────────────────────────┘
```

Maggy is not competing with Lovable (app builders) or Cursor (IDE experience). Maggy competes on a different axis: **autonomous improvement over time**. The question isn't "which tool writes better code today?" — it's "which tool writes better code *next month* than it did *this month*?"

### 10.2 Cloud Agent Platforms

#### OpenAI Codex (Cloud)

Codex is OpenAI's cloud-hosted autonomous coding agent, launched May 2025. Each task runs in its own sandboxed cloud environment preloaded with your GitHub repository. It can write features, fix bugs, run tests, and submit PRs — all in parallel.

| Capability | Codex Cloud | Maggy |
|-----------|-------------|-------|
| Execution model | Cloud sandbox (internet disabled) | Local containers (full network) |
| Model | codex-1 (o3 variant), GPT-5.3-Codex | 6+ models, learned routing |
| Parallel tasks | Yes (multiple cloud sandboxes) | Yes (Polyphony containers) |
| Self-improvement | No | 5-level closed-loop control |
| Process intelligence | No | Full SDLC learning |
| Team learning | No cross-instance learning | Mesh (P2P, autonomous) |
| SWE-bench Verified | 85% (GPT-5.3-Codex) | Model-dependent (routes to best) |
| Cost | ChatGPT Pro/Enterprise subscription | Self-hosted, pay-per-model-use |
| Data privacy | Code sent to OpenAI cloud | Local-first, code stays on machine |
| Trigger automation | Codex Jobs (on GitHub push) | Process Intelligence (on any signal) |

**Codex's strength:** Cloud-native parallel execution with strong sandboxing. The upcoming Codex Jobs feature (automated triggers on git events) is compelling for CI/CD workflows.

**Maggy's edge:** Codex treats each task as independent — it doesn't learn from past tasks, doesn't track reviewer patterns, and doesn't share knowledge across team members. Maggy's L1-L4 control loops mean task #100 is handled significantly better than task #1.

#### Devin (Cognition)

Devin is an autonomous cloud-based AI software engineer. It reached $73M ARR by early 2026, with 67% of PRs merged autonomously. Cognition also acquired Windsurf for ~$250M.

| Capability | Devin | Maggy |
|-----------|-------|-------|
| Execution model | Cloud VM with browser | Local containers |
| Knowledge system | Playbooks + Knowledge docs (manual) | Dynamic typed memory (automatic) |
| Cross-instance learning | No — knowledge is per-org, manually curated | Yes — Mesh shares automatically |
| Multi-model | Limited | 6+ models with auto-routing |
| Self-improvement | Playbooks improve via manual updates | 5-level automatic control loops |
| Process intelligence | No | CI, reviews, deploys, merge patterns |
| Managed Devins | Yes (parallel orchestration) | Yes (Polyphony containers) |
| SWE-bench Verified | 45.8% (Devin 2.0, unassisted) | Model-dependent |
| Cost | $500/mo Teams, custom Enterprise | Self-hosted |
| Scheduling | Recurring/one-time scheduled sessions | Continuous background operation |

**Devin's strength:** Enterprise organization structure, admin controls, playbook management. The acquisition of Windsurf gives them an IDE play too.

**Maggy's edge:** Devin's knowledge system is manually curated — someone writes playbooks and knowledge docs. Maggy's intelligence is learned automatically from task outcomes. Devin doesn't share learnings across team members' instances; Maggy Mesh does this autonomously.

#### Claude Managed Agents

Anthropic's cloud agent platform, updated May 2026 with three significant features: dreaming, outcomes, and multi-agent orchestration.

| Capability | Claude Managed Agents | Maggy |
|-----------|----------------------|-------|
| Execution model | Secure cloud containers | Local containers |
| Dreaming | Yes — reviews past sessions, extracts patterns | Similar to L3/L4 loops |
| Memory | Per-agent + cross-agent via dreaming | Typed memory (scores, patterns, policies, gaps) |
| Multi-agent | Orchestration + webhooks | Polyphony containers + cross-agent delegation |
| Self-improvement | Dreaming (research preview) | 5-level closed-loop control (designed in) |
| Process intelligence | No | Full SDLC learning |
| Team learning | Cross-agent dreaming (same org) | Mesh (P2P, cross-machine) |
| Local execution | No (cloud only) | Yes (local-first) |

**Claude Managed Agents' strength:** Dreaming is the closest any competitor comes to Maggy's self-improvement concept. Harvey (legal AI) saw 6x task completion improvement after implementing dreaming. The cross-agent pattern extraction is genuinely novel.

**Maggy's edge:** Dreaming is cloud-only and Anthropic-locked. Maggy's control loops work locally, across any model, and share learnings across developer machines — not just across agent sessions in the cloud.

#### GitHub Copilot (Cloud Agent + Agent Mode)

Copilot evolved from autocomplete to a multi-layered platform: inline suggestions, chat, agent mode (IDE), and cloud agent (autonomous).

| Capability | Copilot | Maggy |
|-----------|---------|-------|
| Code completion | Best-in-class inline suggestions | Via Pi (any model) |
| Cloud agent | Yes — autonomous PRs from issues | Yes — local containers |
| Agent mode | IDE-integrated (VS Code, Visual Studio) | CLI + web dashboard |
| Custom agents | User-level + repo-level definitions | Skills + iCPG + Mnemos |
| Multi-model | Yes (GPT-4o, Claude, Gemini via settings) | Yes (6+ models, learned routing) |
| Security tools | Security Reviewer agent (beta) | iCPG drift detection |
| Self-improvement | No | 5-level closed-loop control |
| Process intelligence | No | Full SDLC learning |
| Team learning | Spaces (cloud-mediated, admin-controlled) | Mesh (P2P, autonomous) |
| Debugger agent | Yes (Visual Studio, runtime validation) | L0 real-time control |
| Ecosystem | GitHub-native (Issues, PRs, Actions) | GitHub API + any ticketing system |

**Copilot's strength:** Deepest IDE integration. The debugger agent validating fixes against runtime behavior is unique. GitHub ecosystem integration is unmatched. Custom agents with workspace awareness, MCP connections, and model selection are powerful.

**Maggy's edge:** Copilot doesn't learn from its mistakes. It doesn't track which model does best on which task type. It doesn't observe CI results to preemptively fix reviewer complaints. And Spaces is admin-curated knowledge — not automatically learned intelligence.

### 10.3 AI-Native IDEs

#### Cursor

Cursor is the leading AI-native IDE (~$100M+ ARR), a fork of VS Code with deep AI integration.

| Capability | Cursor | Maggy |
|-----------|--------|-------|
| IDE experience | Native (fork of VS Code) | CLI + web dashboard |
| Background agents | 8 parallel cloud agents | Polyphony local containers |
| Memories | Project-scoped, persisted across sessions | Typed memory with provenance |
| Rules | `.cursorrules`, project rules | Skills (`.md`), iCPG, Mnemos |
| Security review | Always-on PR security agents (beta) | iCPG constraints + drift |
| Team features | Centralized billing, usage analytics | Mesh (P2P intelligence sharing) |
| Model routing | Manual selection | Learned from reward data |
| Self-improvement | Memories (passive) | 5-level active control loops |
| Process intelligence | No | Full SDLC learning |
| Context management | Rules, skills, MCPs, subagents | Skills, iCPG, Mnemos, code graph |

**Cursor's strength:** UX polish, background agents at scale (8 parallel), and the always-on security review agents. The context usage breakdown (rules, skills, MCPs) shows mature observability.

**Maggy's edge:** Cursor's memories are passive ("remember this fact"). Maggy's memory is active — it observes outcomes and adjusts behavior. Cursor doesn't learn from CI failures, doesn't track reviewer patterns, and doesn't share intelligence P2P.

#### Windsurf (Codeium → Cognition)

Windsurf's Cascade agent plans and executes multi-file edits with a dedicated planning agent running in the background. Acquired by Cognition (Devin) for ~$250M in December 2025.

| Capability | Windsurf | Maggy |
|-----------|----------|-------|
| Agent | Cascade (plan + execute) | Multi-level control loops |
| Codemaps | AI-annotated visual code maps | codebase-memory-mcp graph |
| Built-in browser | Yes (web context for Cascade) | Process Intelligence API hooks |
| Self-improvement | No | 5-level closed-loop control |
| Cost | $15/mo Pro | Self-hosted |

### 10.4 CLI Agents

#### Claude Code

Anthropic's terminal-first coding agent. Runs locally, supports multi-agent orchestration via Task tool with teams.

| Capability | Claude Code | Maggy |
|-----------|-------------|-------|
| Multi-agent | Task tool, teams, SendMessage | Polyphony containers + Pi |
| Model | Claude only | 6+ models with auto-routing |
| IDE integration | VS Code, JetBrains, desktop app | CLI + web dashboard |
| Hooks | PreToolUse, PostToolUse, Stop | Skills + hooks + L0 real-time |
| Self-improvement | No | 5-level closed-loop control |
| MCP support | Native | Native + MCP Forge (auto-generate) |

**Note:** Maggy is *built on* Claude Code's infrastructure (skills, hooks, MCP). It extends Claude Code with self-improvement, multi-model routing, process intelligence, and team mesh.

#### Codex CLI (OpenAI)

Open-source (Apache-2.0), Rust-based terminal agent. 81K+ GitHub stars. Runs locally, authenticates via ChatGPT account or API key.

| Capability | Codex CLI | Maggy |
|-----------|-----------|-------|
| Open source | Yes (Apache-2.0, 81K stars) | Yes |
| Language | Rust (96.3%) | Python |
| Model | OpenAI models only | 6+ providers |
| Self-improvement | No | 5-level closed-loop control |
| Team learning | No | Mesh (P2P) |

#### Aider

Open-source CLI pair programmer. 39K+ GitHub stars, 4.1M+ installations. Model-agnostic with an architect/editor dual-model approach.

| Capability | Aider | Maggy |
|-----------|-------|-------|
| Open source | Yes (39K stars) | Yes |
| Multi-model | Yes (75+ providers) | Yes (6+ with auto-routing) |
| Architect mode | Dual-model: strong planner + cheap editor | Dual-model planning (Phase 6) |
| Git integration | Every edit = reviewable commit | iCPG + Polyphony branches |
| Auto-lint/test | Yes (on every change) | L0 real-time control |
| Self-improvement | No | 5-level closed-loop control |
| Team learning | No | Mesh (P2P) |

**Aider's strength:** The architect/editor mode is clever cost optimization — expensive model plans, cheap model executes. Maggy's Phase 6 dual-model planning is similar but adds conflict resolution and outcome tracking.

#### OpenCode

Was a Go-based CLI with TUI (Bubble Tea), 12K+ stars. **Archived September 2025**, now continued as "Crush" by the original author (Charm team). Supported 75+ LLM providers, SQLite session storage, LSP integration.

### 10.5 App Builders

These tools target a different audience (non-developers, designers, rapid prototyping) but are worth understanding as they represent the "opposite end" of the autonomy spectrum.

#### Lovable

Prompt-to-full-stack-app builder. 2.3M users, $100M ARR, $6.6B valuation (Series B, Dec 2025, backed by Nvidia/Salesforce).

| Capability | Lovable | Maggy |
|-----------|---------|-------|
| Target user | Non-developers, designers | Professional developers |
| Output | Full-stack app from prompt | Code changes to existing codebase |
| Stack | React + TypeScript + Supabase | Any stack |
| Agent mode | Autonomous development mode | Multi-level control loops |
| GitHub sync | Yes | Native (git-first) |
| Self-improvement | No | 5-level closed-loop control |

#### Bolt.new, Replit Agent, v0

- **Bolt.new** — Browser-based JS app generator. 1M+ websites generated in 5 months.
- **Replit Agent 4** (March 2026) — Handles auth, databases, parallel task execution, Design Mode, checkpoint rollback. Richest ecosystem (50+ languages).
- **v0** (Vercel) — Specializes in React components with Tailwind/shadcn/ui. Precision frontend generation.

These are complementary to Maggy, not competitive. A developer might use Lovable to prototype, then bring the codebase into Maggy for professional development with CI integration, code quality tracking, and team collaboration.

### 10.6 Summary Comparison Matrix

| Capability | Codex Cloud | Devin | Claude Managed | Copilot | Cursor | Claude Code | Aider | Maggy |
|-----------|------------|-------|---------------|---------|--------|-------------|-------|-------|
| **Self-improvement** | - | - | Dreaming (preview) | - | - | - | - | 5-level control |
| **Process intelligence** | - | - | - | - | - | - | - | Full SDLC |
| **Team learning** | - | - | Cross-agent dreaming | Spaces | Org memories | - | - | P2P Mesh |
| **Multi-model routing** | - | Limited | - | Manual | Manual | - | Manual | Learned |
| **Local-first** | - | - | - | - | Partial | Yes | Yes | Yes |
| **Cloud agents** | Yes | Yes | Yes | Yes | Yes | - | - | - |
| **IDE integration** | VS Code | Browser | - | Native | Native | VS Code | Terminal | Dashboard |
| **Open source** | CLI only | - | - | - | - | - | Yes | Yes |
| **Vendor lock-in** | OpenAI | Cognition | Anthropic | GitHub | Cursor | Anthropic | None | None |

### 10.7 Where Maggy Wins

1. **Self-improvement is the product** — No other tool has a formal multi-level control system. Claude's dreaming is the closest, but it's cloud-only and single-vendor.
2. **Process intelligence is unique** — Nobody else learns from CI results, reviewer comments, and merge patterns to preemptively fix code.
3. **Autonomous team learning** — Mesh shares typed, provenanced intelligence P2P without a central server. Everyone else's "team features" are admin-curated knowledge or cloud-mediated memory.
4. **Model-agnostic by design** — Not locked to any provider. Learns which model is best for which task type automatically.
5. **Local-first with no compromises** — Code never leaves developer machines. Works offline with local models. No vendor sees your proprietary codebase.

### 10.8 Where Competitors Win Today

- **Copilot:** Deepest IDE integration, GitHub ecosystem, largest user base
- **Cursor:** Best editor UX, background agents at scale, security review agents
- **Devin:** Enterprise controls, playbooks, $73M ARR proves market demand
- **Claude Managed Agents:** Dreaming is genuinely novel, cloud scalability
- **Codex Cloud:** Parallel cloud sandboxes, upcoming Codex Jobs automation
- **Lovable:** Prompt-to-app for non-developers, $6.6B validates the broader market
- **Aider:** Open-source community (39K stars), architect/editor cost optimization

---

## 11. Migration Roadmap

### Phase Dependencies

```
Phase 1: PiAdapter + Token Budget ──────────────────┐
    │                                                 │
    ├── Phase 2: Model Routing (blast→model)          │
    ├── Phase 3: Mnemos Multi-Model Fatigue           │
    ├── Phase 6: Dual-Model Planning                  │
    │                                                 │
Phase 4: CIKG Extract ────────────────┐               │
    │                                  │              │
    └───────────┬──────────────────────┘              │
                │                                     │
Phase 5: Maggy v2 Dashboard ◄─────────────────────────┘
    │
    ├── Phase 7: Vercel Deploy Containers (Docker)
    ├── Phase 8: Process Intelligence ──────┐
    ├── Phase 9: MCP Forge                  │
    │                                       │
    └── Phase 11: Maggy Mesh ◄──────────────┘
                                            │
Phase 10: Integration Testing ◄─────────────┘
                                            │
Phase 3 + Phase 5 ──► Phase 12: Engram ─────┘
                                    │
Phase 9 + Phase 12 ─► Phase 13: Lexon
                                    │
Phase 12 + Phase 13 ─► Phase 14: Event Spine
```

### Phase Summary

| Phase | What | Priority | Effort | Dependencies |
|-------|------|----------|--------|-------------|
| 1 | PiAdapter + token budget | P0 | Large | Pi installed |
| 2 | Model routing (blast→model) | P0 | Medium | Phase 1 + iCPG |
| 3 | Mnemos multi-model fatigue | P1 | Medium | Phase 1 |
| 4 | CIKG extraction | P1 | Medium | Supabase |
| 5 | Maggy v2 dashboard | P0 | Large | Phases 1-4 |
| 6 | Dual-model planning | P2 | Medium | Phase 1 |
| 7 | Vercel deploy containers | P2 | Medium | Docker |
| 8 | Process intelligence | P1 | Large | Phase 5 + GitHub API |
| 9 | MCP Forge | P2 | Large | Phase 5 |
| 10 | Integration testing + docs | P1 | Large | All phases |
| 11 | Maggy Mesh (P2P) | P2 | XL | Phase 5 + Phase 8 |
| 12 | Engram (cross-session memory) | P1 | Large | Phase 3 + Phase 5 |
| 13 | Lexon (semantic tool binding) | P2 | Large | Phase 9 + Phase 12 |
| 14 | Event Spine (canonical event flow) | P2 | Medium | Phase 12 + Phase 13 |

---

## 12. Research Foundations & Prior Art

Maggy's architecture draws from five distinct research streams. This isn't a tool assembled from hype — each component maps to validated research with production evidence.

### 12.1 Self-Evolving Agent Systems

The field of self-improving AI agents has exploded in 2025-2026. Papers mentioning "AI Agent" or "Agentic AI" in 2025 exceeded the total from 2020-2024 combined by more than twofold.

**Key papers and systems:**

- **SICA — Self-Improving Coding Agent (ICLR 2025 Workshop)** — An agent that autonomously edits its own codebase, climbing from 17% to 53% on SWE-bench Verified through self-modification. This validates Maggy's core thesis: agents that modify their own behavior based on outcomes dramatically outperform static agents. ([Paper](https://openreview.net/pdf?id=rShJCyLsOr))

- **Godel Agent (ACL 2025)** — Uses runtime monkey-patching with safety verification. The agent modifies both its task-solving policy and its own learning algorithm, guided by high-level objectives while formal invariant checking prevents unsafe changes. Maggy's L3/L4 control loops use a similar principle: change the improvement process itself, but with rollback safeguards.

- **SAGE — Skill Augmented GRPO (December 2025)** — Agents accumulate reusable function libraries across task chains, achieving 8.9% goal completion gains while reducing output tokens by 59%. This directly parallels Maggy's skill evolution in L3, where successful patterns get codified into reusable skills.

- **HyperAgents (2026)** — Makes the meta-level itself editable. Agents improve *how they improve*, discovering domain-general skills (memory management, prompt engineering, exploration strategies) that transfer across coding, mathematics, and scientific domains. Maggy's L4 monthly evolution loop is designed for exactly this: improving the improvement process.

- **SWE-RL (Meta, 2025)** — Uses self-play where agents alternate between bug injection and fixing roles, gaining +10.4 points on SWE-bench Verified without human-labeled data. This reinforcement-based approach validates Maggy's reward registry concept.

- **AlphaEvolve (Google DeepMind)** — Recovered 0.7% of Google's worldwide compute through automated algorithm optimization. This is the first evidence of hyperscale ROI from self-improving agents — validating that autonomous optimization can deliver measurable economic value.

**Maggy's position:** Maggy applies self-evolution at the *operational* level (routing, workflows, process patterns) rather than at the model-weight level. This is more practical for a local-first system — you don't need GPU clusters to improve model routing decisions based on task rewards.

### 12.2 Agent Memory Systems

Memory has emerged as the central bottleneck for autonomous agents. A comprehensive 2025-2026 survey ("Memory in the Age of AI Agents") offers a structured taxonomy of how memory is designed, implemented, and evaluated in modern LLM-based agents.

**Key developments:**

- **Mem0 (2025-2026)** — Dominates commercially with 186 million API calls quarterly. The graph-enhanced variant (Mem0g) builds a directed, labeled knowledge graph alongside the vector store. Maggy's typed memory system (scores, patterns, policies, gaps) is similarly structured but uses domain-specific merge rules rather than a general-purpose graph.

- **Collaborative Memory (2025)** — A framework for multi-user, multi-agent environments with asymmetric, time-evolving access controls. Maintains private memory (per-user) and shared memory (selectively shared). This directly validates Maggy Mesh's approach of personal memory + team memory with provenance-based filtering.

- **MAGMA: Multi-Graph Agentic Memory Architecture (2026)** — Uses multiple graph structures for different memory types. Parallels Maggy's typed memory classes where scores, patterns, and policies each have different storage and merge semantics.

- **SimpleMem (2025)** — Achieved 26.4% average F1 improvement over baselines with 30x token reduction. Demonstrates that structured memory management produces dramatically better results than naive context stuffing.

**Maggy's position:** Most memory systems are passive stores. Maggy's memory is active — the L1-L4 control loops continuously update, prune, and evolve stored knowledge based on outcomes. The Mesh adds a distributed dimension that no other agent memory system currently implements.

### 12.3 Federated & Distributed AI

- **Federated AI Agents** — Intelligent software systems that learn collaboratively across multiple devices while keeping data localized. This is the theoretical foundation for Maggy Mesh: share learned intelligence, not raw data.

- **Agentic Federated Learning (ICML 2025)** — Autonomous agents collaborate on distributed learning tasks, each contributing local expertise to a shared model. Maggy adapts this from model training to operational intelligence: instead of sharing gradients, Maggy shares typed memory (scores, patterns, policies) with provenance.

- **Multi-Agent Collaboration Surveys (ACM DEAI 2025)** — A unified taxonomy decomposing AI agents into Perception, Brain, Planning, Action, Tool Use, and Collaboration subsystems. Surveys show collaborative architectures outperform isolated agents by 30-60% on complex tasks. Gartner reported a 1,445% surge in multi-agent system inquiries from Q1 2024 to Q2 2025.

- **CRDT-inspired merge** — Conflict-free replicated data types allow distributed systems to merge state without coordination. Maggy uses type-specific merge rules (weighted average for scores, union for patterns, backtest-gated for policies) inspired by CRDT semantics.

### 12.4 Self-Improving Coding in Production

The research isn't just theoretical. Production deployments validate that self-improving agents deliver measurable value:

| System | Result | Relevance to Maggy |
|--------|--------|-------------------|
| **Meta's REA** | Doubled model accuracy; 3 engineers improved 8 models simultaneously | Multi-model optimization works at scale |
| **Cognition (Devin)** | $73M ARR, 67% of PRs merged autonomously | Market demand for autonomous engineering is real |
| **Harvey + Claude Dreaming** | 6x task completion improvement | Cross-session pattern extraction works |
| **Karpathy's autoresearch** | 630-line script, 700 experiments in 2 days, 20 optimizations, 11% efficiency gain | Automated experimentation finds real improvements |
| **AlphaEvolve** | 0.7% of Google's worldwide compute recovered | Self-improvement produces hyperscale ROI |

**Claude Managed Agents — Dreaming (May 2026):** Anthropic's most relevant competitive move. Dreaming is a scheduled process that reviews past agent sessions, extracts patterns, and curates memories so agents improve over time. It surfaces insights no single session could see: recurring mistakes, workflows that multiple agents converge on, and team-shared preferences. This is the closest any competitor comes to Maggy's L3/L4 control loops — but it's cloud-only, Anthropic-locked, and doesn't include process intelligence (CI/review/deploy learning).

### 12.5 Control Theory Foundations

- **Inner-outer loop control** — Industrial control systems use fast inner loops for stability and slow outer loops for optimization. Maggy's L0 (seconds) through L4 (months) hierarchy mirrors this established engineering pattern. The key insight: outer loops NEVER override inner loop stability. L3 can change routing policy, but L0 still catches in-task failures regardless.

- **Reinforcement learning from task outcomes** — Maggy's reward registry applies RLHF principles at the system level, using task outcomes (CI pass, review rounds, deploy success) and user behavior (overrides, re-dos, reverts) as reward signals. Unlike RLHF for model training, this operates at the operational level without any model fine-tuning.

### 12.6 Local-First Software

- **Local-first principles (Ink & Switch, 2019)** — Software that works offline, keeps data on user devices, and syncs peer-to-peer. Maggy's architecture is explicitly local-first: SQLite databases, local filesystem storage, optional P2P sync.

- **Privacy-first trend (2026)** — Multiple tools now emphasize data privacy. OpenCode stores no code or context data. Aider runs entirely locally. The market is moving toward local execution as enterprises grow wary of sending proprietary code to cloud services. Maggy was designed local-first from day one — this isn't a retrofit.

### 12.7 Market Context

The AI coding tool market is at an inflection point:

- **Gartner predicts 40% of enterprise apps will include task-specific AI agents by 2026**, up from less than 5% in 2025.
- **57% of organizations** report measurable impact from AI agents in software development (2025 industry survey).
- The explosion of coding CLIs (30+ tools in 2026) reflects a shift from IDE-native AI to terminal-first agents that understand codebases, git history, and development workflows.
- **SWE-bench scores** continue to climb: Claude Mythos Preview hits 93.9% on Verified, 77.8% on Pro. But raw coding ability is becoming commoditized. The differentiation is moving to *what surrounds the model*: memory, learning, process integration, and team collaboration.

**The implication for Maggy:** Raw code generation quality is converging across models. The next competitive frontier is *what happens around the generation*: learning from outcomes, optimizing processes, sharing intelligence across teams. This is exactly where Maggy's architecture is positioned.

---

## 13. How to Get Started

### Installation

```bash
git clone https://github.com/alinaqi/maggy.git
cd maggy
./install.sh
```

### Current State (v4.0)

Today, Maggy includes:
- **Skills system** — Markdown-based instructions for AI agents (TDD, security, iCPG, Mnemos, etc.)
- **Polyphony** — Container-isolated multi-agent orchestration (173 tests, 14 modules)
- **iCPG** — Intent-augmented code property graph with blast radius scoring
- **Mnemos** — Task-scoped memory lifecycle with typed MnemoGraph
- **Cross-agent delegation** — Complexity-based task routing to Codex, Kimi, etc.
- **Skill-lint** — Quality gates for skill files
- **Behavioral evals** — Test framework for skill effectiveness

### Roadmap to v5.0

The 14-phase migration path takes Maggy from a single-project, single-model toolkit to the multi-project, multi-model, self-improving, team-learning platform described in this RFC.

---

## Contact

**Ali Shaheen** — ali@protaige.com
**Protaige** — Building the future of autonomous AI engineering

---

*This document describes the Maggy v5 architecture as designed. Implementation follows the 11-phase migration path. For technical details, see `docs/architecture-v5.md`. For phase-level task specs, see `_project_specs/phases/`.*


================================================
FILE: maggy/install.sh
================================================
#!/usr/bin/env bash
# Maggy installer — sets up deps and copies config template.
#
# Usage: ./install.sh

set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MAGGY_HOME="${MAGGY_HOME:-$HOME/.maggy}"

echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "  Maggy — Generic AI Engineering Command Center"
echo "  Installing to: $MAGGY_HOME"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo

# 1. Check Python — enforce the 3.11+ minimum from pyproject.toml's requires-python.
if ! command -v python3 >/dev/null 2>&1; then
  echo "❌ python3 not found. Install Python 3.11 or later first."
  exit 1
fi
PY_VERSION=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
if ! python3 -c 'import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)'; then
  echo "❌ Python 3.11 or later is required. Found Python $PY_VERSION."
  echo "   Install a newer Python (e.g. via pyenv, homebrew, or python.org)."
  exit 1
fi
echo "✓ Python $PY_VERSION"

# 2. Check claude CLI
if ! command -v claude >/dev/null 2>&1; then
  echo "⚠  claude CLI not found on PATH. Maggy can still run, but Execute won't work until you install Claude Code."
else
  echo "✓ claude CLI found"
fi

# 3. Install Python deps
echo
echo "Installing Python dependencies..."
python3 -m pip install --upgrade pip >/dev/null 2>&1 || true
python3 -m pip install -e "$HERE" || python3 -m pip install -r "$HERE/requirements.txt" 2>/dev/null || {
  # Fallback: explicit install of runtime deps
  python3 -m pip install 'fastapi>=0.115' 'uvicorn[standard]>=0.30' 'httpx>=0.27' 'anthropic>=0.40' 'pyyaml>=6.0' 'feedparser>=6.0' 'pydantic>=2.6'
}
echo "✓ Dependencies installed"

# 4. Config directory + template
mkdir -p "$MAGGY_HOME"
if [ ! -f "$MAGGY_HOME/config.yaml" ]; then
  cp "$HERE/config.example.yaml" "$MAGGY_HOME/config.yaml"
  echo "✓ Wrote config template to $MAGGY_HOME/config.yaml"
  NEEDS_CONFIG=1
else
  echo "✓ Config already exists at $MAGGY_HOME/config.yaml (not overwritten)"
  NEEDS_CONFIG=0
fi

# 5. Remember bootstrap location for iCPG integration
BOOTSTRAP_MARKER="$HOME/.claude/.bootstrap-dir"
if [ ! -f "$BOOTSTRAP_MARKER" ]; then
  mkdir -p "$HOME/.claude"
  # Maggy lives in <bootstrap>/maggy — one level up is bootstrap root
  echo "$(cd "$HERE/.." && pwd)" > "$BOOTSTRAP_MARKER"
  echo "✓ Marked bootstrap location for iCPG access"
fi

echo
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if [ "$NEEDS_CONFIG" = "1" ]; then
  echo "Next steps:"
  echo "  1. Edit $MAGGY_HOME/config.yaml"
  echo "     - Set your org name, domain, GitHub org + repos"
  echo "     - Set codebase paths for each repo you want Maggy to execute in"
  echo
  echo "  2. Export credentials:"
  echo "     export GITHUB_TOKEN=ghp_...           # repo + issues scopes"
  echo "     export ANTHROPIC_API_KEY=sk-ant-..."
  echo
  echo "  3. Run:"
  echo "     cd $HERE && python3 -m maggy.main"
  echo
  echo "  4. Open http://localhost:8080"
else
  echo "Ready to run:"
  echo "  cd $HERE && python3 -m maggy.main"
  echo "  Then open http://localhost:8080"
fi
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"


================================================
FILE: maggy/maggy/__init__.py
================================================
"""Maggy — generic AI engineering command center."""

__version__ = "0.1.0"


================================================
FILE: maggy/maggy/adapters/__init__.py
================================================
"""Unified agent adapters for multi-model execution."""


================================================
FILE: maggy/maggy/adapters/cli_discovery.py
================================================
"""Auto-discover installed AI CLIs and their command-line flags.

Probes each CLI via --help, parses capabilities, and builds
command templates that PiAdapter uses to spawn prompts.
"""

from __future__ import annotations

import logging
import re
import shutil
import subprocess
from dataclasses import dataclass, field

logger = logging.getLogger(__name__)

_HELP_TIMEOUT = 10


@dataclass
class CliProfile:
    """Discovered CLI capabilities and flags."""

    name: str
    binary: str
    version: str = ""
    installed: bool = False
    prompt_flag: str = ""
    work_dir_flag: str = ""
    auto_approve_flag: str = ""
    output_format_flag: str = ""
    max_turns_flag: str = ""
    afk_flag: str = ""
    uses_exec_subcommand: bool = False
    uses_run_subcommand: bool = False
    run_model: str = ""
    prompt_is_positional: bool = False

    def build_command(
        self, prompt: str, wd: str, max_turns: int,
    ) -> list[str]:
        """Build full CLI command from discovered flags."""
        cmd = [self.binary]
        if self.uses_exec_subcommand:
            cmd.append("exec")
        elif self.uses_run_subcommand:
            cmd += ["run", self.run_model]
        if self.prompt_is_positional:
            if self.prompt_flag:
                cmd.append(self.prompt_flag)
            cmd.append(prompt)
        elif self.prompt_flag:
            cmd += [self.prompt_flag, prompt]
        else:
            cmd.append(prompt)
        if self.work_dir_flag:
            cmd += [self.work_dir_flag, wd]
        if self.auto_approve_flag:
            cmd.append(self.auto_approve_flag)
        if self.afk_flag:
            cmd.append(self.afk_flag)
        if self.output_format_flag:
            cmd += [self.output_format_flag, "json"]
        if self.max_turns_flag and max_turns > 0:
            cmd += [self.max_turns_flag, str(max_turns)]
        return cmd


@dataclass
class DiscoveryResult:
    """Result of scanning all known CLI tools."""

    profiles: dict[str, CliProfile] = field(default_factory=dict)
    errors: list[str] = field(default_factory=list)


_KNOWN_CLIS = ["claude", "codex", "kimi", "deepseek", "ollama"]


def discover_all() -> DiscoveryResult:
    """Scan for all known AI CLIs and probe capabilities."""
    result = DiscoveryResult()
    for name in _KNOWN_CLIS:
        profile = discover_cli(name)
        result.profiles[name] = profile
        if not profile.installed:
            result.errors.append(f"{name}: not found")
    return result


def discover_cli(name: str) -> CliProfile:
    """Probe a single CLI binary for capabilities."""
    binary = shutil.which(name)
    if not binary:
        return CliProfile(name=name, binary=name)
    profile = CliProfile(name=name, binary=binary, installed=True)
    profile.version = _get_version(binary)
    help_text = _get_help(binary, "")
    _extract_flags(profile, help_text)
    if profile.uses_exec_subcommand:
        exec_help = _get_help(binary, "exec")
        _refine_from_exec(profile, exec_help)
    if profile.uses_run_subcommand:
        run_help = _get_help(binary, "run")
        _refine_from_run(profile, run_help)
    _post_process(profile)
    return profile


def _extract_flags(profile: CliProfile, text: str) -> None:
    """Extract flags by matching known flag names in help."""
    # Print/prompt mode
    if _has(text, r"-p,\s*--print\b"):
        profile.prompt_flag = "--print"
    elif _has(text, r"(-p|--prompt)\b"):
        profile.prompt_flag = "-p"
    # Working directory
    if _has(text, r"--work-dir\b"):
        profile.work_dir_flag = "-w"
    elif _has(text, r"-C,\s*--cd\b"):
        profile.work_dir_flag = "-C"
    elif _has(text, r"--cwd\b"):
        profile.work_dir_flag = "--cwd"
    # Auto-approve / skip permissions
    if _has(text, r"--dangerously-skip-permissions\b"):
        profile.auto_approve_flag = "--dangerously-skip-permissions"
    elif _has(text, r"--dangerously-bypass-approvals"):
        profile.auto_approve_flag = "--dangerously-bypass-approvals-and-sandbox"
    elif _has(text, r"--yolo\b"):
        profile.auto_approve_flag = "--yolo"
    elif _has(text, r"--auto-approve\b"):
        profile.auto_approve_flag = "--auto-approve"
    # Output format
    if _has(text, r"--output-format\b"):
        profile.output_format_flag = "--output-format"
    # Max turns / steps
    if _has(text, r"--max-turns\b"):
        profile.max_turns_flag = "--max-turns"
    elif _has(text, r"--max-steps-per"):
        profile.max_turns_flag = "--max-steps-per-turn"
    elif _has(text, r"--max-steps\b"):
        profile.max_turns_flag = "--max-steps"
    # AFK mode
    if _has(text, r"--afk\b"):
        profile.afk_flag = "--afk"
    # Exec subcommand for non-interactive use
    if _has(text, r"\bexec\b.*non-interactive"):
        profile.uses_exec_subcommand = True
    # Run subcommand (ollama-style: "run  Run a model")
    if _has(text, r"\brun\s+Run a model\b"):
        profile.uses_run_subcommand = True


def _refine_from_exec(profile: CliProfile, text: str) -> None:
    """Override flags with more specific exec subcommand flags."""
    if _has(text, r"-C,\s*--cd\b"):
        profile.work_dir_flag = "-C"
    if _has(text, r"--dangerously-bypass-approvals"):
        profile.auto_approve_flag = "--dangerously-bypass-approvals-and-sandbox"


def _refine_from_run(profile: CliProfile, text: str) -> None:
    """Extract flags from run subcommand help (ollama-style)."""
    profile.prompt_is_positional = True
    profile.prompt_flag = ""


def _post_process(profile: CliProfile) -> None:
    """Apply heuristics after flag extraction."""
    # --print means non-interactive mode; prompt is positional
    if profile.prompt_flag == "--print":
        profile.prompt_is_positional = True
        profile.prompt_flag = "-p"
    # exec subcommand: prompt is also positional
    if profile.uses_exec_subcommand:
        profile.prompt_is_positional = True
        profile.prompt_flag = ""
    # run subcommand (ollama): prompt is positional, need model
    if profile.uses_run_subcommand:
        profile.prompt_is_positional = True
        profile.prompt_flag = ""
        if not profile.run_model:
            profile.run_model = _detect_ollama_model(profile)
    # Claude uses subprocess cwd, not a --cd flag
    if "claude" in profile.name.lower():
        profile.work_dir_flag = ""
    # If -p is a prompt arg (not print mode), --output-format
    # is likely tied to --print mode and will error in -p mode
    if not profile.prompt_is_positional and profile.output_format_flag:
        profile.output_format_flag = ""


def _detect_ollama_model(profile: CliProfile) -> str:
    """Find best coding model available in ollama."""
    try:
        out = subprocess.run(
            [profile.binary, "list"],
            capture_output=True, text=True,
            timeout=_HELP_TIMEOUT,
        )
        text = out.stdout.lower()
    except (subprocess.TimeoutExpired, OSError):
        return "qwen3-coder:30b-a3b-q8_0"
    # Prefer Qwen3-Coder (MoE, 3.3B active), then older models
    prefs = [
        "qwen3-coder:30b-a3b-q8_0",
        "qwen3-coder:30b",
        "qwen2.5-coder:32b", "qwen2.5-coder:14b",
        "qwen2.5-coder:7b", "deepseek-coder-v2",
        "codellama:34b", "codellama:13b",
        "qwen3:32b", "llama3.1:70b", "llama3.1:8b",
    ]
    for model in prefs:
        if model.split(":")[0] in text:
            return model
    # Fallback: first listed model
    lines = out.stdout.strip().splitlines()
    if len(lines) > 1:
        return lines[1].split()[0]
    return "qwen3-coder:30b-a3b-q8_0"


def _has(text: str, pattern: str) -> bool:
    """Check if pattern exists in text (case-insensitive)."""
    return bool(re.search(pattern, text, re.IGNORECASE))


def _get_version(binary: str) -> str:
    """Get CLI version string."""
    for flag in ("--version", "-V", "-v"):
        try:
            out = subprocess.run(
                [binary, flag],
                capture_output=True, text=True,
                timeout=_HELP_TIMEOUT, env=_clean_env(),
            )
            text = (out.stdout + out.stderr).strip()
            if text and len(text) < 200:
                return text.split("\n")[0]
        except (subprocess.TimeoutExpired, OSError):
            continue
    return ""


def _get_help(binary: str, subcommand: str) -> str:
    """Run --help and return output."""
    cmd = [binary]
    if subcommand:
        cmd.append(subcommand)
    cmd.append("--help")
    try:
        out = subprocess.run(
            cmd, capture_output=True, text=True,
            timeout=_HELP_TIMEOUT, env=_clean_env(),
        )
        return (out.stdout + out.stderr).strip()
    except (subprocess.TimeoutExpired, OSError) as exc:
        logger.debug("Help failed for %s: %s", binary, exc)
        return ""


def _clean_env() -> dict[str, str]:
    """Return env without CLAUDECODE to avoid nesting block."""
    import os
    env = os.environ.copy()
    env.pop("CLAUDECODE", None)
    return env


================================================
FILE: maggy/maggy/adapters/pi.py
================================================
"""Unified adapter for CLI prompts and Pi RPC control.

Auto-discovers installed AI CLIs and their flags at init time
so Maggy can orchestrate any subscription-based tool (claude,
codex, kimi, etc.) without hardcoded command templates.
"""

from __future__ import annotations

import asyncio
import json
import logging
import os
import shutil
import subprocess
from dataclasses import dataclass
from typing import AsyncIterator

from maggy.adapters.cli_discovery import (
    CliProfile,
    DiscoveryResult,
    discover_all,
)

logger = logging.getLogger(__name__)


def _extract_usage(raw: str) -> tuple[float, int, int, str]:
    """Parse JSON CLI output for cost/tokens; fall back to raw text."""
    try:
        d = json.loads(raw)
        u = d.get("usage") or {}
        return (
            float(d.get("cost_usd") or 0),
            int(u.get("input_tokens") or 0),
            int(u.get("output_tokens") or 0),
            str(d.get("result", raw)),
        )
    except (json.JSONDecodeError, ValueError, TypeError):
        return 0.0, 0, 0, raw


@dataclass
class ModelEntry:
    name: str
    provider: str
    model_id: str
    tier: str
    cost_per_1k: float = 0.0
    daily_limit_usd: float = 50.0
    cli_command: str = "claude"
    context_window: int = 200_000


DEFAULT_MODELS: list[ModelEntry] = [
    ModelEntry("local", "ollama", "qwen3-coder:30b-a3b-q8_0", "local", 0.0, 0.0, "ollama", 32_000),
    ModelEntry("kimi", "moonshot", "kimi-k2", "cheap", 0.001, 10.0, "kimi", 128_000),
    ModelEntry("deepseek", "deepseek", "deepseek-v3", "cheap", 0.002, 10.0, "deepseek", 128_000),
    ModelEntry("gpt", "openai", "gpt-4o", "medium", 0.01, 20.0, "codex", 128_000),
    ModelEntry("claude", "anthropic", "claude-sonnet-4", "premium", 0.03, 50.0, "claude", 200_000),
    ModelEntry("codex", "openai", "codex", "validator", 0.02, 30.0, "codex", 200_000),
]

QUOTA_MARKERS = frozenset(
    {"rate limit", "quota", "429", "too many requests", "capacity", "overloaded"}
)

@dataclass
class RunResult:
    model: str
    success: bool
    output: str = ""
    error: str = ""
    cost_usd: float = 0.0
    input_tokens: int = 0
    output_tokens: int = 0
    turns: int = 0
    quota_hit: bool = False


class PiAdapter:
    def __init__(
        self,
        models: list[ModelEntry] | None = None,
        rpc_command: str = "pi",
        discovery: DiscoveryResult | None = None,
    ):
        entries = models or DEFAULT_MODELS
        self._models = {entry.name: entry for entry in entries}
        self._fallback_order = [
            entry.name for entry in sorted(entries, key=lambda m: m.cost_per_1k)
        ]
        self._rpc_command = rpc_command
        self._rpc_process: subprocess.Popen[str] | None = None
        self._streaming = False
        self._discovery = discovery or discover_all()
        self._profiles: dict[str, CliProfile] = self._discovery.profiles
        self._log_discovery()

    def get_model(self, name: str) -> ModelEntry | None:
        return self._models.get(name)

    def list_models(self) -> list[ModelEntry]:
        return list(self._models.values())

    def fallback_chain(self, start: str) -> list[str]:
        try:
            idx = self._fallback_order.index(start)
        except ValueError:
            return self._fallback_order
        return self._fallback_order[idx + 1 :]

    async def send_prompt(
        self,
        model_name: str,
        prompt: str,
        working_dir: str,
        max_turns: int = 20,
        timeout: int = 600,
    ) -> RunResult:
        model = self._models.get(model_name)
        if not model:
            return RunResult(model=model_name, success=False, error=f"Unknown model: {model_name}")
        try:
            proc = await self._spawn_prompt(model, prompt, max_turns, working_dir)
            stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout)
            return self._prompt_result(model_name, proc.returncode or 0, stdout or b"")
        except asyncio.TimeoutError:
            return RunResult(model=model_name, success=False, error="Timed out")
        except FileNotFoundError:
            return RunResult(
                model=model_name, success=False, error=f"CLI '{model.cli_command}' not found"
            )

    async def send_with_fallback(
        self,
        model_name: str,
        prompt: str,
        working_dir: str,
        max_turns: int = 20,
    ) -> RunResult:
        result = await self.send_prompt(model_name, prompt, working_dir, max_turns)
        if result.success:
            return result
        for fallback in self.fallback_chain(model_name):
            logger.info("Falling back from %s to %s", model_name, fallback)
            result = await self.send_prompt(fallback, prompt, working_dir, max_turns)
            if result.success:
                return result
        return result

    def send_rpc(self, command: dict[str, object]) -> dict[str, object]:
        proc = self._ensure_rpc_process()
        stdin = self._require_stream(proc.stdin, "stdin")
        stdout = self._require_stream(proc.stdout, "stdout")
        if self._streaming:
            raise RuntimeError("Cannot send RPC while streaming")
        stdin.write(f"{json.dumps(command, separators=(',', ':'))}\n")
        stdin.flush()
        line = stdout.readline()
        return json.loads(line or "{}")

    def switch_model(self, provider: str, model: str) -> bool:
        payload = {"command": "set_model", "provider": provider, "model": model}
        return bool(self.send_rpc(payload).get("ok"))

    async def stream_events(self) -> AsyncIterator[dict[str, object]]:
        if self._streaming:
            raise RuntimeError("Already streaming events")
        stdout = self._require_stream(self._ensure_rpc_process().stdout, "stdout")
        self._streaming = True
        try:
            while True:
                line = await asyncio.to_thread(stdout.readline)
                if not line:
                    break
                yield json.loads(line)
        finally:
            self._streaming = False

    def _build_command(
        self, model: ModelEntry, prompt: str, max_turns: int, wd: str,
    ) -> list[str]:
        profile = self._profiles.get(model.cli_command)
        if profile and profile.installed:
            return profile.build_command(prompt, wd, max_turns)
        return [model.cli_command, "-p", prompt]

    def _detect_quota(self, text: str) -> bool:
        return any(marker in text.lower() for marker in QUOTA_MARKERS)

    def _detect_pi(self) -> bool:
        return shutil.which(self._rpc_command) is not None

    async def _spawn_prompt(
        self,
        model: ModelEntry,
        prompt: str,
        max_turns: int,
        working_dir: str,
    ) -> asyncio.subprocess.Process:
        env = os.environ.copy()
        env.pop("CLAUDECODE", None)
        return await asyncio.create_subprocess_exec(
            *self._build_command(model, prompt, max_turns, working_dir),
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.STDOUT,
            cwd=working_dir,
            env=env,
        )

    def _log_discovery(self) -> None:
        for name, p in self._profiles.items():
            level = logging.INFO if p.installed else logging.DEBUG
            logger.log(level, "CLI %s: %s v%s", "OK" if p.installed else "missing", name, p.version)

    @property
    def discovered_profiles(self) -> dict[str, CliProfile]:
        return dict(self._profiles)

    def _prompt_result(self, model_name: str, code: int, stdout: bytes) -> RunResult:
        raw = stdout.decode("utf-8", errors="replace")
        quota = self._detect_quota(raw)
        cost, in_t, out_t, text = _extract_usage(raw)
        return RunResult(
            model=model_name, success=code == 0, output=text,
            error="" if code == 0 else f"Exit code {code}",
            quota_hit=quota, cost_usd=cost,
            input_tokens=in_t, output_tokens=out_t,
        )

    def _ensure_rpc_process(self) -> subprocess.Popen[str]:
        proc = self._rpc_process
        if proc and getattr(proc, "poll", lambda: None)() is None:
            return proc
        self._rpc_process = subprocess.Popen(
            [self._rpc_command], stdin=subprocess.PIPE,
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            text=True, bufsize=1,
        )
        return self._rpc_process

    def _require_stream(self, stream: object, name: str):
        if stream is None:
            raise RuntimeError(f"Pi RPC {name} is unavailable")
        return stream


================================================
FILE: maggy/maggy/api/__init__.py
================================================


================================================
FILE: maggy/maggy/api/auth.py
================================================
"""Shared authentication and configuration guards."""

from __future__ import annotations

from fastapi import HTTPException, Request


def check_auth(
    request: Request, x_api_key: str | None,
) -> None:
    """Simple token check. Bypassed when auth_mode='local'."""
    cfg = request.app.state.cfg
    if cfg.dashboard.auth_mode == "local":
        return
    expected = cfg.dashboard.api_key
    if not expected or x_api_key != expected:
        raise HTTPException(
            status_code=401,
            detail="Invalid or missing X-API-Key",
        )


def require_configured(request: Request) -> None:
    """Abort 503 if Maggy is not configured."""
    if not getattr(request.app.state, "configured", False):
        raise HTTPException(
            status_code=503,
            detail="Maggy is not configured yet.",
        )


def require_provider(request: Request) -> None:
    """Abort 503 if no provider credentials (Tier 2)."""
    mode = getattr(request.app.state, "mode", "local")
    if mode != "full":
        raise HTTPException(
            status_code=503,
            detail="Provider credentials required. "
            "Set GITHUB_TOKEN or configure Asana.",
        )


================================================
FILE: maggy/maggy/api/routes.py
================================================
"""REST API routes — wraps services. All routes under /api/*."""

from __future__ import annotations

import logging
from typing import Literal

from fastapi import APIRouter, Header, HTTPException, Query, Request
from pydantic import BaseModel

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api", tags=["maggy"])


def _auth(request: Request, x_api_key: str | None) -> None:
    """Simple token check. Bypassed when auth_mode='local'."""
    cfg = request.app.state.cfg
    if cfg.dashboard.auth_mode == "local":
        return
    expected = cfg.dashboard.api_key
    if not expected or x_api_key != expected:
        raise HTTPException(status_code=401, detail="Invalid or missing X-API-Key")


def _require_configured(request: Request) -> None:
    """Abort 503 if no provider credentials (Tier 2)."""
    mode = getattr(request.app.state, "mode", "local")
    if mode != "full":
        raise HTTPException(
            status_code=503,
            detail="Provider credentials required. "
            "Set GITHUB_TOKEN or configure Asana.",
        )


# ── Health + Config ──────────────────────────────────────────────────────

@router.get("/health")
async def health(request: Request) -> dict:
    cfg = request.app.state.cfg
    mode = getattr(request.app.state, "mode", "local")
    return {
        "status": "ok",
        "version": "0.1.0",
        "mode": mode,
        "provider": cfg.issue_tracker.provider,
        "org": cfg.org.name,
        "codebases": len(cfg.codebases),
        "competitors_enabled": bool(
            cfg.competitors.categories,
        ),
    }


@router.get("/activity")
async def get_activity(request: Request) -> dict:
    """Live CLI sessions + recent prompts. No credentials needed."""
    return request.app.state.activity.get_activity()


@router.get("/discovery")
async def get_discovery(request: Request) -> dict:
    """Return auto-discovered environment info."""
    from maggy.discovery import full_discovery
    result = full_discovery()
    return {
        "clis": result.clis,
        "repos": result.repos,
        "active_projects": result.active_projects,
        "tokens": result.tokens,
        "github_org": result.github_org,
    }


@router.get("/config")
async def get_config(request: Request, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    cfg = request.app.state.cfg
    # Redact secrets before returning
    return {
        "org": {"name": cfg.org.name, "domain": cfg.org.domain},
        "issue_tracker": {"provider": cfg.issue_tracker.provider},
        "codebases": [{"key": c.key, "path": c.path} for c in cfg.codebases],
        "competitors": {"categories": cfg.competitors.categories, "seed": cfg.competitors.seed},
        "okrs": {"source": cfg.okrs.source, "count": len(cfg.okrs.items)},
        "ai": {"provider": cfg.ai.provider, "model": cfg.ai.model, "has_key": bool(cfg.ai.api_key)},
    }


# ── Inbox ────────────────────────────────────────────────────────────────

@router.get("/inbox")
async def get_inbox(request: Request, refresh: bool = Query(False), x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    items = await request.app.state.inbox.get_prioritized(force_refresh=refresh)
    return {"items": items, "total": len(items)}


@router.get("/followed")
async def get_followed(request: Request, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    try:
        tasks = await request.app.state.provider.list_followed(limit=50)
    except Exception as e:
        logger.warning("list_followed failed: %s", e)
        raise HTTPException(status_code=502, detail="Issue tracker unavailable")
    return {
        "items": [
            {
                "id": t.id, "title": t.title, "board": t.board, "url": t.url,
                "assignee": t.assignee, "updated_at": t.updated_at, "labels": t.labels,
            }
            for t in tasks
        ],
        "total": len(tasks),
    }


# ── Task detail + comments ───────────────────────────────────────────────

@router.get("/task/{task_id:path}")
async def get_task(request: Request, task_id: str, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    try:
        task = await request.app.state.provider.get_task(task_id)
    except Exception as e:
        logger.warning("get_task(%s) failed: %s", task_id, e)
        raise HTTPException(status_code=502, detail="Issue tracker unavailable")
    if not task:
        raise HTTPException(status_code=404, detail="Task not found")
    try:
        comments = await request.app.state.provider.get_comments(task_id)
    except Exception as e:
        logger.warning("get_comments(%s) failed: %s", task_id, e)
        comments = []
    return {
        "task": {
            "id": task.id, "title": task.title, "description": task.description,
            "status": task.status, "assignee": task.assignee, "url": task.url,
            "labels": task.labels, "board": task.board,
            "created_at": task.created_at, "updated_at": task.updated_at,
        },
        "comments": [{"id": c.id, "author": c.author, "text": c.text, "created_at": c.created_at}
                     for c in comments],
    }


class CommentRequest(BaseModel):
    text: str


@router.post("/task/{task_id:path}/comment")
async def post_comment(request: Request, task_id: str, body: CommentRequest, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    if not body.text.strip():
        raise HTTPException(status_code=400, detail="Comment text is required")
    try:
        comment = await request.app.state.provider.add_comment(task_id, body.text)
    except Exception as e:
        logger.warning("add_comment(%s) failed: %s", task_id, e)
        raise HTTPException(status_code=502, detail="Issue tracker unavailable")
    if not comment:
        raise HTTPException(status_code=502, detail="Issue tracker rejected the comment")
    return {"ok": True, "comment": {"id": comment.id, "text": comment.text, "created_at": comment.created_at}}


class StatusRequest(BaseModel):
    status: str


@router.post("/task/{task_id:path}/status")
async def update_status(request: Request, task_id: str, body: StatusRequest, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    try:
        ok = await request.app.state.provider.update_status(task_id, body.status)
    except Exception as e:
        logger.warning("update_status(%s) failed: %s", task_id, e)
        raise HTTPException(status_code=502, detail="Issue tracker unavailable")
    return {"ok": ok}


# ── Execute ──────────────────────────────────────────────────────────────

class ExecuteRequest(BaseModel):
    task_id: str
    mode: Literal["tdd", "plan"] = "tdd"
    working_dir: str | None = None  # override; otherwise auto-picked


@router.post("/execute")
async def execute(request: Request, body: ExecuteRequest, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    try:
        session_id = await request.app.state.executor.start(
            task_id=body.task_id, mode=body.mode, working_dir=body.working_dir,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"session_id": session_id, "status": "running"}


@router.get("/execute/sessions")
async def list_sessions(request: Request, x_api_key: str | None = Header(None)) -> list[dict]:
    _auth(request, x_api_key)
    _require_configured(request)
    return request.app.state.executor.list_sessions()


@router.get("/execute/sessions/{session_id}")
async def get_session(request: Request, session_id: str, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    s = request.app.state.executor.get_session(session_id)
    if not s:
        raise HTTPException(status_code=404, detail="Session not found")
    return s


# ── Competitors ──────────────────────────────────────────────────────────

@router.get("/competitors")
async def list_competitors(request: Request, x_api_key: str | None = Header(None)) -> list[dict]:
    _auth(request, x_api_key)
    _require_configured(request)
    return request.app.state.competitors.list_all()


@router.post("/competitors/discover")
async def discover_competitors(request: Request, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    return await request.app.state.competitors.discover()


@router.post("/competitors/monitor")
async def trigger_monitoring(request: Request, x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    return await request.app.state.competitors.monitor_all()


@router.get("/competitors/news")
async def get_competitor_news(request: Request, limit: int = Query(100), x_api_key: str | None = Header(None)) -> list[dict]:
    _auth(request, x_api_key)
    _require_configured(request)
    return request.app.state.competitors.get_news(limit=limit)


@router.get("/competitors/news/summary")
async def get_briefing(request: Request, refresh: bool = Query(False), x_api_key: str | None = Header(None)) -> dict:
    _auth(request, x_api_key)
    _require_configured(request)
    return await request.app.state.competitors.get_daily_briefing(refresh=refresh)


================================================
FILE: maggy/maggy/api/routes_budget.py
================================================
"""Budget REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, Request

from .auth import check_auth

router = APIRouter(prefix="/api/budget", tags=["budget"])


@router.get("")
async def get_budget(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Return current budget status."""
    check_auth(request, x_api_key)
    budget = request.app.state.budget
    if not budget:
        return {"status": "unconfigured"}
    return budget.budget_status()


@router.get("/by-provider")
async def by_provider(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """Return spend breakdown by provider."""
    check_auth(request, x_api_key)
    budget = request.app.state.budget
    if not budget:
        return []
    return budget.by_provider()


================================================
FILE: maggy/maggy/api/routes_chat.py
================================================
"""Chat API routes — interactive Claude sessions via SSE."""

from __future__ import annotations

import json
import logging
from dataclasses import asdict

from fastapi import APIRouter, Header, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from maggy.api.auth import check_auth

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api/chat", tags=["chat"])


def _require_chat(request: Request):
    chat = getattr(request.app.state, "chat", None)
    if chat is None:
        raise HTTPException(
            status_code=503,
            detail="Chat service not available.",
        )
    return chat


class CreateSessionRequest(BaseModel):
    project_key: str
    project_path: str | None = None


class SendMessageRequest(BaseModel):
    message: str


class RoutedMessageRequest(BaseModel):
    message: str
    blast_score: int | None = None
    task_type: str | None = None
    allowed_models: list[str] | None = None


@router.post("/auto-connect")
async def auto_connect(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Auto-connect to all active projects."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    activity = getattr(request.app.state, "activity", None)
    if not activity:
        return {"sessions": []}
    data = activity.get_activity()
    active = data.get("sessions", [])
    recent = data.get("recent", [])
    sessions = chat.auto_connect(active)
    history = getattr(request.app.state, "history", None)
    result = []
    for s in sessions:
        ctx = _enrich_session(s, history, recent)
        result.append(_session_summary(s, ctx))
    return {"sessions": result}


def _enrich_session(s, history, recent: list[dict]) -> str:
    """Build context and resolve session ID."""
    from maggy.services.chat_context import (
        build_project_context,
        resolve_claude_session_id,
    )
    ctx = build_project_context(
        history, s.working_dir, s.project_key, recent,
    )
    s.history_context = ctx
    if not s.claude_session_id:
        sid = resolve_claude_session_id(s.working_dir)
        if sid:
            s.claude_session_id = sid
    return ctx


def _session_summary(s, context: str) -> dict:
    """Format session for API response."""
    return {
        "id": s.id,
        "project_key": s.project_key,
        "working_dir": s.working_dir,
        "status": s.status,
        "messages": len(s.messages),
        "history_context": context,
        "has_resume_id": bool(s.claude_session_id),
    }


@router.post("/sessions")
async def create_session(
    request: Request,
    body: CreateSessionRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Create a new chat session."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    try:
        session = chat.create_session(
            body.project_key, project_path=body.project_path,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {
        "id": session.id,
        "project_key": session.project_key,
        "working_dir": session.working_dir,
        "status": session.status,
    }


@router.get("/sessions")
async def list_sessions(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """List all chat sessions."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    return [
        {
            "id": s.id,
            "project_key": s.project_key,
            "status": s.status,
            "created_at": s.created_at,
            "messages": len(s.messages),
        }
        for s in chat.list_sessions()
    ]


@router.get("/sessions/{session_id}")
async def get_session(
    request: Request,
    session_id: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get session details + message history."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    s = chat.get_session(session_id)
    if not s:
        raise HTTPException(status_code=404, detail="Session not found")
    return {
        "id": s.id,
        "project_key": s.project_key,
        "working_dir": s.working_dir,
        "status": s.status,
        "created_at": s.created_at,
        "history_context": s.history_context,
        "messages": [asdict(m) for m in s.messages],
    }


@router.post("/sessions/{session_id}/send")
async def send_message(
    request: Request,
    session_id: str,
    body: SendMessageRequest,
    x_api_key: str | None = Header(None),
):
    """Send a message and stream response via SSE."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    s = chat.get_session(session_id)
    if not s:
        raise HTTPException(status_code=404, detail="Session not found")
    if not body.message.strip():
        raise HTTPException(status_code=400, detail="Message required")
    budget = getattr(request.app.state, "budget", None)

    async def event_stream():
        async for chunk in chat.send(session_id, body.message):
            if budget and chunk.get("type") == "result":
                _record_chat_spend(budget, chunk)
            data = json.dumps(chunk)
            yield f"data: {data}\n\n"
        yield "data: {\"type\": \"done\"}\n\n"

    return StreamingResponse(
        event_stream(),
        media_type="text/event-stream",
    )


@router.post("/sessions/{session_id}/send-routed")
async def send_routed(
    request: Request,
    session_id: str,
    body: RoutedMessageRequest,
    x_api_key: str | None = Header(None),
):
    """Send a message routed through blast-score engine."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    s = chat.get_session(session_id)
    if not s:
        raise HTTPException(
            status_code=404, detail="Session not found",
        )
    if not body.message.strip():
        raise HTTPException(
            status_code=400, detail="Message required",
        )
    routing = getattr(request.app.state, "routing", None)
    budget = getattr(request.app.state, "budget", None)

    async def event_stream():
        from maggy.services.chat_router import RoutedChat
        decision = None
        if routing:
            rc = RoutedChat(routing, budget)
            decision = rc.decide(
                body.message, body.blast_score, body.task_type,
            )
            allowed = body.allowed_models
            if allowed and decision.model not in allowed:
                decision.model = allowed[0]
                decision.reason = f"restricted to {','.join(allowed)}"
            meta = {
                "type": "routing",
                "model": decision.model,
                "blast": decision.blast,
                "task_type": decision.task_type,
                "reason": decision.reason,
            }
            yield f"data: {json.dumps(meta)}\n\n"
        had_error = False
        async for chunk in chat.send(session_id, body.message):
            if budget and chunk.get("type") == "result":
                _record_chat_spend(budget, chunk)
            if chunk.get("type") == "error":
                had_error = True
            yield f"data: {json.dumps(chunk)}\n\n"
        _record_routing_outcome(
            routing, decision, had_error=had_error,
        )
        yield 'data: {"type": "done"}\n\n'

    return StreamingResponse(
        event_stream(),
        media_type="text/event-stream",
    )


def _record_chat_spend(budget, chunk: dict) -> None:
    """Record token/cost data from a result chunk."""
    cost = chunk.get("cost_usd", 0)
    in_t = chunk.get("input_tokens", 0)
    out_t = chunk.get("output_tokens", 0)
    if cost or in_t or out_t:
        budget.record_spend("anthropic", "claude", cost, in_t, out_t)


def _record_routing_outcome(routing, decision, *, had_error: bool) -> None:
    """Record routing reward after chat completes."""
    if not routing or not decision:
        return
    reward = 0.0 if had_error else 1.0
    routing.record_outcome(
        decision.model, decision.task_type,
        decision.blast, reward,
    )


@router.delete("/sessions/{session_id}")
async def delete_session(
    request: Request,
    session_id: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Delete a chat session."""
    check_auth(request, x_api_key)
    chat = _require_chat(request)
    ok = chat.delete_session(session_id)
    if not ok:
        raise HTTPException(status_code=404, detail="Session not found")
    return {"ok": True}


================================================
FILE: maggy/maggy/api/routes_cikg.py
================================================
"""CIKG REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, Request

from .auth import check_auth

router = APIRouter(prefix="/api/cikg", tags=["cikg"])


@router.get("/landscape")
async def landscape(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Return competitive landscape summary."""
    check_auth(request, x_api_key)
    graph = request.app.state.cikg
    if not graph:
        return {"error": "cikg not configured"}
    from maggy.cikg.queries import get_landscape
    return get_landscape(graph)


@router.get("/gaps/{feature}")
async def feature_gaps(
    request: Request,
    feature: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Score a feature against competitive landscape."""
    check_auth(request, x_api_key)
    graph = request.app.state.cikg
    if not graph:
        return {"error": "cikg not configured"}
    from maggy.cikg.queries import find_gaps
    from dataclasses import asdict
    return asdict(find_gaps(graph, feature))


================================================
FILE: maggy/maggy/api/routes_deploy.py
================================================
"""Deploy REST endpoints."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request
from pydantic import BaseModel, Field

from .auth import check_auth

router = APIRouter(prefix="/api/deploy", tags=["deploy"])


class CreateSessionRequest(BaseModel):
    project: str = Field(..., min_length=1, max_length=200)
    branch: str = Field(default="main", max_length=200)


@router.get("/sessions")
async def list_sessions(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """List all deploy sessions."""
    check_auth(request, x_api_key)
    svc = request.app.state.deploy
    if not svc:
        return {"error": "deploy not configured"}
    return {
        "sessions": [asdict(s) for s in svc.list_sessions()],
    }


@router.get("/sessions/{sid}")
async def get_session(
    request: Request,
    sid: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get a specific deploy session."""
    check_auth(request, x_api_key)
    svc = request.app.state.deploy
    if not svc:
        return {"error": "deploy not configured"}
    session = svc.get_session(sid)
    if not session:
        return {"error": "session not found"}
    return asdict(session)


@router.post("/sessions")
async def create_session(
    request: Request,
    body: CreateSessionRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Create a new deploy session."""
    check_auth(request, x_api_key)
    svc = request.app.state.deploy
    if not svc:
        return {"error": "deploy not configured"}
    session = svc.create_session(
        project=body.project,
        branch=body.branch,
    )
    return asdict(session)


================================================
FILE: maggy/maggy/api/routes_engram.py
================================================
"""Engram REST endpoints."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request

from .auth import check_auth

router = APIRouter(prefix="/api/engram", tags=["engram"])


@router.get("/query")
async def query_engrams(
    request: Request,
    namespace: str | None = None,
    memory_type: str | None = None,
    limit: int = 50,
    x_api_key: str | None = Header(None),
) -> dict:
    """Query engram records."""
    check_auth(request, x_api_key)
    engram = request.app.state.engram
    if not engram:
        return {"error": "engram not configured"}
    records = engram.query(
        namespace=namespace,
        memory_type=memory_type,
        limit=limit,
    )
    return {"records": [asdict(r) for r in records]}


@router.get("/diagnostics")
async def diagnostics(
    request: Request,
    namespace: str | None = None,
    x_api_key: str | None = Header(None),
) -> dict:
    """Run memory diagnostics."""
    check_auth(request, x_api_key)
    store = request.app.state.engram
    if not store:
        return {"error": "engram not configured"}
    from maggy.engram.diagnostics import diagnose
    profile = diagnose(store, namespace)
    return asdict(profile)


================================================
FILE: maggy/maggy/api/routes_escalation.py
================================================
"""Escalation REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, HTTPException, Request
from pydantic import BaseModel

from .auth import check_auth

router = APIRouter(prefix="/api/escalations", tags=["escalations"])


class _EscalationIn(BaseModel):
    session_id: str
    reason: str
    context: dict = {}


class _ResolveIn(BaseModel):
    guidance: str


@router.get("")
async def list_pending(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """List pending escalations."""
    check_auth(request, x_api_key)
    esc = request.app.state.escalator
    if not esc:
        return []
    return [
        {
            "id": p.id, "session_id": p.session_id,
            "reason": p.reason, "created_at": p.created_at,
        }
        for p in esc.list_pending()
    ]


@router.post("", status_code=201)
async def create_escalation(
    body: _EscalationIn,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Create a new escalation."""
    check_auth(request, x_api_key)
    esc = request.app.state.escalator
    if not esc:
        raise HTTPException(503, "Not configured")
    packet = esc.escalate(
        body.session_id, body.reason, body.context,
    )
    return {"id": packet.id, "status": "pending"}


@router.post("/{escalation_id}/resolve")
async def resolve_escalation(
    escalation_id: str,
    body: _ResolveIn,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Resolve an escalation with guidance."""
    check_auth(request, x_api_key)
    esc = request.app.state.escalator
    if not esc:
        raise HTTPException(503, "Not configured")
    try:
        packet = esc.resolve(escalation_id, body.guidance)
    except KeyError:
        raise HTTPException(404, "Not found")
    return {"id": packet.id, "status": "resolved"}


================================================
FILE: maggy/maggy/api/routes_events.py
================================================
"""Event Spine REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, Request

from .auth import check_auth

router = APIRouter(prefix="/api/events", tags=["events"])


@router.get("")
async def query_events(
    request: Request,
    task_id: str | None = None,
    event_type: str | None = None,
    project_id: str | None = None,
    limit: int = 100,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """Query events with optional filters."""
    check_auth(request, x_api_key)
    emitter = request.app.state.events
    if not emitter:
        return []
    return emitter.query(task_id, event_type, project_id, limit)


@router.get("/trace/{task_id}")
async def trace_task(
    request: Request,
    task_id: str,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """Get full event chain for a task."""
    check_auth(request, x_api_key)
    emitter = request.app.state.events
    if not emitter:
        return []
    return emitter.trace(task_id)


@router.get("/count")
async def count_events(
    request: Request,
    event_type: str | None = None,
    project_id: str | None = None,
    x_api_key: str | None = Header(None),
) -> dict:
    """Count events matching filters."""
    check_auth(request, x_api_key)
    emitter = request.app.state.events
    if not emitter:
        return {"count": 0}
    return {"count": emitter.count(event_type, project_id)}


================================================
FILE: maggy/maggy/api/routes_forge.py
================================================
"""Forge REST endpoints."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request
from pydantic import BaseModel, Field

from .auth import check_auth

router = APIRouter(prefix="/api/forge", tags=["forge"])


class GapReport(BaseModel):
    capability: str = Field(..., min_length=1, max_length=200)


@router.get("/status")
async def forge_status(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Return Forge connector status."""
    check_auth(request, x_api_key)
    forge = request.app.state.forge
    if not forge:
        return {"error": "forge not configured"}
    return asdict(forge.status())


@router.get("/search")
async def search_tools(
    request: Request,
    q: str = "",
    x_api_key: str | None = Header(None),
) -> dict:
    """Search the Forge tool registry."""
    check_auth(request, x_api_key)
    forge = request.app.state.forge
    if not forge:
        return {"error": "forge not configured"}
    return {"results": forge.search_tools(q)}


@router.get("/gaps")
async def list_gaps(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """List detected capability gaps."""
    check_auth(request, x_api_key)
    forge = request.app.state.forge
    if not forge:
        return {"error": "forge not configured"}
    return {"gaps": forge.get_gaps()}


@router.post("/gaps")
async def report_gap(
    request: Request,
    body: GapReport,
    x_api_key: str | None = Header(None),
) -> dict:
    """Report a capability gap."""
    check_auth(request, x_api_key)
    forge = request.app.state.forge
    if not forge:
        return {"error": "forge not configured"}
    return forge.report_gap(body.capability)


================================================
FILE: maggy/maggy/api/routes_heartbeat.py
================================================
"""Heartbeat API routes — scheduler status and manual triggers."""

from __future__ import annotations

from fastapi import APIRouter, Header, HTTPException, Request

from maggy.api.auth import check_auth

router = APIRouter(prefix="/api", tags=["heartbeat"])


@router.get("/heartbeat/status")
async def heartbeat_status(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    check_auth(request, x_api_key)
    scheduler = getattr(request.app.state, "heartbeat", None)
    if not scheduler:
        return []
    return scheduler.status()


@router.post("/heartbeat/trigger/{job_name}")
async def trigger_job(
    request: Request,
    job_name: str,
    x_api_key: str | None = Header(None),
) -> dict:
    check_auth(request, x_api_key)
    scheduler = getattr(request.app.state, "heartbeat", None)
    if not scheduler:
        raise HTTPException(status_code=503, detail="Heartbeat not running")
    try:
        return await scheduler.trigger(job_name)
    except KeyError:
        raise HTTPException(status_code=404, detail=f"Job '{job_name}' not found")


================================================
FILE: maggy/maggy/api/routes_history.py
================================================
"""API routes for session history analysis."""

from __future__ import annotations

from fastapi import APIRouter, Header, HTTPException, Request

from maggy.api.auth import check_auth

router = APIRouter(
    prefix="/api/history", tags=["history"],
)


def _require_history(request: Request):
    svc = getattr(request.app.state, "history", None)
    if svc is None:
        raise HTTPException(
            status_code=503,
            detail="History service not available.",
        )
    return svc


@router.post("/analyze")
async def analyze_history(
    request: Request,
    x_api_key: str | None = Header(None),
):
    """Trigger full history analysis pipeline."""
    check_auth(request, x_api_key)
    svc = _require_history(request)
    report = svc.analyze()
    return {
        "status": "ok",
        "total_sessions": report.total_sessions,
        "total_prompts": report.total_prompts,
        "providers": len(report.providers),
        "patterns": report.patterns,
        "summary": report.summary,
    }


@router.get("/report")
async def get_report(
    request: Request,
    x_api_key: str | None = Header(None),
):
    """Get latest cached history report."""
    check_auth(request, x_api_key)
    svc = _require_history(request)
    report = svc.get_report()
    if not report:
        return {"status": "no_data"}
    return report


@router.get("/sessions")
async def get_sessions(
    request: Request,
    provider: str | None = None,
    x_api_key: str | None = Header(None),
):
    """Get parsed session records."""
    check_auth(request, x_api_key)
    svc = _require_history(request)
    sessions = svc.get_sessions(provider=provider)
    return {"sessions": sessions, "total": len(sessions)}


@router.get("/providers")
async def list_providers(
    request: Request,
    x_api_key: str | None = Header(None),
):
    """List which CLI tools are available."""
    check_auth(request, x_api_key)
    svc = _require_history(request)
    return {"providers": svc.available_providers()}


================================================
FILE: maggy/maggy/api/routes_improve.py
================================================
"""Self-improvement API routes — reports and manual analysis."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, HTTPException, Request

from maggy.api.auth import check_auth

router = APIRouter(prefix="/api", tags=["improve"])


@router.get("/improve/report")
async def get_report(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    check_auth(request, x_api_key)
    introspector = getattr(request.app.state, "introspector", None)
    if not introspector:
        raise HTTPException(status_code=503, detail="Not configured")
    report = introspector.get_report()
    if not report:
        return {"report": None}
    return {"report": asdict(report)}


@router.post("/improve/analyze")
async def run_analysis(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    check_auth(request, x_api_key)
    introspector = getattr(request.app.state, "introspector", None)
    if not introspector:
        raise HTTPException(status_code=503, detail="Not configured")
    report = introspector.analyze()
    return {"report": asdict(report)}


================================================
FILE: maggy/maggy/api/routes_lexon.py
================================================
"""Lexon REST endpoints."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request
from pydantic import BaseModel, Field

from .auth import check_auth

router = APIRouter(prefix="/api/lexon", tags=["lexon"])


class LearnRequest(BaseModel):
    phrase: str = Field(..., min_length=1, max_length=500)
    tool: str = Field(..., min_length=1, max_length=100)


@router.get("/parse")
async def parse_intent(
    request: Request,
    q: str = "",
    x_api_key: str | None = Header(None),
) -> dict:
    """Parse a phrase into a tool intent."""
    check_auth(request, x_api_key)
    lexon = request.app.state.lexon
    if not lexon:
        return {"error": "lexon not configured"}
    record = lexon.route(q)
    return asdict(record)


@router.post("/learn")
async def learn_mapping(
    request: Request,
    body: LearnRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Record a confirmed phrase-to-tool mapping."""
    check_auth(request, x_api_key)
    lexon = request.app.state.lexon
    if not lexon:
        return {"error": "lexon not configured"}
    lexon.learn(body.phrase, body.tool)
    return {"status": "learned"}


================================================
FILE: maggy/maggy/api/routes_mesh.py
================================================
"""Mesh P2P REST endpoints — data operations."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field

from .auth import check_auth

router = APIRouter(prefix="/api/mesh", tags=["mesh"])


class AddPeerRequest(BaseModel):
    org: str
    peer_id: str
    name: str = ""
    address: str = ""
    port: int = Field(default=8080, ge=1, le=65535)


class PromoteRequest(BaseModel):
    org: str
    key: str


@router.get("/status")
async def mesh_status(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Return mesh status across all networks."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return {"enabled": False, "peers": 0}
    return {
        "enabled": True,
        "peers": mesh.total_peers,
        "networks": mesh.list_networks(),
    }


@router.get("/networks")
async def list_networks(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """List all org-scoped mesh networks."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return {"networks": []}
    return {"networks": mesh.list_networks()}


@router.get("/peers")
async def list_peers(
    request: Request,
    org: str = "",
    x_api_key: str | None = Header(None),
) -> dict:
    """List peers, optionally filtered by org."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    if org:
        net = mesh.get_network(org)
        if not net:
            return JSONResponse(
                {"error": f"unknown org: {org}"},
                status_code=404,
            )
        return {
            "peers": [asdict(p) for p in net.peers.list_peers()],
        }
    peers = []
    for status in mesh.list_networks():
        net = mesh.get_network(status["org"])
        if net:
            peers.extend(
                asdict(p) for p in net.peers.list_peers()
            )
    return {"peers": peers}


@router.post("/peers")
async def add_peer(
    request: Request,
    body: AddPeerRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Manually add a peer to a network."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    net = mesh.get_network(body.org)
    if not net:
        return JSONResponse(
            {"error": f"unknown org: {body.org}"},
            status_code=404,
        )
    from maggy.mesh.discovery import PeerInfo
    net.peers.register(PeerInfo(
        peer_id=body.peer_id,
        name=body.name,
        address=body.address,
        port=body.port,
        org=body.org,
        manual=True,
    ))
    return {"status": "added", "peer_id": body.peer_id}


@router.get("/quarantine")
async def quarantine_list(
    request: Request,
    org: str = "",
    x_api_key: str | None = Header(None),
) -> dict:
    """List quarantined items for an org."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    if not org:
        return JSONResponse(
            {"error": "org parameter required"},
            status_code=422,
        )
    net = mesh.get_network(org)
    if not net:
        return JSONResponse(
            {"error": f"unknown org: {org}"},
            status_code=404,
        )
    items = [asdict(e) for e in net.quarantine.list_all()]
    return {"items": items}


@router.post("/promote")
async def promote(
    request: Request,
    body: PromoteRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Promote a quarantined item into shared memories."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    net = mesh.get_network(body.org)
    if not net:
        return JSONResponse(
            {"error": f"unknown org: {body.org}"},
            status_code=404,
        )
    ok = net.sync.promote_from_quarantine(body.key)
    return {"promoted": ok}


================================================
FILE: maggy/maggy/api/routes_mesh_admin.py
================================================
"""Mesh P2P REST endpoints — admin operations."""

from __future__ import annotations

from fastapi import APIRouter, Header, Request
from fastapi.responses import JSONResponse

from .auth import check_auth

router = APIRouter(prefix="/api/mesh", tags=["mesh"])


@router.post("/announce")
async def announce(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Announce self to all org mesh repos via git."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    cfg = request.app.state.cfg
    token = cfg.issue_tracker.github.token
    if not token:
        return JSONResponse(
            {"error": "no github token"}, status_code=422,
        )
    result = await mesh.announce_all(token)
    return {"announced": result}


@router.post("/discover")
async def discover(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Trigger git-based peer discovery for all orgs."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    cfg = request.app.state.cfg
    token = cfg.issue_tracker.github.token
    if not token:
        return JSONResponse(
            {"error": "no github token"}, status_code=422,
        )
    result = await mesh.discover(token)
    return {"discovered": result}


@router.post("/setup")
async def setup(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Onboarding: create mesh repos for all orgs."""
    check_auth(request, x_api_key)
    mesh = request.app.state.mesh
    if not mesh:
        return JSONResponse(
            {"error": "mesh not enabled"}, status_code=503,
        )
    cfg = request.app.state.cfg
    token = cfg.issue_tracker.github.token
    if not token:
        return JSONResponse(
            {"error": "no github token"}, status_code=422,
        )
    result = await mesh.setup_repos(token)
    return {"repos_created": result}


================================================
FILE: maggy/maggy/api/routes_monitor.py
================================================
"""API routes for monitor service — tracker polling."""

from __future__ import annotations

from fastapi import APIRouter, Request

router = APIRouter(prefix="/api/monitor", tags=["monitor"])


@router.get("/status")
async def monitor_status(request: Request) -> dict:
    """Get active monitor status."""
    svc = getattr(request.app.state, "monitor", None)
    if not svc:
        return {"active": 0, "monitors": []}
    return svc.status()


@router.post("/start")
async def monitor_start(request: Request) -> dict:
    """Start monitoring current project's tracker."""
    svc = getattr(request.app.state, "monitor", None)
    if not svc:
        return {"ok": False, "error": "monitor not configured"}
    return {"ok": True, "active": len(svc.list_active())}


@router.post("/stop")
async def monitor_stop(request: Request) -> dict:
    """Stop all monitors."""
    svc = getattr(request.app.state, "monitor", None)
    if not svc:
        return {"ok": False}
    for cfg in svc.list_active():
        svc.remove(cfg.project_key)
    return {"ok": True}


================================================
FILE: maggy/maggy/api/routes_observability.py
================================================
"""Observability signal REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, HTTPException, Request
from pydantic import BaseModel

from .auth import check_auth

router = APIRouter(
    prefix="/api/observability", tags=["observability"],
)


class _SignalIn(BaseModel):
    project: str
    signal_type: str
    value: float


@router.get("/signals/{project}")
async def get_signals(
    project: str,
    request: Request,
    x_api_key: str | None = Header(None),
    limit: int = 20,
) -> list[dict]:
    """Get recent signals for a project."""
    check_auth(request, x_api_key)
    obs = request.app.state.observability
    if not obs:
        return []
    return obs.recent_signals(project, min(limit, 100))


@router.post("/record", status_code=201)
async def record_signal(
    body: _SignalIn,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Record an observability signal."""
    check_auth(request, x_api_key)
    obs = request.app.state.observability
    if not obs:
        raise HTTPException(503, "Not configured")
    obs.record_signal(body.project, body.signal_type, body.value)
    return {"status": "recorded"}


================================================
FILE: maggy/maggy/api/routes_planning.py
================================================
"""Planning REST endpoints."""

from __future__ import annotations

from dataclasses import asdict

from fastapi import APIRouter, Header, Request
from pydantic import BaseModel, Field

from .auth import check_auth

router = APIRouter(prefix="/api/planning", tags=["planning"])


class PlanGenerateRequest(BaseModel):
    task: str = Field(..., min_length=1, max_length=2000)
    blast_score: int = Field(default=0, ge=0, le=10)
    files: list[str] | None = None


@router.post("/generate")
async def generate_plan(
    request: Request,
    body: PlanGenerateRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Generate a plan for a task."""
    check_auth(request, x_api_key)
    svc = request.app.state.planning
    if not svc:
        return {"error": "planning not configured"}
    from maggy.planning import PlanRequest
    req = PlanRequest(
        task=body.task,
        blast_score=body.blast_score,
        file_context=body.files,
    )
    result = svc.plan_task(req)
    plan = result["plan"]
    response = {
        "mode": result["mode"],
        "plan": asdict(plan),
    }
    if result.get("diff"):
        response["diff"] = asdict(result["diff"])
    return response


================================================
FILE: maggy/maggy/api/routes_process.py
================================================
"""Process Intelligence REST routes — /api/process/*."""

from __future__ import annotations

import logging

from fastapi import APIRouter, Header, HTTPException, Request
from pydantic import BaseModel

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api/process", tags=["process"])


def _auth(request: Request, x_api_key: str | None) -> None:
    cfg = request.app.state.cfg
    if cfg.dashboard.auth_mode == "local":
        return
    expected = cfg.dashboard.api_key
    if not expected or x_api_key != expected:
        raise HTTPException(401, "Invalid or missing X-API-Key")


def _require_process(request: Request) -> None:
    if not getattr(request.app.state, "process", None):
        raise HTTPException(503, "Process Intelligence not configured")


class AnalyzeRequest(BaseModel):
    project_key: str


@router.post("/analyze")
async def analyze(
    request: Request,
    body: AnalyzeRequest,
    x_api_key: str | None = Header(None),
) -> dict:
    """Trigger full PR analysis (background)."""
    _auth(request, x_api_key)
    _require_process(request)
    svc = request.app.state.process

    try:
        report = await svc.analyze(body.project_key)
    except ValueError as e:
        raise HTTPException(400, str(e))
    except Exception as e:
        logger.exception("Analysis failed for %s", body.project_key)
        raise HTTPException(502, f"Analysis failed: {e}")

    return {
        "status": "completed",
        "project_key": body.project_key,
        "total_prs": report.total_prs,
        "summary": report.summary,
    }


@router.get("/report/{project_key}")
async def get_report(
    request: Request,
    project_key: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get latest process report."""
    _auth(request, x_api_key)
    _require_process(request)
    report = request.app.state.process.get_report(project_key)
    if not report:
        raise HTTPException(404, "No report found. Run /api/process/analyze first.")
    return report


@router.get("/health/{project_key}")
async def get_health(
    request: Request,
    project_key: str,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get process health metrics."""
    _auth(request, x_api_key)
    _require_process(request)
    health = request.app.state.process.get_health(project_key)
    if not health:
        raise HTTPException(404, "No health data. Run /api/process/analyze first.")
    return health


================================================
FILE: maggy/maggy/api/routes_projects.py
================================================
"""Project registry REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, HTTPException, Request
from pydantic import BaseModel

from .auth import check_auth

router = APIRouter(prefix="/api/projects", tags=["projects"])


class _ProjectIn(BaseModel):
    name: str
    repo: str
    path: str
    default_branch: str = "main"


@router.get("")
async def list_projects(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """List all registered projects."""
    check_auth(request, x_api_key)
    registry = request.app.state.registry
    if not registry:
        return []
    return [
        {"name": p.name, "repo": p.repo, "path": p.path}
        for p in registry.list()
    ]


@router.get("/{name}")
async def get_project(
    name: str,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get a single project by name."""
    check_auth(request, x_api_key)
    registry = request.app.state.registry
    if not registry:
        raise HTTPException(404, "Not configured")
    project = registry.get(name)
    if not project:
        raise HTTPException(404, f"{name!r} not found")
    return {
        "name": project.name,
        "repo": project.repo,
        "path": project.path,
    }


@router.post("", status_code=201)
async def add_project(
    body: _ProjectIn,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Register a new project."""
    check_auth(request, x_api_key)
    registry = request.app.state.registry
    if not registry:
        raise HTTPException(503, "Not configured")
    from maggy.config import ProjectConfig
    project = ProjectConfig(
        name=body.name, repo=body.repo,
        path=body.path, default_branch=body.default_branch,
    )
    try:
        registry.add(project)
    except ValueError as exc:
        raise HTTPException(409, str(exc)) from exc
    return {"name": project.name, "status": "created"}


@router.delete("/{name}")
async def remove_project(
    name: str,
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Remove a project by name."""
    check_auth(request, x_api_key)
    registry = request.app.state.registry
    if not registry:
        raise HTTPException(503, "Not configured")
    if not registry.remove(name):
        raise HTTPException(404, f"{name!r} not found")
    return {"name": name, "status": "removed"}


================================================
FILE: maggy/maggy/api/routes_routing.py
================================================
"""Routing REST endpoints."""

from __future__ import annotations

from fastapi import APIRouter, Header, Request

from .auth import check_auth

router = APIRouter(prefix="/api/routing", tags=["routing"])


@router.get("/heatmap")
async def heatmap(
    request: Request,
    x_api_key: str | None = Header(None),
) -> list[dict]:
    """Return reward heatmap for dashboard."""
    check_auth(request, x_api_key)
    svc = request.app.state.routing
    if not svc:
        return []
    return svc.get_heatmap()


@router.get("/decide")
async def decide(
    request: Request,
    blast: int = 0,
    task_type: str = "general",
    security: bool = False,
    x_api_key: str | None = Header(None),
) -> dict:
    """Get routing decision for given context."""
    check_auth(request, x_api_key)
    svc = request.app.state.routing
    if not svc:
        return {"error": "routing not configured"}
    from maggy.routing import RoutingContext
    ctx = RoutingContext(blast, task_type, security)
    decision = svc.route(ctx)
    return {
        "primary": decision.primary,
        "validator": decision.validator,
        "fallback": decision.fallback_chain,
        "reason": decision.reason,
    }


@router.get("/rules")
async def rules(
    request: Request,
    x_api_key: str | None = Header(None),
) -> dict:
    """Return routing rules summary."""
    check_auth(request, x_api_key)
    svc = request.app.state.routing
    if not svc:
        return {"mode": "unconfigured"}
    r = svc.rules
    overrides = {
        k: {"model": v.model, "reason": v.reason}
        for k, v in r.task_type_overrides.items()
    }
    perf = {
        k: {
            "strengths": v.strengths,
            "success_rate": v.success_rate,
            "tasks_completed": v.tasks_completed,
        }
        for k, v in r.model_performance.items()
    }
    return {
        "mode": svc.cfg.routing.mode,
        "task_type_overrides": overrides,
        "model_performance": perf,
        "conventions_count": len(r.conventions),
    }


================================================
FILE: maggy/maggy/api/routes_setup.py
================================================
"""Setup and onboarding routes — detect missing config, guide users."""

from __future__ import annotations

from fastapi import APIRouter, Request
from pydantic import BaseModel, Field

from maggy import config as config_mod

router = APIRouter(prefix="/api/setup", tags=["setup"])


class ConfigureRequest(BaseModel):
    org_name: str = ""
    github_org: str = ""
    github_repos: list[str] = Field(default_factory=list)
    competitor_categories: list[str] = Field(
        default_factory=list,
    )


def _step(label: str, ok: bool, hint: str = "") -> dict:
    """Build a single setup step status."""
    return {
        "label": label,
        "status": "done" if ok else "missing",
        "hint": hint,
    }


def _build_steps(cfg) -> list[dict]:
    """Detect what's configured and what's missing."""
    gh = cfg.issue_tracker.github
    return [
        _step("GitHub token", bool(gh.token), ""),
        _step("GitHub organization", bool(gh.org), ""),
        _step(
            "GitHub repositories", bool(gh.repos),
            "Select repos to track issues from",
        ),
        _step(
            "AI provider",
            bool(cfg.ai.api_key) or _has_claude_cli(),
            "",
        ),
        _step("Codebases", bool(cfg.codebases), ""),
    ]


def _has_claude_cli() -> bool:
    """Check if claude CLI is available."""
    import shutil
    return shutil.which("claude") is not None


def _discover_summary() -> dict:
    """Run discovery and return summary."""
    from maggy.discovery import (
        discover_cli_auth,
        discover_clis,
        discover_env_tokens,
    )
    return {
        "clis": discover_clis(),
        "cli_auth": discover_cli_auth(),
        "tokens": discover_env_tokens(),
    }


@router.get("/status")
async def setup_status(request: Request) -> dict:
    """What's configured, what's missing."""
    cfg = request.app.state.cfg
    steps = _build_steps(cfg)
    done = sum(1 for s in steps if s["status"] == "done")
    discovery = _discover_summary()
    return {
        "configured": request.app.state.mode == "full",
        "mode": request.app.state.mode,
        "steps": steps,
        "progress": f"{done}/{len(steps)}",
        "codebases": len(cfg.codebases),
        "github_org": cfg.issue_tracker.github.org,
        "discovery": discovery,
    }


@router.post("/configure")
async def configure(
    request: Request, body: ConfigureRequest,
) -> dict:
    """Update config sections dynamically."""
    cfg = request.app.state.cfg
    if body.org_name:
        cfg.org.name = body.org_name
    if body.github_org:
        cfg.issue_tracker.github.org = body.github_org
    if body.github_repos:
        cfg.issue_tracker.github.repos = body.github_repos
    if body.competitor_categories:
        cfg.competitors.categories = body.competitor_categories
    config_mod.save(cfg)
    return {"saved": True}


@router.post("/reload")
async def reload_config(request: Request) -> dict:
    """Reload config and reinitialize services."""
    from maggy.main import reconfigure
    reconfigure(request.app)
    mode = request.app.state.mode
    return {"mode": mode, "reloaded": True}


@router.get("/discover-repos")
async def discover_repos(request: Request) -> dict:
    """Return repos found on disk, grouped by org."""
    from maggy.discovery import full_discovery
    result = full_discovery()
    return {
        "github_org": result.github_org,
        "github_orgs": result.github_orgs,
        "repos": [
            {"key": r["key"], "path": r["path"]}
            for r in result.repos
        ],
        "cli_auth": result.cli_auth,
        "clis": result.clis,
    }


@router.post("/auto-configure")
async def auto_configure(request: Request) -> dict:
    """Run auto-discovery, save config, reload."""
    cfg = config_mod.auto_configure()
    request.app.state.cfg = cfg
    from maggy.main import reconfigure
    reconfigure(request.app)
    return {
        "mode": request.app.state.mode,
        "codebases": len(cfg.codebases),
        "github_org": cfg.issue_tracker.github.org,
        "github_repos": cfg.issue_tracker.github.repos,
        "has_token": bool(cfg.issue_tracker.github.token),
    }


@router.get("/cli-models")
async def cli_models() -> dict:
    """Auto-discover AI CLIs and their capabilities."""
    from maggy.adapters.cli_discovery import discover_all
    result = discover_all()
    profiles = []
    for name, p in result.profiles.items():
        profiles.append({
            "name": name, "installed": p.installed,
            "version": p.version,
            "prompt_flag": p.prompt_flag,
            "work_dir_flag": p.work_dir_flag,
            "auto_approve": p.auto_approve_flag,
            "afk": p.afk_flag,
        })
    installed = [p["name"] for p in profiles if p["installed"]]
    return {
        "profiles": profiles,
        "installed": installed,
        "ready": len(installed) > 0,
    }


================================================
FILE: maggy/maggy/budget.py
================================================
"""Token budget manager — tracks spend per provider with daily limits."""

from __future__ import annotations

import sqlite3
import tempfile
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator

from maggy.config import MaggyConfig


def _today_utc() -> str:
    return datetime.now(timezone.utc).date().isoformat()


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    try:
        conn = _open_conn(path)
    except sqlite3.OperationalError:
        fallback = Path(tempfile.gettempdir()) / "maggy" / path.name
        conn = _open_conn(fallback)
    try:
        yield conn
    finally:
        conn.close()


def _open_conn(path: Path) -> sqlite3.Connection:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    return conn

SCHEMA = """
CREATE TABLE IF NOT EXISTS spend (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    provider TEXT NOT NULL,
    model TEXT NOT NULL,
    cost_usd REAL NOT NULL,
    input_tokens INTEGER NOT NULL DEFAULT 0,
    output_tokens INTEGER NOT NULL DEFAULT 0,
    day TEXT NOT NULL,
    created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_spend_day
    ON spend(day, provider);
"""


@dataclass(frozen=True)
class ProviderBudget:
    """Budget limit and preferred model for a provider."""

    provider: str
    daily_limit_usd: float
    model_preference: str


class TaskSpendTracker:
    """Track task-level spend and repeated edits."""

    def __init__(self, max_spend: float):
        self.max_spend = max_spend
        self._spent = 0.0
        self.files_edited: dict[str, int] = {}

    def record(self, cost: float) -> None:
        self._spent += cost

    def total(self) -> float:
        return self._spent

    def is_exceeded(self) -> bool:
        return self._spent >= self.max_spend

    def record_edit(self, file_path: str) -> None:
        count = self.files_edited.get(file_path, 0)
        self.files_edited[file_path] = count + 1

    def detect_loop(self, threshold: int = 3) -> list[str]:
        return [
            path for path, count in self.files_edited.items()
            if count >= threshold
        ]


class BudgetManager:
    """Track token spend per provider with daily limits."""

    def __init__(self, cfg: MaggyConfig):
        self.daily_limit = cfg.budget.daily_limit_usd
        self._plan = cfg.budget.plan
        self.providers = list(cfg.budget.providers)
        self._provider_budgets = {
            item.provider: item for item in self.providers
        }
        self.warning_threshold = cfg.budget.warning_threshold
        db_dir = Path(cfg.storage.path).expanduser().parent
        self._db_path = db_dir / "budget.db"
        self._init_db()

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def record_spend(
        self, provider: str, model: str, cost_usd: float,
        input_tokens: int = 0, output_tokens: int = 0,
    ) -> None:
        now = datetime.now(timezone.utc)
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO spend "
                "(provider,model,cost_usd,input_tokens,output_tokens,day,created_at) "
                "VALUES (?,?,?,?,?,?,?)",
                (provider, model, cost_usd, input_tokens, output_tokens,
                 now.date().isoformat(), now.isoformat()),
            )
            conn.commit()

    def today_spend(self, provider: str | None = None) -> float:
        today = _today_utc()
        sql = "SELECT COALESCE(SUM(cost_usd),0) FROM spend WHERE day=?"
        params: list = [today]
        if provider:
            sql += " AND provider=?"
            params.append(provider)
        with _connect(self._db_path) as conn:
            row = conn.execute(sql, params).fetchone()
        return float(row[0])

    def today_tokens(self, provider: str | None = None) -> dict:
        today = _today_utc()
        sql = ("SELECT COALESCE(SUM(input_tokens),0),"
               "COALESCE(SUM(output_tokens),0) FROM spend WHERE day=?")
        params: list = [today]
        if provider:
            sql += " AND provider=?"
            params.append(provider)
        with _connect(self._db_path) as conn:
            row = conn.execute(sql, params).fetchone()
        return {"input": int(row[0]), "output": int(row[1])}

    def budget_status(self) -> dict:
        spent = self.today_spend()
        ratio = spent / self.daily_limit if self.daily_limit > 0 else 0
        status = "exhausted" if ratio >= 1.0 else (
            "warning" if ratio >= self.warning_threshold else "ok")
        tokens = self.today_tokens()
        return {
            "spent_today_usd": round(spent, 4),
            "daily_limit_usd": self.daily_limit,
            "utilization": round(ratio, 3),
            "status": status,
            "plan": self._plan,
            "input_tokens": tokens["input"],
            "output_tokens": tokens["output"],
        }

    def by_provider(self) -> list[dict]:
        today = _today_utc()
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT provider, SUM(cost_usd) as total "
                "FROM spend WHERE day=? GROUP BY provider",
                (today,),
            ).fetchall()
        return [
            {"provider": r["provider"], "spent_usd": round(r["total"], 4)}
            for r in rows
        ]

    def is_exhausted(
        self, provider: str | None = None,
    ) -> bool:
        """Check if daily budget is exhausted."""
        spent = self.today_spend(provider)
        return spent >= self.daily_limit

    def is_provider_exhausted(self, provider: str) -> bool:
        """Check provider-specific budget when configured."""
        budget = self._provider_budgets.get(provider)
        if budget is None:
            return self.is_exhausted(provider)
        return self.today_spend(provider) >= budget.daily_limit_usd

    def cheapest_available(self) -> str | None:
        """Return preferred model for the first provider with budget left."""
        for budget in self.providers:
            if not self.is_provider_exhausted(budget.provider):
                return budget.model_preference
        return None


================================================
FILE: maggy/maggy/calibration/__init__.py
================================================
"""Calibration exports."""

from .tracker import CalibrationTracker

__all__ = ["CalibrationTracker"]


================================================
FILE: maggy/maggy/calibration/tracker.py
================================================
"""SQLite-backed model calibration tracking."""

from __future__ import annotations

import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator

SCHEMA = """
CREATE TABLE IF NOT EXISTS calibration (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    model TEXT NOT NULL,
    task_type TEXT NOT NULL,
    predicted REAL NOT NULL,
    actual REAL NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_calibration_model
    ON calibration(model);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class CalibrationTracker:
    def __init__(self, db_path: Path):
        self._db_path = db_path
        self._init_db()

    def record(
        self, model: str, task_type: str, predicted: float, actual: float,
    ) -> None:
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO calibration (model, task_type, predicted, actual) "
                "VALUES (?, ?, ?, ?)",
                (model, task_type, predicted, actual),
            )
            conn.commit()

    def accuracy(self, model: str) -> float:
        errors = self._errors(model)
        if not errors:
            return 0.0
        score = sum(max(0.0, 1.0 - err) for err in errors) / len(errors)
        return round(score, 6)

    def calibration_error(self, model: str) -> float:
        errors = self._errors(model)
        if not errors:
            return 0.0
        return round(sum(errors) / len(errors), 6)

    def _errors(self, model: str) -> list[float]:
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT predicted, actual FROM calibration WHERE model = ?",
                (model,),
            ).fetchall()
        return [abs(row["predicted"] - row["actual"]) for row in rows]

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)


================================================
FILE: maggy/maggy/checkpoint.py
================================================
"""JSON checkpoint persistence for fallback chains."""

from __future__ import annotations

import json
from pathlib import Path

DEFAULT_DIR = Path.home() / ".maggy" / "checkpoints"


class CheckpointManager:
    def __init__(self, base_dir: Path = DEFAULT_DIR):
        self.base_dir = base_dir.expanduser()

    def write(self, session_id: str, data: dict) -> None:
        self.base_dir.mkdir(parents=True, exist_ok=True)
        payload = _normalize(data)
        target = self._path(session_id)
        tmp = target.with_suffix(".tmp")
        tmp.write_text(json.dumps(payload, indent=2))
        tmp.replace(target)

    def read(self, session_id: str) -> dict | None:
        path = self._path(session_id)
        if not path.exists():
            return None
        try:
            return json.loads(path.read_text())
        except (json.JSONDecodeError, OSError):
            return None

    def delete(self, session_id: str) -> bool:
        path = self._path(session_id)
        if not path.exists():
            return False
        path.unlink()
        return True

    def list_checkpoints(self) -> list[str]:
        if not self.base_dir.exists():
            return []
        names = [path.stem for path in self.base_dir.glob("*.json")]
        return sorted(names)

    def _path(self, session_id: str) -> Path:
        safe_id = _sanitize_id(session_id)
        target = (self.base_dir / f"{safe_id}.json").resolve()
        if not str(target).startswith(str(self.base_dir.resolve())):
            raise ValueError(f"Invalid session id: {session_id!r}")
        return target


def _sanitize_id(session_id: str) -> str:
    import re
    if not session_id or not re.fullmatch(r"[a-zA-Z0-9_\-]+", session_id):
        raise ValueError(f"Invalid session id: {session_id!r}")
    return session_id


def _normalize(data: dict) -> dict:
    return {
        "goal": str(data.get("goal", "")),
        "constraints": list(data.get("constraints", [])),
        "progress": list(data.get("progress", [])),
        "model_history": list(data.get("model_history", [])),
        "current_subgoal": str(data.get("current_subgoal", "")),
        "fatigue_score": float(data.get("fatigue_score", 0.0)),
    }


================================================
FILE: maggy/maggy/cikg/__init__.py
================================================
"""Competitive Intelligence Knowledge Graph."""


================================================
FILE: maggy/maggy/cikg/graph.py
================================================
"""KnowledgeGraphService — CRUD operations for CIKG."""

from __future__ import annotations

import json
import sqlite3
from pathlib import Path

from .models import Edge, Node
from .storage import SCHEMA, _connect


class KnowledgeGraphService:
    """SQLite-backed knowledge graph — CRUD only."""

    def __init__(self, db_path: Path):
        self._db_path = db_path
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def add_node(self, node: Node) -> None:
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT OR REPLACE INTO nodes VALUES (?,?,?,?,?,?)",
                (node.id, node.node_type, node.name,
                 node.description, json.dumps(node.metadata),
                 node.created_at),
            )
            conn.commit()

    def get_node(self, node_id: str) -> Node | None:
        with _connect(self._db_path) as conn:
            row = conn.execute(
                "SELECT * FROM nodes WHERE id=?", (node_id,),
            ).fetchone()
        if not row:
            return None
        return _row_to_node(row)

    def list_nodes(self, node_type: str | None = None) -> list[Node]:
        with _connect(self._db_path) as conn:
            if node_type:
                rows = conn.execute(
                    "SELECT * FROM nodes WHERE node_type=?",
                    (node_type,),
                ).fetchall()
            else:
                rows = conn.execute("SELECT * FROM nodes").fetchall()
        return [_row_to_node(r) for r in rows]

    def add_edge(self, edge: Edge) -> None:
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT OR REPLACE INTO edges VALUES (?,?,?,?,?)",
                (edge.source_id, edge.target_id, edge.edge_type,
                 edge.weight, json.dumps(edge.metadata)),
            )
            conn.commit()

    def get_edges(self, node_id: str, direction: str = "out") -> list[Edge]:
        with _connect(self._db_path) as conn:
            edges: list[Edge] = []
            if direction in ("out", "both"):
                for r in conn.execute(
                    "SELECT * FROM edges WHERE source_id=?",
                    (node_id,),
                ).fetchall():
                    edges.append(_row_to_edge(r))
            if direction in ("in", "both"):
                for r in conn.execute(
                    "SELECT * FROM edges WHERE target_id=?",
                    (node_id,),
                ).fetchall():
                    edges.append(_row_to_edge(r))
        return edges

    def neighbors(self, node_id: str) -> list[Node]:
        edges = self.get_edges(node_id, "both")
        ids = set()
        for e in edges:
            ids.add(e.source_id)
            ids.add(e.target_id)
        ids.discard(node_id)
        return [n for n in (self.get_node(i) for i in ids) if n]

    def delete_node(self, node_id: str) -> None:
        with _connect(self._db_path) as conn:
            conn.execute("DELETE FROM nodes WHERE id=?", (node_id,))
            conn.execute(
                "DELETE FROM edges WHERE source_id=? OR target_id=?",
                (node_id, node_id),
            )
            conn.commit()


def _row_to_node(r: sqlite3.Row) -> Node:
    return Node(
        id=r["id"], node_type=r["node_type"], name=r["name"],
        description=r["description"],
        metadata=json.loads(r["metadata"]), created_at=r["created_at"],
    )


def _row_to_edge(r: sqlite3.Row) -> Edge:
    return Edge(
        source_id=r["source_id"], target_id=r["target_id"],
        edge_type=r["edge_type"], weight=r["weight"],
        metadata=json.loads(r["metadata"]),
    )


================================================
FILE: maggy/maggy/cikg/models.py
================================================
"""CIKG node and edge models."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone

NODE_TYPES = (
    "codebase", "competitor", "feature", "market_segment",
    "product", "technology", "trend",
)

EDGE_TYPES = (
    "has_feature", "competes_with", "targets_market",
    "uses_technology", "protaige_has", "protaige_lacks",
    "threatens",
)


@dataclass
class Node:
    """A node in the knowledge graph."""

    id: str
    node_type: str
    name: str
    description: str = ""
    metadata: dict = field(default_factory=dict)
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )

    def __post_init__(self) -> None:
        if self.node_type not in NODE_TYPES:
            raise ValueError(f"Invalid node_type: {self.node_type!r}")


@dataclass
class Edge:
    """A directed edge between two nodes."""

    source_id: str
    target_id: str
    edge_type: str
    weight: float = 1.0
    metadata: dict = field(default_factory=dict)

    def __post_init__(self) -> None:
        if self.edge_type not in EDGE_TYPES:
            raise ValueError(f"Invalid edge_type: {self.edge_type!r}")


@dataclass
class MarketScore:
    """Result of a market scoring query."""

    feature: str
    gap_count: int = 0
    threat_level: str = "low"  # low | medium | high
    trend_alignment: float = 0.0
    recommendation: str = ""


================================================
FILE: maggy/maggy/cikg/queries.py
================================================
"""CIKG query functions — gap analysis and market scoring."""

from __future__ import annotations

from .graph import KnowledgeGraphService
from .models import MarketScore, Node


def find_gaps(graph: KnowledgeGraphService, feature_name: str) -> MarketScore:
    """Score a feature against the competitive landscape."""
    feature_ids = _matching_ids(graph, "feature", feature_name)
    results = []
    for node in graph.list_nodes("competitor"):
        has = bool(feature_ids & _targets_for(graph, node.id, "has_feature"))
        results.append({
            "entity_id": node.id, "entity": node.name,
            "feature": feature_name, "status": "has" if has else "lacks",
        })
    have_it = sum(1 for r in results if r["status"] == "has")
    total = len(results)
    threat = _threat_level(have_it, total)
    return MarketScore(
        feature=feature_name, gap_count=total - have_it,
        threat_level=threat,
        recommendation=_recommend(feature_name, have_it, total, threat),
    )


def find_gaps_raw(graph: KnowledgeGraphService, feature: str) -> list[dict]:
    """Return raw gap results per competitor."""
    feature_ids = _matching_ids(graph, "feature", feature)
    results = []
    for node in graph.list_nodes("competitor"):
        has = bool(feature_ids & _targets_for(graph, node.id, "has_feature"))
        results.append({
            "entity_id": node.id, "entity": node.name,
            "feature": feature, "status": "has" if has else "lacks",
        })
    return sorted(results, key=lambda r: r["entity"])


def compare_entities(graph: KnowledgeGraphService, id_a: str, id_b: str) -> dict:
    """Compare two entities by their features."""
    a_feat = _targets_for(graph, id_a, "has_feature")
    b_feat = _targets_for(graph, id_b, "has_feature")
    related = graph.get_edges(id_a, "out") + graph.get_edges(id_b, "out")
    rels = [
        {"source_id": e.source_id, "target_id": e.target_id, "edge_type": e.edge_type}
        for e in related if {e.source_id, e.target_id} == {id_a, id_b}
    ]
    return {
        "shared": sorted(a_feat & b_feat),
        "only_a": sorted(a_feat - b_feat),
        "only_b": sorted(b_feat - a_feat),
        "relationships": rels,
    }


def get_landscape(graph: KnowledgeGraphService) -> dict:
    """Return competitive landscape summary."""
    competitors = graph.list_nodes("competitor")
    features = graph.list_nodes("feature")
    techs = graph.list_nodes("technology")
    return {
        "competitors": len(competitors),
        "features_tracked": len(features),
        "technologies": len(techs),
        "top_competitors": [c.name for c in competitors[:10]],
    }


def get_segment_landscape(graph: KnowledgeGraphService, segment: str) -> dict:
    """Return landscape for a specific market segment."""
    seg_nodes = _matching_nodes(graph, "market_segment", segment)
    if not seg_nodes:
        return _empty_landscape(segment)
    seg_id = seg_nodes[0].id
    comp_ids = [
        e.source_id for e in graph.get_edges(seg_id, "in")
        if e.edge_type == "targets_market"
    ]
    names = [graph.get_node(i).name for i in comp_ids if graph.get_node(i)]
    feats = set().union(*(
        _targets_for(graph, i, "has_feature") for i in comp_ids
    ))
    techs = set().union(*(
        _targets_for(graph, i, "uses_technology") for i in comp_ids
    ))
    threats = sum(
        1 for i in comp_ids for e in graph.get_edges(i, "out")
        if e.edge_type == "threatens" and e.target_id in comp_ids
    )
    return {
        "segment": seg_nodes[0].name,
        "competitors": len(comp_ids),
        "features_tracked": len(feats),
        "technologies": len(techs),
        "threat_count": threats,
        "top_competitors": sorted(names)[:10],
    }


def _matching_ids(graph: KnowledgeGraphService, node_type: str, query: str) -> set[str]:
    return {n.id for n in _matching_nodes(graph, node_type, query)}


def _matching_nodes(graph: KnowledgeGraphService, node_type: str, query: str) -> list[Node]:
    val = query.lower()
    return [n for n in graph.list_nodes(node_type) if val in n.name.lower() or val == n.id.lower()]


def _targets_for(graph: KnowledgeGraphService, node_id: str, edge_type: str) -> set[str]:
    return {e.target_id for e in graph.get_edges(node_id, "out") if e.edge_type == edge_type}


def _threat_level(have_it: int, total: int) -> str:
    if total == 0:
        return "low"
    ratio = have_it / total
    if ratio > 0.7:
        return "high"
    return "medium" if ratio > 0.3 else "low"


def _recommend(feature: str, have: int, total: int, threat: str) -> str:
    if have == 0:
        return f"No competitor has '{feature}' — potential differentiator"
    suffix = {"high": "Table stakes — must have.", "medium": "Growing trend.",
              "low": "Differentiator opportunity."}[threat]
    return f"{have}/{total} competitors have this. {suffix}"


def _empty_landscape(segment: str) -> dict:
    return {
        "segment": segment, "competitors": 0,
        "features_tracked": 0, "technologies": 0,
        "threat_count": 0, "top_competitors": [],
    }


================================================
FILE: maggy/maggy/cikg/storage.py
================================================
"""SQLite helpers for the competitive graph."""

from __future__ import annotations

import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator

SCHEMA = """
CREATE TABLE IF NOT EXISTS nodes (
    id TEXT PRIMARY KEY,
    node_type TEXT NOT NULL,
    name TEXT NOT NULL,
    description TEXT DEFAULT '',
    metadata TEXT DEFAULT '{}',
    created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(node_type);
CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);
CREATE TABLE IF NOT EXISTS edges (
    source_id TEXT NOT NULL,
    target_id TEXT NOT NULL,
    edge_type TEXT NOT NULL,
    weight REAL DEFAULT 1.0,
    metadata TEXT DEFAULT '{}',
    PRIMARY KEY (source_id, target_id, edge_type)
);
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


================================================
FILE: maggy/maggy/cli.py
================================================
"""Maggy CLI — terminal interface for the engineering platform."""

from __future__ import annotations

import typer

from maggy.cli_client import MaggyClient
from maggy.cli_output import (
    console,
    dump_json,
    render_budget,
    render_competitors,
    render_health,
    render_inbox,
    render_models,
    render_route,
    render_sessions,
)

app = typer.Typer(
    name="maggy",
    help="Maggy — AI Engineering Platform",
    no_args_is_help=False,
)
_client = MaggyClient()


def _ensure() -> bool:
    if not _client._check_health():
        console.print("[dim]Starting Maggy server...[/dim]")
    if not _client.ensure_server():
        console.print("[red]Cannot reach Maggy server.[/red]")
        raise typer.Exit(1)
    return True


@app.callback(invoke_without_command=True)
def main(ctx: typer.Context) -> None:
    """Interactive REPL (in project) or dashboard."""
    if ctx.invoked_subcommand is not None:
        return
    _ensure()
    from maggy.cli_chat import detect_project, run_chat
    project = detect_project(_client)
    if project:
        run_chat(_client, project, routed=True)
    else:
        serve()


@app.command()
def serve() -> None:
    """Start the Maggy server + web dashboard."""
    from maggy.main import main as start_server
    start_server()


@app.command()
def status(json_out: bool = typer.Option(False, "--json")) -> None:
    """Show server health and config summary."""
    _ensure()
    data = _client.health()
    dump_json(data) if json_out else render_health(data)


@app.command()
def inbox(
    refresh: bool = typer.Option(False, "--refresh"),
    json_out: bool = typer.Option(False, "--json"),
) -> None:
    """Show AI-ranked task inbox."""
    _ensure()
    data = _client.inbox(refresh=refresh)
    if json_out:
        dump_json(data)
    elif not data.get("items"):
        console.print("[dim]No tasks in inbox.[/dim]")
    else:
        render_inbox(data)


@app.command()
def sessions(json_out: bool = typer.Option(False, "--json")) -> None:
    """List active AI sessions across projects."""
    _ensure()
    data = _client.activity()
    dump_json(data) if json_out else render_sessions(data)


@app.command()
def chat(
    project: str = typer.Argument(..., help="Project key"),
    direct: bool = typer.Option(False, "--direct"),
) -> None:
    """Interactive chat with a project's AI session."""
    _ensure()
    from maggy.cli_chat import run_chat
    run_chat(_client, project, routed=not direct)


@app.command()
def spawn(
    task: str = typer.Argument(..., help="Task description"),
) -> None:
    """Spawn a background AI session."""
    _ensure()
    from maggy.cli_chat import detect_project
    from maggy.cli_sessions import spawn_session
    project = detect_project(_client)
    if not project:
        console.print("[red]Not in a project directory.[/red]")
        raise typer.Exit(1)
    spawn_session(_client, task, project)


@app.command()
def ps() -> None:
    """List all managed sessions (chat + executor)."""
    _ensure()
    from maggy.cli_sessions import list_all
    list_all(_client)


@app.command()
def kill(
    session_id: str = typer.Argument(..., help="Session ID"),
) -> None:
    """Stop a managed session."""
    _ensure()
    from maggy.cli_sessions import kill_session
    kill_session(_client, session_id)


@app.command()
def execute(
    task_id: str = typer.Argument(..., help="Task ID"),
    plan: bool = typer.Option(False, "--plan"),
) -> None:
    """Execute a task via the TDD pipeline."""
    _ensure()
    mode = "plan" if plan else "tdd"
    data = _client.execute(task_id, mode)
    console.print(
        f"[green]Started[/green] session "
        f"[bold]{data.get('session_id', '?')}[/bold] "
        f"({mode} mode)",
    )


@app.command()
def route(
    blast: int = typer.Argument(..., help="Complexity 1-10"),
    task_type: str = typer.Option("general", "--type"),
    json_out: bool = typer.Option(False, "--json"),
) -> None:
    """Get routing decision for a complexity score."""
    _ensure()
    data = _client.route(blast, task_type)
    dump_json(data) if json_out else render_route(data)


@app.command()
def budget(json_out: bool = typer.Option(False, "--json")) -> None:
    """Show per-provider token budget."""
    _ensure()
    data = _client.budget_summary()
    dump_json(data) if json_out else render_budget(data)


@app.command()
def models(json_out: bool = typer.Option(False, "--json")) -> None:
    """Show model performance heatmap."""
    _ensure()
    data = _client.models_heatmap()
    dump_json(data) if json_out else render_models(data)


@app.command()
def competitors(
    briefing: bool = typer.Option(False, "--briefing"),
    json_out: bool = typer.Option(False, "--json"),
) -> None:
    """Show competitor intelligence."""
    _ensure()
    if briefing:
        data = _client.competitors_briefing()
    else:
        data = _client.competitors_news()
    if json_out:
        dump_json(data)
    elif briefing:
        console.print(data.get("summary", "No briefing available."))
    else:
        render_competitors(data)


@app.command()
def process(
    project: str = typer.Argument(..., help="Project key"),
    json_out: bool = typer.Option(False, "--json"),
) -> None:
    """Show process health for a project."""
    _ensure()
    data = _client.process_health(project)
    dump_json(data) if json_out else console.print_json(data=data)


@app.command()
def config(json_out: bool = typer.Option(False, "--json")) -> None:
    """Show current configuration (redacted)."""
    _ensure()
    dump_json(_client.config())


================================================
FILE: maggy/maggy/cli_chat.py
================================================
"""Interactive chat REPL for Maggy CLI with model routing."""
from __future__ import annotations

import os

from rich.console import Console
from rich.live import Live
from rich.markdown import Markdown
from rich.prompt import Prompt
from rich.spinner import Spinner

from maggy.cli_repl_cmds import SessionState, dispatch
from maggy.cli_welcome import render_welcome
from maggy.services.session_detect import detect_all

console = Console()

EXIT_WORDS = frozenset({"exit", "bye", "quit", "/exit", "/bye"})
_QUOTA_MARKERS = ("rate_limit", "quota", "exceeded", "429")


def detect_project(client) -> str | None:
    """Auto-detect project from current working directory."""
    return client.detect_project(os.getcwd())


def run_chat(
    client, project: str, routed: bool = True,
) -> None:
    session, resumed = _find_or_create(client, project)
    sid = session.get("id", "?")
    wd = session.get("working_dir", "?")
    render_welcome(project, session, client)
    _show_resume_info(client, sid, wd)
    state = SessionState(session_id=sid, working_dir=wd)
    _repl_loop(client, state, routed)
    console.print("[dim]Session saved. Bye.[/dim]")


def _find_or_create(client, project: str) -> tuple[dict, bool]:
    for s in client.chat_sessions():
        if s.get("project_key") == project:
            return s, True
    return client.chat_create(project), False


def _show_resume_info(client, sid: str, wd: str) -> None:
    detected = detect_all(wd)
    if detected.sessions:
        parts = [f"{s.cli}({s.session_id[:8]})" for s in detected.sessions]
        console.print(f"[dim]Prior: {', '.join(parts)}[/dim]")
    for msg in client.chat_history(sid).get("messages", [])[-3:]:
        role = msg.get("role", "?")
        text = msg.get("content", "")[:120]
        tag = "[cyan]You[/cyan]" if role == "user" else "[green]Maggy[/green]"
        console.print(f"  {tag}: {text}")


def _repl_loop(client, state: SessionState, routed: bool) -> None:
    blast_override: int | None = None
    while True:
        try:
            text = Prompt.ask("[bold cyan]>[/bold cyan]")
        except (KeyboardInterrupt, EOFError):
            console.print()
            break
        stripped = text.strip()
        if not stripped:
            continue
        if stripped == "/quit" or stripped.lower() in EXIT_WORDS:
            break
        if stripped == "/history":
            _show_history(client, state.session_id)
            continue
        if stripped == "/sessions":
            _show_sessions(client)
            continue
        if stripped == "/clear":
            console.clear()
            continue
        if stripped.startswith("/monitor"):
            data = _call_safe(client.monitor_status)
            console.print(f"[dim]Monitors: {data.get('active', 0)} active[/dim]")
            continue
        if stripped.startswith("/screenshot"):
            _handle_screenshot(stripped)
            continue
        if stripped.startswith("/blast"):
            blast_override = _parse_blast(stripped)
            continue
        if dispatch(stripped, client, state):
            continue
        if routed:
            chunks = client.chat_send_routed(
                state.session_id, stripped,
                blast=blast_override,
                allowed_models=state.allowed_models or None,
            )
        else:
            chunks = client.chat_send_stream(
                state.session_id, stripped,
            )
        _stream_chunks(chunks)
        blast_override = None


def _parse_blast(text: str) -> int | None:
    parts = text.split()
    if len(parts) >= 2:
        try:
            val = max(1, min(10, int(parts[1])))
            console.print(f"[dim]Blast override: {val}[/dim]")
            return val
        except ValueError:
            pass
    console.print("[dim]Usage: /blast N (1-10)[/dim]")
    return None


def _stream_chunks(chunks) -> None:
    full, err = "", ""
    try:
        with Live(
            Spinner("dots", text="Thinking..."),
            console=console, refresh_per_second=8,
        ) as live:
            for chunk in chunks:
                ct = chunk.get("type", "")
                if ct == "routing":
                    _show_routing(chunk)
                elif ct == "queued":
                    pos = chunk.get("position", "?")
                    live.update(Markdown(f"[dim]Queued (position {pos})[/dim]"))
                elif ct in ("warning", "agent_status"):
                    console.print(f"[dim]{chunk.get('content', chunk.get('status', ''))}[/dim]")
                elif ct in ("text", "result"):
                    full += chunk.get("content", "")
                    live.update(Markdown(full))
                elif ct == "error":
                    err = chunk.get("content", "")
                elif ct == "done":
                    break
    except KeyboardInterrupt:
        console.print("\n[dim]Interrupted[/dim]")
    except Exception as e:
        err = str(e)
    if err:
        console.print(f"[red]Error:[/red] {err}")
        if any(m in err.lower() for m in _QUOTA_MARKERS):
            from maggy.services.account_guide import render_switch_guide
            render_switch_guide("anthropic")


def _call_safe(fn, default=None):
    try:
        return fn()
    except (Exception, SystemExit):
        return default if default is not None else {}


def _handle_screenshot(text: str) -> None:
    """Send image to Qwen3-VL for analysis."""
    from maggy.services.vision import analyze_image
    parts = text.split(None, 2)
    if len(parts) < 2:
        console.print("[dim]Usage: /screenshot <path> [prompt][/dim]")
        return
    path = parts[1]
    prompt = parts[2] if len(parts) > 2 else None
    console.print(f"[dim]Analyzing {path}...[/dim]")
    _stream_chunks(analyze_image(path, prompt))


def _show_routing(chunk: dict) -> None:
    console.print(f"[dim][{chunk.get('model', '?')}] blast={chunk.get('blast', '?')} {chunk.get('reason', '')}[/dim]")


def _show_history(client, session_id: str) -> None:
    msgs = client.chat_history(session_id).get("messages", [])
    if not msgs:
        console.print("[dim]No messages yet.[/dim]")
        return
    for msg in msgs:
        role, content = msg.get("role", "?"), msg.get("content", "")
        tag = "[cyan]You[/cyan]" if role == "user" else "[green]Maggy[/green]"
        console.print(f"  {tag}: {content[:120]}")


def _show_sessions(client) -> None:
    sessions = client.chat_sessions()
    if not sessions:
        console.print("[dim]No chat sessions.[/dim]")
        return
    for s in sessions:
        sid = s.get("id", "?")[:8]
        proj = s.get("project_key", "?")
        n = s.get("messages", 0)
        console.print(f"  [bold]{sid}[/bold] {proj} ({n} msgs)")


================================================
FILE: maggy/maggy/cli_client.py
================================================
"""HTTP client for Maggy REST API."""

from __future__ import annotations

import json
import os
import signal
import subprocess
import sys
import time
from urllib.parse import urlparse

import httpx
import typer

from maggy.config import CONFIG_DIR

DEFAULT_URL = "http://127.0.0.1:8080"
HEALTH_TIMEOUT = 2.0
START_WAIT = 45.0
START_POLL = 1.0


class MaggyClient:
    """Thin wrapper over Maggy's REST API."""

    def __init__(self, base_url: str = DEFAULT_URL):
        self.base_url = base_url.rstrip("/")

    # ── Server lifecycle ─────────────────────────

    def _check_health(self) -> bool:
        try:
            r = httpx.get(
                f"{self.base_url}/api/health",
                timeout=HEALTH_TIMEOUT,
            )
            return r.status_code == 200
        except (httpx.ConnectError, httpx.ReadTimeout):
            return False

    def _get_port(self) -> int:
        parsed = urlparse(self.base_url)
        return parsed.port or 8080

    def _kill_stale_port(self) -> None:
        """Kill any process holding our port."""
        try:
            result = subprocess.run(
                ["lsof", "-ti", f":{self._get_port()}"],
                capture_output=True, text=True, timeout=5,
            )
        except (subprocess.SubprocessError, OSError):
            return
        for line in result.stdout.strip().splitlines():
            try:
                os.kill(int(line.strip()), signal.SIGTERM)
            except (ValueError, ProcessLookupError,
                    PermissionError):
                continue
        time.sleep(0.5)

    def _start_server(self) -> None:
        """Spawn server, logging to server.log."""
        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
        log = open(CONFIG_DIR / "server.log", "a")
        subprocess.Popen(
            [sys.executable, "-m", "maggy.main"],
            stdout=log, stderr=log,
        )

    def ensure_server(self) -> bool:
        """Return True if server is reachable."""
        if self._check_health():
            return True
        self._kill_stale_port()
        self._start_server()
        deadline = time.monotonic() + START_WAIT
        while time.monotonic() < deadline:
            time.sleep(START_POLL)
            if self._check_health():
                return True
        return False

    # ── API calls ────────────────────────────────

    def _handle_error(self, r: httpx.Response) -> None:
        if r.is_success:
            return
        try:
            detail = r.json().get("detail", r.text)
        except Exception:
            detail = r.text
        from rich.console import Console
        Console(stderr=True).print(
            f"[red]Error {r.status_code}:[/red] {detail}",
        )
        raise typer.Exit(1)

    def get(self, path: str, **params) -> dict | list:
        r = httpx.get(
            f"{self.base_url}{path}",
            params=params or None,
            timeout=30.0,
        )
        self._handle_error(r)
        return r.json()

    def post(self, path: str, body: dict) -> dict:
        r = httpx.post(
            f"{self.base_url}{path}",
            json=body,
            timeout=60.0,
        )
        self._handle_error(r)
        return r.json()

    def health(self) -> dict:
        return self.get("/api/health")

    def inbox(self, refresh: bool = False) -> dict:
        return self.get("/api/inbox", refresh=refresh)

    def activity(self) -> dict:
        return self.get("/api/activity")

    def route(self, blast: int, task_type: str) -> dict:
        return self.get(
            "/api/routing/decide",
            blast=blast,
            task_type=task_type,
        )

    def budget_summary(self) -> dict:
        return self.get("/api/budget")

    def competitors_news(self, limit: int = 50) -> list:
        return self.get("/api/competitors/news", limit=limit)

    def competitors_briefing(self) -> dict:
        return self.get("/api/competitors/news/summary")

    def models_heatmap(self) -> list:
        return self.get("/api/routing/heatmap")

    def routing_rules(self) -> dict:
        return self.get("/api/routing/rules")

    def budget_by_provider(self) -> list:
        return self.get("/api/budget/by-provider")

    def process_health(self, project: str) -> dict:
        return self.get(f"/api/process/health/{project}")

    def config(self) -> dict:
        return self.get("/api/config")

    def execute(self, task_id: str, mode: str) -> dict:
        return self.post(
            "/api/execute",
            {"task_id": task_id, "mode": mode},
        )

    def sessions(self) -> list:
        return self.get("/api/execute/sessions")

    # ── Chat ──────────────────────────────────────

    def chat_create(self, project_key: str) -> dict:
        return self.post(
            "/api/chat/sessions",
            {"project_key": project_key},
        )

    def chat_sessions(self) -> list:
        return self.get("/api/chat/sessions")

    def chat_history(self, session_id: str) -> dict:
        return self.get(f"/api/chat/sessions/{session_id}")

    def chat_send_stream(
        self, session_id: str, message: str,
    ):
        """Yield parsed SSE chunks from chat endpoint."""
        url = (
            f"{self.base_url}"
            f"/api/chat/sessions/{session_id}/send"
        )
        with httpx.stream(
            "POST", url,
            json={"message": message},
            timeout=120.0,
        ) as r:
            for line in r.iter_lines():
                if line.startswith("data: "):
                    yield json.loads(line[6:])

    def chat_send_routed(
        self, session_id: str, message: str,
        blast: int | None = None,
        allowed_models: list[str] | None = None,
    ):
        """Yield SSE chunks from routed chat endpoint."""
        url = (
            f"{self.base_url}"
            f"/api/chat/sessions/{session_id}/send-routed"
        )
        body: dict = {"message": message}
        if blast is not None:
            body["blast_score"] = blast
        if allowed_models:
            body["allowed_models"] = allowed_models
        with httpx.stream(
            "POST", url, json=body, timeout=120.0,
        ) as r:
            for line in r.iter_lines():
                if line.startswith("data: "):
                    yield json.loads(line[6:])

    def detect_project(self, cwd: str) -> str | None:
        """Match cwd against configured codebases."""
        try:
            cfg = self.config()
        except Exception:
            return None
        for cb in cfg.get("codebases", []):
            if cwd.startswith(cb.get("path", "")):
                return cb.get("key")
        return None

    # ── Session management ─────────────────────────

    def spawn(self, task: str, project: str) -> dict:
        return self.post(
            "/api/execute",
            {"task_id": task, "mode": "tdd",
             "project_key": project},
        )

    def all_sessions(self) -> list:
        """Merge chat + executor sessions."""
        chat = self.chat_sessions()
        executor = self.sessions()
        combined = []
        for s in chat:
            combined.append({
                "id": s.get("id"),
                "project": s.get("project_key", ""),
                "model": "claude",
                "status": s.get("status", ""),
                "type": "chat",
                "messages": s.get("messages", 0),
            })
        for s in executor:
            combined.append({
                "id": s.get("id"),
                "project": s.get("task_id", ""),
                "model": s.get("model", "?"),
                "status": s.get("status", ""),
                "type": "executor",
                "messages": 0,
            })
        return combined

    def kill_session(self, session_id: str) -> dict:
        r = httpx.delete(
            f"{self.base_url}"
            f"/api/chat/sessions/{session_id}",
            timeout=10.0,
        )
        self._handle_error(r)
        return r.json()

    # ── Monitor ────────────────────────────────────

    def monitor_status(self) -> dict:
        return self.get("/api/monitor/status")

    def monitor_start(self) -> dict:
        return self.post("/api/monitor/start", {})

    def monitor_stop(self) -> dict:
        return self.post("/api/monitor/stop", {})

    # ── Health ─────────────────────────────────────

    def health_dashboard(self) -> dict:
        return self.get("/api/engram/diagnostics")

    def engram_diagnostics(self) -> dict:
        return self.get("/api/engram/diagnostics")


================================================
FILE: maggy/maggy/cli_output.py
================================================
"""Rich terminal formatters for Maggy CLI output."""

from __future__ import annotations

import json
import sys

from rich.console import Console
from rich.panel import Panel
from rich.table import Table

console = Console()


def _is_pipe() -> bool:
    return not sys.stdout.isatty()


def dump_json(data) -> None:
    """Print raw JSON for piping / --json flag."""
    print(json.dumps(data, indent=2))


# ── Status ──────────────────────────────────────


def render_health(data: dict) -> None:
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    t.add_row("Status", f"[green]{data.get('status', '?')}[/green]")
    t.add_row("Mode", data.get("mode", "?"))
    t.add_row("Org", data.get("org", "?"))
    t.add_row("Codebases", str(data.get("codebases", 0)))
    t.add_row("Provider", data.get("provider", "?"))
    console.print(Panel(t, title="Maggy Status", border_style="blue"))


# ── Inbox ───────────────────────────────────────


def render_inbox(data: dict) -> None:
    items = data.get("items", [])
    if not items:
        console.print("[dim]No tasks in inbox.[/dim]")
        return
    t = Table(title=f"Inbox ({len(items)} tasks)")
    t.add_column("#", style="bold", width=4)
    t.add_column("Title", min_width=30)
    t.add_column("Labels")
    t.add_column("Reason", style="dim")
    for item in items:
        labels = ", ".join(item.get("labels", [])[:3])
        t.add_row(
            str(item.get("rank", "")),
            item.get("title", "")[:60],
            labels,
            item.get("ai_reason", "")[:40],
        )
    console.print(t)


# ── Sessions ────────────────────────────────────


def render_sessions(data: dict | list) -> None:
    items = data if isinstance(data, list) else data.get("sessions", [])
    if not items:
        console.print("[dim]No active sessions.[/dim]")
        return
    t = Table(title=f"Active Sessions ({len(items)})")
    t.add_column("PID", width=8)
    t.add_column("CLI")
    t.add_column("Project")
    t.add_column("Status")
    t.add_column("Agent")
    for s in items:
        cli = s.get("cli") or s.get("tool") or "?"
        agent = s.get("agent_name") or ""
        t.add_row(
            str(s.get("pid", "")),
            cli,
            s.get("project", "?"),
            s.get("status", "?"),
            agent,
        )
    console.print(t)


# ── Route ───────────────────────────────────────


def _model_name(val) -> str:
    if isinstance(val, dict):
        return val.get("name", "?")
    return str(val) if val else "?"


def render_route(data: dict) -> None:
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    primary = _model_name(data.get("primary"))
    t.add_row("Primary", f"[green]{primary}[/green]")
    validator = data.get("validator")
    if validator:
        t.add_row("Validator", _model_name(validator))
    fallback = data.get("fallback", [])
    if fallback:
        names = [_model_name(f) for f in fallback]
        t.add_row("Fallback", " → ".join(names))
    t.add_row("Reason", str(data.get("reason", "")))
    console.print(Panel(t, title="Routing Decision", border_style="yellow"))


# ── Budget ──────────────────────────────────────


def render_budget(data: dict) -> None:
    spent = data.get("spent_today_usd", 0)
    limit = data.get("daily_limit_usd", 0)
    pct = (spent / limit * 100) if limit else 0
    bar_len = int(pct / 5)
    color = "red" if pct > 80 else "green"
    bar = f"[{color}]{'█' * bar_len}[/{color}]{'░' * (20 - bar_len)}"

    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    t.add_row("Spent today", f"${spent:.2f}")
    t.add_row("Daily limit", f"${limit:.2f}")
    t.add_row("Utilization", f"{pct:.0f}%  {bar}")
    t.add_row("Status", data.get("status", "?"))

    # Per-provider breakdown if available
    providers = data.get("providers", [])
    if providers:
        t.add_row("", "")
        for p in providers:
            p_used = p.get("used", 0)
            p_limit = p.get("limit", 0)
            t.add_row(
                p.get("name", "?"),
                f"${p_used:.2f} / ${p_limit:.2f}",
            )
    console.print(Panel(t, title="Budget", border_style="green"))


# ── Competitors ─────────────────────────────────


def render_competitors(news: list) -> None:
    if not news:
        console.print("[dim]No competitor news.[/dim]")
        return
    t = Table(title=f"Competitor Intel ({len(news)} items)")
    t.add_column("Date", width=12)
    t.add_column("Type")
    t.add_column("Headline", min_width=40)
    for item in news[:20]:
        t.add_row(
            item.get("date", "?")[:10],
            item.get("event_type", "?"),
            item.get("headline", "")[:60],
        )
    console.print(t)


# ── Models ──────────────────────────────────────


def render_models(heatmap: list) -> None:
    if not heatmap:
        console.print("[dim]No model performance data.[/dim]")
        return
    t = Table(title="Model Performance Heatmap")
    t.add_column("Model")
    t.add_column("Task Type")
    t.add_column("Reward", justify="right")
    for entry in heatmap:
        reward = entry.get("reward", 0)
        color = "green" if reward >= 0.8 else "yellow" if reward >= 0.5 else "red"
        t.add_row(
            entry.get("model", "?"),
            entry.get("task_type", "?"),
            f"[{color}]{reward:.2f}[/{color}]",
        )
    console.print(t)


================================================
FILE: maggy/maggy/cli_repl_cmds.py
================================================
"""REPL slash command handlers for Maggy CLI."""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path

from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.table import Table

console = Console()

_KNOWN_MODELS = ("local", "kimi", "claude", "codex")


def _call(fn, d=None):
    try:
        return fn()
    except (Exception, SystemExit):
        return d if d is not None else {}


@dataclass
class SessionState:
    """Mutable session-level state for REPL."""

    session_id: str = ""
    working_dir: str = ""
    allowed_models: list[str] = field(default_factory=list)


def dispatch(cmd: str, client, state: SessionState) -> bool:
    """Route a slash command. Returns True if handled."""
    parts = cmd.strip().split(None, 1)
    name, args = parts[0].lower(), parts[1] if len(parts) > 1 else ""
    simple = {
        "/stats": cmd_stats, "/budget": cmd_budget,
        "/route": cmd_route, "/models": cmd_models,
        "/config": cmd_config, "/health": cmd_health,
    }
    if name in simple:
        simple[name](client)
        return True
    if name == "/use":
        cmd_use(args, state)
    elif name == "/claude-md":
        cmd_claude_md(state)
    elif name == "/help":
        cmd_help()
    else:
        return False
    return True


def cmd_stats(client) -> None:
    b = _call(client.budget_summary)
    t = Table(title="Stats")
    t.add_column("Metric", style="bold")
    t.add_column("Value")
    t.add_row("Spent", f"${b.get('spent_today_usd', 0):.2f} / ${b.get('daily_limit_usd', 0):.2f}")
    in_t, out_t = b.get("input_tokens", 0), b.get("output_tokens", 0)
    if in_t or out_t:
        t.add_row("Tokens", f"{in_t:,} in / {out_t:,} out")
    t.add_row("Status", b.get("status", "?"))
    for p in _call(client.budget_by_provider, []):
        t.add_row(f"  {p.get('provider', '?')}", f"${p.get('spent_usd', 0):.2f}")
    for h in _call(client.models_heatmap, [])[:8]:
        r, c = h.get("avg_reward", 0), "green" if h.get("avg_reward", 0) >= 0.8 else "yellow"
        t.add_row(f"  {h.get('model', '?')} ({h.get('task_type', '')})", f"[{c}]{r:.2f}[/{c}] ({h.get('samples', 0)})")
    console.print(t)


def cmd_budget(client) -> None:
    b = _call(client.budget_summary)
    spent, limit = b.get("spent_today_usd", 0), b.get("daily_limit_usd", 0)
    pct = (spent / limit * 100) if limit else 0
    bl, c = min(20, int(pct / 5)), "red" if pct > 80 else "green"
    bar = f"[{c}]{'█' * bl}[/{c}]{'░' * (20 - bl)}"
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    if b.get("plan") == "subscription":
        t.add_row("Plan", "[green]Subscription[/green]")
    else:
        t.add_row("Spent", f"${spent:.2f} / ${limit:.2f}")
    t.add_row("Usage", f"{pct:.0f}%  {bar}")
    t.add_row("Status", b.get("status", "?"))
    for p in _call(client.budget_by_provider, []):
        t.add_row(p.get("provider", "?"), f"${p.get('spent_usd', 0):.2f}")
    console.print(Panel(t, title="Budget", border_style="green"))


def cmd_route(client) -> None:
    data = _call(client.routing_rules)
    t = Table(title=f"Routing ({data.get('mode', '?')})")
    t.add_column("Task Type", style="bold")
    t.add_column("Model")
    t.add_column("Reason", style="dim")
    for tt, info in data.get("task_type_overrides", {}).items():
        t.add_row(tt, info.get("model", "?"), info.get("reason", ""))
    console.print(t)
    console.print("[dim]Blast: 1-3 cheap | 4-6 medium | 7-10 premium[/dim]")
    perf = data.get("model_performance", {})
    if not perf:
        return
    pt = Table(title="Model Performance")
    pt.add_column("Model", style="bold")
    pt.add_column("Strengths")
    pt.add_column("Rate", justify="right")
    for model, info in perf.items():
        pt.add_row(model, ", ".join(info.get("strengths", [])), f"{info.get('success_rate', 0):.0%}")
    console.print(pt)


def cmd_models(client) -> None:
    heatmap = _call(client.models_heatmap, [])
    t = Table(title="Model Rewards")
    for col in ("Model", "Task Type", "Blast Tier"):
        t.add_column(col)
    t.add_column("Reward", justify="right")
    t.add_column("N", justify="right")
    if not heatmap:
        for m in _KNOWN_MODELS:
            t.add_row(m, "-", "-", "-", "0")
    else:
        for h in heatmap:
            r = h.get("avg_reward", 0)
            c = "green" if r >= 0.8 else "yellow" if r >= 0.5 else "red"
            t.add_row(h.get("model", "?"), h.get("task_type", "?"), h.get("blast_tier", "?"), f"[{c}]{r:.2f}[/{c}]", str(h.get("samples", 0)))
    console.print(t)


def cmd_use(args: str, state: SessionState) -> None:
    """Set allowed models for this session."""
    if not args or args.strip().lower() == "all":
        state.allowed_models = []
        console.print("[dim]Routing: all models enabled[/dim]")
        return
    models = [m.strip() for m in args.split(",") if m.strip()]
    bad = [m for m in models if m not in _KNOWN_MODELS]
    if bad:
        console.print(f"[yellow]Unknown: {', '.join(bad)}. Known: {', '.join(_KNOWN_MODELS)}[/yellow]")
    state.allowed_models = models
    console.print(f"[dim]Routing restricted to: {', '.join(models)}[/dim]")


def cmd_config(client) -> None:
    """Show configuration summary."""
    cfg = _call(client.config)
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    cbs = cfg.get("codebases", [])
    t.add_row("Codebases", str(len(cbs)))
    for cb in cbs[:5]:
        t.add_row(f"  {cb.get('key', '?')}", cb.get("path", ""))
    t.add_row("Routing", cfg.get("routing", {}).get("mode", "dynamic"))
    t.add_row("Limit", f"${cfg.get('budget', {}).get('daily_limit_usd', 0):.2f}")
    console.print(Panel(t, title="Config", border_style="blue"))


def cmd_claude_md(state: SessionState) -> None:
    """Show project's CLAUDE.md."""
    wd = Path(state.working_dir)
    for name in ("CLAUDE.md", ".claude/CLAUDE.md"):
        path = wd / name
        if path.exists():
            console.print(Markdown(path.read_text()))
            return
    console.print("[dim]CLAUDE.md not found in project.[/dim]")


def cmd_health(client) -> None:
    """Memory system health dashboard."""
    data = _call(client.health_dashboard)
    eng = data if "health_score" in data else data.get("engram", {})
    mn, score = data.get("mnemos", {}), eng.get("health_score", 0)
    c = "green" if score >= 0.7 else "yellow" if score >= 0.4 else "red"
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    t.add_row("Engram", f"[{c}]{score:.0%}[/{c}] ({eng.get('active', 0)}/{eng.get('total', 0)})")
    t.add_row("Mnemos", f"{mn.get('state', '?')} ({mn.get('composite', 0):.2f})")
    console.print(Panel(t, title="Health", border_style="green"))


_HELP = """\
[bold]Commands:[/bold]
  /stats   Budget+perf      /budget  Breakdown       /route   Rules+tiers
  /models  Reward heatmap   /health  Memory health   /monitor Trackers
  /screenshot F  Analyze image with Qwen3-VL         /claude-md CLAUDE.md
  /use M   Restrict models  /config  Settings        /blast N Override
  /history Messages         /sessions List           /clear   Screen
  /quit    Exit             /help    This help"""


def cmd_help() -> None:
    console.print(_HELP)


================================================
FILE: maggy/maggy/cli_sessions.py
================================================
"""Session management for Maggy CLI — spawn, list, kill."""

from __future__ import annotations

from rich.console import Console
from rich.table import Table

console = Console()


def spawn_session(client, task: str, project: str) -> None:
    """Spawn a background execution session."""
    data = client.spawn(task, project)
    sid = data.get("session_id", "?")
    console.print(
        f"[green]Spawned[/green] session "
        f"[bold]{sid}[/bold] for {project}",
    )


def list_all(client) -> None:
    """Show all sessions (chat + executor)."""
    sessions = client.all_sessions()
    if not sessions:
        console.print("[dim]No active sessions.[/dim]")
        return
    t = Table(title="All Sessions")
    t.add_column("ID", width=12)
    t.add_column("Project")
    t.add_column("Model")
    t.add_column("Type")
    t.add_column("Status")
    for s in sessions:
        t.add_row(
            str(s.get("id", "?")),
            s.get("project", "?"),
            s.get("model", "?"),
            s.get("type", "?"),
            s.get("status", "?"),
        )
    console.print(t)


def kill_session(client, session_id: str) -> None:
    """Kill a session by ID."""
    client.kill_session(session_id)
    console.print(
        f"[yellow]Killed[/yellow] session [bold]{session_id}[/bold]",
    )


================================================
FILE: maggy/maggy/cli_welcome.py
================================================
"""Rich welcome banner for Maggy CLI startup."""

from __future__ import annotations

import os

from rich.console import Console
from rich.panel import Panel
from rich.table import Table

console = Console()

VERSION = "0.5"


def render_welcome(
    project: str, session: dict, client,
) -> None:
    """Print a rich 2-column welcome panel."""
    t = Table(show_header=False, box=None, padding=(0, 2))
    t.add_column(style="bold")
    t.add_column()
    _add_project_rows(t, project, session)
    _add_system_rows(t, client, session)
    label = "Resuming" if session.get("messages", 0) else "New"
    title = f"Maggy v{VERSION} - {label}"
    console.print(Panel(t, title=title, border_style="cyan"))
    console.print(
        "[dim]/help for commands | /stats for budget[/dim]\n",
    )


def _add_project_rows(
    t: Table, project: str, session: dict,
) -> None:
    """Left-side project info."""
    wd = session.get("working_dir") or os.getcwd()
    short_wd = _shorten(wd, 35)
    msgs = session.get("messages", 0)
    sid = session.get("id", "?")[:8]
    t.add_row("Project", f"[bold]{project}[/bold]")
    t.add_row("Dir", short_wd)
    t.add_row("Session", f"{sid} ({msgs} msgs)")


_KNOWN_MODELS = ("local", "kimi", "gpt", "claude", "codex")


def _add_system_rows(
    t: Table, client, session: dict,
) -> None:
    """Right-side system state."""
    budget = _safe_call(client.budget_summary)
    if isinstance(budget, dict) and budget.get("plan") == "subscription":
        t.add_row("Budget", "[green]Subscription[/green]")
    else:
        spent = budget.get("spent_today_usd", 0) if isinstance(budget, dict) else 0
        limit = budget.get("daily_limit_usd", 0) if isinstance(budget, dict) else 0
        t.add_row("Budget", f"${spent:.2f} / ${limit:.2f}")
    models = _safe_call(client.models_heatmap)
    count = len(models) if models else len(_KNOWN_MODELS)
    label = f"{len(models)} tracked" if models else f"{count} available"
    t.add_row("Models", label)
    status = budget.get("status", "?") if isinstance(budget, dict) else "?"
    t.add_row("Status", f"[green]{status}[/green]")
    _add_health_row(t, client)


def _add_health_row(t: Table, client) -> None:
    """Show engram health score inline."""
    diag = _safe_call(client.engram_diagnostics)
    if not isinstance(diag, dict):
        return
    score = diag.get("health_score", 0)
    color = "green" if score >= 0.7 else "yellow" if score >= 0.4 else "red"
    t.add_row("Memory", f"[{color}]{score:.0%}[/{color}]")


def _safe_call(fn):
    """Call a client method, return empty on failure."""
    try:
        return fn() or []
    except Exception:
        return []


def _shorten(path: str, max_len: int) -> str:
    """Truncate long paths with ellipsis."""
    if len(path) <= max_len:
        return path
    return "..." + path[-(max_len - 3) :]


================================================
FILE: maggy/maggy/config.py
================================================
"""Config loader for Maggy — reads ~/.maggy/config.yaml with env overrides."""

from __future__ import annotations

import os
import tempfile
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any

import yaml

CONFIG_DIR = Path(os.environ.get("MAGGY_HOME", "~/.maggy")).expanduser()
CONFIG_PATH = CONFIG_DIR / "config.yaml"

if TYPE_CHECKING:
    from maggy.budget import ProviderBudget


def _default_storage_path() -> str:
    return _safe_storage_path(CONFIG_DIR / "maggy.db")


def _safe_storage_path(path: str | Path) -> str:
    target = Path(path).expanduser()
    try:
        target.parent.mkdir(parents=True, exist_ok=True)
        probe = target.parent / ".write-test"
        probe.write_text("")
        probe.unlink()
        return str(target)
    except OSError:
        fallback = Path(tempfile.gettempdir()) / "maggy" / "maggy.db"
        fallback.parent.mkdir(parents=True, exist_ok=True)
        return str(fallback)


@dataclass
class GitHubConfig:
    org: str = ""
    repos: list[str] = field(default_factory=list)
    labels: list[str] = field(default_factory=list)
    token: str = ""


@dataclass
class AsanaConfig:
    workspace_id: str = ""
    boards: dict[str, str] = field(default_factory=dict)
    token: str = ""


@dataclass
class LinearConfig:
    workspace: str = ""
    token: str = ""


@dataclass
class IssueTrackerConfig:
    provider: str = "github"
    github: GitHubConfig = field(default_factory=GitHubConfig)
    asana: AsanaConfig = field(default_factory=AsanaConfig)
    linear: LinearConfig = field(default_factory=LinearConfig)


@dataclass
class CodebaseConfig:
    path: str
    key: str


@dataclass
class ProjectConfig:
    name: str
    repo: str
    path: str
    default_branch: str
    icpg: bool = True
    cikg: bool = False


@dataclass
class OKRItem:
    id: str
    title: str
    keywords: list[str] = field(default_factory=list)


@dataclass
class OKRConfig:
    source: str = "skip"
    items: list[OKRItem] = field(default_factory=list)


@dataclass
class CompetitorsConfig:
    categories: list[str] = field(default_factory=list)
    seed: list[str] = field(default_factory=list)


@dataclass
class AIConfig:
    provider: str = "anthropic"
    model: str = "claude-sonnet-4-5-20250929"
    api_key: str = ""
    max_budget_usd_per_execute: float = 5.0


@dataclass
class StorageConfig:
    backend: str = "sqlite"
    path: str = field(default_factory=_default_storage_path)


@dataclass
class DashboardConfig:
    host: str = "127.0.0.1"
    port: int = 8080
    auth_mode: str = "local"
    api_key: str = ""


@dataclass
class OrgConfig:
    name: str = "Your Org"
    domain: str = ""


@dataclass
class BootstrapConfig:
    path: str = ""


@dataclass
class ModelTierConfig:
    name: str = ""
    provider: str = ""
    model: str = ""
    complexity_range: list[int] = field(default_factory=lambda: [0, 10])
    strengths: list[str] = field(default_factory=list)
    cost_per_1k: float = 0.0


@dataclass
class BudgetConfig:
    daily_limit_usd: float = 10.0
    max_spend_per_task: float = 5.0
    warning_threshold: float = 0.8
    plan: str = "daily"
    providers: list["ProviderBudget"] = field(default_factory=list)


@dataclass
class RoutingConfig:
    mode: str = "dynamic"
    tiers: list[ModelTierConfig] = field(default_factory=list)


@dataclass
class MeshConfig:
    enabled: bool = False
    peer_id: str = ""
    port: int = 8080
    org_key_secret: str = ""
    orgs: list[str] = field(default_factory=list)
    exclude_orgs: list[str] = field(default_factory=list)
    manual_peers: list[str] = field(default_factory=list)
    tunnel_url: str = ""
    git_discovery: bool = True
    share_interval: int = 600


@dataclass
class HeartbeatConfig:
    enabled: bool = True
    history_interval: int = 1800
    engram_interval: int = 3600
    improve_interval: int = 3600
    mesh_interval: int = 300


@dataclass
class MaggyConfig:
    org: OrgConfig = field(default_factory=OrgConfig)
    issue_tracker: IssueTrackerConfig = field(default_factory=IssueTrackerConfig)
    codebases: list[CodebaseConfig] = field(default_factory=list)
    projects: list[ProjectConfig] = field(default_factory=list)
    competitors: CompetitorsConfig = field(default_factory=CompetitorsConfig)
    okrs: OKRConfig = field(default_factory=OKRConfig)
    ai: AIConfig = field(default_factory=AIConfig)
    storage: StorageConfig = field(default_factory=StorageConfig)
    dashboard: DashboardConfig = field(default_factory=DashboardConfig)
    bootstrap: BootstrapConfig = field(default_factory=BootstrapConfig)
    budget: BudgetConfig = field(default_factory=BudgetConfig)
    routing: RoutingConfig = field(default_factory=RoutingConfig)
    mesh: MeshConfig = field(default_factory=MeshConfig)
    heartbeat: HeartbeatConfig = field(default_factory=HeartbeatConfig)

    def codebase_paths(self) -> dict[str, Path]:
        """Return {key: expanded_path} for all configured codebases."""
        return {c.key: Path(c.path).expanduser() for c in self.codebases}

    def resolve_bootstrap_path(self) -> Path | None:
        """Find Maggy install. Checks config, then ~/.claude/.bootstrap-dir."""
        if self.bootstrap.path:
            return Path(self.bootstrap.path).expanduser()
        marker = Path.home() / ".claude" / ".bootstrap-dir"
        if marker.exists():
            return Path(marker.read_text().strip()).expanduser()
        return None


def _merge_env(cfg: MaggyConfig) -> MaggyConfig:
    """Override config with env vars where defined. Env wins over file."""
    cfg.issue_tracker.github.token = os.environ.get("GITHUB_TOKEN", cfg.issue_tracker.github.token)
    # Fall back to git credential helper if no env var
    if not cfg.issue_tracker.github.token:
        cfg.issue_tracker.github.token = _git_credential_token()
    cfg.issue_tracker.asana.token = os.environ.get("ASANA_API_KEY", cfg.issue_tracker.asana.token)
    cfg.issue_tracker.linear.token = os.environ.get("LINEAR_API_KEY", cfg.issue_tracker.linear.token)
    cfg.ai.api_key = os.environ.get("ANTHROPIC_API_KEY", cfg.ai.api_key)
    cfg.dashboard.api_key = os.environ.get("MAGGY_API_KEY", cfg.dashboard.api_key)
    cfg.mesh.org_key_secret = os.environ.get("MAGGY_MESH_SECRET", cfg.mesh.org_key_secret)
    return cfg


def _git_credential_token() -> str:
    """Read GitHub token from git credential helper."""
    from maggy.discovery import discover_git_token
    return discover_git_token()


def _from_dict(data: dict[str, Any]) -> MaggyConfig:
    """Build MaggyConfig from loaded YAML dict. Tolerates missing sections."""
    from maggy.budget import ProviderBudget

    it_raw = data.get("issue_tracker") or {}
    tracker = IssueTrackerConfig(
        provider=it_raw.get("provider", "github"),
        github=GitHubConfig(**(it_raw.get("github") or {})),
        asana=AsanaConfig(**(it_raw.get("asana") or {})),
        linear=LinearConfig(**(it_raw.get("linear") or {})),
    )

    okr_raw = data.get("okrs") or {}
    okrs = OKRConfig(
        source=okr_raw.get("source", "skip"),
        items=[OKRItem(**item) for item in (okr_raw.get("items") or [])],
    )

    routing_raw = data.get("routing") or {}
    routing = RoutingConfig(
        mode=routing_raw.get("mode", "dynamic"),
        tiers=[
            ModelTierConfig(**t)
            for t in (routing_raw.get("tiers") or [])
        ],
    )
    budget_raw = data.get("budget") or {}
    providers = [
        ProviderBudget(**item)
        for item in (budget_raw.get("providers") or [])
    ]
    storage_raw = data.get("storage") or {}

    return MaggyConfig(
        org=OrgConfig(**(data.get("org") or {})),
        issue_tracker=tracker,
        codebases=[CodebaseConfig(**c) for c in (data.get("codebases") or [])],
        projects=[ProjectConfig(**p) for p in (data.get("projects") or [])],
        competitors=CompetitorsConfig(**(data.get("competitors") or {})),
        okrs=okrs,
        ai=AIConfig(**(data.get("ai") or {})),
        storage=StorageConfig(
            backend=storage_raw.get("backend", "sqlite"),
            path=_safe_storage_path(
                storage_raw.get("path", _default_storage_path())
            ),
        ),
        dashboard=DashboardConfig(**(data.get("dashboard") or {})),
        bootstrap=BootstrapConfig(**(data.get("bootstrap") or {})),
        budget=BudgetConfig(
            daily_limit_usd=budget_raw.get("daily_limit_usd", 10.0),
            warning_threshold=budget_raw.get("warning_threshold", 0.8),
            providers=providers,
        ),
        routing=routing,
        mesh=MeshConfig(**(data.get("mesh") or {})),
        heartbeat=HeartbeatConfig(**(data.get("heartbeat") or {})),
    )


_CACHED: MaggyConfig | None = None


def _has_provider_credentials(cfg: MaggyConfig) -> bool:
    """Check if config has full provider credentials."""
    if cfg.issue_tracker.provider == "github":
        gh = cfg.issue_tracker.github
        return bool(gh.org and gh.repos and gh.token)
    if cfg.issue_tracker.provider == "asana":
        az = cfg.issue_tracker.asana
        return bool(az.workspace_id and az.token)
    return False


def _has_cli_history(
    home: Path | None = None,
) -> bool:
    """Check if any CLI data directories exist."""
    root = home or Path.home()
    for d in (".claude", ".codex", ".kimi"):
        if (root / d).exists():
            return True
    return False


def auto_configure(
    home: Path | None = None,
    persist: bool = True,
) -> MaggyConfig:
    """Build config from auto-discovery."""
    from maggy.discovery import full_discovery
    result = full_discovery(home)
    cfg = MaggyConfig(
        codebases=[
            CodebaseConfig(path=r["path"], key=r["key"])
            for r in result.repos
        ],
    )
    if result.github_org:
        cfg.issue_tracker.github.org = result.github_org
    # Auto-populate repos matching the primary org
    if result.github_org:
        cfg.issue_tracker.github.repos = _repos_for_org(
            result.repos, result.github_org,
        )
    if persist:
        save(cfg)
    return _merge_env(cfg)


def _repos_for_org(
    repos: list[dict], org: str,
) -> list[str]:
    """Filter repo names belonging to a GitHub org."""
    from maggy.discovery import infer_github_org
    matched: list[str] = []
    for repo in repos:
        repo_org = infer_github_org(Path(repo["path"]))
        if repo_org == org:
            matched.append(repo["key"])
    return matched


def load(refresh: bool = False) -> MaggyConfig:
    """Load config from ~/.maggy/config.yaml, with env var overrides. Cached."""
    global _CACHED
    if _CACHED is not None and not refresh:
        return _CACHED

    if not CONFIG_PATH.exists():
        _CACHED = _merge_env(MaggyConfig())
        return _CACHED

    with open(CONFIG_PATH) as f:
        data = yaml.safe_load(f) or {}
    _CACHED = _merge_env(_from_dict(data))
    return _CACHED


def save(cfg: MaggyConfig) -> None:
    """Write config back to ~/.maggy/config.yaml."""
    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
    # Convert dataclass → dict, strip empty tokens (they come from env)
    from dataclasses import asdict
    d = asdict(cfg)
    # Don't persist tokens — those come from env
    for section in ("github", "asana", "linear"):
        d.get("issue_tracker", {}).get(section, {}).pop("token", None)
    d.get("ai", {}).pop("api_key", None)
    d.get("dashboard", {}).pop("api_key", None)
    with open(CONFIG_PATH, "w") as f:
        yaml.safe_dump(d, f, sort_keys=False)
    global _CACHED
    _CACHED = None  # force reload on next load()


def is_configured() -> bool:
    """Check if Maggy has enough to be useful.

    Full mode: provider credentials present.
    Local mode: CLI history dirs exist (zero-config).
    """
    if CONFIG_PATH.exists():
        cfg = load(refresh=True)
        if _has_provider_credentials(cfg):
            return True
    if _has_cli_history():
        return True
    return False


================================================
FILE: maggy/maggy/contracts/__init__.py
================================================
"""Contracts exports."""

from .generator import ContractGenerator

__all__ = ["ContractGenerator"]


================================================
FILE: maggy/maggy/contracts/generator.py
================================================
"""Generate lightweight contract tests from postconditions."""

from __future__ import annotations

import re


class ContractGenerator:
    def from_postcondition(self, postcondition: str, symbol: str) -> str:
        test_name = _test_name(symbol)
        return (
            f"def {test_name}() -> None:\n"
            f'    """Contract for {symbol}."""\n'
            f"    # Postcondition: {postcondition}\n"
            f"    raise NotImplementedError("
            f"\"Verify: {postcondition}\")\n"
        )


def _test_name(symbol: str) -> str:
    short = symbol.split(".")[-2:]
    slug = "_".join(short).lower()
    slug = re.sub(r"[^a-z0-9_]+", "_", slug)
    return f"test_{slug}_contract"


================================================
FILE: maggy/maggy/coordination/__init__.py
================================================


================================================
FILE: maggy/maggy/coordination/lock_manager.py
================================================
"""SQLite-backed file locks for multi-agent coordination."""

from __future__ import annotations

import sqlite3
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Iterator

LOCK_TTL = timedelta(minutes=30)
SCHEMA = """
CREATE TABLE IF NOT EXISTS locks (
    file_path TEXT NOT NULL,
    agent_id TEXT NOT NULL,
    acquired_at TEXT NOT NULL,
    expires_at TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_locks_file_path
    ON locks(file_path);
CREATE INDEX IF NOT EXISTS idx_locks_expires_at
    ON locks(expires_at);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class LockManager:
    def __init__(self, db_path: Path):
        self._db_path = db_path
        self._init_db()

    def acquire(self, file_path: str, agent_id: str) -> bool:
        now, expires = _timestamps()
        with _connect(self._db_path) as conn:
            self._expire_locks(conn, now)
            try:
                conn.execute(
                    "INSERT INTO locks(file_path, agent_id, acquired_at, expires_at) "
                    "VALUES (?, ?, ?, ?)",
                    (file_path, agent_id, now, expires),
                )
                conn.commit()
                return True
            except sqlite3.IntegrityError:
                row = conn.execute(
                    "SELECT agent_id FROM locks WHERE file_path = ?",
                    (file_path,),
                ).fetchone()
                if row and row["agent_id"] == agent_id:
                    conn.execute(
                        "UPDATE locks SET acquired_at = ?, expires_at = ? "
                        "WHERE file_path = ?",
                        (now, expires, file_path),
                    )
                    conn.commit()
                    return True
                return False

    def release(self, file_path: str, agent_id: str) -> bool:
        with _connect(self._db_path) as conn:
            self._expire_locks(conn, _now())
            cur = conn.execute(
                "DELETE FROM locks WHERE file_path = ? AND agent_id = ?",
                (file_path, agent_id),
            )
            conn.commit()
        return cur.rowcount > 0

    def release_all(self, agent_id: str) -> int:
        with _connect(self._db_path) as conn:
            self._expire_locks(conn, _now())
            cur = conn.execute("DELETE FROM locks WHERE agent_id = ?", (agent_id,))
            conn.commit()
        return cur.rowcount

    def conflicts(self, file_paths: list[str]) -> list[str]:
        if not file_paths:
            return []
        marks = ", ".join("?" for _ in file_paths)
        with _connect(self._db_path) as conn:
            self._expire_locks(conn, _now())
            rows = conn.execute(
                f"SELECT file_path FROM locks WHERE file_path IN ({marks})",
                file_paths,
            ).fetchall()
        locked = {row["file_path"] for row in rows}
        return [path for path in file_paths if path in locked]

    def _expire_locks(self, conn: sqlite3.Connection, now: str) -> None:
        conn.execute("DELETE FROM locks WHERE expires_at <= ?", (now,))

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


def _timestamps() -> tuple[str, str]:
    now = datetime.now(timezone.utc)
    return now.isoformat(), (now + LOCK_TTL).isoformat()


================================================
FILE: maggy/maggy/deploy.py
================================================
"""Deploy orchestrator — manages Vercel session containers."""

from __future__ import annotations

import logging
from dataclasses import dataclass, field
from datetime import datetime, timezone

logger = logging.getLogger(__name__)


@dataclass
class DeploySession:
    """Represents a running deploy session."""

    session_id: str
    project: str
    branch: str
    status: str = "pending"  # pending | building | live | failed
    url: str = ""
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )


class DeployService:
    """Manages deploy sessions (stub for container orchestration)."""

    def __init__(self):
        self._sessions: dict[str, DeploySession] = {}

    def create_session(
        self, project: str, branch: str,
    ) -> DeploySession:
        """Create a new deploy session."""
        import uuid
        sid = str(uuid.uuid4())[:8]
        session = DeploySession(
            session_id=sid,
            project=project,
            branch=branch,
            status="building",
        )
        self._sessions[sid] = session
        logger.info("Deploy session %s created for %s:%s",
                     sid, project, branch)
        return session

    def get_session(self, sid: str) -> DeploySession | None:
        return self._sessions.get(sid)

    def list_sessions(self) -> list[DeploySession]:
        return list(self._sessions.values())

    def update_status(
        self, sid: str, status: str, url: str = "",
    ) -> DeploySession | None:
        """Update session status."""
        session = self._sessions.get(sid)
        if not session:
            return None
        session.status = status
        if url:
            session.url = url
        return session

    def teardown(self, sid: str) -> bool:
        """Remove a deploy session."""
        if sid in self._sessions:
            del self._sessions[sid]
            return True
        return False


================================================
FILE: maggy/maggy/discovery.py
================================================
"""Auto-discovery — detects local CLIs, repos, and dev environment."""

from __future__ import annotations

import json
import logging
import os
import shutil
import subprocess
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path

logger = logging.getLogger(__name__)

SCAN_DIRS = [
    "Documents", "dev", "projects", "code", "src",
    "workspace", "repos", "work",
]

CLI_NAMES = ["claude", "codex", "kimi"]


@dataclass
class DiscoveryResult:
    """Everything auto-discovered about the local env."""

    clis: dict[str, str] = field(default_factory=dict)
    cli_auth: dict[str, bool] = field(
        default_factory=dict,
    )
    repos: list[dict] = field(default_factory=list)
    active_projects: list[str] = field(
        default_factory=list,
    )
    tokens: dict[str, bool] = field(
        default_factory=dict,
    )
    github_org: str = ""
    github_orgs: list[str] = field(
        default_factory=list,
    )
    timestamp: str = ""


def discover_clis() -> dict[str, str]:
    """Find installed CLI tools on PATH."""
    result: dict[str, str] = {}
    for name in CLI_NAMES:
        path = shutil.which(name)
        if path:
            result[name] = path
    return result


def discover_cli_auth() -> dict[str, bool]:
    """Check which CLIs have stored auth."""
    home = Path.home()
    auth: dict[str, bool] = {}
    # Claude Code: has projects dir = subscription active
    claude_dir = home / ".claude"
    auth["claude"] = (claude_dir / "projects").is_dir()
    # Codex: auth.json with tokens
    codex_auth = home / ".codex" / "auth.json"
    auth["codex"] = _has_json_key(codex_auth, "tokens")
    # Kimi: credentials directory with token files
    kimi_creds = home / ".kimi" / "credentials"
    auth["kimi"] = kimi_creds.is_dir() and any(
        kimi_creds.iterdir()
    )
    return auth


def _has_json_key(path: Path, key: str) -> bool:
    """Check if JSON file exists and has a key."""
    if not path.exists():
        return False
    try:
        with open(path) as f:
            return bool(json.load(f).get(key))
    except (json.JSONDecodeError, OSError):
        return False


def discover_git_token() -> str:
    """Read GitHub token from git credential helper."""
    try:
        result = subprocess.run(
            ["git", "credential", "fill"],
            input="protocol=https\nhost=github.com\n\n",
            capture_output=True, text=True, timeout=5,
        )
        for line in result.stdout.splitlines():
            if line.startswith("password="):
                return line.split("=", 1)[1]
    except (subprocess.SubprocessError, OSError):
        pass
    return ""


def discover_repos(
    home: Path | None = None,
) -> list[dict]:
    """Scan common directories for git repos."""
    root = home or Path.home()
    repos: list[dict] = []
    for dirname in SCAN_DIRS:
        parent = root / dirname
        if not parent.exists():
            continue
        _scan_dir(parent, repos, depth=0)
        if len(repos) >= 30:
            break
    return repos[:30]


def _scan_dir(
    parent: Path, repos: list[dict], depth: int,
) -> None:
    """Recursively scan for .git dirs up to depth 3."""
    if depth > 3 or len(repos) >= 30:
        return
    try:
        for child in sorted(parent.iterdir()):
            if not child.is_dir():
                continue
            if child.name.startswith("."):
                continue
            git_dir = child / ".git"
            if git_dir.is_dir():
                repos.append({
                    "path": str(child),
                    "key": child.name,
                })
            else:
                _scan_dir(child, repos, depth + 1)
    except PermissionError:
        pass


def discover_active_projects(
    claude_dir: Path | None = None,
) -> list[str]:
    """Rank projects by prompt count from Claude history."""
    cdir = claude_dir or (Path.home() / ".claude")
    history = cdir / "history.jsonl"
    if not history.exists():
        return []

    from collections import Counter
    counts: Counter[str] = Counter()
    try:
        for line in history.read_text().splitlines():
            if not line.strip():
                continue
            try:
                entry = json.loads(line)
                project = entry.get("project", "")
                if project:
                    name = Path(project).name
                    if name:
                        counts[name] += 1
            except json.JSONDecodeError:
                continue
    except OSError:
        return []

    return [p for p, _ in counts.most_common(15)]


def discover_env_tokens() -> dict[str, bool]:
    """Check env vars and git credential helper."""
    tokens = {
        "GITHUB_TOKEN": bool(
            os.environ.get("GITHUB_TOKEN"),
        ),
        "ANTHROPIC_API_KEY": bool(
            os.environ.get("ANTHROPIC_API_KEY"),
        ),
        "ASANA_API_KEY": bool(
            os.environ.get("ASANA_API_KEY"),
        ),
    }
    # Fall back to git credential helper for GitHub
    if not tokens["GITHUB_TOKEN"]:
        tokens["GIT_CREDENTIAL"] = bool(
            discover_git_token(),
        )
    return tokens


def infer_github_org(repo_path: Path) -> str:
    """Infer GitHub org from git remote URL."""
    try:
        result = subprocess.run(
            ["git", "remote", "get-url", "origin"],
            capture_output=True, text=True,
            cwd=str(repo_path), timeout=5,
        )
        url = result.stdout.strip()
        return _parse_org_from_url(url)
    except (subprocess.SubprocessError, OSError):
        return ""


def _parse_org_from_url(url: str) -> str:
    """Extract org from GitHub URL."""
    if "github.com:" in url:
        parts = url.split("github.com:")[-1]
        return parts.split("/")[0]
    if "github.com/" in url:
        parts = url.split("github.com/")[-1]
        return parts.split("/")[0]
    return ""


def discover_all_orgs(repos: list[dict]) -> list[str]:
    """Extract unique GitHub orgs from all repos."""
    orgs: set[str] = set()
    for repo in repos:
        org = infer_github_org(Path(repo["path"]))
        if org:
            orgs.add(org)
    return sorted(orgs)


def full_discovery(
    home: Path | None = None,
) -> DiscoveryResult:
    """Run all discovery checks."""
    clis = discover_clis()
    cli_auth = discover_cli_auth()
    repos = discover_repos(home)
    projects = discover_active_projects()
    tokens = discover_env_tokens()
    all_orgs = discover_all_orgs(repos)
    org = all_orgs[0] if all_orgs else ""

    return DiscoveryResult(
        clis=clis,
        cli_auth=cli_auth,
        repos=repos,
        active_projects=projects,
        tokens=tokens,
        github_org=org,
        github_orgs=all_orgs,
        timestamp=datetime.now(
            timezone.utc
        ).isoformat(),
    )


================================================
FILE: maggy/maggy/engram/__init__.py
================================================
"""Engram — cross-session persistent memory."""


================================================
FILE: maggy/maggy/engram/diagnostics.py
================================================
"""AmnesiaProfile — 7-dimension memory diagnostics."""

from __future__ import annotations

from dataclasses import dataclass

from .store import EngramStore


@dataclass
class AmnesiaProfile:
    """7-dimension memory health assessment."""

    total_memories: int = 0
    active_count: int = 0
    superseded_count: int = 0
    facts: int = 0
    decisions: int = 0
    code_refs: int = 0
    handoffs: int = 0

    @property
    def health_score(self) -> float:
        """0.0-1.0 overall memory health."""
        if self.total_memories == 0:
            return 0.0
        active_ratio = self.active_count / self.total_memories
        diversity = sum(
            1 for c in [
                self.facts, self.decisions,
                self.code_refs, self.handoffs,
            ] if c > 0
        ) / 4.0
        return round(
            active_ratio * 0.6 + diversity * 0.4, 3,
        )


def diagnose(
    store: EngramStore, namespace: str | None = None,
) -> AmnesiaProfile:
    """Run diagnostics on memory store."""
    all_records = store.query(
        namespace=namespace, active_only=False, limit=10000,
    )
    active = [r for r in all_records if r.is_active]

    return AmnesiaProfile(
        total_memories=len(all_records),
        active_count=len(active),
        superseded_count=len(all_records) - len(active),
        facts=sum(1 for r in active if r.memory_type == "fact"),
        decisions=sum(
            1 for r in active if r.memory_type == "decision"
        ),
        code_refs=sum(
            1 for r in active if r.memory_type == "code_ref"
        ),
        handoffs=sum(
            1 for r in active if r.memory_type == "handoff"
        ),
    )


================================================
FILE: maggy/maggy/engram/record.py
================================================
"""EngramRecord — the unit of persistent memory."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum


class Origin(str, Enum):
    EXPLICIT = "explicit"
    INFERRED = "inferred"
    MESH = "mesh"


class Validity(str, Enum):
    ACTIVE = "active"
    SUPERSEDED = "superseded"
    EXPIRED = "expired"


@dataclass
class EngramRecord:
    """A single unit of persistent memory."""

    engram_id: str
    namespace: str
    memory_type: str  # fact | decision | code_ref | handoff
    content: str
    origin: str = Origin.EXPLICIT
    validity: str = Validity.ACTIVE
    confidence: float = 1.0
    tags: list[str] = field(default_factory=list)
    source_task: str = ""
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )
    expires_at: str = ""

    @property
    def is_active(self) -> bool:
        return self.validity == Validity.ACTIVE

    def supersede(self) -> None:
        self.validity = Validity.SUPERSEDED


================================================
FILE: maggy/maggy/engram/retrieval.py
================================================
"""Multi-path retrieval for Engram records."""

from __future__ import annotations

from .record import EngramRecord
from .store import EngramStore


class EngramRetrieval:
    """Multi-path retrieval: semantic, temporal, causal, entity."""

    def __init__(self, store: EngramStore):
        self._store = store

    def by_namespace(
        self, namespace: str, limit: int = 50,
    ) -> list[EngramRecord]:
        """Retrieve by namespace (project/session scope)."""
        return self._store.query(
            namespace=namespace, limit=limit,
        )

    def by_type(
        self, memory_type: str, limit: int = 50,
    ) -> list[EngramRecord]:
        """Retrieve by memory type (fact/decision/etc)."""
        return self._store.query(
            memory_type=memory_type, limit=limit,
        )

    def by_keyword(
        self, keyword: str, namespace: str | None = None,
        limit: int = 50,
    ) -> list[EngramRecord]:
        """Simple keyword search in content."""
        records = self._store.query(
            namespace=namespace, limit=1000,
        )
        matched = [
            r for r in records
            if keyword.lower() in r.content.lower()
        ]
        return matched[:limit]

    def by_tag(
        self, tag: str, namespace: str | None = None,
        limit: int = 50,
    ) -> list[EngramRecord]:
        """Retrieve by tag."""
        records = self._store.query(
            namespace=namespace, limit=1000,
        )
        matched = [
            r for r in records if tag in r.tags
        ]
        return matched[:limit]

    def recent(self, limit: int = 20) -> list[EngramRecord]:
        """Retrieve most recent records across all namespaces."""
        return self._store.query(
            active_only=True, limit=limit,
        )


================================================
FILE: maggy/maggy/engram/seed.py
================================================
"""Seed engrams on first boot for non-zero health."""

from __future__ import annotations

from .record import EngramRecord
from .store import EngramStore

_SEEDS = [
    ("seed-fact-1", "fact", "Maggy uses blast-score routing "
     "to pick the optimal model per task."),
    ("seed-fact-2", "fact", "Quality gates: max 20 lines/fn, "
     "3 params, 2 nesting, 200 lines/file."),
    ("seed-decision-1", "decision", "TDD workflow: RED "
     "(failing tests) -> GREEN (pass) -> VALIDATE."),
    ("seed-decision-2", "decision", "Local Qwen3-Coder "
     "handles blast 0-5; Claude handles 5-10."),
    ("seed-coderef-1", "code_ref",
     "Routing tiers: process/model_router.py DEFAULT_TIERS"),
    ("seed-coderef-2", "code_ref",
     "Chat REPL: cli_chat.py _repl_loop"),
    ("seed-handoff-1", "handoff", "System initialized. "
     "Memory will grow as tasks are completed."),
]

_REQUIRED_TYPES = {"fact", "decision", "code_ref", "handoff"}


def seed_if_empty(store: EngramStore) -> None:
    """Seed missing memory types for healthy diversity."""
    existing = {
        r.memory_type
        for r in store.query(active_only=True, limit=500)
    }
    missing = _REQUIRED_TYPES - existing
    if not missing:
        return
    for eid, mtype, content in _SEEDS:
        if mtype in missing:
            store.write(EngramRecord(
                engram_id=eid,
                namespace="system",
                memory_type=mtype,
                content=content,
                tags=["seed"],
            ))


================================================
FILE: maggy/maggy/engram/store.py
================================================
"""SQLite store for Engram records with namespace isolation."""

from __future__ import annotations

import json
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator

from .record import EngramRecord

SCHEMA = """
CREATE TABLE IF NOT EXISTS engrams (
    engram_id TEXT PRIMARY KEY,
    namespace TEXT NOT NULL,
    memory_type TEXT NOT NULL,
    content TEXT NOT NULL,
    origin TEXT NOT NULL DEFAULT 'explicit',
    validity TEXT NOT NULL DEFAULT 'active',
    confidence REAL NOT NULL DEFAULT 1.0,
    tags TEXT NOT NULL DEFAULT '[]',
    source_task TEXT NOT NULL DEFAULT '',
    created_at TEXT NOT NULL,
    expires_at TEXT NOT NULL DEFAULT ''
);
CREATE INDEX IF NOT EXISTS idx_engram_ns
    ON engrams(namespace);
CREATE INDEX IF NOT EXISTS idx_engram_type
    ON engrams(memory_type);
CREATE INDEX IF NOT EXISTS idx_engram_validity
    ON engrams(validity);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class EngramStore:
    """SQLite-backed engram storage."""

    def __init__(self, db_path: Path):
        self._db_path = db_path
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def write(self, record: EngramRecord) -> None:
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT OR REPLACE INTO engrams "
                "VALUES (?,?,?,?,?,?,?,?,?,?,?)",
                (
                    record.engram_id,
                    record.namespace,
                    record.memory_type,
                    record.content,
                    record.origin,
                    record.validity,
                    record.confidence,
                    json.dumps(record.tags),
                    record.source_task,
                    record.created_at,
                    record.expires_at,
                ),
            )
            conn.commit()

    def get(
        self, engram_id: str,
    ) -> EngramRecord | None:
        with _connect(self._db_path) as conn:
            row = conn.execute(
                "SELECT * FROM engrams "
                "WHERE engram_id=?",
                (engram_id,),
            ).fetchone()
        if not row:
            return None
        return self._row_to_record(row)

    def query(
        self,
        namespace: str | None = None,
        memory_type: str | None = None,
        active_only: bool = True,
        limit: int = 100,
    ) -> list[EngramRecord]:
        clauses: list[str] = []
        params: list = []
        if namespace:
            clauses.append("namespace = ?")
            params.append(namespace)
        if memory_type:
            clauses.append("memory_type = ?")
            params.append(memory_type)
        if active_only:
            clauses.append("validity = 'active'")

        where = (
            f"WHERE {' AND '.join(clauses)}"
            if clauses else ""
        )
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                f"SELECT * FROM engrams {where} "
                f"ORDER BY created_at DESC LIMIT ?",
                params + [limit],
            ).fetchall()
        return [self._row_to_record(r) for r in rows]

    def count(
        self, namespace: str | None = None,
    ) -> int:
        with _connect(self._db_path) as conn:
            if namespace:
                row = conn.execute(
                    "SELECT COUNT(*) FROM engrams "
                    "WHERE namespace = ?",
                    (namespace,),
                ).fetchone()
            else:
                row = conn.execute(
                    "SELECT COUNT(*) FROM engrams",
                ).fetchone()
        return int(row[0])

    def _row_to_record(
        self, r: sqlite3.Row,
    ) -> EngramRecord:
        return EngramRecord(
            engram_id=r["engram_id"],
            namespace=r["namespace"],
            memory_type=r["memory_type"],
            content=r["content"],
            origin=r["origin"],
            validity=r["validity"],
            confidence=r["confidence"],
            tags=json.loads(r["tags"]),
            source_task=r["source_task"],
            created_at=r["created_at"],
            expires_at=r["expires_at"],
        )


================================================
FILE: maggy/maggy/escalation/__init__.py
================================================


================================================
FILE: maggy/maggy/escalation/protocol.py
================================================
"""Human escalation packets with SQLite persistence."""

from __future__ import annotations

import json
import sqlite3
import uuid
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator

SCHEMA = """
CREATE TABLE IF NOT EXISTS escalations (
    id TEXT PRIMARY KEY,
    session_id TEXT NOT NULL,
    reason TEXT NOT NULL,
    context TEXT NOT NULL,
    agent_state TEXT NOT NULL,
    suggested_actions TEXT NOT NULL,
    created_at TEXT NOT NULL,
    resolved INTEGER NOT NULL,
    resolution TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_escalations_pending
    ON escalations(resolved, created_at);
"""


@dataclass
class EscalationPacket:
    id: str
    session_id: str
    reason: str
    context: dict[str, object]
    agent_state: dict[str, object]
    suggested_actions: list[str]
    created_at: str
    resolved: bool
    resolution: str


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class Escalator:
    def __init__(self, db_path: Path):
        self._db_path = db_path
        self._init_db()

    def escalate(
        self, session_id: str, reason: str, context: dict[str, object]
    ) -> EscalationPacket:
        packet = _build_packet(session_id, reason, context)
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO escalations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                _serialize(packet),
            )
            conn.commit()
        return packet

    def resolve(self, escalation_id: str, guidance: str) -> EscalationPacket:
        with _connect(self._db_path) as conn:
            conn.execute(
                "UPDATE escalations SET resolved = 1, resolution = ? WHERE id = ?",
                (guidance, escalation_id),
            )
            conn.commit()
            row = conn.execute(
                "SELECT * FROM escalations WHERE id = ?",
                (escalation_id,),
            ).fetchone()
        if not row:
            raise KeyError(escalation_id)
        return _from_row(row)

    def list_pending(self) -> list[EscalationPacket]:
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT * FROM escalations WHERE resolved = 0 ORDER BY created_at",
            ).fetchall()
        return [_from_row(row) for row in rows]

    def get(self, escalation_id: str) -> EscalationPacket | None:
        with _connect(self._db_path) as conn:
            row = conn.execute(
                "SELECT * FROM escalations WHERE id = ?",
                (escalation_id,),
            ).fetchone()
        return _from_row(row) if row else None

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)


def _build_packet(
    session_id: str, reason: str, context: dict[str, object]
) -> EscalationPacket:
    return EscalationPacket(
        id=str(uuid.uuid4()),
        session_id=session_id,
        reason=reason,
        context=context,
        agent_state=_dict_field(context, "agent_state"),
        suggested_actions=_list_field(context, "suggested_actions"),
        created_at=datetime.now(timezone.utc).isoformat(),
        resolved=False,
        resolution="",
    )


def _dict_field(context: dict[str, object], key: str) -> dict[str, object]:
    value = context.get(key, {})
    return value if isinstance(value, dict) else {}


def _list_field(context: dict[str, object], key: str) -> list[str]:
    value = context.get(key, [])
    return [item for item in value if isinstance(item, str)] if isinstance(value, list) else []


def _serialize(packet: EscalationPacket) -> tuple[object, ...]:
    return (
        packet.id,
        packet.session_id,
        packet.reason,
        json.dumps(packet.context),
        json.dumps(packet.agent_state),
        json.dumps(packet.suggested_actions),
        packet.created_at,
        int(packet.resolved),
        packet.resolution,
    )


def _safe_json(raw: str, fallback: object) -> object:
    try:
        return json.loads(raw)
    except (json.JSONDecodeError, TypeError):
        return fallback


def _from_row(row: sqlite3.Row) -> EscalationPacket:
    return EscalationPacket(
        id=row["id"],
        session_id=row["session_id"],
        reason=row["reason"],
        context=_safe_json(row["context"], {}),
        agent_state=_safe_json(row["agent_state"], {}),
        suggested_actions=_safe_json(row["suggested_actions"], []),
        created_at=row["created_at"],
        resolved=bool(row["resolved"]),
        resolution=row["resolution"],
    )


================================================
FILE: maggy/maggy/event_spine/__init__.py
================================================
"""Event Spine — canonical event flow for end-to-end tracing."""

from .emitter import EventEmitter
from .header import EventHeader

__all__ = ["EventEmitter", "EventHeader"]


================================================
FILE: maggy/maggy/event_spine/emitter.py
================================================
"""Event emitter — write, query, and trace events."""

from __future__ import annotations

import logging
from dataclasses import asdict

from .header import EventHeader
from .store import EventStore

logger = logging.getLogger(__name__)


class EventEmitter:
    """Thread-safe event emission and query API."""

    def __init__(self, store: EventStore):
        self._store = store

    def emit(self, event: object) -> str:
        """Write event to store. Returns event_id."""
        header = getattr(event, "header", None)
        if not isinstance(header, EventHeader):
            raise ValueError("Event must have an EventHeader")

        data = asdict(event)
        self._store.write(header, data)
        logger.debug(
            "Event %s emitted: %s",
            header.event_type, header.event_id,
        )
        return header.event_id

    def query(
        self,
        task_id: str | None = None,
        event_type: str | None = None,
        project_id: str | None = None,
        limit: int = 100,
    ) -> list[dict]:
        """Query events with optional filters."""
        return self._store.query(
            task_id=task_id,
            event_type=event_type,
            project_id=project_id,
            limit=limit,
        )

    def trace(self, task_id: str) -> list[dict]:
        """Return full ordered event chain for a task."""
        return self._store.query(
            task_id=task_id, limit=10000,
        )

    def count(
        self,
        event_type: str | None = None,
        project_id: str | None = None,
    ) -> int:
        """Count events matching filters."""
        return self._store.count(
            event_type=event_type,
            project_id=project_id,
        )


================================================
FILE: maggy/maggy/event_spine/events.py
================================================
"""Eight typed event dataclasses for the Event Spine."""

from __future__ import annotations

from dataclasses import dataclass, field

from .header import EventHeader


@dataclass
class IntentEvent:
    """iCPG ReasonNode decomposition."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("intent")
    )
    intent_text: str = ""
    reason_node_id: str = ""
    decomposed_steps: list[str] = field(default_factory=list)


@dataclass
class BindingEvent:
    """Lexon tool selection + clarify mode."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("binding")
    )
    phrase: str = ""
    selected_tool: str = ""
    candidates: list[str] = field(default_factory=list)
    clarify_mode: str = ""  # self_clarify | user_clarify


@dataclass
class ExecutionEvent:
    """Tool invocation input/output/duration."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("execution")
    )
    tool_name: str = ""
    input_summary: str = ""
    output_summary: str = ""
    duration_ms: int = 0
    success: bool = True


@dataclass
class MemoryEvent:
    """Mnemos within-task memory write."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("memory")
    )
    memory_type: str = ""  # fact | decision | code_ref | handoff
    content: str = ""
    node_id: str = ""


@dataclass
class PersistenceEvent:
    """Engram cross-session promotion."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("persistence")
    )
    engram_id: str = ""
    memory_type: str = ""
    content: str = ""
    source_namespace: str = ""
    target_namespace: str = ""


@dataclass
class OutcomeEvent:
    """Process Intelligence success/failure + reward."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("outcome")
    )
    success: bool = True
    reward: float = 0.0
    metrics: dict = field(default_factory=dict)


@dataclass
class MutationEvent:
    """L2/L3/L4 self-modification."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("mutation")
    )
    control_level: str = ""  # L2 | L3 | L4
    target: str = ""
    old_value: str = ""
    new_value: str = ""
    reason: str = ""


@dataclass
class MeshEvent:
    """Cross-machine sharing + quarantine status."""

    header: EventHeader = field(
        default_factory=lambda: EventHeader("mesh")
    )
    peer_id: str = ""
    peer_name: str = ""
    action: str = ""  # share | receive | quarantine | promote
    memory_type: str = ""
    content_key: str = ""


EVENT_TYPES = {
    "intent": IntentEvent,
    "binding": BindingEvent,
    "execution": ExecutionEvent,
    "memory": MemoryEvent,
    "persistence": PersistenceEvent,
    "outcome": OutcomeEvent,
    "mutation": MutationEvent,
    "mesh": MeshEvent,
}


================================================
FILE: maggy/maggy/event_spine/header.py
================================================
"""Common EventHeader shared by all typed events."""

from __future__ import annotations

import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone


def _uuid() -> str:
    return str(uuid.uuid4())


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


@dataclass
class EventHeader:
    """Standard fields for every event in the spine."""

    event_type: str
    event_id: str = field(default_factory=_uuid)
    task_id: str = ""
    project_id: str = ""
    agent_id: str = ""
    model_id: str = ""
    parent_event_id: str = ""
    confidence: float = 1.0
    namespace: str = ""
    policy_version: str = ""
    reward_delta: float = 0.0
    timestamp: str = field(default_factory=_now)
    schema_version: int = 1


================================================
FILE: maggy/maggy/event_spine/store.py
================================================
"""SQLite event store — append-only with archive support."""

from __future__ import annotations

import gzip
import json
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator

from .header import EventHeader

SCHEMA = """
CREATE TABLE IF NOT EXISTS events (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    event_id TEXT UNIQUE NOT NULL,
    event_type TEXT NOT NULL,
    task_id TEXT NOT NULL DEFAULT '',
    project_id TEXT NOT NULL DEFAULT '',
    agent_id TEXT NOT NULL DEFAULT '',
    model_id TEXT NOT NULL DEFAULT '',
    parent_event_id TEXT NOT NULL DEFAULT '',
    timestamp TEXT NOT NULL,
    payload TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_events_task
    ON events(task_id);
CREATE INDEX IF NOT EXISTS idx_events_type
    ON events(event_type);
CREATE INDEX IF NOT EXISTS idx_events_project
    ON events(project_id);
CREATE INDEX IF NOT EXISTS idx_events_ts
    ON events(timestamp);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class EventStore:
    """Append-only SQLite event store."""

    def __init__(self, db_path: Path):
        self._db_path = db_path
        self._init_db()

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def write(
        self, header: EventHeader, payload: dict,
    ) -> None:
        """Append an event."""
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT OR IGNORE INTO events "
                "(event_id, event_type, task_id, "
                "project_id, agent_id, model_id, "
                "parent_event_id, timestamp, payload) "
                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    header.event_id, header.event_type,
                    header.task_id, header.project_id,
                    header.agent_id, header.model_id,
                    header.parent_event_id,
                    header.timestamp,
                    json.dumps(payload),
                ),
            )
            conn.commit()

    def query(
        self,
        task_id: str | None = None,
        event_type: str | None = None,
        project_id: str | None = None,
        limit: int = 100,
    ) -> list[dict]:
        """Query events with filters."""
        clauses: list[str] = []
        params: list = []
        if task_id:
            clauses.append("task_id = ?")
            params.append(task_id)
        if event_type:
            clauses.append("event_type = ?")
            params.append(event_type)
        if project_id:
            clauses.append("project_id = ?")
            params.append(project_id)

        where = (
            f"WHERE {' AND '.join(clauses)}"
            if clauses else ""
        )
        sql = (
            f"SELECT payload FROM events {where} "
            f"ORDER BY timestamp ASC LIMIT ?"
        )
        params.append(limit)

        with _connect(self._db_path) as conn:
            rows = conn.execute(sql, params).fetchall()
        return [json.loads(r["payload"]) for r in rows]

    def count(
        self,
        event_type: str | None = None,
        project_id: str | None = None,
    ) -> int:
        """Count events matching filters."""
        clauses: list[str] = []
        params: list[str] = []
        if event_type:
            clauses.append("event_type = ?")
            params.append(event_type)
        if project_id:
            clauses.append("project_id = ?")
            params.append(project_id)

        where = (
            f"WHERE {' AND '.join(clauses)}"
            if clauses else ""
        )
        with _connect(self._db_path) as conn:
            row = conn.execute(
                f"SELECT COUNT(*) FROM events {where}",
                params,
            ).fetchone()
        return int(row[0])

    def archive_old(
        self,
        days: int = 90,
        archive_dir: Path | None = None,
    ) -> int:
        """Archive events older than N days."""
        from datetime import datetime, timedelta, timezone
        cutoff = (
            datetime.now(timezone.utc)
            - timedelta(days=days)
        ).isoformat()

        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT payload FROM events "
                "WHERE timestamp < ?",
                (cutoff,),
            ).fetchall()

            if not rows:
                return 0

            out_dir = archive_dir or (
                self._db_path.parent / "events_archive"
            )
            out_dir.mkdir(parents=True, exist_ok=True)
            archive_file = (
                out_dir / f"events_{cutoff[:10]}.jsonl.gz"
            )

            with gzip.open(archive_file, "wt") as f:
                for r in rows:
                    f.write(r["payload"] + "\n")

            conn.execute(
                "DELETE FROM events WHERE timestamp < ?",
                (cutoff,),
            )
            conn.commit()

        return len(rows)


================================================
FILE: maggy/maggy/fatigue.py
================================================
"""Model-normalized fatigue tracking for cross-model sessions.

Normalizes fatigue scores across models with different context windows
so that 0.6 means "approaching limit" regardless of model.
"""

from __future__ import annotations

from dataclasses import dataclass


@dataclass
class FatigueProfile:
    """Fatigue state for a model during a session."""

    model: str
    context_window: int
    tokens_used: int = 0
    turns: int = 0
    recovery_reads: int = 0

    @property
    def raw_utilization(self) -> float:
        """Raw context utilization 0.0-1.0."""
        if self.context_window <= 0:
            return 0.0
        return min(self.tokens_used / self.context_window, 1.0)

    @property
    def fatigue_score(self) -> float:
        """Normalized fatigue score 0.0-1.0.

        Combines context utilization with turn-based fatigue.
        Higher = more fatigued.
        """
        ctx_factor = self.raw_utilization
        turn_factor = min(self.turns / 50.0, 1.0)
        return min(ctx_factor * 0.7 + turn_factor * 0.3, 1.0)

    def should_checkpoint(self, threshold: float = 0.6) -> bool:
        """Whether the model should checkpoint soon."""
        return self.fatigue_score >= threshold


MODEL_CONTEXT_WINDOWS: dict[str, int] = {
    "claude": 200_000,
    "gpt": 128_000,
    "kimi": 128_000,
    "deepseek": 128_000,
    "codex": 200_000,
    "local": 32_000,
}


def create_profile(model: str) -> FatigueProfile:
    """Create a fatigue profile for a known model."""
    window = MODEL_CONTEXT_WINDOWS.get(model, 128_000)
    return FatigueProfile(model=model, context_window=window)


def compare_fatigue(
    profiles: list[FatigueProfile],
) -> list[dict]:
    """Compare fatigue across active models."""
    return [
        {
            "model": p.model,
            "fatigue": round(p.fatigue_score, 3),
            "utilization": round(p.raw_utilization, 3),
            "turns": p.turns,
            "should_checkpoint": p.should_checkpoint(),
        }
        for p in sorted(
            profiles, key=lambda p: p.fatigue_score, reverse=True,
        )
    ]


================================================
FILE: maggy/maggy/forge/__init__.py
================================================
"""MCP Forge integration — bridge to mcp-forge pipeline."""


================================================
FILE: maggy/maggy/forge/connector.py
================================================
"""Bridge to mcp-forge — wraps registry, pipeline, codegen.

Connects Maggy to the MCP Forge at ~/Documents/protaige/mcp-forge/
without requiring it on PYTHONPATH. Uses subprocess for pipeline
invocation and file-based data exchange.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass
from pathlib import Path

from .detector import GapDetector
from .registry import ForgeRegistry

logger = logging.getLogger(__name__)

DEFAULT_FORGE_PATH = Path.home() / "Documents" / "protaige" / "mcp-forge"


@dataclass
class ForgeStatus:
    """Current state of the Forge connector."""

    available: bool
    forge_path: str
    registry_count: int
    pending_gaps: int


class ForgeConnector:
    """Bridge between Maggy and MCP Forge."""

    def __init__(
        self, forge_path: Path | None = None,
    ):
        self._path = forge_path or DEFAULT_FORGE_PATH
        self._available = self._path.exists()
        self.registry = ForgeRegistry(
            self._path if self._available else None,
        )
        self.detector = GapDetector()

    @property
    def available(self) -> bool:
        return self._available

    def status(self) -> ForgeStatus:
        """Return current connector status."""
        return ForgeStatus(
            available=self._available,
            forge_path=str(self._path),
            registry_count=self.registry.count,
            pending_gaps=len(self.detector.list_gaps()),
        )

    def search_tools(self, query: str) -> list[dict]:
        """Search the Forge registry."""
        results = self.registry.search(query)
        return [
            {
                "slug": t.slug,
                "mcp_url": t.mcp_url,
                "has_mcp": t.has_mcp,
                "auth_method": t.auth_method,
            }
            for t in results
        ]

    def report_gap(self, capability: str) -> dict:
        """Report a capability gap. Returns trigger status."""
        triggered = self.detector.record_gap(capability)
        return {
            "capability": capability,
            "triggered": triggered,
            "message": (
                f"Forge triggered for '{capability}'"
                if triggered
                else f"Gap recorded ({capability})"
            ),
        }

    def get_gaps(self) -> list[dict]:
        """Return all detected gaps."""
        return [
            {
                "capability": g.capability,
                "occurrences": g.occurrences,
                "triggered": g.triggered,
            }
            for g in self.detector.top_gaps(10)
        ]


================================================
FILE: maggy/maggy/forge/detector.py
================================================
"""Capability gap detection — monitors unresolvable requests.

Tracks patterns of failed tool lookups and triggers Forge
after repeated occurrences of the same gap.
"""

from __future__ import annotations

from collections import Counter
from dataclasses import dataclass

TRIGGER_THRESHOLD = 3


@dataclass
class GapRecord:
    """A detected capability gap."""

    capability: str
    occurrences: int = 0
    triggered: bool = False


class GapDetector:
    """Monitors capability gaps across requests."""

    def __init__(self, threshold: int = TRIGGER_THRESHOLD):
        self._gaps: Counter = Counter()
        self._threshold = threshold
        self._triggered: set[str] = set()

    def record_gap(self, capability: str) -> bool:
        """Record a gap. Returns True if threshold reached."""
        key = capability.lower().strip()
        self._gaps[key] += 1
        if (
            self._gaps[key] >= self._threshold
            and key not in self._triggered
        ):
            self._triggered.add(key)
            return True
        return False

    def list_gaps(self) -> list[GapRecord]:
        """Return all recorded gaps."""
        return [
            GapRecord(
                capability=cap,
                occurrences=count,
                triggered=cap in self._triggered,
            )
            for cap, count in self._gaps.most_common()
        ]

    def top_gaps(self, n: int = 5) -> list[GapRecord]:
        """Return top N gaps by occurrence count."""
        return self.list_gaps()[:n]

    def reset(self, capability: str) -> None:
        """Reset a gap counter after resolution."""
        key = capability.lower().strip()
        if key in self._gaps:
            del self._gaps[key]
        self._triggered.discard(key)


================================================
FILE: maggy/maggy/forge/registry.py
================================================
"""Tool registry — wraps mcp-forge's KNOWN_SERVERS.

Provides enable/disable per project and search capabilities
without requiring mcp-forge on PYTHONPATH.
"""

from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ToolInfo:
    """A registered MCP tool."""

    slug: str
    mcp_url: str = ""
    has_mcp: str = "Community"
    auth_method: str = "API Key"
    enabled: bool = True


class ForgeRegistry:
    """Project-aware tool registry."""

    def __init__(self, forge_path: Path | None = None):
        self._tools: dict[str, ToolInfo] = {}
        self._forge_path = forge_path
        self._load_registry()

    def _load_registry(self) -> None:
        """Load from mcp-forge if available."""
        if not self._forge_path:
            return
        reg_file = self._forge_path / "src" / "mcp_registry.py"
        if not reg_file.exists():
            return
        # Parse KNOWN_SERVERS from the registry
        self._tools = _parse_registry(reg_file)

    def search(self, query: str) -> list[ToolInfo]:
        """Search tools by slug or keyword."""
        q = query.lower()
        return [
            t for t in self._tools.values()
            if q in t.slug or q in t.mcp_url.lower()
        ]

    def get(self, slug: str) -> ToolInfo | None:
        return self._tools.get(slug)

    def list_all(self) -> list[ToolInfo]:
        return list(self._tools.values())

    def set_enabled(self, slug: str, enabled: bool) -> bool:
        tool = self._tools.get(slug)
        if not tool:
            return False
        tool.enabled = enabled
        return True

    @property
    def count(self) -> int:
        return len(self._tools)


def _parse_registry(path: Path) -> dict[str, ToolInfo]:
    """Extract KNOWN_SERVERS entries from registry file."""
    tools: dict[str, ToolInfo] = {}
    content = path.read_text()
    # Find dict literals in KNOWN_SERVERS list
    import re
    pattern = r'\{[^}]+\}'
    for match in re.finditer(pattern, content):
        try:
            # Clean Python dict to JSON-compatible
            raw = match.group()
            raw = raw.replace("'", '"')
            data = json.loads(raw)
            slug = data.get("slug", "")
            if slug:
                tools[slug] = ToolInfo(
                    slug=slug,
                    mcp_url=data.get("mcp_url", ""),
                    has_mcp=data.get("has_mcp", "Community"),
                    auth_method=data.get("auth_method", ""),
                )
        except (json.JSONDecodeError, KeyError):
            continue
    return tools


================================================
FILE: maggy/maggy/heartbeat/__init__.py
================================================
"""Heartbeat — background scheduler for periodic jobs."""


================================================
FILE: maggy/maggy/heartbeat/jobs.py
================================================
"""Built-in heartbeat jobs — wire to existing services."""

from __future__ import annotations

import logging
from datetime import datetime, timezone

from maggy.engram.record import Validity

logger = logging.getLogger(__name__)


async def refresh_history(app) -> None:
    """Re-parse CLI session data."""
    history = getattr(app.state, "history", None)
    if not history:
        return
    try:
        history.analyze()
    except Exception as exc:
        logger.warning("refresh_history failed: %s", exc)
        raise


async def expire_engrams(app) -> None:
    """Mark expired engrams."""
    engram = getattr(app.state, "engram", None)
    if not engram:
        return
    try:
        records = engram.query(active_only=True, limit=500)
        now = datetime.now(timezone.utc)
        for rec in records:
            if _is_expired(rec, now):
                rec.validity = Validity.expired
                engram.write(rec)
    except Exception as exc:
        logger.warning("expire_engrams failed: %s", exc)
        raise


def _is_expired(rec, now) -> bool:
    """Check if an engram's TTL has elapsed."""
    tags = getattr(rec, "tags", []) or []
    ttl_tag = next((t for t in tags if t.startswith("ttl:")), None)
    if not ttl_tag:
        return False
    try:
        ttl = int(ttl_tag.split(":")[1])
    except (IndexError, ValueError):
        return False
    created = rec.created_at
    if not created:
        return False
    created_dt = datetime.fromisoformat(created)
    return (now - created_dt).total_seconds() > ttl * 3600


async def self_improve(app) -> None:
    """Run self-improvement analysis."""
    introspector = getattr(app.state, "introspector", None)
    if not introspector:
        return
    try:
        introspector.analyze()
    except Exception as exc:
        logger.warning("self_improve failed: %s", exc)
        raise


async def mesh_heartbeat(app) -> None:
    """Discover peers, announce self, publish shares."""
    mesh = getattr(app.state, "mesh", None)
    if not mesh:
        return
    cfg = getattr(app.state, "cfg", None)
    if not cfg:
        return
    try:
        token = cfg.issue_tracker.github.token
        if token and cfg.mesh.git_discovery:
            await mesh.discover(token)
            await mesh.announce_all(token)
    except Exception as exc:
        logger.warning("mesh_heartbeat failed: %s", exc)
        raise


async def collect_signals(app) -> None:
    """Record periodic observability signals."""
    obs = getattr(app.state, "observability", None)
    cfg = getattr(app.state, "cfg", None)
    if not obs or not cfg:
        return
    try:
        for cb in cfg.codebases:
            obs.record_signal(cb.key, "heartbeat", 1.0)
    except Exception as exc:
        logger.warning("collect_signals failed: %s", exc)
        raise


================================================
FILE: maggy/maggy/heartbeat/scheduler.py
================================================
"""Core heartbeat scheduler — register and run periodic jobs."""

from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Awaitable, Callable

logger = logging.getLogger(__name__)

TICK_INTERVAL = 1.0  # seconds between scheduler ticks


@dataclass
class Job:
    name: str
    fn: Callable[..., Awaitable[None]]
    interval_seconds: int
    last_run: str = ""
    run_count: int = 0
    last_error: str = ""
    enabled: bool = True

    def is_due(self) -> bool:
        if not self.last_run:
            return True
        last = datetime.fromisoformat(self.last_run)
        elapsed = (datetime.now(timezone.utc) - last).total_seconds()
        return elapsed >= self.interval_seconds


class HeartbeatScheduler:
    def __init__(self) -> None:
        self._jobs: dict[str, Job] = {}
        self._task: asyncio.Task | None = None

    def register(
        self, name: str, fn: Callable, interval: int,
    ) -> None:
        if name in self._jobs:
            raise ValueError(f"Job '{name}' already registered")
        self._jobs[name] = Job(
            name=name, fn=fn, interval_seconds=interval,
        )

    async def tick(self) -> None:
        for job in self._jobs.values():
            if not job.enabled or not job.is_due():
                continue
            await self._run_job(job)

    async def _run_job(self, job: Job) -> None:
        try:
            await job.fn()
            job.last_error = ""
        except Exception as exc:
            job.last_error = str(exc)
            logger.warning("Job %s failed: %s", job.name, exc)
        job.last_run = datetime.now(timezone.utc).isoformat()
        job.run_count += 1

    async def trigger(self, name: str) -> dict:
        if name not in self._jobs:
            raise KeyError(name)
        job = self._jobs[name]
        await self._run_job(job)
        return {"ok": not job.last_error, "name": name}

    async def start(self) -> None:
        self._task = asyncio.create_task(self._loop())
        logger.info("Heartbeat started — %d jobs", len(self._jobs))

    async def stop(self) -> None:
        if self._task:
            self._task.cancel()
            try:
                await self._task
            except asyncio.CancelledError:
                pass
            self._task = None
        logger.info("Heartbeat stopped")

    async def _loop(self) -> None:
        while True:
            await self.tick()
            await asyncio.sleep(TICK_INTERVAL)

    def status(self) -> list[dict]:
        return [
            {
                "name": j.name,
                "interval": j.interval_seconds,
                "last_run": j.last_run,
                "run_count": j.run_count,
                "last_error": j.last_error,
                "enabled": j.enabled,
            }
            for j in self._jobs.values()
        ]


================================================
FILE: maggy/maggy/history/__init__.py
================================================
"""Session history analyzer — reads Claude/Codex/Kimi local state."""


================================================
FILE: maggy/maggy/history/analyzer.py
================================================
"""Aggregation and pattern detection for session history."""

from __future__ import annotations

from collections import Counter, defaultdict
from datetime import datetime

from .models import (
    HistoryReport,
    ProjectActivity,
    ProviderUsage,
    SessionEntry,
    TimeDistribution,
    _now_iso,
)


def build_report(
    sessions: list[SessionEntry],
) -> HistoryReport:
    """Build complete history report from sessions."""
    if not sessions:
        return HistoryReport(
            generated_at=_now_iso(),
            total_sessions=0,
            total_prompts=0,
        )
    return HistoryReport(
        generated_at=_now_iso(),
        total_sessions=len(sessions),
        total_prompts=sum(s.prompt_count for s in sessions),
        providers=aggregate_by_provider(sessions),
        projects=aggregate_by_project(sessions),
        time_distribution=compute_time_distribution(sessions),
        top_topics=extract_top_topics(sessions),
        patterns=detect_patterns(sessions),
    )


def aggregate_by_provider(
    sessions: list[SessionEntry],
) -> list[ProviderUsage]:
    """Group sessions by provider."""
    by_prov: dict[str, list[SessionEntry]] = defaultdict(list)
    for s in sessions:
        by_prov[s.provider].append(s)

    result: list[ProviderUsage] = []
    for prov, items in sorted(by_prov.items()):
        minutes = sum(
            s.duration_minutes or 0 for s in items
        )
        models: set[str] = set()
        for s in items:
            models.update(s.models_used)
        result.append(ProviderUsage(
            provider=prov,
            session_count=len(items),
            prompt_count=sum(s.prompt_count for s in items),
            total_minutes=minutes,
            models_used=sorted(models),
        ))
    return result


def aggregate_by_project(
    sessions: list[SessionEntry],
) -> list[ProjectActivity]:
    """Group sessions by project."""
    by_proj: dict[str, list[SessionEntry]] = defaultdict(list)
    for s in sessions:
        by_proj[s.project].append(s)

    result: list[ProjectActivity] = []
    for proj, items in sorted(by_proj.items()):
        providers = sorted({s.provider for s in items})
        dates = [s.started_at for s in items if s.started_at]
        date_range = (min(dates), max(dates)) if dates else ("", "")
        topics = _merge_topics(items)
        result.append(ProjectActivity(
            project=proj,
            total_sessions=len(items),
            total_prompts=sum(s.prompt_count for s in items),
            providers_used=providers,
            date_range=date_range,
            top_topics=topics[:5],
        ))
    return result


def compute_time_distribution(
    sessions: list[SessionEntry],
) -> TimeDistribution:
    """Bucket sessions by hour, weekday, date."""
    by_hour: Counter[int] = Counter()
    by_weekday: Counter[int] = Counter()
    by_date: Counter[str] = Counter()

    for s in sessions:
        if not s.started_at:
            continue
        try:
            dt = datetime.fromisoformat(s.started_at)
        except ValueError:
            continue
        by_hour[dt.hour] += 1
        by_weekday[dt.weekday()] += 1
        by_date[dt.strftime("%Y-%m-%d")] += s.prompt_count

    return TimeDistribution(
        by_hour=dict(by_hour),
        by_weekday=dict(by_weekday),
        by_date=dict(by_date),
    )


def extract_top_topics(
    sessions: list[SessionEntry],
) -> list[str]:
    """Frequency-rank topics across all sessions."""
    counts: Counter[str] = Counter()
    for s in sessions:
        for t in s.topics:
            counts[t] += 1
    return [t for t, _ in counts.most_common(10)]


def detect_patterns(
    sessions: list[SessionEntry],
) -> list[str]:
    """Generate human-readable pattern observations."""
    if not sessions:
        return []
    patterns: list[str] = []
    _detect_provider_dominance(sessions, patterns)
    _detect_session_stats(sessions, patterns)
    _detect_project_focus(sessions, patterns)
    return patterns


def _detect_provider_dominance(
    sessions: list[SessionEntry],
    patterns: list[str],
) -> None:
    """Check if one provider dominates usage."""
    counts = Counter(s.provider for s in sessions)
    total = len(sessions)
    for prov, count in counts.most_common(1):
        pct = count * 100 // total
        if pct >= 70:
            patterns.append(
                f"{pct}% of sessions use {prov}"
            )


def _detect_session_stats(
    sessions: list[SessionEntry],
    patterns: list[str],
) -> None:
    """Compute average session statistics."""
    avg_prompts = (
        sum(s.prompt_count for s in sessions)
        // len(sessions)
    )
    durations = [
        s.duration_minutes for s in sessions
        if s.duration_minutes is not None
    ]
    if durations:
        avg_min = sum(durations) / len(durations)
        patterns.append(
            f"Average session: {avg_prompts} prompts, "
            f"{avg_min:.0f} minutes"
        )
    else:
        patterns.append(
            f"Average session: {avg_prompts} prompts"
        )


def _detect_project_focus(
    sessions: list[SessionEntry],
    patterns: list[str],
) -> None:
    """Detect high-activity projects."""
    by_proj = Counter(s.project for s in sessions)
    for proj, count in by_proj.most_common(1):
        if count >= 5:
            patterns.append(
                f"Project '{proj}' had {count} sessions"
                f" — high focus"
            )


def _merge_topics(
    sessions: list[SessionEntry],
) -> list[str]:
    """Merge topics across sessions by frequency."""
    counts: Counter[str] = Counter()
    for s in sessions:
        for t in s.topics:
            counts[t] += 1
    return [t for t, _ in counts.most_common(10)]


================================================
FILE: maggy/maggy/history/models.py
================================================
"""Data models for session history analysis."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone


@dataclass
class SessionEntry:
    """A single parsed session from any CLI."""

    session_id: str
    provider: str  # "claude" | "codex" | "kimi"
    project: str
    started_at: str
    ended_at: str
    prompt_count: int
    tool_use_count: int
    models_used: list[str] = field(default_factory=list)
    git_branch: str = ""
    topics: list[str] = field(default_factory=list)
    summary: str = ""

    @property
    def duration_minutes(self) -> float | None:
        """Session duration in minutes."""
        if not self.started_at or not self.ended_at:
            return None
        try:
            start = datetime.fromisoformat(self.started_at)
            end = datetime.fromisoformat(self.ended_at)
            return (end - start).total_seconds() / 60
        except (ValueError, TypeError):
            return None


@dataclass
class ProjectActivity:
    """Aggregated activity for a project across CLIs."""

    project: str
    total_sessions: int
    total_prompts: int
    providers_used: list[str] = field(default_factory=list)
    date_range: tuple[str, str] = ("", "")
    top_topics: list[str] = field(default_factory=list)


@dataclass
class ProviderUsage:
    """Usage statistics per provider."""

    provider: str
    session_count: int
    prompt_count: int
    total_minutes: float
    models_used: list[str] = field(default_factory=list)


@dataclass
class TimeDistribution:
    """Work distribution across time periods."""

    by_hour: dict[int, int] = field(default_factory=dict)
    by_weekday: dict[int, int] = field(default_factory=dict)
    by_date: dict[str, int] = field(default_factory=dict)


@dataclass
class HistoryReport:
    """Complete analysis report."""

    generated_at: str
    total_sessions: int
    total_prompts: int
    providers: list[ProviderUsage] = field(
        default_factory=list
    )
    projects: list[ProjectActivity] = field(
        default_factory=list
    )
    time_distribution: TimeDistribution | None = None
    top_topics: list[str] = field(default_factory=list)
    patterns: list[str] = field(default_factory=list)
    summary: str = ""


def _now_iso() -> str:
    """Current UTC timestamp as ISO string."""
    return datetime.now(timezone.utc).isoformat()


================================================
FILE: maggy/maggy/history/parsers/__init__.py
================================================
"""History parsers for Claude Code, Codex CLI, and Kimi CLI."""

from .claude import ClaudeHistoryParser
from .codex import CodexHistoryParser
from .kimi import KimiHistoryParser

__all__ = [
    "ClaudeHistoryParser",
    "CodexHistoryParser",
    "KimiHistoryParser",
]


================================================
FILE: maggy/maggy/history/parsers/base.py
================================================
"""Abstract base for CLI history parsers."""

from __future__ import annotations

from abc import ABC, abstractmethod

from maggy.history.models import SessionEntry


class HistoryParser(ABC):
    """Base protocol for CLI history parsers."""

    provider: str

    @abstractmethod
    def is_available(self) -> bool:
        """Check if this CLI's data directory exists."""
        ...

    @abstractmethod
    def parse_sessions(
        self, limit: int = 500,
    ) -> list[SessionEntry]:
        """Parse session history into SessionEntry list."""
        ...

    @abstractmethod
    def session_count(self) -> int:
        """Return total number of sessions available."""
        ...


================================================
FILE: maggy/maggy/history/parsers/claude.py
================================================
"""Claude Code history parser — reads ~/.claude/ local state."""

from __future__ import annotations

import json
import logging
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path

from maggy.history.models import SessionEntry

from .base import HistoryParser

logger = logging.getLogger(__name__)


def _millis_to_iso(ms: int | float) -> str:
    """Convert Unix milliseconds to ISO-8601."""
    dt = datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
    return dt.isoformat()


def _read_jsonl(path: Path) -> list[dict]:
    """Read JSONL file, skip bad lines."""
    if not path.exists():
        return []
    results: list[dict] = []
    try:
        for line in path.read_text().splitlines():
            line = line.strip()
            if not line:
                continue
            try:
                results.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    except OSError:
        return []
    return results


def _extract_topics(prompts: list[str]) -> list[str]:
    """Extract keyword topics from prompt texts."""
    from collections import Counter
    words: list[str] = []
    for text in prompts:
        for w in text.lower().split():
            if len(w) > 3 and w.isalpha():
                words.append(w)
    counts = Counter(words)
    return [w for w, _ in counts.most_common(5)]


class ClaudeHistoryParser(HistoryParser):
    """Parse Claude Code session history."""

    provider = "claude"

    def __init__(self, claude_dir: Path | None = None):
        self._dir = claude_dir or (
            Path.home() / ".claude"
        )

    def is_available(self) -> bool:
        history = self._dir / "history.jsonl"
        return history.exists()

    def session_count(self) -> int:
        entries = _read_jsonl(self._dir / "history.jsonl")
        ids = {e.get("sessionId") for e in entries}
        ids.discard(None)
        return len(ids)

    def parse_sessions(
        self, limit: int = 500,
    ) -> list[SessionEntry]:
        entries = _read_jsonl(self._dir / "history.jsonl")
        if not entries:
            return []

        grouped = self._group_by_session(entries)
        sessions: list[SessionEntry] = []
        for sid, items in list(grouped.items())[:limit]:
            session = self._build_entry(sid, items)
            sessions.append(session)
        return sessions

    def _group_by_session(
        self, entries: list[dict],
    ) -> dict[str, list[dict]]:
        grouped: dict[str, list[dict]] = defaultdict(list)
        for e in entries:
            sid = e.get("sessionId")
            if sid:
                grouped[sid].append(e)
        return dict(grouped)

    def _build_entry(
        self, sid: str, items: list[dict],
    ) -> SessionEntry:
        timestamps = [
            i["timestamp"] for i in items
            if "timestamp" in i
        ]
        project = items[0].get("project", "")
        prompts = [
            i.get("display", "") for i in items
            if i.get("display")
        ]
        summary = prompts[0] if prompts else ""

        started = _millis_to_iso(min(timestamps)) if timestamps else ""
        ended = _millis_to_iso(max(timestamps)) if timestamps else ""

        # Try reading transcript for richer data
        extra = self._parse_transcript(sid, project)

        return SessionEntry(
            session_id=sid,
            provider="claude",
            project=self._slug(project),
            started_at=started,
            ended_at=ended,
            prompt_count=len(items),
            tool_use_count=extra.get("tool_uses", 0),
            models_used=extra.get("models", []),
            git_branch=extra.get("branch", ""),
            topics=_extract_topics(prompts),
            summary=summary,
        )

    def _slug(self, project_path: str) -> str:
        """Extract project name from path."""
        if not project_path:
            return ""
        return Path(project_path).name

    def _find_transcript(
        self, sid: str, project: str,
    ) -> Path | None:
        """Locate transcript JSONL by session ID."""
        projects_dir = self._dir / "projects"
        if not projects_dir.exists():
            return None
        slug = project.replace("/", "-").lstrip("-")
        direct = projects_dir / slug / f"{sid}.jsonl"
        if direct.exists():
            return direct
        # Search all project dirs for the session
        for d in projects_dir.iterdir():
            if not d.is_dir():
                continue
            f = d / f"{sid}.jsonl"
            if f.exists():
                return f
        return None

    def _parse_transcript(
        self, sid: str, project: str,
    ) -> dict:
        """Read session transcript for models/tools/branch."""
        if not project:
            return {}
        transcript = self._find_transcript(sid, project)
        if not transcript:
            return {}

        entries = _read_jsonl(transcript)
        models: set[str] = set()
        tool_uses = 0
        branch = ""

        for e in entries:
            etype = e.get("type", "")
            if etype == "assistant":
                m = e.get("model", "")
                if m:
                    models.add(m)
                content = e.get("message", {}).get(
                    "content", []
                )
                if isinstance(content, list):
                    tool_uses += sum(
                        1 for b in content
                        if isinstance(b, dict)
                        and b.get("type") == "tool_use"
                    )
            elif etype == "user" and not branch:
                branch = e.get("gitBranch", "")

        return {
            "models": sorted(models),
            "tool_uses": tool_uses,
            "branch": branch,
        }


================================================
FILE: maggy/maggy/history/parsers/codex.py
================================================
"""Codex CLI history parser — reads ~/.codex/ local state."""

from __future__ import annotations

import json
import logging
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path

from maggy.history.models import SessionEntry

from .base import HistoryParser

logger = logging.getLogger(__name__)


def _seconds_to_iso(ts: int | float) -> str:
    """Convert Unix seconds to ISO-8601."""
    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
    return dt.isoformat()


def _read_jsonl(path: Path) -> list[dict]:
    """Read JSONL file, skip bad lines."""
    if not path.exists():
        return []
    results: list[dict] = []
    try:
        for line in path.read_text().splitlines():
            line = line.strip()
            if not line:
                continue
            try:
                results.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    except OSError:
        return []
    return results


def _extract_topics(texts: list[str]) -> list[str]:
    """Extract keyword topics from prompt texts."""
    from collections import Counter
    words: list[str] = []
    for text in texts:
        for w in text.lower().split():
            if len(w) > 3 and w.isalpha():
                words.append(w)
    counts = Counter(words)
    return [w for w, _ in counts.most_common(5)]


class CodexHistoryParser(HistoryParser):
    """Parse OpenAI Codex CLI session history."""

    provider = "codex"

    def __init__(self, codex_dir: Path | None = None):
        self._dir = codex_dir or (
            Path.home() / ".codex"
        )

    def is_available(self) -> bool:
        index = self._dir / "session_index.jsonl"
        return index.exists()

    def session_count(self) -> int:
        entries = _read_jsonl(
            self._dir / "session_index.jsonl"
        )
        return len(entries)

    def parse_sessions(
        self, limit: int = 500,
    ) -> list[SessionEntry]:
        index = _read_jsonl(
            self._dir / "session_index.jsonl"
        )
        if not index:
            return []

        history = _read_jsonl(
            self._dir / "history.jsonl"
        )
        prompts_by_sid = self._group_prompts(history)

        sessions: list[SessionEntry] = []
        for entry in index[:limit]:
            sid = entry.get("id", "")
            if not sid:
                continue
            session = self._build_entry(
                entry, prompts_by_sid.get(sid, []),
            )
            sessions.append(session)
        return sessions

    def _group_prompts(
        self, history: list[dict],
    ) -> dict[str, list[dict]]:
        grouped: dict[str, list[dict]] = defaultdict(list)
        for h in history:
            sid = h.get("session_id", "")
            if sid:
                grouped[sid].append(h)
        return dict(grouped)

    def _build_entry(
        self, index_entry: dict, prompts: list[dict],
    ) -> SessionEntry:
        sid = index_entry.get("id", "")
        thread_name = index_entry.get("thread_name", "")
        updated = index_entry.get("updated_at", "")

        timestamps = [
            p["ts"] for p in prompts if "ts" in p
        ]
        texts = [
            p.get("text", "") for p in prompts
            if p.get("text")
        ]

        started = _seconds_to_iso(min(timestamps)) if timestamps else updated
        ended = _seconds_to_iso(max(timestamps)) if timestamps else updated

        return SessionEntry(
            session_id=sid,
            provider="codex",
            project=thread_name,
            started_at=started,
            ended_at=ended,
            prompt_count=len(prompts),
            tool_use_count=0,
            models_used=[],
            topics=_extract_topics(texts),
            summary=thread_name or (
                texts[0][:100] if texts else ""
            ),
        )


================================================
FILE: maggy/maggy/history/parsers/kimi.py
================================================
"""Kimi CLI history parser — reads ~/.kimi/ local state."""

from __future__ import annotations

import json
import logging
from datetime import datetime, timezone
from pathlib import Path

from maggy.history.models import SessionEntry

from .base import HistoryParser

logger = logging.getLogger(__name__)


def _float_to_iso(ts: float) -> str:
    """Convert Unix float seconds to ISO-8601."""
    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
    return dt.isoformat()


def _read_jsonl(path: Path) -> list[dict]:
    """Read JSONL file, skip bad lines."""
    if not path.exists():
        return []
    results: list[dict] = []
    try:
        for line in path.read_text().splitlines():
            line = line.strip()
            if not line:
                continue
            try:
                results.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    except OSError:
        return []
    return results


def _extract_topics(texts: list[str]) -> list[str]:
    """Extract keyword topics from texts."""
    from collections import Counter
    words: list[str] = []
    for text in texts:
        for w in text.lower().split():
            if len(w) > 3 and w.isalpha():
                words.append(w)
    counts = Counter(words)
    return [w for w, _ in counts.most_common(5)]


class KimiHistoryParser(HistoryParser):
    """Parse Moonshot Kimi CLI session history."""

    provider = "kimi"

    def __init__(self, kimi_dir: Path | None = None):
        self._dir = kimi_dir or (
            Path.home() / ".kimi"
        )

    def is_available(self) -> bool:
        sessions = self._dir / "sessions"
        return sessions.exists() and sessions.is_dir()

    def session_count(self) -> int:
        return len(self._find_session_dirs())

    def parse_sessions(
        self, limit: int = 500,
    ) -> list[SessionEntry]:
        dirs = self._find_session_dirs()
        sessions: list[SessionEntry] = []
        for d in dirs[:limit]:
            entry = self._parse_session_dir(d)
            if entry:
                sessions.append(entry)
        return sessions

    def _find_session_dirs(self) -> list[Path]:
        """Find all session UUID directories."""
        sessions_root = self._dir / "sessions"
        if not sessions_root.exists():
            return []
        dirs: list[Path] = []
        for hash_dir in sessions_root.iterdir():
            if not hash_dir.is_dir():
                continue
            for uuid_dir in hash_dir.iterdir():
                if not uuid_dir.is_dir():
                    continue
                ctx = uuid_dir / "context.jsonl"
                if ctx.exists():
                    dirs.append(uuid_dir)
        return dirs

    def _parse_session_dir(
        self, session_dir: Path,
    ) -> SessionEntry | None:
        context = _read_jsonl(
            session_dir / "context.jsonl"
        )
        if not context:
            return None

        user_msgs = [
            e for e in context
            if e.get("role") == "user"
        ]
        prompts = []
        for e in user_msgs:
            c = e.get("content", "")
            if isinstance(c, str):
                prompts.append(c)
            elif isinstance(c, list):
                prompts.append(str(c[0]) if c else "")
        summary = prompts[0][:100] if prompts else ""

        wire = self._parse_wire(session_dir)

        return SessionEntry(
            session_id=session_dir.name,
            provider="kimi",
            project="",
            started_at=wire.get("started", ""),
            ended_at=wire.get("ended", ""),
            prompt_count=len(user_msgs),
            tool_use_count=wire.get("steps", 0),
            models_used=[],
            topics=_extract_topics(prompts),
            summary=summary,
        )

    def _parse_wire(self, session_dir: Path) -> dict:
        """Extract timestamps and step counts from wire."""
        entries = _read_jsonl(
            session_dir / "wire.jsonl"
        )
        if not entries:
            return {}

        timestamps: list[float] = []
        steps = 0
        for e in entries:
            ts = e.get("timestamp")
            if isinstance(ts, (int, float)):
                timestamps.append(float(ts))
            msg_str = e.get("message", "")
            if "StepBegin" in str(msg_str):
                steps += 1

        result: dict = {"steps": steps}
        if timestamps:
            result["started"] = _float_to_iso(
                min(timestamps)
            )
            result["ended"] = _float_to_iso(
                max(timestamps)
            )
        return result


================================================
FILE: maggy/maggy/history/service.py
================================================
"""History analysis service — orchestrates the full pipeline."""

from __future__ import annotations

import logging
from pathlib import Path

from .analyzer import build_report
from .models import HistoryReport
from .parsers.claude import ClaudeHistoryParser
from .parsers.codex import CodexHistoryParser
from .parsers.kimi import KimiHistoryParser
from .store import HistoryStore

logger = logging.getLogger(__name__)


class HistoryService:
    """Orchestrates session history analysis."""

    def __init__(
        self,
        db_path: Path | None = None,
        cli_dirs: dict[str, Path] | None = None,
    ):
        db = db_path or (
            Path.home() / ".maggy" / "history.db"
        )
        self._store = HistoryStore(db)
        dirs = cli_dirs or {}
        self._parsers = [
            ClaudeHistoryParser(dirs.get("claude")),
            CodexHistoryParser(dirs.get("codex")),
            KimiHistoryParser(dirs.get("kimi")),
        ]

    def analyze(self) -> HistoryReport:
        """Parse all CLIs, analyze, store report."""
        all_sessions = self._collect_sessions()
        report = build_report(all_sessions)

        if all_sessions:
            self._store.save_sessions(all_sessions)
        self._store.save_report(report)

        logger.info(
            "History analysis: %d sessions, %d prompts, "
            "%d providers",
            report.total_sessions,
            report.total_prompts,
            len(report.providers),
        )
        return report

    def _collect_sessions(self) -> list:
        """Collect sessions from all available parsers."""
        sessions = []
        for parser in self._parsers:
            if not parser.is_available():
                logger.debug(
                    "%s not available, skipping",
                    parser.provider,
                )
                continue
            try:
                parsed = parser.parse_sessions()
                sessions.extend(parsed)
                logger.info(
                    "Parsed %d sessions from %s",
                    len(parsed), parser.provider,
                )
            except Exception:
                logger.exception(
                    "Failed to parse %s history",
                    parser.provider,
                )
        return sessions

    def get_report(self) -> dict | None:
        """Get latest cached report."""
        return self._store.load_latest_report()

    def get_sessions(
        self, provider: str | None = None,
    ) -> list[dict]:
        """Get stored session records."""
        return self._store.load_sessions(
            provider=provider,
        )

    def available_providers(self) -> list[str]:
        """List which CLIs are available."""
        return [
            p.provider for p in self._parsers
            if p.is_available()
        ]


================================================
FILE: maggy/maggy/history/store.py
================================================
"""SQLite store for session history data."""

from __future__ import annotations

import json
import sqlite3
from contextlib import contextmanager
from dataclasses import asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator

from .models import HistoryReport, SessionEntry

SCHEMA = """
CREATE TABLE IF NOT EXISTS sessions (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT NOT NULL,
    provider TEXT NOT NULL,
    project TEXT NOT NULL,
    started_at TEXT NOT NULL,
    ended_at TEXT NOT NULL DEFAULT '',
    prompt_count INTEGER NOT NULL DEFAULT 0,
    tool_use_count INTEGER NOT NULL DEFAULT 0,
    models_used TEXT NOT NULL DEFAULT '[]',
    git_branch TEXT NOT NULL DEFAULT '',
    topics TEXT NOT NULL DEFAULT '[]',
    summary TEXT NOT NULL DEFAULT '',
    ingested_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_hsess_provider
    ON sessions(provider);
CREATE INDEX IF NOT EXISTS idx_hsess_project
    ON sessions(project);

CREATE TABLE IF NOT EXISTS history_reports (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    generated_at TEXT NOT NULL,
    payload TEXT NOT NULL
);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class HistoryStore:
    """SQLite-backed session history storage."""

    def __init__(self, db_path: Path):
        self._db_path = db_path
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def save_sessions(
        self, sessions: list[SessionEntry],
    ) -> None:
        """Save parsed session entries."""
        now = datetime.now(timezone.utc).isoformat()
        with _connect(self._db_path) as conn:
            for s in sessions:
                conn.execute(
                    "INSERT INTO sessions "
                    "(session_id, provider, project, "
                    "started_at, ended_at, prompt_count, "
                    "tool_use_count, models_used, "
                    "git_branch, topics, summary, "
                    "ingested_at) "
                    "VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
                    (
                        s.session_id, s.provider,
                        s.project, s.started_at,
                        s.ended_at, s.prompt_count,
                        s.tool_use_count,
                        json.dumps(s.models_used),
                        s.git_branch,
                        json.dumps(s.topics),
                        s.summary, now,
                    ),
                )
            conn.commit()

    def load_sessions(
        self,
        provider: str | None = None,
        limit: int = 500,
    ) -> list[dict]:
        """Load stored session records."""
        with _connect(self._db_path) as conn:
            if provider:
                rows = conn.execute(
                    "SELECT * FROM sessions "
                    "WHERE provider = ? "
                    "ORDER BY started_at DESC "
                    "LIMIT ?",
                    (provider, limit),
                ).fetchall()
            else:
                rows = conn.execute(
                    "SELECT * FROM sessions "
                    "ORDER BY started_at DESC "
                    "LIMIT ?",
                    (limit,),
                ).fetchall()
        return [self._row_to_dict(r) for r in rows]

    def save_report(self, report: HistoryReport) -> None:
        """Save an analysis report."""
        payload = json.dumps(asdict(report))
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO history_reports "
                "(generated_at, payload) VALUES (?, ?)",
                (report.generated_at, payload),
            )
            conn.commit()

    def load_latest_report(self) -> dict | None:
        """Load the most recent report."""
        with _connect(self._db_path) as conn:
            row = conn.execute(
                "SELECT payload FROM history_reports "
                "ORDER BY id DESC LIMIT 1",
            ).fetchone()
        if not row:
            return None
        return json.loads(row["payload"])

    def _row_to_dict(self, r: sqlite3.Row) -> dict:
        """Convert a session row to dict."""
        return {
            "session_id": r["session_id"],
            "provider": r["provider"],
            "project": r["project"],
            "started_at": r["started_at"],
            "ended_at": r["ended_at"],
            "prompt_count": r["prompt_count"],
            "tool_use_count": r["tool_use_count"],
            "models_used": json.loads(r["models_used"]),
            "git_branch": r["git_branch"],
            "topics": json.loads(r["topics"]),
            "summary": r["summary"],
        }


================================================
FILE: maggy/maggy/improve/__init__.py
================================================
"""Self-improvement — signal collection and analysis."""


================================================
FILE: maggy/maggy/improve/analyzer.py
================================================
"""Analyze collected signals and produce recommendations."""

from __future__ import annotations

from .models import Recommendation, SignalBundle

LOW_REWARD = 0.4
HIGH_FAILURE_RATE = 0.2
LOW_USAGE_RATE = 0.05
LOW_HEALTH = 0.5
HIGH_UTILIZATION = 0.9


def analyze_routing(signals: SignalBundle) -> list[Recommendation]:
    """Flag models with low average reward."""
    recs: list[Recommendation] = []
    for entry in signals.routing.get("underperformers", []):
        recs.append(Recommendation(
            category="routing",
            severity="warning",
            message=(
                f"Model {entry.get('model', '?')} underperforms on "
                f"{entry.get('task_type', '?')} "
                f"(avg reward {entry.get('avg_reward', 0):.2f})."
            ),
            suggestion="Consider routing to a different model.",
            data=entry,
        ))
    return recs


def analyze_failures(signals: SignalBundle) -> list[Recommendation]:
    """Flag high execution failure rates."""
    rate = signals.events.get("failure_rate", 0)
    if rate < HIGH_FAILURE_RATE:
        return []
    return [Recommendation(
        category="reliability",
        severity="action",
        message=f"Execution failure rate is {rate:.0%}.",
        suggestion="Check tool configuration and logs.",
        data=signals.events,
    )]


def analyze_usage(signals: SignalBundle) -> list[Recommendation]:
    """Detect underutilized providers."""
    recs: list[Recommendation] = []
    by_provider = signals.history.get("by_provider", {})
    total = signals.history.get("sessions", 0)
    if total == 0:
        return []
    for provider, count in by_provider.items():
        ratio = count / total
        if ratio < LOW_USAGE_RATE:
            recs.append(Recommendation(
                category="usage",
                severity="info",
                message=(
                    f"{provider} used in only "
                    f"{ratio:.0%} of sessions."
                ),
                suggestion="Consider removing or promoting it.",
                data={"provider": provider, "ratio": ratio},
            ))
    return recs


def analyze_gaps(signals: SignalBundle) -> list[Recommendation]:
    """Surface triggered capability gaps."""
    recs: list[Recommendation] = []
    for gap in signals.forge.get("gaps", []):
        recs.append(Recommendation(
            category="capability",
            severity="action",
            message=(
                f"Capability '{gap.get('name', '?')}' "
                f"requested {gap.get('count', 0)} times."
            ),
            suggestion="Consider building an MCP server.",
            data=gap,
        ))
    return recs


def analyze_memory(signals: SignalBundle) -> list[Recommendation]:
    """Flag low engram health scores."""
    score = signals.engram.get("health_score", 1.0)
    if score >= LOW_HEALTH:
        return []
    return [Recommendation(
        category="memory",
        severity="warning",
        message=f"Memory health is {score:.2f}.",
        suggestion="Run engram cleanup or review superseded records.",
        data=signals.engram,
    )]


def analyze_cost(signals: SignalBundle) -> list[Recommendation]:
    """Flag high budget utilization."""
    util = signals.budget.get("utilization", 0)
    if util < HIGH_UTILIZATION:
        return []
    return [Recommendation(
        category="cost",
        severity="action",
        message=f"Budget utilization at {util:.0%}.",
        suggestion="Increase daily_limit_usd or optimize routing.",
        data=signals.budget,
    )]


def analyze_all(signals: SignalBundle) -> list[Recommendation]:
    """Run all analyzers and merge results."""
    recs: list[Recommendation] = []
    for fn in (
        analyze_routing, analyze_failures, analyze_usage,
        analyze_gaps, analyze_memory, analyze_cost,
    ):
        recs.extend(fn(signals))
    return recs


================================================
FILE: maggy/maggy/improve/models.py
================================================
"""Data models for self-improvement analysis."""

from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class Recommendation:
    category: str   # routing | reliability | usage | capability | memory | cost
    severity: str   # info | warning | action
    message: str
    suggestion: str
    data: dict = field(default_factory=dict)


@dataclass
class SignalBundle:
    routing: dict = field(default_factory=dict)
    events: dict = field(default_factory=dict)
    history: dict = field(default_factory=dict)
    forge: dict = field(default_factory=dict)
    engram: dict = field(default_factory=dict)
    budget: dict = field(default_factory=dict)
    collected_at: str = ""


@dataclass
class ImprovementReport:
    generated_at: str
    total_signals: int
    recommendations: list[Recommendation] = field(default_factory=list)
    health_summary: dict = field(default_factory=dict)
    top_actions: list[str] = field(default_factory=list)


================================================
FILE: maggy/maggy/improve/service.py
================================================
"""Introspector — orchestrates signal collection and analysis."""

from __future__ import annotations

import logging
from datetime import datetime, timezone

from .analyzer import analyze_all
from .models import ImprovementReport, SignalBundle
from .signals import collect_all

logger = logging.getLogger(__name__)


class Introspector:
    """Collect signals, analyze, persist recommendations."""

    def __init__(self, app_state) -> None:
        self._state = app_state
        self._last_report: ImprovementReport | None = None

    def analyze(self) -> ImprovementReport:
        """Run full analysis cycle."""
        signals = collect_all(self._state)
        recs = analyze_all(signals)
        report = self._build_report(signals, recs)
        self._persist(report)
        self._last_report = report
        return report

    def get_report(self) -> ImprovementReport | None:
        """Return the most recent report."""
        return self._last_report

    def _build_report(self, signals, recs) -> ImprovementReport:
        total = sum(
            1 for v in (
                signals.routing, signals.events,
                signals.history, signals.forge,
                signals.engram, signals.budget,
            )
            if v
        )
        actions = [
            r.message for r in recs if r.severity == "action"
        ][:3]
        health = self._health_summary(signals)
        return ImprovementReport(
            generated_at=datetime.now(timezone.utc).isoformat(),
            total_signals=total,
            recommendations=recs,
            health_summary=health,
            top_actions=actions,
        )

    def _health_summary(self, s: SignalBundle) -> dict:
        summary: dict = {}
        if s.routing:
            bad = len(s.routing.get("underperformers", []))
            summary["routing"] = 0.5 if bad else 1.0
        if s.engram:
            summary["memory"] = s.engram.get("health_score", 1.0)
        if s.events:
            rate = s.events.get("failure_rate", 0)
            summary["reliability"] = round(1.0 - rate, 2)
        if s.budget:
            util = s.budget.get("utilization", 0)
            summary["cost"] = round(1.0 - util, 2)
        return summary

    def _persist(self, report: ImprovementReport) -> None:
        """Write report as engram + emit mutation events."""
        engram = getattr(self._state, "engram", None)
        if engram:
            self._write_engram(engram, report)
        events = getattr(self._state, "events", None)
        if events:
            self._emit_mutations(events, report)

    def _write_engram(self, engram, report) -> None:
        from maggy.engram.record import EngramRecord
        import uuid
        try:
            record = EngramRecord(
                engram_id=uuid.uuid4().hex[:12],
                namespace="self-improvement",
                memory_type="fact",
                content=f"Report: {len(report.recommendations)} recs",
                tags=["auto-improve"],
            )
            engram.write(record)
        except Exception as exc:
            logger.warning("Failed to write engram: %s", exc)

    def _emit_mutations(self, events, report) -> None:
        from maggy.event_spine.events import MutationEvent
        from maggy.event_spine.header import EventHeader
        for rec in report.recommendations:
            if rec.severity != "action":
                continue
            try:
                evt = MutationEvent(
                    header=EventHeader(event_type="mutation"),
                    control_level="advisory",
                    target=rec.category,
                    old_value="",
                    new_value=rec.suggestion,
                    reason=rec.message,
                )
                events.emit(evt)
            except Exception as exc:
                logger.warning("Failed to emit: %s", exc)


================================================
FILE: maggy/maggy/improve/signals.py
================================================
"""Signal collectors — pull data from existing services."""

from __future__ import annotations

from datetime import datetime, timezone

from .models import SignalBundle

MIN_SAMPLES = 5
LOW_REWARD = 0.4
HIGH_FAILURE_RATE = 0.2
LOW_USAGE_RATE = 0.05


def collect_routing(routing) -> dict:
    """Read reward heatmap from RoutingService."""
    heatmap = routing.get_heatmap()
    underperformers = [
        entry for entry in heatmap
        if entry.get("count", 0) >= MIN_SAMPLES
        and entry.get("avg_reward", 1.0) < LOW_REWARD
    ]
    return {"heatmap": heatmap, "underperformers": underperformers}


def collect_events(events) -> dict:
    """Read outcome events for failure analysis."""
    outcomes = events.query(event_type="outcome", limit=200)
    total = len(outcomes)
    failures = sum(
        1 for o in outcomes
        if not o.get("success", True)
    )
    rate = failures / total if total else 0.0
    return {
        "total": total,
        "failures": failures,
        "failure_rate": round(rate, 3),
    }


def collect_history(history) -> dict:
    """Read session patterns from HistoryService."""
    report = history.get_report()
    if not report:
        return {"sessions": 0, "patterns": []}
    return {
        "sessions": report.get("total_sessions", 0),
        "patterns": report.get("patterns", []),
        "by_provider": report.get("by_provider", {}),
    }


def collect_forge(forge) -> dict:
    """Read capability gaps from ForgeConnector."""
    gaps = forge.get_gaps()
    return {"gaps": gaps, "count": len(gaps)}


def collect_engram(engram) -> dict:
    """Read memory health from EngramStore."""
    from maggy.engram.diagnostics import diagnose
    profile = diagnose(engram)
    return {
        "health_score": profile.health_score,
        "total": profile.total_memories,
        "active": profile.active_count,
        "superseded": profile.superseded_count,
    }


def collect_budget(budget) -> dict:
    """Read spend patterns from BudgetManager."""
    return budget.budget_status()


def collect_all(app_state) -> SignalBundle:
    """Collect signals from all available services."""
    bundle = SignalBundle(
        collected_at=datetime.now(timezone.utc).isoformat(),
    )
    if app_state.routing:
        bundle.routing = collect_routing(app_state.routing)
    if app_state.events:
        bundle.events = collect_events(app_state.events)
    if app_state.history:
        bundle.history = collect_history(app_state.history)
    if app_state.forge:
        bundle.forge = collect_forge(app_state.forge)
    if app_state.engram:
        bundle.engram = collect_engram(app_state.engram)
    if app_state.budget:
        bundle.budget = collect_budget(app_state.budget)
    return bundle


================================================
FILE: maggy/maggy/lexon/__init__.py
================================================
"""Lexon — intent parsing and tool disambiguation."""


================================================
FILE: maggy/maggy/lexon/disambiguate.py
================================================
"""Confidence-gated disambiguation for ambiguous intents."""

from __future__ import annotations

from dataclasses import dataclass

SELF_CLARIFY_THRESHOLD = 0.5
USER_CLARIFY_THRESHOLD = 0.3


@dataclass
class DisambiguationResult:
    """Outcome of disambiguation attempt."""

    resolved: bool
    tool: str = ""
    mode: str = ""  # self_clarify | user_clarify | none
    suggestions: list[str] | None = None


def disambiguate(
    confidence: float,
    candidates: list[str],
) -> DisambiguationResult:
    """Determine disambiguation strategy.

    >= 0.7: auto-resolve (no disambiguation needed)
    0.5-0.7: self-clarify (use context to pick)
    0.3-0.5: user-clarify (ask the user)
    < 0.3: reject (too ambiguous)
    """
    if confidence >= 0.7 and candidates:
        return DisambiguationResult(
            resolved=True, tool=candidates[0], mode="none",
        )

    if confidence >= SELF_CLARIFY_THRESHOLD and candidates:
        return DisambiguationResult(
            resolved=True, tool=candidates[0],
            mode="self_clarify",
            suggestions=candidates[:3],
        )

    if confidence >= USER_CLARIFY_THRESHOLD and candidates:
        return DisambiguationResult(
            resolved=False, mode="user_clarify",
            suggestions=candidates[:5],
        )

    return DisambiguationResult(
        resolved=False, mode="none",
        suggestions=candidates[:3] if candidates else None,
    )


================================================
FILE: maggy/maggy/lexon/personalization.py
================================================
"""Implicit learning — tracks 5 user behavior signals."""

from __future__ import annotations

from collections import Counter
from dataclasses import dataclass, field


@dataclass
class UserSignals:
    """Five implicit signals for personalization."""

    tool_frequency: Counter = field(
        default_factory=Counter
    )
    correction_pairs: list[tuple[str, str]] = field(
        default_factory=list
    )
    preferred_aliases: dict[str, str] = field(
        default_factory=dict
    )
    rejection_count: Counter = field(
        default_factory=Counter
    )
    confirmation_rate: dict[str, float] = field(
        default_factory=dict
    )


class PersonalizationEngine:
    """Learns from user behavior to improve intent parsing."""

    def __init__(self):
        self._signals = UserSignals()

    def record_use(self, tool: str) -> None:
        """Signal 1: Track tool usage frequency."""
        self._signals.tool_frequency[tool] += 1

    def record_correction(
        self, wrong: str, correct: str,
    ) -> None:
        """Signal 2: Track user corrections."""
        self._signals.correction_pairs.append(
            (wrong, correct)
        )

    def record_alias(
        self, phrase: str, tool: str,
    ) -> None:
        """Signal 3: Track preferred naming."""
        self._signals.preferred_aliases[
            phrase.lower()
        ] = tool

    def record_rejection(self, tool: str) -> None:
        """Signal 4: Track rejected suggestions."""
        self._signals.rejection_count[tool] += 1

    def get_preferred(self, phrase: str) -> str | None:
        """Check if user has a preference for this phrase."""
        return self._signals.preferred_aliases.get(
            phrase.lower()
        )

    def top_tools(self, n: int = 5) -> list[str]:
        """Return most frequently used tools."""
        return [
            t for t, _ in self._signals.tool_frequency.most_common(n)
        ]

    @property
    def signals(self) -> UserSignals:
        return self._signals


================================================
FILE: maggy/maggy/lexon/record.py
================================================
"""LexonRecord — parsed intent with confidence."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone


@dataclass
class LexonRecord:
    """A parsed user intent."""

    phrase: str
    resolved_tool: str = ""
    confidence: float = 0.0
    candidates: list[str] = field(default_factory=list)
    disambiguation_mode: str = ""  # "" | self_clarify | user_clarify
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )

    @property
    def is_ambiguous(self) -> bool:
        return self.confidence < 0.7

    @property
    def needs_user_input(self) -> bool:
        return self.disambiguation_mode == "user_clarify"


================================================
FILE: maggy/maggy/lexon/router.py
================================================
"""Two-tier Lexon router — fast keyword + fallback LLM."""

from __future__ import annotations

from .disambiguate import disambiguate
from .personalization import PersonalizationEngine
from .record import LexonRecord
from .terminology import TerminologyMap

CONFIDENCE_THRESHOLD = 0.82
TOP2_GAP = 0.15
DEFAULT_TOOL_MANIFEST = {
    "deploy": ["vercel_deploy", "docker_push"],
    "test": ["pytest", "vitest", "jest"],
    "fix": ["code_edit", "patch"],
    "create": ["file_create", "scaffold"],
    "delete": ["file_delete", "cleanup"],
    "update": ["code_edit", "config_update"],
    "search": ["grep", "glob", "find"],
    "review": ["code_review", "pr_review"],
}


class LexonRouter:
    """Routes user phrases to tools using two tiers.

    Tier 1: Fast keyword/terminology lookup
    Tier 2: LLM-based intent classification (stub)
    """

    def __init__(self, config: dict[str, object] | None = None):
        self._config = config or {}
        self._terms = TerminologyMap()
        self._personal = PersonalizationEngine()
        self._tool_map = self._load_tool_manifest()

    def route(self, phrase: str) -> LexonRecord:
        """Route a phrase to a tool."""
        preferred = self._personal.get_preferred(phrase)
        if preferred:
            return LexonRecord(
                phrase=phrase,
                resolved_tool=preferred,
                confidence=0.95,
                candidates=[preferred],
            )
        tier1 = self._route_tier1(phrase)
        if tier1:
            return tier1
        return self._llm_classify(phrase)

    def learn(self, phrase: str, tool: str) -> None:
        """Record a confirmed tool selection."""
        self._personal.record_use(tool)
        self._personal.record_alias(phrase, tool)

    @property
    def terminology(self) -> TerminologyMap:
        return self._terms

    @property
    def personalization(self) -> PersonalizationEngine:
        return self._personal

    def _load_tool_manifest(self) -> dict[str, list[str]]:
        manifest = self._config.get("tool_manifest", DEFAULT_TOOL_MANIFEST)
        if not isinstance(manifest, dict):
            return dict(DEFAULT_TOOL_MANIFEST)
        return {
            str(key): [str(item) for item in value]
            for key, value in manifest.items()
            if isinstance(value, list)
        } or dict(DEFAULT_TOOL_MANIFEST)

    def _llm_classify(self, phrase: str) -> LexonRecord:
        return LexonRecord(
            phrase=phrase,
            confidence=0.55,
            disambiguation_mode="llm",
        )

    def _route_tier1(self, phrase: str) -> LexonRecord | None:
        for word in phrase.lower().split():
            canonical = self._terms.resolve(word)
            if canonical and canonical in self._tool_map:
                return self._resolve_manifest_match(phrase, self._tool_map[canonical])
        return None

    def _resolve_manifest_match(
        self,
        phrase: str,
        candidates: list[str],
    ) -> LexonRecord:
        confidence = self._keyword_confidence(candidates)
        if confidence < CONFIDENCE_THRESHOLD:
            return self._llm_classify(phrase)
        if self._top2_gap(candidates) < TOP2_GAP:
            return self._llm_classify(phrase)
        result = disambiguate(confidence, candidates)
        return LexonRecord(
            phrase=phrase,
            resolved_tool=result.tool if result.resolved else "",
            confidence=confidence,
            candidates=candidates,
            disambiguation_mode=result.mode,
        )

    def _keyword_confidence(self, candidates: list[str]) -> float:
        if len(candidates) == 1:
            return 0.9
        if len(candidates) == 2:
            return 0.84
        return 0.8

    def _top2_gap(self, candidates: list[str]) -> float:
        if len(candidates) <= 1:
            return 1.0
        if len(candidates) == 2:
            return 0.18
        return 0.1


================================================
FILE: maggy/maggy/lexon/terminology.py
================================================
"""3-level terminology map for intent normalization.

Level 1: Canonical terms (e.g., "deploy")
Level 2: Synonyms (e.g., "ship", "push", "release")
Level 3: Project-specific aliases (learned over time)
"""

from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class TermEntry:
    """A canonical term with synonyms."""

    canonical: str
    synonyms: list[str] = field(default_factory=list)
    aliases: list[str] = field(default_factory=list)


DEFAULT_TERMS: list[TermEntry] = [
    TermEntry("deploy", ["ship", "push", "release", "publish"]),
    TermEntry("test", ["check", "verify", "validate", "qa"]),
    TermEntry("fix", ["repair", "patch", "resolve", "debug"]),
    TermEntry("create", ["add", "build", "make", "generate"]),
    TermEntry("delete", ["remove", "drop", "destroy", "clean"]),
    TermEntry("update", ["modify", "change", "edit", "revise"]),
    TermEntry("search", ["find", "lookup", "query", "locate"]),
    TermEntry("review", ["inspect", "audit", "examine", "check"]),
]


class TerminologyMap:
    """Three-level terminology resolution."""

    def __init__(
        self, terms: list[TermEntry] | None = None,
    ):
        # Deep copy to avoid mutating module-level defaults
        if terms is not None:
            self._terms = terms
        else:
            self._terms = [
                TermEntry(
                    t.canonical,
                    list(t.synonyms),
                    list(t.aliases),
                )
                for t in DEFAULT_TERMS
            ]
        self._index = self._build_index()

    def _build_index(self) -> dict[str, str]:
        idx: dict[str, str] = {}
        for t in self._terms:
            idx[t.canonical] = t.canonical
            for s in t.synonyms:
                idx[s] = t.canonical
            for a in t.aliases:
                idx[a] = t.canonical
        return idx

    def resolve(self, word: str) -> str | None:
        """Resolve a word to its canonical form."""
        return self._index.get(word.lower())

    def add_alias(self, canonical: str, alias: str) -> bool:
        """Add a project-specific alias (Level 3)."""
        for t in self._terms:
            if t.canonical == canonical:
                t.aliases.append(alias.lower())
                self._index[alias.lower()] = canonical
                return True
        return False

    def list_terms(self) -> list[TermEntry]:
        return list(self._terms)


================================================
FILE: maggy/maggy/main.py
================================================
"""Maggy FastAPI app entrypoint."""

from __future__ import annotations

import logging
from contextlib import asynccontextmanager
from pathlib import Path

from fastapi import FastAPI
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import (
    BaseHTTPMiddleware,
    RequestResponseEndpoint,
)
from starlette.requests import Request
from starlette.responses import Response

from maggy import config as config_mod
from maggy import providers
from maggy.api.routes import router as api_router
from maggy.api.routes_budget import router as budget_router
from maggy.api.routes_cikg import router as cikg_router
from maggy.api.routes_deploy import router as deploy_router
from maggy.api.routes_engram import router as engram_router
from maggy.api.routes_events import router as events_router
from maggy.api.routes_forge import router as forge_router
from maggy.api.routes_heartbeat import router as heartbeat_router
from maggy.api.routes_history import router as history_router
from maggy.api.routes_improve import router as improve_router
from maggy.api.routes_lexon import router as lexon_router
from maggy.api.routes_mesh import router as mesh_router
from maggy.api.routes_mesh_admin import router as mesh_admin_router
from maggy.api.routes_planning import router as planning_router
from maggy.api.routes_process import router as process_router
from maggy.api.routes_routing import router as routing_router
from maggy.api.routes_chat import router as chat_router
from maggy.api.routes_escalation import router as escalation_router
from maggy.api.routes_observability import router as observability_router
from maggy.api.routes_projects import router as projects_router
from maggy.api.routes_setup import router as setup_router
from maggy.api.routes_users import router as users_router
from maggy.mesh.ws_server import router as ws_mesh_router
from maggy.budget import BudgetManager
from maggy.event_spine.emitter import EventEmitter
from maggy.event_spine.store import EventStore
from maggy.history.service import HistoryService
from maggy.process.service import ProcessService
from maggy.routing import RoutingService
from maggy.services.competitor import CompetitorService
from maggy.services.executor import ExecutorService
from maggy.services.inbox import InboxService

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
logger = logging.getLogger("maggy")

_TIER1_ATTRS = ("budget", "routing", "events", "cikg", "planning", "deploy", "forge", "engram", "lexon", "mesh", "activity", "registry", "escalator", "observability")
_TIER2_ATTRS = ("provider", "inbox", "competitors", "executor", "process")


def _init_tier1(app: FastAPI, cfg) -> None:
    """Tier 1: local-only services."""
    db_dir = Path(cfg.storage.path).expanduser().parent
    app.state.budget = BudgetManager(cfg)
    app.state.routing = RoutingService(cfg)
    app.state.events = EventEmitter(EventStore(db_dir / "events.db"))
    from maggy.cikg.graph import KnowledgeGraphService
    app.state.cikg = KnowledgeGraphService(db_dir / "cikg.db")
    from maggy.planning import PlanningService
    app.state.planning = PlanningService(cfg)
    from maggy.deploy import DeployService
    app.state.deploy = DeployService()
    from maggy.forge.connector import ForgeConnector
    app.state.forge = ForgeConnector()
    from maggy.engram.store import EngramStore
    app.state.engram = EngramStore(db_dir / "engram.db")
    from maggy.engram.seed import seed_if_empty
    seed_if_empty(app.state.engram)
    from maggy.lexon.router import LexonRouter
    app.state.lexon = LexonRouter()
    _init_mesh(app, cfg)
    from maggy.services.activity import ActivityService
    app.state.activity = ActivityService()
    app.state.history = HistoryService(db_path=db_dir / "history.db")
    from maggy.improve.service import Introspector
    app.state.introspector = Introspector(app.state)
    from maggy.services.chat import ChatManager
    app.state.chat = ChatManager(cfg)
    from maggy.registry import ProjectRegistry
    app.state.registry = ProjectRegistry(cfg)
    from maggy.escalation.protocol import Escalator
    app.state.escalator = Escalator(db_dir / "escalations.db")
    from maggy.observability.collector import ObservabilityCollector
    app.state.observability = ObservabilityCollector(db_dir / "observability.db")


def _init_mesh(app: FastAPI, cfg) -> None:
    """Wire MeshManager if enabled in config."""
    if not cfg.mesh.enabled or not cfg.mesh.org_key_secret:
        if cfg.mesh.enabled and not cfg.mesh.org_key_secret:
            logger.warning("Mesh disabled: MAGGY_MESH_SECRET not set")
        app.state.mesh = None
        return
    from maggy.mesh.manager import MeshManager
    from maggy.mesh.org_scanner import effective_orgs
    from maggy.mesh.store import MeshStore
    db_dir = Path(cfg.storage.path).expanduser().parent
    store = MeshStore(db_dir / "mesh.db")
    mgr = MeshManager(cfg.mesh, store)
    for org in effective_orgs(cfg.mesh.orgs, [], cfg.mesh.exclude_orgs):
        mgr.add_network(org)
    app.state.mesh = mgr


def _set_mode(app: FastAPI, cfg) -> None:
    """Initialize or skip Tier 2 based on credentials."""
    if config_mod._has_provider_credentials(cfg):
        app.state.provider = providers.build(cfg)
        app.state.inbox = InboxService(cfg, app.state.provider)
        app.state.competitors = CompetitorService(cfg)
        app.state.executor = ExecutorService(cfg, app.state.provider)
        app.state.process = ProcessService(cfg)
        app.state.mode = "full"
    else:
        for attr in _TIER2_ATTRS:
            setattr(app.state, attr, None)
        app.state.mode = "local"


async def _start_heartbeat(app: FastAPI) -> None:
    """Register and start the heartbeat scheduler."""
    cfg = app.state.cfg
    if not cfg.heartbeat.enabled or not app.state.configured:
        app.state.heartbeat = None
        return
    from maggy.heartbeat.scheduler import HeartbeatScheduler
    from maggy.heartbeat.jobs import refresh_history, expire_engrams, self_improve, mesh_heartbeat, collect_signals
    from functools import partial
    sched = HeartbeatScheduler()
    sched.register("refresh_history", partial(refresh_history, app), cfg.heartbeat.history_interval)
    sched.register("expire_engrams", partial(expire_engrams, app), cfg.heartbeat.engram_interval)
    sched.register("self_improve", partial(self_improve, app), cfg.heartbeat.improve_interval)
    sched.register("collect_signals", partial(collect_signals, app), cfg.heartbeat.improve_interval)
    if cfg.mesh.enabled:
        sched.register("mesh_heartbeat", partial(mesh_heartbeat, app), cfg.heartbeat.mesh_interval)
    await sched.start()
    app.state.heartbeat = sched
    logger.info("Heartbeat started — %d jobs", len(sched._jobs))


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Startup/shutdown lifecycle."""
    await _start_heartbeat(app)
    await _bootstrap(app)
    yield
    if app.state.heartbeat:
        await app.state.heartbeat.stop()


async def _bootstrap(app: FastAPI) -> None:
    """Seed services with data on first startup."""
    history = getattr(app.state, "history", None)
    if history:
        try:
            history.analyze()
        except Exception as e:
            logger.warning("Bootstrap history failed: %s", e)

    introspector = getattr(app.state, "introspector", None)
    if introspector:
        try:
            introspector.analyze()
        except Exception as e:
            logger.warning("Bootstrap improve failed: %s", e)

    cikg = getattr(app.state, "cikg", None)
    cfg = getattr(app.state, "cfg", None)
    if cikg and cfg:
        try:
            _seed_cikg(cikg, cfg)
        except Exception as e:
            logger.warning("Bootstrap CIKG failed: %s", e)


def _seed_cikg(cikg, cfg) -> None:
    """Build initial knowledge graph from configured codebases."""
    from datetime import datetime, timezone

    from maggy.cikg.models import Node

    now = datetime.now(timezone.utc).isoformat()
    for cb in cfg.codebases:
        path = Path(cb.path).expanduser()
        if not path.exists():
            continue
        cikg.add_node(Node(
            id=f"codebase:{cb.key}", node_type="codebase",
            name=cb.key, description=str(path),
            metadata={"path": str(path)}, created_at=now,
        ))
        _add_language_nodes(cikg, cb.key, path, now)


def _add_language_nodes(cikg, codebase_key, path, now) -> None:
    """Detect languages in a codebase and add nodes + edges."""
    from maggy.cikg.models import Edge, Node

    ext_map = {
        ".py": "python", ".ts": "typescript",
        ".tsx": "typescript", ".js": "javascript",
        ".jsx": "javascript", ".go": "go",
        ".rs": "rust", ".java": "java",
        ".rb": "ruby", ".swift": "swift",
        ".kt": "kotlin", ".cs": "csharp",
    }
    skip_dirs = {
        "node_modules", ".git", "__pycache__", ".venv",
        "venv", "dist", "build", ".next", "target",
    }
    found: set[str] = set()
    # Only scan 2 levels deep to avoid slow recursive scan
    for child in path.iterdir():
        if child.name in skip_dirs:
            continue
        if child.is_file() and child.suffix in ext_map:
            found.add(ext_map[child.suffix])
        elif child.is_dir():
            try:
                for f in child.iterdir():
                    if f.is_file() and f.suffix in ext_map:
                        found.add(ext_map[f.suffix])
            except PermissionError:
                pass
        if len(found) >= 10:
            break
    for lang in found:
        node_id = f"lang:{lang}"
        cikg.add_node(Node(
            id=node_id, node_type="technology",
            name=lang, description=f"{lang} programming language",
            metadata={}, created_at=now,
        ))
        cikg.add_edge(Edge(
            source_id=f"codebase:{codebase_key}",
            target_id=node_id,
            edge_type="uses_technology",
        ))


class _NoCacheStatic(BaseHTTPMiddleware):
    """Add no-cache headers to /static responses."""

    async def dispatch(
        self, request: Request, call_next: RequestResponseEndpoint,
    ) -> Response:
        response = await call_next(request)
        if request.url.path.startswith("/static"):
            response.headers["Cache-Control"] = "no-store"
        return response


_ROUTERS = (
    api_router, budget_router, chat_router, cikg_router,
    deploy_router, engram_router, escalation_router,
    events_router, forge_router, heartbeat_router,
    history_router, improve_router, lexon_router,
    mesh_router, mesh_admin_router, observability_router,
    planning_router, process_router, projects_router,
    routing_router, setup_router, users_router,
    ws_mesh_router,
)


def create_app() -> FastAPI:
    """Build the FastAPI application."""
    cfg = config_mod.load()
    if cfg.dashboard.auth_mode == "local" and cfg.dashboard.host not in ("127.0.0.1", "localhost", "::1"):
        raise RuntimeError(
            f"dashboard.auth_mode=\"local\" is only safe on loopback. "
            f"You configured host={cfg.dashboard.host!r} — set auth_mode=\"token\" and MAGGY_API_KEY, "
            f"or bind to 127.0.0.1."
        )
    app = FastAPI(title="Maggy", version="0.1.0", lifespan=lifespan)
    app.add_middleware(_NoCacheStatic)
    app.state.cfg = cfg
    app.state.configured = config_mod.is_configured()
    if app.state.configured:
        _init_tier1(app, cfg)
    else:
        for attr in _TIER1_ATTRS:
            setattr(app.state, attr, None)
        from maggy.services.activity import ActivityService
        app.state.activity = ActivityService()
        app.state.history = HistoryService()
        app.state.introspector = None
        from maggy.services.chat import ChatManager
        app.state.chat = ChatManager(cfg)
    _set_mode(app, cfg)
    logger.info("Maggy ready (%s) — codebases=%d", app.state.mode, len(cfg.codebases))
    for r in _ROUTERS:
        app.include_router(r)
    static_dir = Path(__file__).parent / "static"
    if static_dir.exists():
        app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
        @app.get("/")
        async def index():
            return FileResponse(
                str(static_dir / "index.html"),
                headers={"Cache-Control": "no-store"},
            )
    return app


def reconfigure(app: FastAPI) -> None:
    """Reload config and reinitialize services."""
    cfg = config_mod.load(refresh=True)
    app.state.cfg = cfg
    app.state.configured = config_mod.is_configured()
    if app.state.configured:
        _init_tier1(app, cfg)
    _set_mode(app, cfg)
    logger.info("Reconfigured — mode=%s", app.state.mode)


app = create_app()


def _print_banner(host: str, port: int) -> None:
    """Print startup banner with usage instructions."""
    url = f"http://{host}:{port}"
    print("\n\033[1;38;5;208m  Maggy\033[0m")
    print(f"  Dashboard: \033[4m{url}\033[0m")
    print()
    print(
        "  \033[33mKeep this terminal open\033[0m"
        " — Maggy runs here."
    )
    print(
        "  Use other terminals for Claude Code"
        " sessions."
    )
    print(
        "  Maggy Chat auto-connects to all"
        " active sessions."
    )
    print(
        "\n  Press Ctrl+C to stop.\n"
    )


def main() -> None:
    """Console script entrypoint."""
    import uvicorn
    cfg = config_mod.load()
    _print_banner(cfg.dashboard.host, cfg.dashboard.port)
    uvicorn.run(
        "maggy.main:app",
        host=cfg.dashboard.host,
        port=cfg.dashboard.port,
        reload=False,
    )


if __name__ == "__main__":
    main()


================================================
FILE: maggy/maggy/mesh/__init__.py
================================================
"""Maggy Mesh — P2P memory sharing between instances."""


================================================
FILE: maggy/maggy/mesh/discovery.py
================================================
"""Peer discovery — registry with optional SQLite backing."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone


@dataclass
class PeerInfo:
    """Known mesh peer."""

    peer_id: str
    name: str
    address: str
    port: int = 8080
    org: str = ""
    last_seen: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )
    manual: bool = False


class PeerRegistry:
    """Registry of known mesh peers."""

    def __init__(self, store=None, org: str = ""):
        self._store = store
        self._org = org
        self._peers: dict[str, PeerInfo] = {}
        if store and org:
            self._load_from_store()

    def _load_from_store(self) -> None:
        for row in self._store.list_peers(self._org):
            self._peers[row["peer_id"]] = PeerInfo(
                peer_id=row["peer_id"],
                name=row["name"],
                address=row["address"],
                port=row["port"],
                org=row.get("org", self._org),
                last_seen=row.get("last_seen", ""),
                manual=bool(row.get("manual", 0)),
            )

    def register(self, peer: PeerInfo) -> None:
        if self._store and self._org:
            self._store.upsert_peer(
                peer.peer_id, peer.name,
                peer.address, peer.port, self._org,
            )
        self._peers[peer.peer_id] = peer

    def unregister(self, peer_id: str) -> bool:
        if self._store and self._org:
            self._store.remove_peer(peer_id, self._org)
        if peer_id in self._peers:
            del self._peers[peer_id]
            return True
        return False

    def get(self, peer_id: str) -> PeerInfo | None:
        return self._peers.get(peer_id)

    def list_peers(self) -> list[PeerInfo]:
        return list(self._peers.values())

    def update_seen(self, peer_id: str) -> None:
        peer = self._peers.get(peer_id)
        if peer:
            peer.last_seen = datetime.now(
                timezone.utc
            ).isoformat()
            if self._store and self._org:
                self._store.upsert_peer(
                    peer.peer_id, peer.name,
                    peer.address, peer.port, self._org,
                )

    @property
    def count(self) -> int:
        return len(self._peers)


================================================
FILE: maggy/maggy/mesh/git_discovery.py
================================================
"""Git-based peer discovery via GitHub Contents API."""

from __future__ import annotations

import base64
import json
import logging
from dataclasses import dataclass

import httpx

logger = logging.getLogger(__name__)

GITHUB_API = "https://api.github.com"
REPO_NAME = "maggy-mesh"
TIMEOUT = 15


@dataclass
class Announcement:
    """Peer data for git-based discovery."""

    peer_id: str
    name: str
    address: str
    port: int = 8080
    org: str = ""


def _headers(token: str) -> dict[str, str]:
    return {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
    }


async def ensure_mesh_repo(
    org: str, token: str, private: bool = True,
) -> bool:
    """Create {org}/maggy-mesh repo if it doesn't exist."""
    async with httpx.AsyncClient(
        timeout=TIMEOUT, headers=_headers(token),
    ) as client:
        resp = await client.get(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}",
        )
        if resp.status_code == 200:
            return True
        resp = await client.post(
            f"{GITHUB_API}/orgs/{org}/repos",
            json={
                "name": REPO_NAME,
                "private": private,
                "description": "Maggy mesh peer discovery",
                "auto_init": True,
            },
        )
        if resp.status_code in (200, 201):
            logger.info("Created %s/%s", org, REPO_NAME)
            return True
        logger.warning(
            "Failed to create %s/%s: %s",
            org, REPO_NAME, resp.status_code,
        )
        return False


async def announce(
    org: str, ann: Announcement, token: str,
) -> bool:
    """Write peer announcement to {org}/maggy-mesh."""
    content = json.dumps({
        "peer_id": ann.peer_id,
        "name": ann.name,
        "address": ann.address,
        "port": ann.port,
        "org": org,
    }, indent=2)
    encoded = base64.b64encode(content.encode()).decode()
    path = f"peers/{ann.peer_id}.json"

    async with httpx.AsyncClient(
        timeout=TIMEOUT, headers=_headers(token),
    ) as client:
        existing = await client.get(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}"
            f"/contents/{path}",
        )
        sha = ""
        if existing.status_code == 200:
            sha = existing.json().get("sha", "")
        body: dict = {
            "message": f"announce {ann.peer_id}",
            "content": encoded,
        }
        if sha:
            body["sha"] = sha
        resp = await client.put(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}"
            f"/contents/{path}",
            json=body,
        )
        if resp.status_code not in (200, 201):
            logger.warning(
                "Announce %s to %s failed: %s",
                ann.peer_id, org, resp.status_code,
            )
        return resp.status_code in (200, 201)


async def read_peers(
    org: str, token: str,
) -> list[dict]:
    """Read all peer announcements from {org}/maggy-mesh."""
    async with httpx.AsyncClient(
        timeout=TIMEOUT, headers=_headers(token),
    ) as client:
        resp = await client.get(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}"
            "/contents/peers",
        )
        if resp.status_code != 200:
            return []
        items = resp.json()
        if not isinstance(items, list):
            return []
        peers: list[dict] = []
        for item in items:
            name = item.get("name", "")
            if not name.endswith(".json"):
                continue
            peer = _decode_peer(item)
            if peer:
                peers.append(peer)
        return peers


def _decode_peer(item: dict) -> dict | None:
    """Decode peer from directory listing content."""
    raw_content = item.get("content")
    if not raw_content:
        return None
    try:
        return json.loads(base64.b64decode(raw_content))
    except (json.JSONDecodeError, Exception):
        return None


async def remove_announcement(
    org: str, peer_id: str, token: str,
) -> bool:
    """Remove peer file on shutdown (best-effort)."""
    path = f"peers/{peer_id}.json"
    async with httpx.AsyncClient(
        timeout=TIMEOUT, headers=_headers(token),
    ) as client:
        resp = await client.get(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}"
            f"/contents/{path}",
        )
        if resp.status_code != 200:
            return False
        sha = resp.json().get("sha", "")
        resp = await client.delete(
            f"{GITHUB_API}/repos/{org}/{REPO_NAME}"
            f"/contents/{path}",
            json={
                "message": f"remove {peer_id}",
                "sha": sha,
            },
        )
        return resp.status_code == 200


================================================
FILE: maggy/maggy/mesh/manager.py
================================================
"""MeshManager — orchestrates multiple org networks."""

from __future__ import annotations

import logging
import platform

from .discovery import PeerInfo
from .git_discovery import (
    Announcement,
    announce,
    ensure_mesh_repo,
    read_peers,
)
from .network import Network, build_network
from .store import MeshStore

logger = logging.getLogger(__name__)


class MeshManager:
    """Manages all org-scoped mesh networks."""

    def __init__(self, cfg, store: MeshStore) -> None:
        self._cfg = cfg
        self._store = store
        self._networks: dict[str, Network] = {}

    def add_network(self, org: str) -> Network:
        net = build_network(
            org, self._cfg.org_key_secret, self._store,
        )
        self._networks[org] = net
        return net

    def get_network(self, org: str) -> Network | None:
        return self._networks.get(org)

    def list_networks(self) -> list[dict]:
        return [n.status() for n in self._networks.values()]

    @property
    def total_peers(self) -> int:
        return sum(
            n.peers.count for n in self._networks.values()
        )

    async def discover(self, token: str) -> dict:
        """Read peers from git for all networks."""
        result: dict[str, int] = {}
        for org, net in self._networks.items():
            if not self._cfg.git_discovery:
                continue
            peers = await read_peers(org, token)
            for p in peers:
                pid = p.get("peer_id", "")
                if pid == self._cfg.peer_id:
                    continue
                net.peers.register(PeerInfo(
                    peer_id=pid,
                    name=p.get("name", ""),
                    address=p.get("address", ""),
                    port=p.get("port", 8080),
                    org=org,
                ))
            result[org] = len(peers)
        return result

    async def announce_all(self, token: str) -> dict:
        """Announce self to all org mesh repos."""
        address = self._resolve_address()
        result: dict[str, bool] = {}
        for org in self._networks:
            ann = Announcement(
                peer_id=self._cfg.peer_id,
                name=platform.node(),
                address=address,
                port=self._cfg.port,
                org=org,
            )
            ok = await announce(org, ann, token)
            result[org] = ok
        return result

    async def setup_repos(self, token: str) -> dict:
        """Create mesh repos for all networks."""
        result: dict[str, bool] = {}
        for org in self._networks:
            ok = await ensure_mesh_repo(org, token)
            result[org] = ok
        return result

    def _resolve_address(self) -> str:
        if self._cfg.tunnel_url:
            return self._cfg.tunnel_url
        return f"ws://127.0.0.1:{self._cfg.port}"


================================================
FILE: maggy/maggy/mesh/memory.py
================================================
"""Typed memory categories for Mesh sharing."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum


class MemoryType(str, Enum):
    SCORE = "score"
    PATTERN = "pattern"
    POLICY = "policy"
    GAP = "gap"


@dataclass
class SharedMemory:
    """A unit of shared memory in the Mesh."""

    key: str
    memory_type: str
    content: dict = field(default_factory=dict)
    source_peer: str = ""
    confidence: float = 1.0
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )

    @property
    def is_trusted(self) -> bool:
        return self.confidence >= 0.5


================================================
FILE: maggy/maggy/mesh/network.py
================================================
"""Network — one isolated mesh per GitHub org."""

from __future__ import annotations

import logging
from dataclasses import dataclass

from .discovery import PeerRegistry
from .quarantine import QuarantineStore
from .store import MeshStore
from .sync import SyncEngine
from .transport import derive_org_key

logger = logging.getLogger(__name__)


@dataclass
class Network:
    """A single org-scoped mesh network."""

    org: str
    org_key: str
    peers: PeerRegistry
    sync: SyncEngine
    quarantine: QuarantineStore

    def status(self) -> dict:
        return {
            "org": self.org,
            "peers": self.peers.count,
            "memories": self.sync.local_count,
            "quarantined": self.quarantine.count,
        }


def build_network(
    org: str, secret: str, store: MeshStore,
) -> Network:
    """Create an org-scoped network with shared store."""
    org_key = derive_org_key(org, secret)
    quarantine = QuarantineStore(store, org)
    return Network(
        org=org,
        org_key=org_key,
        peers=PeerRegistry(store, org),
        sync=SyncEngine(quarantine, store, org),
        quarantine=quarantine,
    )


================================================
FILE: maggy/maggy/mesh/org_scanner.py
================================================
"""Scan local repos for unique GitHub org names."""

from __future__ import annotations

from pathlib import Path

from maggy.discovery import discover_repos, infer_github_org


def scan_orgs(home: Path | None = None) -> list[str]:
    """Return sorted unique GitHub org names from local repos."""
    repos = discover_repos(home)
    orgs: set[str] = set()
    for repo in repos:
        org = infer_github_org(Path(repo["path"]))
        if org:
            orgs.add(org)
    return sorted(orgs)


def effective_orgs(
    scanned: list[str],
    manual: list[str],
    excluded: list[str],
) -> list[str]:
    """Merge scanned + manual orgs, remove excluded."""
    combined = set(scanned) | set(manual)
    combined -= set(excluded)
    return sorted(combined)


================================================
FILE: maggy/maggy/mesh/protocol.py
================================================
"""Message types and serialization for Mesh protocol."""

from __future__ import annotations

import json
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from enum import Enum


class MessageType(str, Enum):
    HELLO = "hello"
    SHARE = "share"
    REQUEST = "request"
    RESPONSE = "response"
    QUARANTINE = "quarantine"
    PROMOTE = "promote"
    HEARTBEAT = "heartbeat"


@dataclass
class MeshMessage:
    """A message in the Mesh protocol."""

    msg_type: str
    sender_id: str
    payload: dict = field(default_factory=dict)
    timestamp: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )

    def serialize(self) -> str:
        return json.dumps(asdict(self))

    @classmethod
    def deserialize(cls, data: str) -> MeshMessage:
        d = json.loads(data)
        return cls(**d)


def create_hello(peer_id: str, name: str) -> MeshMessage:
    return MeshMessage(
        msg_type=MessageType.HELLO,
        sender_id=peer_id,
        payload={"name": name},
    )


def create_share(
    peer_id: str, key: str, content: dict,
) -> MeshMessage:
    return MeshMessage(
        msg_type=MessageType.SHARE,
        sender_id=peer_id,
        payload={
            "key": key,
            "memory_type": content.get("memory_type", ""),
            "content": content,
        },
    )


================================================
FILE: maggy/maggy/mesh/provenance.py
================================================
"""Provenance tracking with confidence decay."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone

DECAY_PER_HOP = 0.1
MIN_CONFIDENCE = 0.1


@dataclass
class Provenance:
    """Tracks origin and confidence of shared data."""

    origin_peer: str
    hops: int = 0
    base_confidence: float = 1.0
    received_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )

    @property
    def effective_confidence(self) -> float:
        decayed = self.base_confidence - (self.hops * DECAY_PER_HOP)
        return max(decayed, MIN_CONFIDENCE)

    def add_hop(self) -> Provenance:
        """Create new provenance with one more hop."""
        return Provenance(
            origin_peer=self.origin_peer,
            hops=self.hops + 1,
            base_confidence=self.base_confidence,
        )


================================================
FILE: maggy/maggy/mesh/publisher.py
================================================
"""Collect local data and build shareable memories."""

from __future__ import annotations

from .memory import SharedMemory


def collect_scores(routing, peer_id: str) -> list[SharedMemory]:
    """Build shareable routing score memories."""
    if not routing:
        return []
    shares: list[SharedMemory] = []
    for entry in routing.get_heatmap():
        if entry.get("count", 0) < 5:
            continue
        key = f"score:{entry.get('model', '')}:{entry.get('task_type', '')}"
        shares.append(SharedMemory(
            key=key, memory_type="score",
            content=entry, source_peer=peer_id,
            confidence=min(entry.get("count", 0) / 20, 1.0),
        ))
    return shares


def collect_gaps(forge, peer_id: str) -> list[SharedMemory]:
    """Build shareable capability gap memories."""
    if not forge:
        return []
    shares: list[SharedMemory] = []
    for gap in forge.get_gaps():
        key = f"gap:{gap.get('name', '')}"
        shares.append(SharedMemory(
            key=key, memory_type="gap",
            content=gap, source_peer=peer_id,
        ))
    return shares


def collect_policies(introspector, peer_id: str) -> list[SharedMemory]:
    """Build shareable policy memories from recommendations."""
    if not introspector:
        return []
    report = introspector.get_report()
    if not report:
        return []
    shares: list[SharedMemory] = []
    for rec in report.recommendations:
        if rec.severity != "action":
            continue
        key = f"policy:{rec.category}"
        shares.append(SharedMemory(
            key=key, memory_type="policy",
            content={"message": rec.message, "suggestion": rec.suggestion},
            source_peer=peer_id,
        ))
    return shares


def collect_all_shares(app_state, peer_id: str) -> list[SharedMemory]:
    """Collect all shareable data from local services."""
    shares: list[SharedMemory] = []
    shares.extend(collect_scores(
        getattr(app_state, "routing", None), peer_id,
    ))
    shares.extend(collect_gaps(
        getattr(app_state, "forge", None), peer_id,
    ))
    shares.extend(collect_policies(
        getattr(app_state, "introspector", None), peer_id,
    ))
    return shares


================================================
FILE: maggy/maggy/mesh/quarantine.py
================================================
"""Quarantine system for untrusted mesh data."""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone


@dataclass
class QuarantineEntry:
    """A quarantined memory item."""

    key: str
    source_peer: str
    reason: str
    content: dict = field(default_factory=dict)
    memory_type: str = ""
    quarantined_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )


class QuarantineStore:
    """Manages quarantined data from mesh peers."""

    def __init__(self, store=None, org: str = ""):
        self._entries: dict[str, QuarantineEntry] = {}
        self._store = store
        self._org = org
        if store and org:
            self._load_from_store()

    def _load_from_store(self) -> None:
        for row in self._store.list_quarantined(self._org):
            self._entries[row["key"]] = QuarantineEntry(
                key=row["key"],
                source_peer=row["source_peer"],
                reason=row["reason"],
                content=row.get("content", {}),
                memory_type=row.get("memory_type", ""),
            )

    def quarantine(
        self, key: str, source: str,
        reason: str, content: dict,
        memory_type: str = "",
    ) -> QuarantineEntry:
        entry = QuarantineEntry(
            key=key, source_peer=source,
            reason=reason, content=content,
            memory_type=memory_type,
        )
        self._entries[key] = entry
        if self._store and self._org:
            self._store.quarantine_item(
                self._org, key, source, reason, content,
            )
        return entry

    def get(self, key: str) -> QuarantineEntry | None:
        return self._entries.get(key)

    def list_all(self) -> list[QuarantineEntry]:
        return list(self._entries.values())

    def promote(self, key: str) -> QuarantineEntry | None:
        """Remove from quarantine and return entry for acceptance."""
        entry = self._entries.pop(key, None)
        if self._store and self._org:
            self._store.promote_item(self._org, key)
        return entry

    def reject(self, key: str) -> bool:
        """Permanently reject quarantined item."""
        if key in self._entries:
            del self._entries[key]
        if self._store and self._org:
            self._store.promote_item(self._org, key)
            return True
        return key is not None

    @property
    def count(self) -> int:
        return len(self._entries)


================================================
FILE: maggy/maggy/mesh/store.py
================================================
"""SQLite backing for mesh peers, memories, and quarantine."""

from __future__ import annotations

import json
import sqlite3
import threading
from datetime import datetime, timezone
from pathlib import Path

SCHEMA = """
CREATE TABLE IF NOT EXISTS peers (
    peer_id TEXT NOT NULL,
    name TEXT NOT NULL,
    address TEXT NOT NULL,
    port INTEGER NOT NULL DEFAULT 8080,
    org TEXT NOT NULL,
    last_seen TEXT NOT NULL,
    manual INTEGER NOT NULL DEFAULT 0,
    PRIMARY KEY (peer_id, org)
);
CREATE TABLE IF NOT EXISTS shared_memories (
    key TEXT NOT NULL,
    org TEXT NOT NULL,
    memory_type TEXT NOT NULL,
    content TEXT NOT NULL,
    source_peer TEXT NOT NULL,
    confidence REAL NOT NULL DEFAULT 1.0,
    created_at TEXT NOT NULL,
    PRIMARY KEY (key, org)
);
CREATE TABLE IF NOT EXISTS quarantine (
    key TEXT NOT NULL,
    org TEXT NOT NULL,
    source_peer TEXT NOT NULL,
    reason TEXT NOT NULL,
    content TEXT NOT NULL,
    quarantined_at TEXT NOT NULL,
    PRIMARY KEY (key, org)
);
"""


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


class MeshStore:
    """SQLite-backed mesh storage with connection reuse."""

    def __init__(self, db_path: Path) -> None:
        self._db = db_path
        self._lock = threading.Lock()
        db_path.parent.mkdir(parents=True, exist_ok=True)
        self._conn = sqlite3.connect(
            str(db_path), timeout=30.0,
            check_same_thread=False,
        )
        self._conn.execute("PRAGMA journal_mode=WAL")
        self._conn.execute("PRAGMA busy_timeout=30000")
        self._conn.row_factory = sqlite3.Row
        self._conn.executescript(SCHEMA)

    # ── Peers ──────────────────────────────────────────

    def upsert_peer(
        self, peer_id: str, name: str,
        address: str, port: int, org: str,
    ) -> None:
        with self._lock:
            self._conn.execute(
                "INSERT OR REPLACE INTO peers "
                "VALUES (?,?,?,?,?,?,?)",
                (peer_id, name, address, port,
                 org, _now(), 0),
            )
            self._conn.commit()

    def get_peer(
        self, peer_id: str, org: str,
    ) -> dict | None:
        with self._lock:
            row = self._conn.execute(
                "SELECT * FROM peers "
                "WHERE peer_id=? AND org=?",
                (peer_id, org),
            ).fetchone()
        return dict(row) if row else None

    def list_peers(
        self, org: str | None = None,
    ) -> list[dict]:
        with self._lock:
            if org:
                rows = self._conn.execute(
                    "SELECT * FROM peers WHERE org=?",
                    (org,),
                ).fetchall()
            else:
                rows = self._conn.execute(
                    "SELECT * FROM peers",
                ).fetchall()
        return [dict(r) for r in rows]

    def remove_peer(
        self, peer_id: str, org: str,
    ) -> bool:
        with self._lock:
            cur = self._conn.execute(
                "DELETE FROM peers "
                "WHERE peer_id=? AND org=?",
                (peer_id, org),
            )
            self._conn.commit()
        return cur.rowcount > 0

    # ── Memories ───────────────────────────────────────

    def write_memory(
        self, org: str, key: str, memory_type: str,
        content: dict, source_peer: str,
        confidence: float = 1.0,
    ) -> None:
        with self._lock:
            self._conn.execute(
                "INSERT OR REPLACE INTO shared_memories "
                "VALUES (?,?,?,?,?,?,?)",
                (key, org, memory_type,
                 json.dumps(content),
                 source_peer, confidence, _now()),
            )
            self._conn.commit()

    def list_memories(self, org: str) -> list[dict]:
        with self._lock:
            rows = self._conn.execute(
                "SELECT * FROM shared_memories WHERE org=?",
                (org,),
            ).fetchall()
        return [
            {**dict(r), "content": json.loads(r["content"])}
            for r in rows
        ]

    # ── Quarantine ─────────────────────────────────────

    def quarantine_item(
        self, org: str, key: str,
        source: str, reason: str, content: dict,
    ) -> None:
        with self._lock:
            self._conn.execute(
                "INSERT OR REPLACE INTO quarantine "
                "VALUES (?,?,?,?,?,?)",
                (key, org, source, reason,
                 json.dumps(content), _now()),
            )
            self._conn.commit()

    def promote_item(
        self, org: str, key: str,
    ) -> bool:
        with self._lock:
            cur = self._conn.execute(
                "DELETE FROM quarantine "
                "WHERE key=? AND org=?",
                (key, org),
            )
            self._conn.commit()
        return cur.rowcount > 0

    def list_quarantined(self, org: str) -> list[dict]:
        with self._lock:
            rows = self._conn.execute(
                "SELECT * FROM quarantine WHERE org=?",
                (org,),
            ).fetchall()
        return [
            {**dict(r), "content": json.loads(r["content"])}
            for r in rows
        ]

    def close(self) -> None:
        """Close the database connection."""
        self._conn.close()


================================================
FILE: maggy/maggy/mesh/sync.py
================================================
"""Sync engine — merges shared memories across peers."""

from __future__ import annotations

import logging
from dataclasses import dataclass

from .memory import SharedMemory
from .quarantine import QuarantineStore

logger = logging.getLogger(__name__)

CONFIDENCE_THRESHOLD = 0.5


@dataclass
class SyncResult:
    """Result of a sync operation."""

    accepted: int = 0
    quarantined: int = 0
    rejected: int = 0


class SyncEngine:
    """Merges incoming memories with local store."""

    def __init__(
        self, quarantine: QuarantineStore,
        store=None, org: str = "",
    ):
        self._local: dict[str, SharedMemory] = {}
        self._quarantine = quarantine
        self._store = store
        self._org = org
        if store and org:
            self._load_from_store()

    def _load_from_store(self) -> None:
        for row in self._store.list_memories(self._org):
            self._local[row["key"]] = SharedMemory(
                key=row["key"],
                memory_type=row["memory_type"],
                content=row["content"],
                source_peer=row["source_peer"],
                confidence=row["confidence"],
            )

    def sync_incoming(
        self, memories: list[SharedMemory],
    ) -> SyncResult:
        """Process incoming memories from a peer."""
        result = SyncResult()
        for mem in memories:
            if mem.confidence >= CONFIDENCE_THRESHOLD:
                self._accept(mem)
                result.accepted += 1
            else:
                self._quarantine.quarantine(
                    key=mem.key,
                    source=mem.source_peer,
                    reason="low confidence",
                    content=mem.content,
                    memory_type=mem.memory_type,
                )
                result.quarantined += 1
        return result

    def _accept(self, mem: SharedMemory) -> None:
        self._local[mem.key] = mem
        if self._store and self._org:
            self._store.write_memory(
                self._org, mem.key, mem.memory_type,
                mem.content, mem.source_peer, mem.confidence,
            )

    def promote_from_quarantine(self, key: str) -> bool:
        """Accept a quarantined item into shared memories."""
        entry = self._quarantine.promote(key)
        if not entry:
            return False
        mem = SharedMemory(
            key=entry.key,
            memory_type=entry.memory_type,
            content=entry.content,
            source_peer=entry.source_peer,
            confidence=1.0,
        )
        self._accept(mem)
        return True

    def get_local(self, key: str) -> SharedMemory | None:
        return self._local.get(key)

    def list_local(self) -> list[SharedMemory]:
        return list(self._local.values())

    @property
    def local_count(self) -> int:
        return len(self._local)


================================================
FILE: maggy/maggy/mesh/transport.py
================================================
"""Transport layer — HMAC auth and org key derivation."""

from __future__ import annotations

import hashlib
import hmac
import json
import logging
import time

from .protocol import MeshMessage

logger = logging.getLogger(__name__)

MAX_AGE_SECONDS = 300  # 5-minute replay window


def derive_org_key(org: str, secret: str) -> str:
    """Derive per-org HMAC key from shared secret."""
    return hmac.new(
        secret.encode(), org.encode(), hashlib.sha256,
    ).hexdigest()


def compute_hmac(payload: str, key: str) -> str:
    """Compute HMAC-SHA256 for message authentication."""
    return hmac.new(
        key.encode(), payload.encode(), hashlib.sha256,
    ).hexdigest()


def verify_hmac(
    payload: str, key: str, signature: str,
) -> bool:
    """Verify HMAC signature."""
    expected = compute_hmac(payload, key)
    return hmac.compare_digest(expected, signature)


def sign_message(msg: MeshMessage, org_key: str) -> str:
    """Serialize and sign with timestamp for replay protection."""
    payload = msg.serialize()
    ts = time.time()
    sig = compute_hmac(f"{payload}:{ts}", org_key)
    return json.dumps({"payload": payload, "sig": sig, "ts": ts})


def verify_message(
    raw: str, org_key: str,
) -> MeshMessage | None:
    """Verify signature and timestamp, then deserialize."""
    try:
        envelope = json.loads(raw)
        payload = envelope["payload"]
        sig = envelope["sig"]
        ts = envelope.get("ts", 0)
    except (json.JSONDecodeError, KeyError):
        return None
    age = abs(time.time() - ts)
    if age > MAX_AGE_SECONDS:
        logger.debug("Rejected stale message (age=%.0fs)", age)
        return None
    if not verify_hmac(f"{payload}:{ts}", org_key, sig):
        return None
    return MeshMessage.deserialize(payload)


================================================
FILE: maggy/maggy/mesh/ws_client.py
================================================
"""Async WebSocket client for mesh peer connections."""

from __future__ import annotations

import asyncio
import logging

from .discovery import PeerInfo
from .protocol import MeshMessage, create_hello
from .transport import sign_message, verify_message

logger = logging.getLogger(__name__)

RECONNECT_DELAY = 10.0


class MeshClient:
    """Maintains WebSocket connections to known peers."""

    def __init__(self, peer_id: str) -> None:
        self._peer_id = peer_id
        self._connections: dict[str, object] = {}
        self._tasks: dict[str, asyncio.Task] = {}

    async def connect(
        self, peer: PeerInfo, org: str, org_key: str,
    ) -> bool:
        """Connect to a peer and send HELLO."""
        try:
            import websockets
            url = f"{peer.address}/ws/mesh"
            ws = await websockets.connect(url)
            hello = create_hello(self._peer_id, "client")
            hello.payload["org"] = org
            signed = sign_message(hello, org_key)
            await ws.send(signed)
            reply_raw = await ws.recv()
            reply = verify_message(reply_raw, org_key)
            if not reply:
                await ws.close()
                return False
            self._connections[peer.peer_id] = ws
            logger.info("Connected to peer %s", peer.peer_id)
            return True
        except Exception as exc:
            logger.debug("Connect to %s failed: %s", peer.peer_id, exc)
            return False

    async def send(
        self, peer_id: str, msg: MeshMessage, org_key: str,
    ) -> bool:
        """Send message to a connected peer."""
        ws = self._connections.get(peer_id)
        if not ws:
            return False
        try:
            signed = sign_message(msg, org_key)
            await ws.send(signed)
            return True
        except Exception as exc:
            logger.debug("Send to %s failed: %s", peer_id, exc)
            self._connections.pop(peer_id, None)
            return False

    async def broadcast(
        self, peers: list[str], msg: MeshMessage, org_key: str,
    ) -> int:
        """Send to all specified peers. Returns success count."""
        sent = 0
        for pid in peers:
            if await self.send(pid, msg, org_key):
                sent += 1
        return sent

    async def close_all(self) -> None:
        """Close all connections."""
        for ws in self._connections.values():
            try:
                await ws.close()
            except Exception:
                pass
        self._connections.clear()
        for task in self._tasks.values():
            task.cancel()
        self._tasks.clear()

    @property
    def connected_count(self) -> int:
        return len(self._connections)

    def is_connected(self, peer_id: str) -> bool:
        return peer_id in self._connections


================================================
FILE: maggy/maggy/mesh/ws_server.py
================================================
"""WebSocket server endpoint for mesh communication."""

from __future__ import annotations

import asyncio
import json
import logging

from fastapi import APIRouter, WebSocket, WebSocketDisconnect

from .protocol import MessageType, MeshMessage, create_hello
from .transport import sign_message, verify_message

logger = logging.getLogger(__name__)

router = APIRouter()

HELLO_TIMEOUT = 10.0
MSG_TIMEOUT = 300.0
MAX_INVALID = 5


@router.websocket("/ws/mesh")
async def mesh_ws(websocket: WebSocket) -> None:
    """Accept mesh peer connections."""
    await websocket.accept()
    manager = getattr(websocket.app.state, "mesh", None)
    if not manager:
        await websocket.close(code=1008, reason="Mesh not enabled")
        return
    try:
        await _handle_connection(websocket, manager)
    except WebSocketDisconnect:
        logger.debug("Mesh peer disconnected")
    except asyncio.TimeoutError:
        logger.debug("Mesh peer timed out")
    except Exception as exc:
        logger.warning("Mesh WS error: %s", exc)


async def _handle_connection(websocket, manager) -> None:
    """Authenticate and enter message loop."""
    raw = await asyncio.wait_for(
        websocket.receive_text(), timeout=HELLO_TIMEOUT,
    )
    org, msg = _authenticate(raw, manager)
    if not msg or not org:
        await websocket.close(code=1008, reason="Auth failed")
        return
    net = manager.get_network(org)
    if not net:
        await websocket.close(code=1008, reason="Unknown org")
        return
    peers = [
        {"peer_id": p.peer_id, "address": p.address, "port": p.port}
        for p in net.peers.list_peers()
    ]
    reply = create_hello(manager._cfg.peer_id, "server")
    reply.payload["peers"] = peers
    signed = sign_message(reply, net.org_key)
    await websocket.send_text(signed)
    await _message_loop(websocket, net)


async def _message_loop(websocket, net) -> None:
    """Rate-limited message receive loop."""
    invalid_count = 0
    while True:
        data = await asyncio.wait_for(
            websocket.receive_text(), timeout=MSG_TIMEOUT,
        )
        incoming = verify_message(data, net.org_key)
        if not incoming:
            invalid_count += 1
            if invalid_count >= MAX_INVALID:
                logger.warning("Too many invalid messages")
                break
            continue
        invalid_count = 0
        await _dispatch(incoming, net)


def _authenticate(
    raw: str, manager,
) -> tuple[str | None, MeshMessage | None]:
    """Try to authenticate a HELLO message."""
    try:
        envelope = json.loads(raw)
        payload_str = envelope.get("payload", "")
        msg = MeshMessage.deserialize(payload_str)
        org = msg.payload.get("org", "")
    except (json.JSONDecodeError, KeyError, TypeError):
        return None, None
    if msg.msg_type != MessageType.HELLO:
        return None, None
    net = manager.get_network(org)
    if not net:
        return None, None
    verified = verify_message(raw, net.org_key)
    if not verified:
        return None, None
    return org, verified


async def _dispatch(msg: MeshMessage, net) -> None:
    """Handle incoming message by type."""
    if msg.msg_type == MessageType.SHARE:
        from .memory import SharedMemory
        mem = SharedMemory(
            key=msg.payload.get("key", ""),
            memory_type=msg.payload.get("memory_type", ""),
            content=msg.payload.get("content", {}),
            source_peer=msg.sender_id,
            confidence=msg.payload.get("confidence", 1.0),
        )
        net.sync.sync_incoming([mem])
    elif msg.msg_type == MessageType.HEARTBEAT:
        net.peers.update_seen(msg.sender_id)


================================================
FILE: maggy/maggy/mnemos/__init__.py
================================================
"""Mnemos helpers for fatigue and signal tracking."""

from .fatigue import FatigueTracker
from .signals import SignalLog

__all__ = ["FatigueTracker", "SignalLog"]


================================================
FILE: maggy/maggy/mnemos/fatigue.py
================================================
"""Cross-model fatigue tracking for Mnemos."""

from __future__ import annotations

VALID_DIMENSIONS = frozenset({
    "context_load",
    "turn_pressure",
    "reread_ratio",
    "handoff_risk",
})


class FatigueTracker:
    """Track fatigue across four compression signals."""

    def __init__(self, context_window: int = 200_000):
        self.context_window = context_window
        self.dimensions: dict[str, float] = {
            d: 0.0 for d in VALID_DIMENSIONS
        }

    def record(self, dimension: str, value: float) -> None:
        if dimension not in VALID_DIMENSIONS:
            raise ValueError(
                f"Unknown dimension {dimension!r}. "
                f"Valid: {sorted(VALID_DIMENSIONS)}"
            )
        self.dimensions[dimension] = max(0.0, min(value, 1.0))

    def on_model_switch(self, new_context_window: int) -> None:
        self.context_window = new_context_window
        value = self.dimensions["reread_ratio"] + 0.15
        self.record("reread_ratio", value)

    def composite(self) -> float:
        return sum(self.dimensions.values()) / len(self.dimensions)

    def state(self) -> str:
        score = self.composite()
        if score >= 0.8:
            return "critical"
        if score >= 0.45:
            return "compress"
        return "ok"


================================================
FILE: maggy/maggy/mnemos/signals.py
================================================
"""JSONL-backed signal logging for Mnemos."""

from __future__ import annotations

import json
from pathlib import Path


class SignalLog:
    """Append and read Mnemos signal history."""

    def __init__(self, path: Path):
        self._path = path

    def append(self, signal: dict) -> None:
        self._path.parent.mkdir(parents=True, exist_ok=True)
        with self._path.open("a", encoding="utf-8") as handle:
            handle.write(json.dumps(signal) + "\n")

    def recent(self, n: int) -> list[dict]:
        if n <= 0 or not self._path.exists():
            return []
        from collections import deque
        with self._path.open(encoding="utf-8") as handle:
            lines = deque(handle, maxlen=n)
        return [json.loads(line) for line in lines]


================================================
FILE: maggy/maggy/models/__init__.py
================================================
"""Maggy data models."""


================================================
FILE: maggy/maggy/models/plan.py
================================================
"""Plan and PlanDiff models for dual-model planning."""

from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class PlanStep:
    """A single step in a plan."""

    description: str
    files: list[str] = field(default_factory=list)
    blast_estimate: int = 0


@dataclass
class Plan:
    """A generated implementation plan."""

    task: str
    model: str
    steps: list[PlanStep] = field(default_factory=list)
    risks: list[str] = field(default_factory=list)
    total_blast: int = 0

    @property
    def step_count(self) -> int:
        return len(self.steps)


@dataclass
class PlanDiff:
    """Diff between primary and counter plans."""

    agreed: list[str] = field(default_factory=list)
    conflicts: list[dict] = field(default_factory=list)
    primary_only: list[str] = field(default_factory=list)
    counter_only: list[str] = field(default_factory=list)

    @property
    def conflict_count(self) -> int:
        return len(self.conflicts)

    @property
    def agreement_ratio(self) -> float:
        total = (
            len(self.agreed) + len(self.conflicts)
            + len(self.primary_only) + len(self.counter_only)
        )
        if total == 0:
            return 1.0
        return len(self.agreed) / total


================================================
FILE: maggy/maggy/observability/__init__.py
================================================
"""Observability exports."""

from .collector import ObservabilityCollector

__all__ = ["ObservabilityCollector"]


================================================
FILE: maggy/maggy/observability/collector.py
================================================
"""SQLite-backed observability signal storage."""

from __future__ import annotations

import sqlite3
from contextlib import contextmanager
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator

SCHEMA = """
CREATE TABLE IF NOT EXISTS signals (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project TEXT NOT NULL,
    signal_type TEXT NOT NULL,
    value REAL NOT NULL,
    created_at TEXT NOT NULL
);
"""


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class ObservabilityCollector:
    def __init__(self, db_path: Path):
        self._db_path = db_path
        self._init_db()

    def record_signal(
        self, project: str, signal_type: str, value: float,
    ) -> None:
        now = datetime.now(timezone.utc).isoformat()
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO signals (project, signal_type, value, created_at) "
                "VALUES (?, ?, ?, ?)",
                (project, signal_type, value, now),
            )
            conn.commit()

    def recent_signals(
        self, project: str, limit: int = 20,
    ) -> list[dict]:
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT project, signal_type, value, created_at "
                "FROM signals WHERE project = ? "
                "ORDER BY id DESC LIMIT ?",
                (project, limit),
            ).fetchall()
        return [dict(row) for row in rows]

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)


================================================
FILE: maggy/maggy/planning.py
================================================
"""Dual-model planning orchestrator.

Generates plan with primary model, counter-checks with secondary,
merges into a diff showing agreements and conflicts.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass

from maggy.config import MaggyConfig
from maggy.models.plan import Plan, PlanDiff, PlanStep

logger = logging.getLogger(__name__)

DUAL_PLAN_THRESHOLD = 4


@dataclass
class PlanRequest:
    """Input for plan generation."""

    task: str
    blast_score: int = 0
    file_context: list[str] | None = None


class PlanningService:
    """Dual-plan orchestrator."""

    def __init__(self, cfg: MaggyConfig):
        self.cfg = cfg

    def should_dual_plan(self, blast_score: int) -> bool:
        """Only dual-plan for tasks above threshold."""
        return blast_score >= DUAL_PLAN_THRESHOLD

    def generate_plan(
        self, task: str, model: str,
        files: list[str] | None = None,
    ) -> Plan:
        """Generate a plan (stub — real impl calls LLM)."""
        steps = [
            PlanStep(
                description=f"Analyze {task}",
                files=files or [],
                blast_estimate=1,
            ),
            PlanStep(
                description=f"Implement {task}",
                files=files or [],
                blast_estimate=2,
            ),
            PlanStep(
                description=f"Test {task}",
                blast_estimate=1,
            ),
        ]
        return Plan(
            task=task, model=model, steps=steps,
            total_blast=sum(s.blast_estimate for s in steps),
        )

    def diff_plans(
        self, primary: Plan, counter: Plan,
    ) -> PlanDiff:
        """Compare two plans and produce a diff."""
        p_descs = {s.description for s in primary.steps}
        c_descs = {s.description for s in counter.steps}

        agreed = list(p_descs & c_descs)
        primary_only = list(p_descs - c_descs)
        counter_only = list(c_descs - p_descs)

        conflicts = []
        for po in primary_only:
            for co in counter_only:
                if _similar(po, co):
                    conflicts.append({
                        "primary": po, "counter": co,
                    })

        return PlanDiff(
            agreed=agreed,
            conflicts=conflicts,
            primary_only=[
                p for p in primary_only
                if not any(c["primary"] == p for c in conflicts)
            ],
            counter_only=[
                c for c in counter_only
                if not any(cf["counter"] == c for cf in conflicts)
            ],
        )

    def plan_task(self, req: PlanRequest) -> dict:
        """Full planning flow for a task."""
        primary = self.generate_plan(
            req.task, "claude", req.file_context,
        )
        if not self.should_dual_plan(req.blast_score):
            return {
                "mode": "single",
                "plan": primary,
                "diff": None,
            }

        counter = self.generate_plan(
            req.task, "codex", req.file_context,
        )
        diff = self.diff_plans(primary, counter)
        return {
            "mode": "dual",
            "plan": primary,
            "counter_plan": counter,
            "diff": diff,
        }


def _similar(a: str, b: str) -> bool:
    """Simple word-overlap similarity check."""
    a_words = set(a.lower().split())
    b_words = set(b.lower().split())
    if not a_words or not b_words:
        return False
    overlap = len(a_words & b_words)
    return overlap / min(len(a_words), len(b_words)) > 0.5


================================================
FILE: maggy/maggy/process/__init__.py
================================================
"""Process Intelligence — learns from PRs, reviews, CI to improve engineering."""


================================================
FILE: maggy/maggy/process/discovery.py
================================================
"""Environment auto-discovery — detects CI/CD, review tools, etc."""

from __future__ import annotations

import logging
from pathlib import Path

import httpx

logger = logging.getLogger(__name__)

GITHUB_API = "https://api.github.com"


def discover_local(project_path: Path) -> dict:
    """Discover tools from local filesystem markers."""
    result: dict[str, list[str]] = {
        "ci": [], "quality": [], "review": [], "deps": [],
    }

    # CI/CD
    gh_workflows = project_path / ".github" / "workflows"
    if gh_workflows.exists():
        result["ci"].append("github_actions")

    if (project_path / "Jenkinsfile").exists():
        result["ci"].append("jenkins")

    if (project_path / ".circleci").exists():
        result["ci"].append("circleci")

    if (project_path / ".gitlab-ci.yml").exists():
        result["ci"].append("gitlab_ci")

    # Code quality
    if (project_path / ".eslintrc.json").exists() or \
       (project_path / ".eslintrc.js").exists():
        result["quality"].append("eslint")

    if (project_path / "pyproject.toml").exists():
        content = (project_path / "pyproject.toml").read_text()
        if "ruff" in content:
            result["quality"].append("ruff")
        if "mypy" in content:
            result["quality"].append("mypy")

    if (project_path / ".pre-commit-config.yaml").exists():
        result["quality"].append("pre-commit")

    # Review tools
    if (project_path / "CODEOWNERS").exists() or \
       (project_path / ".github" / "CODEOWNERS").exists():
        result["review"].append("codeowners")

    # Dependency management
    dependabot = project_path / ".github" / "dependabot.yml"
    if dependabot.exists():
        result["deps"].append("dependabot")

    renovate = project_path / "renovate.json"
    if renovate.exists():
        result["deps"].append("renovate")

    return result


async def discover_github(
    repo: str, token: str,
) -> dict:
    """Discover integrations via GitHub API."""
    result: dict[str, list[str]] = {
        "bots": [], "protection": [],
    }
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github.v3+json",
    }

    async with httpx.AsyncClient(
        timeout=10.0, headers=headers,
    ) as client:
        # Check branch protection
        try:
            resp = await client.get(
                f"{GITHUB_API}/repos/{repo}/branches/main"
            )
            if resp.status_code == 200:
                data = resp.json()
                if data.get("protected"):
                    result["protection"].append(
                        "branch_protection"
                    )
        except httpx.HTTPError:
            pass

        # Check recent PR comments for bots
        try:
            resp = await client.get(
                f"{GITHUB_API}/repos/{repo}/pulls",
                params={"state": "all", "per_page": "5"},
            )
            if resp.status_code == 200:
                for pr in resp.json()[:3]:
                    cr = await client.get(
                        f"{GITHUB_API}/repos/{repo}"
                        f"/pulls/{pr['number']}/comments",
                        params={"per_page": "10"},
                    )
                    if cr.status_code == 200:
                        for c in cr.json():
                            user = (c.get("user") or {}).get(
                                "login", ""
                            ).lower()
                            if "coderabbit" in user:
                                result["bots"].append(
                                    "coderabbit"
                                )
                            if "dependabot" in user:
                                result["bots"].append(
                                    "dependabot"
                                )
                # Deduplicate
                result["bots"] = list(set(result["bots"]))
        except httpx.HTTPError:
            pass

    return result


================================================
FILE: maggy/maggy/process/github_prs.py
================================================
"""GitHub PR fetcher — reads PRs, reviews, and CI checks.

Reuses patterns from providers/github_issues.py (httpx async,
headers, error handling). Fetches up to 200 PRs per repo.
"""

from __future__ import annotations

import logging

import httpx

from .models import CheckRecord, PRRecord, ReviewRecord

logger = logging.getLogger(__name__)

GITHUB_API = "https://api.github.com"
DEFAULT_TIMEOUT = 15


def _headers(token: str) -> dict[str, str]:
    return {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
    }


async def fetch_prs(
    repo: str,
    token: str,
    limit: int = 200,
) -> list[PRRecord]:
    """Fetch merged PRs with reviews and checks."""
    raw_prs = await _fetch_pr_list(repo, token, limit)
    records: list[PRRecord] = []

    async with httpx.AsyncClient(
        timeout=DEFAULT_TIMEOUT, headers=_headers(token)
    ) as client:
        for pr_data in raw_prs:
            detail = await _fetch_pr_detail(
                client, repo, pr_data["number"]
            )
            pr = _parse_pr(detail or pr_data)
            pr.reviews = await _fetch_reviews(
                client, repo, pr.number
            )
            if pr.head_sha:
                pr.checks = await _fetch_checks(
                    client, repo, pr.head_sha
                )
            pr.files = await _fetch_files(
                client, repo, pr.number
            )
            records.append(pr)

    return records


async def _fetch_pr_list(
    repo: str,
    token: str,
    limit: int,
) -> list[dict]:
    """Paginate through /pulls endpoint."""
    results: list[dict] = []
    page = 1
    per_page = min(limit, 100)

    async with httpx.AsyncClient(
        timeout=DEFAULT_TIMEOUT, headers=_headers(token)
    ) as client:
        while len(results) < limit:
            resp = await client.get(
                f"{GITHUB_API}/repos/{repo}/pulls",
                params={
                    "state": "all",
                    "sort": "updated",
                    "direction": "desc",
                    "per_page": str(per_page),
                    "page": str(page),
                },
            )
            if resp.status_code != 200:
                _log_error(repo, "pulls", resp)
                break
            batch = resp.json()
            if not batch:
                break
            results.extend(batch)
            page += 1

    return results[:limit]


async def _fetch_pr_detail(
    client: httpx.AsyncClient,
    repo: str,
    pr_number: int,
) -> dict | None:
    """Fetch single PR detail (has additions/deletions)."""
    resp = await client.get(
        f"{GITHUB_API}/repos/{repo}/pulls/{pr_number}"
    )
    if resp.status_code != 200:
        return None
    return resp.json()


def _parse_pr(data: dict) -> PRRecord:
    """Convert raw GitHub PR JSON to PRRecord."""
    return PRRecord(
        number=data.get("number", 0),
        title=data.get("title", ""),
        author=(data.get("user") or {}).get("login", ""),
        state=_pr_state(data),
        created_at=data.get("created_at", ""),
        merged_at=data.get("merged_at"),
        additions=data.get("additions", 0),
        deletions=data.get("deletions", 0),
        changed_files=data.get("changed_files", 0),
        head_sha=(data.get("head") or {}).get("sha", ""),
        base_branch=(data.get("base") or {}).get("ref", ""),
    )


def _pr_state(data: dict) -> str:
    if data.get("merged_at"):
        return "merged"
    return data.get("state", "open")


async def _fetch_reviews(
    client: httpx.AsyncClient,
    repo: str,
    pr_number: int,
) -> list[ReviewRecord]:
    """Fetch all reviews for a PR."""
    resp = await client.get(
        f"{GITHUB_API}/repos/{repo}/pulls/{pr_number}/reviews"
    )
    if resp.status_code != 200:
        return []
    return [
        ReviewRecord(
            reviewer=(r.get("user") or {}).get("login", ""),
            state=r.get("state", ""),
            body=r.get("body") or "",
            submitted_at=r.get("submitted_at", ""),
        )
        for r in resp.json()
    ]


async def _fetch_checks(
    client: httpx.AsyncClient,
    repo: str,
    sha: str,
) -> list[CheckRecord]:
    """Fetch CI check runs for a commit."""
    resp = await client.get(
        f"{GITHUB_API}/repos/{repo}/commits/{sha}/check-runs"
    )
    if resp.status_code != 200:
        return []
    return [
        CheckRecord(
            name=c.get("name", ""),
            conclusion=c.get("conclusion") or "pending",
            started_at=c.get("started_at", ""),
            completed_at=c.get("completed_at") or "",
        )
        for c in resp.json().get("check_runs", [])
    ]


async def _fetch_files(
    client: httpx.AsyncClient,
    repo: str,
    pr_number: int,
) -> list[str]:
    """Fetch file paths changed in a PR."""
    resp = await client.get(
        f"{GITHUB_API}/repos/{repo}/pulls/{pr_number}/files",
        params={"per_page": "100"},
    )
    if resp.status_code != 200:
        return []
    return [
        f.get("filename", "")
        for f in resp.json()
        if f.get("filename")
    ]


def _log_error(
    repo: str, endpoint: str, resp: httpx.Response
) -> None:
    body = (resp.text or "")[:200].replace("\n", " ")
    logger.warning(
        "GitHub /repos/%s/%s returned %s: %s",
        repo, endpoint, resp.status_code, body,
    )


================================================
FILE: maggy/maggy/process/model_router.py
================================================
"""Dynamic model routing — routes tasks to models by complexity.

Not just fallback chains: intelligent routing based on task complexity,
security sensitivity, and task type. Simple tasks go to cheap models,
complex tasks to premium, security-critical get dual validation.
"""

from __future__ import annotations

from dataclasses import dataclass, field

from .models import ModelTier


DEFAULT_TIERS: list[ModelTier] = [
    ModelTier(
        name="local",
        provider="ollama",
        model="qwen3-coder:30b-a3b-q8_0",
        cost_rank=1,
        complexity_min=0,
        complexity_max=5,
        strengths=["formatting", "simple_edits", "crud", "feature"],
    ),
    ModelTier(
        name="kimi",
        provider="moonshot",
        model="kimi-k2",
        cost_rank=2,
        complexity_min=0,
        complexity_max=5,
        strengths=["documentation", "simple_tasks"],
    ),
    ModelTier(
        name="codex",
        provider="openai",
        model="codex",
        cost_rank=3,
        complexity_min=4,
        complexity_max=10,
        strengths=["code_generation", "api_design", "review"],
    ),
    ModelTier(
        name="claude",
        provider="anthropic",
        model="claude-sonnet-4",
        cost_rank=4,
        complexity_min=5,
        complexity_max=10,
        strengths=["complex_reasoning", "security", "architecture"],
    ),
]


@dataclass
class RoutingDecision:
    """Result of dynamic model routing."""

    primary: ModelTier
    validator: ModelTier | None = None
    reason: str = ""
    fallback_chain: list[str] = field(default_factory=list)


def route_task(
    complexity_score: int,
    task_type: str = "general",
    security_sensitive: bool = False,
    tiers: list[ModelTier] | None = None,
    stakes: str = "low",
) -> RoutingDecision:
    """Route a task to the optimal model tier.

    Args:
        complexity_score: 0-10 from polyphony scoring
        task_type: "bug", "feature", "refactor", "test", etc.
        security_sensitive: True for auth/billing/PII tasks
        tiers: Custom tiers (defaults to DEFAULT_TIERS)
    """
    available = tiers or DEFAULT_TIERS
    primaries = [
        t for t in available if t.role == "primary"
    ]
    validators = [
        t for t in available if t.role == "validator"
    ]

    primary = _select_primary(
        complexity_score, task_type, primaries, stakes,
    )
    validator = _select_validator(
        complexity_score, security_sensitive, validators, stakes,
    )
    fallback = _build_fallback(primary, primaries)
    reason = _build_reason(
        primary, complexity_score, task_type, security_sensitive
    )

    return RoutingDecision(
        primary=primary,
        validator=validator,
        reason=reason,
        fallback_chain=fallback,
    )


def _select_primary(
    score: int,
    task_type: str,
    tiers: list[ModelTier],
    stakes: str = "low",
) -> ModelTier:
    """Pick the cheapest tier that handles the complexity."""
    candidates = [
        t for t in tiers
        if t.complexity_min <= score <= t.complexity_max
    ]
    if not candidates:
        return tiers[-1]  # Fallback to most capable

    candidates.sort(key=lambda t: t.cost_rank)

    # High stakes or security: skip cheapest tiers
    high_risk = (
        stakes == "high"
        or task_type in ("security", "auth", "billing")
    )
    if high_risk:
        capable = [
            c for c in candidates if c.cost_rank >= 3
        ]
        if capable:
            return capable[0]

    return candidates[0]


def _select_validator(
    score: int,
    security_sensitive: bool,
    validators: list[ModelTier],
    stakes: str = "low",
) -> ModelTier | None:
    """Add validation for high-risk tasks."""
    if not validators:
        return None
    if score >= 8 or security_sensitive or stakes == "high":
        return validators[0]
    return None


def _build_fallback(
    primary: ModelTier,
    tiers: list[ModelTier],
) -> list[str]:
    """Build fallback chain: next tier up, then next."""
    above = [
        t for t in tiers
        if t.cost_rank > primary.cost_rank
    ]
    above.sort(key=lambda t: t.cost_rank)
    return [t.name for t in above]


def _build_reason(
    primary: ModelTier,
    score: int,
    task_type: str,
    security_sensitive: bool,
) -> str:
    """Human-readable routing explanation."""
    parts = [f"complexity={score}/10"]
    if task_type != "general":
        parts.append(f"type={task_type}")
    if security_sensitive:
        parts.append("security-sensitive")
    parts.append(f"routed to {primary.name}")
    return ", ".join(parts)


================================================
FILE: maggy/maggy/process/models.py
================================================
"""Dataclasses for Process Intelligence — PR records, reviews, CI checks."""

from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class ReviewRecord:
    """A single PR review event."""

    reviewer: str
    state: str  # APPROVED, CHANGES_REQUESTED, COMMENTED
    body: str
    submitted_at: str


@dataclass
class CheckRecord:
    """A single CI check run result."""

    name: str
    conclusion: str  # success, failure, neutral, skipped
    started_at: str
    completed_at: str


@dataclass
class PRRecord:
    """A pull request with computed metrics."""

    number: int
    title: str
    author: str
    state: str  # open, closed, merged
    created_at: str
    merged_at: str | None
    additions: int
    deletions: int
    changed_files: int
    head_sha: str
    base_branch: str
    reviews: list[ReviewRecord] = field(default_factory=list)
    checks: list[CheckRecord] = field(default_factory=list)
    files: list[str] = field(default_factory=list)

    @property
    def total_lines(self) -> int:
        return self.additions + self.deletions

    @property
    def review_rounds(self) -> int:
        return sum(
            1 for r in self.reviews
            if r.state == "CHANGES_REQUESTED"
        )

    @property
    def time_to_merge_hours(self) -> float | None:
        if not self.merged_at or not self.created_at:
            return None
        from datetime import datetime, timezone
        fmt = "%Y-%m-%dT%H:%M:%SZ"
        try:
            created = datetime.strptime(self.created_at, fmt)
            merged = datetime.strptime(self.merged_at, fmt)
            created = created.replace(tzinfo=timezone.utc)
            merged = merged.replace(tzinfo=timezone.utc)
            return (merged - created).total_seconds() / 3600
        except (ValueError, TypeError):
            return None

    @property
    def ci_passed(self) -> bool:
        if not self.checks:
            return True
        return all(
            c.conclusion in ("success", "neutral", "skipped")
            for c in self.checks
        )


@dataclass
class ReviewSignal:
    """Recurring theme from a reviewer."""

    reviewer: str
    theme: str
    count: int
    example_prs: list[int] = field(default_factory=list)


@dataclass
class CISignal:
    """CI failure pattern."""

    check_name: str
    failure_count: int
    total_runs: int
    correlated_files: list[str] = field(default_factory=list)

    @property
    def failure_rate(self) -> float:
        if self.total_runs == 0:
            return 0.0
        return self.failure_count / self.total_runs


@dataclass
class VelocitySignal:
    """PR velocity metrics."""

    avg_time_to_merge_hours: float
    median_time_to_merge_hours: float
    avg_review_rounds: float
    avg_pr_size: float
    total_prs_analyzed: int


@dataclass
class ProcessReport:
    """The 5-minute analysis report."""

    project_key: str
    generated_at: str
    total_prs: int
    velocity: VelocitySignal | None = None
    review_signals: list[ReviewSignal] = field(default_factory=list)
    ci_signals: list[CISignal] = field(default_factory=list)
    routing_recommendations: list[dict] = field(
        default_factory=list
    )
    preemptive_fixes: list[str] = field(default_factory=list)
    summary: str = ""


@dataclass
class ModelTier:
    """A model tier for dynamic routing."""

    name: str
    provider: str
    model: str
    cost_rank: int  # 1=cheapest, 5=most expensive
    complexity_min: int  # Min complexity score
    complexity_max: int  # Max complexity score
    strengths: list[str] = field(default_factory=list)
    role: str = "primary"  # "primary" | "validator"


================================================
FILE: maggy/maggy/process/patterns.py
================================================
"""Pattern engine — correlates signals into actionable insights.

Takes raw signals from signals.py and produces:
- Preemptive fix recommendations
- Routing recommendations per task type
- Bottleneck identification
"""

from __future__ import annotations

from .models import (
    CISignal,
    PRRecord,
    ReviewSignal,
    VelocitySignal,
)


def identify_bottlenecks(
    velocity: VelocitySignal | None,
    prs: list[PRRecord],
) -> list[str]:
    """Identify why PRs are slow."""
    if not velocity:
        return ["Insufficient data — no merged PRs found"]

    bottlenecks: list[str] = []

    if velocity.avg_time_to_merge_hours > 48:
        bottlenecks.append(
            f"Slow merge: avg {velocity.avg_time_to_merge_hours:.0f}h "
            f"(target: <24h)"
        )

    if velocity.avg_review_rounds > 1.5:
        bottlenecks.append(
            f"High review churn: avg {velocity.avg_review_rounds:.1f} "
            f"rounds (target: <1.5)"
        )

    if velocity.avg_pr_size > 500:
        bottlenecks.append(
            f"Large PRs: avg {velocity.avg_pr_size:.0f} lines "
            f"(target: <300)"
        )

    # Size-velocity correlation
    large = [
        p for p in prs
        if p.total_lines > 500
        and p.time_to_merge_hours is not None
    ]
    small = [
        p for p in prs
        if p.total_lines <= 200
        and p.time_to_merge_hours is not None
    ]
    if large and small:
        avg_large = _avg_merge_time(large)
        avg_small = _avg_merge_time(small)
        if avg_large and avg_small and avg_large > avg_small * 2:
            ratio = avg_large / avg_small
            bottlenecks.append(
                f"Large PRs take {ratio:.1f}x longer to merge"
            )

    if not bottlenecks:
        bottlenecks.append("No major bottlenecks detected")

    return bottlenecks


def generate_preemptive_fixes(
    review_signals: list[ReviewSignal],
    ci_signals: list[CISignal],
) -> list[str]:
    """Generate actionable pre-PR fixes."""
    fixes: list[str] = []

    for sig in review_signals[:5]:
        fixes.append(
            f"Add {sig.theme.replace('_', ' ')} before PR "
            f"— reviewer {sig.reviewer} flags this "
            f"{sig.count}x"
        )

    for sig in ci_signals[:3]:
        if sig.failure_rate > 0.2:
            files = ", ".join(sig.correlated_files[:3])
            fix = (
                f"Run {sig.check_name} locally before push "
                f"— fails {sig.failure_rate:.0%} of the time"
            )
            if files:
                fix += f" (correlated with: {files})"
            fixes.append(fix)

    return fixes


def generate_routing_recs(
    prs: list[PRRecord],
) -> list[dict]:
    """Recommend model routing per task pattern."""
    recs: list[dict] = []

    # Count security-related PRs
    sec_prs = [
        p for p in prs
        if _is_security_related(p)
    ]
    if sec_prs:
        recs.append({
            "pattern": "Security/auth changes",
            "model": "claude",
            "validation": "codex",
            "reason": (
                f"{len(sec_prs)} security PRs found — "
                f"route to Claude + Codex validation"
            ),
        })

    # Count test-only PRs
    test_prs = [
        p for p in prs
        if _is_test_only(p)
    ]
    if test_prs:
        recs.append({
            "pattern": "Test-only changes",
            "model": "kimi",
            "validation": None,
            "reason": (
                f"{len(test_prs)} test-only PRs — "
                f"route to Kimi (cheaper)"
            ),
        })

    # Count doc changes
    doc_prs = [p for p in prs if _is_docs(p)]
    if doc_prs:
        recs.append({
            "pattern": "Documentation changes",
            "model": "kimi",
            "validation": None,
            "reason": (
                f"{len(doc_prs)} doc PRs — "
                f"route to Kimi"
            ),
        })

    # Complex multi-file changes
    complex_prs = [
        p for p in prs if p.changed_files >= 10
    ]
    if complex_prs:
        recs.append({
            "pattern": "Multi-file refactors (10+ files)",
            "model": "claude",
            "validation": "codex",
            "reason": (
                f"{len(complex_prs)} complex PRs — "
                f"route to Claude"
            ),
        })

    return recs


def _avg_merge_time(prs: list[PRRecord]) -> float | None:
    times = [
        p.time_to_merge_hours
        for p in prs
        if p.time_to_merge_hours is not None
    ]
    if not times:
        return None
    return sum(times) / len(times)


def _is_security_related(pr: PRRecord) -> bool:
    keywords = {"auth", "security", "token", "session"}
    title = pr.title.lower()
    return any(k in title for k in keywords) or any(
        "auth" in f or "security" in f for f in pr.files
    )


def _is_test_only(pr: PRRecord) -> bool:
    if not pr.files:
        return False
    return all(
        "test" in f.lower() or "spec" in f.lower()
        for f in pr.files
    )


def _is_docs(pr: PRRecord) -> bool:
    if not pr.files:
        return False
    return all(
        f.endswith(".md") or "doc" in f.lower()
        for f in pr.files
    )


================================================
FILE: maggy/maggy/process/report.py
================================================
"""Report generator — produces the 5-minute process analysis.

Answers:
1. Why are your PRs slow?
2. What do reviewers always flag?
3. Which model should handle which task?
4. What will Maggy change before the next PR?
"""

from __future__ import annotations

from .models import (
    CISignal,
    ProcessReport,
    ReviewSignal,
    VelocitySignal,
)


def generate_summary(report: ProcessReport) -> str:
    """Build human-readable summary from report data."""
    lines: list[str] = []

    lines.append(
        f"## Process Report: {report.project_key}"
    )
    lines.append(
        f"Analyzed {report.total_prs} PRs"
    )
    lines.append("")

    # Velocity
    if report.velocity:
        v = report.velocity
        lines.append("### PR Velocity")
        lines.append(
            f"- Avg time to merge: {v.avg_time_to_merge_hours:.1f}h"
        )
        lines.append(
            f"- Median time to merge: "
            f"{v.median_time_to_merge_hours:.1f}h"
        )
        lines.append(
            f"- Avg review rounds: {v.avg_review_rounds:.1f}"
        )
        lines.append(
            f"- Avg PR size: {v.avg_pr_size:.0f} lines"
        )
        lines.append("")

    # Review patterns
    if report.review_signals:
        lines.append("### Recurring Review Themes")
        for sig in report.review_signals[:5]:
            lines.append(
                f"- **{sig.reviewer}** flags "
                f"*{sig.theme.replace('_', ' ')}* "
                f"({sig.count}x)"
            )
        lines.append("")

    # CI failures
    if report.ci_signals:
        lines.append("### CI Failure Patterns")
        for sig in report.ci_signals[:5]:
            lines.append(
                f"- **{sig.check_name}**: fails "
                f"{sig.failure_rate:.0%} of runs"
            )
            if sig.correlated_files:
                files = ", ".join(sig.correlated_files[:3])
                lines.append(f"  Correlated with: {files}")
        lines.append("")

    # Routing
    if report.routing_recommendations:
        lines.append("### Model Routing Recommendations")
        for rec in report.routing_recommendations:
            model = rec.get("model", "?")
            pattern = rec.get("pattern", "?")
            lines.append(f"- {pattern} -> **{model}**")
            val = rec.get("validation")
            if val:
                lines.append(
                    f"  + validation by **{val}**"
                )
        lines.append("")

    # Fixes
    if report.preemptive_fixes:
        lines.append("### Pre-emptive Fixes")
        for fix in report.preemptive_fixes:
            lines.append(f"- {fix}")
        lines.append("")

    return "\n".join(lines)


def format_health_metrics(
    velocity: VelocitySignal | None,
    ci_signals: list[CISignal],
    review_signals: list[ReviewSignal],
) -> dict:
    """Format as structured health dashboard data."""
    health: dict = {"status": "unknown"}

    if velocity:
        health["velocity"] = {
            "avg_merge_hours": (
                velocity.avg_time_to_merge_hours
            ),
            "median_merge_hours": (
                velocity.median_time_to_merge_hours
            ),
            "avg_review_rounds": velocity.avg_review_rounds,
            "avg_pr_size": velocity.avg_pr_size,
            "prs_analyzed": velocity.total_prs_analyzed,
        }

    ci_pass_rate = _ci_pass_rate(ci_signals)
    health["ci_pass_rate"] = ci_pass_rate
    health["top_review_themes"] = [
        {"reviewer": s.reviewer, "theme": s.theme, "count": s.count}
        for s in review_signals[:5]
    ]

    # Overall status
    if velocity and ci_pass_rate is not None:
        if (
            velocity.avg_review_rounds <= 1.5
            and ci_pass_rate >= 0.9
        ):
            health["status"] = "healthy"
        elif (
            velocity.avg_review_rounds <= 2.5
            and ci_pass_rate >= 0.7
        ):
            health["status"] = "moderate"
        else:
            health["status"] = "needs_attention"

    return health


def _ci_pass_rate(
    ci_signals: list[CISignal],
) -> float | None:
    """Overall CI pass rate across all checks."""
    total_runs = sum(s.total_runs for s in ci_signals)
    total_fails = sum(s.failure_count for s in ci_signals)
    if total_runs == 0:
        return None
    return 1.0 - (total_fails / total_runs)


================================================
FILE: maggy/maggy/process/service.py
================================================
"""Process Intelligence service — orchestrates the full pipeline.

Pipeline: fetch PRs -> extract signals -> find patterns -> generate report.
"""

from __future__ import annotations

import logging
from datetime import datetime, timezone
from pathlib import Path

from maggy.config import MaggyConfig

from . import github_prs
from .models import ProcessReport
from .patterns import (
    generate_preemptive_fixes,
    generate_routing_recs,
    identify_bottlenecks,
)
from .report import generate_summary
from .signals import (
    extract_ci_signals,
    extract_review_signals,
    extract_velocity_signals,
)
from .store import ProcessStore

logger = logging.getLogger(__name__)


class ProcessService:
    """Orchestrates process intelligence analysis."""

    def __init__(self, cfg: MaggyConfig):
        self.cfg = cfg
        db_path = (
            Path(cfg.storage.path).expanduser().parent
            / "process.db"
        )
        self.store = ProcessStore(db_path)

    async def analyze(
        self, project_key: str
    ) -> ProcessReport:
        """Run full analysis pipeline for a project."""
        repo = self._resolve_repo(project_key)
        token = self.cfg.issue_tracker.github.token

        if not token:
            raise ValueError("GITHUB_TOKEN not configured")
        if not repo:
            raise ValueError(
                f"No repo found for project '{project_key}'"
            )

        logger.info(
            "Analyzing %s — fetching PRs from %s",
            project_key, repo,
        )

        # 1. Fetch PRs
        prs = await github_prs.fetch_prs(
            repo=repo, token=token, limit=200
        )
        logger.info("Fetched %d PRs from %s", len(prs), repo)

        # 2. Extract signals
        review_signals = extract_review_signals(prs)
        ci_signals = extract_ci_signals(prs)
        velocity = extract_velocity_signals(prs)

        # 3. Find patterns
        identify_bottlenecks(velocity, prs)
        fixes = generate_preemptive_fixes(
            review_signals, ci_signals
        )
        routing = generate_routing_recs(prs)

        # 4. Build report
        now = datetime.now(timezone.utc).isoformat()
        report = ProcessReport(
            project_key=project_key,
            generated_at=now,
            total_prs=len(prs),
            velocity=velocity,
            review_signals=review_signals,
            ci_signals=ci_signals,
            routing_recommendations=routing,
            preemptive_fixes=fixes,
        )
        report.summary = generate_summary(report)

        # 5. Persist
        self.store.save_report(report)
        logger.info(
            "Process report saved for %s: %d PRs, "
            "%d review signals, %d CI signals",
            project_key, len(prs),
            len(review_signals), len(ci_signals),
        )

        return report

    def get_report(self, project_key: str) -> dict | None:
        """Get latest cached report."""
        return self.store.load_latest_report(project_key)

    def get_health(self, project_key: str) -> dict | None:
        """Get health metrics from latest report."""
        raw = self.store.load_latest_report(project_key)
        if not raw:
            return None
        return raw

    def _resolve_repo(
        self, project_key: str
    ) -> str | None:
        """Map project_key to GitHub org/repo."""
        gh = self.cfg.issue_tracker.github
        for repo in gh.repos:
            slug = repo.split("/")[-1]
            if slug == project_key:
                return repo
        # Try matching against codebase keys
        for cb in self.cfg.codebases:
            if cb.key == project_key:
                slug = Path(cb.path).name
                if gh.org:
                    return f"{gh.org}/{slug}"
        return None


================================================
FILE: maggy/maggy/process/signals.py
================================================
"""Signal extraction — derives patterns from raw PR data.

Three signal types:
- Review signals: what do reviewers always flag?
- CI signals: which checks fail and why?
- Velocity signals: how fast do PRs merge?
"""

from __future__ import annotations

from collections import Counter

from .models import (
    CISignal,
    PRRecord,
    ReviewSignal,
    VelocitySignal,
)

# Keywords that indicate common review themes
REVIEW_THEMES: dict[str, list[str]] = {
    "error_handling": [
        "error", "exception", "try", "catch", "handle",
        "edge case", "null", "undefined",
    ],
    "testing": [
        "test", "coverage", "assert", "mock", "spec",
        "unit test", "missing test",
    ],
    "naming": [
        "naming", "rename", "variable name", "unclear",
        "confusing name", "readability",
    ],
    "types": [
        "type", "typing", "annotation", "any type",
        "type hint", "interface",
    ],
    "security": [
        "security", "auth", "sanitize", "inject",
        "xss", "csrf", "vulnerability",
    ],
    "performance": [
        "performance", "slow", "optimize", "n+1",
        "cache", "memory", "complexity",
    ],
    "documentation": [
        "document", "comment", "docstring", "readme",
        "jsdoc", "explain",
    ],
    "style": [
        "style", "format", "indent", "lint", "spacing",
        "consistent",
    ],
}


def extract_review_signals(
    prs: list[PRRecord],
) -> list[ReviewSignal]:
    """Find recurring reviewer complaints."""
    # reviewer -> theme -> [pr_numbers]
    hits: dict[str, dict[str, list[int]]] = {}

    for pr in prs:
        for review in pr.reviews:
            if not review.body:
                continue
            reviewer = review.reviewer
            if reviewer not in hits:
                hits[reviewer] = {}
            body_lower = review.body.lower()
            for theme, keywords in REVIEW_THEMES.items():
                if _matches_theme(body_lower, keywords):
                    theme_hits = hits[reviewer].setdefault(
                        theme, []
                    )
                    if pr.number not in theme_hits:
                        theme_hits.append(pr.number)

    signals: list[ReviewSignal] = []
    for reviewer, themes in hits.items():
        for theme, pr_nums in themes.items():
            if len(pr_nums) >= 2:
                signals.append(ReviewSignal(
                    reviewer=reviewer,
                    theme=theme,
                    count=len(pr_nums),
                    example_prs=pr_nums[:5],
                ))

    signals.sort(key=lambda s: s.count, reverse=True)
    return signals


def extract_ci_signals(
    prs: list[PRRecord],
) -> list[CISignal]:
    """Find CI failure patterns."""
    # check_name -> {failures, total, files}
    stats: dict[str, dict] = {}

    for pr in prs:
        for check in pr.checks:
            if check.name not in stats:
                stats[check.name] = {
                    "failures": 0,
                    "total": 0,
                    "files": Counter(),
                }
            stats[check.name]["total"] += 1
            if check.conclusion == "failure":
                stats[check.name]["failures"] += 1
                for f in pr.files:
                    stats[check.name]["files"][f] += 1

    signals: list[CISignal] = []
    for name, data in stats.items():
        if data["failures"] == 0:
            continue
        # Top correlated files (appear in >50% of failures)
        threshold = max(2, data["failures"] // 2)
        correlated = [
            f for f, count in data["files"].most_common(5)
            if count >= threshold
        ]
        signals.append(CISignal(
            check_name=name,
            failure_count=data["failures"],
            total_runs=data["total"],
            correlated_files=correlated,
        ))

    signals.sort(
        key=lambda s: s.failure_rate, reverse=True
    )
    return signals


def extract_velocity_signals(
    prs: list[PRRecord],
) -> VelocitySignal | None:
    """Compute PR velocity metrics."""
    merged = [p for p in prs if p.state == "merged"]
    if not merged:
        return None

    merge_times = [
        p.time_to_merge_hours
        for p in merged
        if p.time_to_merge_hours is not None
    ]
    if not merge_times:
        return None

    merge_times.sort()
    avg_time = sum(merge_times) / len(merge_times)
    median_idx = len(merge_times) // 2
    median_time = merge_times[median_idx]

    rounds = [p.review_rounds for p in merged]
    avg_rounds = sum(rounds) / len(rounds) if rounds else 0

    sizes = [p.total_lines for p in merged]
    avg_size = sum(sizes) / len(sizes) if sizes else 0

    return VelocitySignal(
        avg_time_to_merge_hours=round(avg_time, 1),
        median_time_to_merge_hours=round(median_time, 1),
        avg_review_rounds=round(avg_rounds, 2),
        avg_pr_size=round(avg_size, 1),
        total_prs_analyzed=len(merged),
    )


def _matches_theme(
    text: str, keywords: list[str]
) -> bool:
    """Check if text matches any keyword in theme."""
    return any(kw in text for kw in keywords)


================================================
FILE: maggy/maggy/process/store.py
================================================
"""SQLite persistence for process intelligence data.

Stores PR records, signals, and reports. Follows the WAL +
busy_timeout pattern from maggy/services/inbox.py.
"""

from __future__ import annotations

import json
import logging
import sqlite3
from datetime import datetime, timezone
from pathlib import Path

from .models import ProcessReport

logger = logging.getLogger(__name__)


def _connect(path: Path) -> sqlite3.Connection:
    """Open SQLite with WAL mode for concurrency."""
    db = sqlite3.connect(path, timeout=30.0)
    db.execute("PRAGMA journal_mode=WAL")
    db.execute("PRAGMA foreign_keys=ON")
    db.execute("PRAGMA busy_timeout=30000")
    return db


class ProcessStore:
    """SQLite store for process intelligence."""

    def __init__(self, db_path: Path):
        self.db_path = db_path
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init_tables()

    def _init_tables(self) -> None:
        with _connect(self.db_path) as db:
            db.execute("""
                CREATE TABLE IF NOT EXISTS pr_data (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    project_key TEXT NOT NULL,
                    fetched_at TEXT NOT NULL,
                    payload TEXT NOT NULL
                )
            """)
            db.execute("""
                CREATE TABLE IF NOT EXISTS reports (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    project_key TEXT NOT NULL,
                    generated_at TEXT NOT NULL,
                    payload TEXT NOT NULL
                )
            """)
            db.execute(
                "CREATE INDEX IF NOT EXISTS idx_pr_project "
                "ON pr_data(project_key)"
            )
            db.execute(
                "CREATE INDEX IF NOT EXISTS idx_report_project "
                "ON reports(project_key)"
            )

    def save_pr_data(
        self, project_key: str, data: list[dict]
    ) -> None:
        """Store raw PR data as JSON."""
        now = datetime.now(timezone.utc).isoformat()
        with _connect(self.db_path) as db:
            db.execute(
                "DELETE FROM pr_data WHERE project_key = ?",
                (project_key,),
            )
            db.execute(
                "INSERT INTO pr_data "
                "(project_key, fetched_at, payload) "
                "VALUES (?, ?, ?)",
                (project_key, now, json.dumps(data)),
            )

    def load_pr_data(
        self, project_key: str
    ) -> list[dict] | None:
        """Load cached PR data. Returns None if none."""
        with _connect(self.db_path) as db:
            row = db.execute(
                "SELECT payload FROM pr_data "
                "WHERE project_key = ? "
                "ORDER BY id DESC LIMIT 1",
                (project_key,),
            ).fetchone()
        if not row:
            return None
        return json.loads(row[0])

    def save_report(self, report: ProcessReport) -> None:
        """Store a generated report."""
        payload = {
            "project_key": report.project_key,
            "generated_at": report.generated_at,
            "total_prs": report.total_prs,
            "summary": report.summary,
            "preemptive_fixes": report.preemptive_fixes,
            "routing_recommendations": (
                report.routing_recommendations
            ),
        }
        if report.velocity:
            payload["velocity"] = {
                "avg_time_to_merge_hours": (
                    report.velocity.avg_time_to_merge_hours
                ),
                "median_time_to_merge_hours": (
                    report.velocity.median_time_to_merge_hours
                ),
                "avg_review_rounds": (
                    report.velocity.avg_review_rounds
                ),
                "avg_pr_size": report.velocity.avg_pr_size,
                "total_prs_analyzed": (
                    report.velocity.total_prs_analyzed
                ),
            }
        if report.review_signals:
            payload["review_signals"] = [
                {
                    "reviewer": s.reviewer,
                    "theme": s.theme,
                    "count": s.count,
                }
                for s in report.review_signals[:10]
            ]
        if report.ci_signals:
            payload["ci_signals"] = [
                {
                    "check_name": s.check_name,
                    "failure_rate": round(s.failure_rate, 3),
                    "failure_count": s.failure_count,
                }
                for s in report.ci_signals[:10]
            ]

        with _connect(self.db_path) as db:
            db.execute(
                "INSERT INTO reports "
                "(project_key, generated_at, payload) "
                "VALUES (?, ?, ?)",
                (
                    report.project_key,
                    report.generated_at,
                    json.dumps(payload),
                ),
            )

    def load_latest_report(
        self, project_key: str
    ) -> dict | None:
        """Load the most recent report for a project."""
        with _connect(self.db_path) as db:
            row = db.execute(
                "SELECT payload FROM reports "
                "WHERE project_key = ? "
                "ORDER BY id DESC LIMIT 1",
                (project_key,),
            ).fetchone()
        if not row:
            return None
        return json.loads(row[0])


================================================
FILE: maggy/maggy/providers/__init__.py
================================================
"""Issue tracker provider abstractions."""

from .asana import AsanaProvider
from .base import Comment, IssueTrackerProvider, Task
from .github_issues import GitHubIssuesProvider

__all__ = [
    "AsanaProvider",
    "Comment",
    "GitHubIssuesProvider",
    "IssueTrackerProvider",
    "Task",
]


def build(cfg) -> IssueTrackerProvider:
    """Factory: build the right provider from MaggyConfig.

    Currently supported: 'github', 'asana'.
    'linear' is a documented stub — config.is_configured() refuses to accept
    it, so we should never reach this function with that provider. If we do,
    raise with a clear message pointing at the roadmap.
    """
    if cfg.issue_tracker.provider == "github":
        gh = cfg.issue_tracker.github
        return GitHubIssuesProvider(org=gh.org, repos=gh.repos, token=gh.token, labels=gh.labels)
    if cfg.issue_tracker.provider == "asana":
        az = cfg.issue_tracker.asana
        return AsanaProvider(workspace_id=az.workspace_id, boards=az.boards, token=az.token)
    if cfg.issue_tracker.provider == "linear":
        raise NotImplementedError(
            "Linear provider is a stub — not yet implemented. "
            "Use 'github' or 'asana' for now."
        )
    raise ValueError(f"Unknown issue tracker provider: {cfg.issue_tracker.provider!r}")


================================================
FILE: maggy/maggy/providers/asana.py
================================================
"""Asana provider — compatibility shim for teams migrating from the zenloop prototype."""

from __future__ import annotations

import httpx

from .base import Comment, Task

ASANA_BASE = "https://app.asana.com/api/1.0"


class AsanaProvider:
    """IssueTrackerProvider implementation for Asana.

    Simpler than the zenloop prototype — no USER_GIDS hardcoded. `list_followed`
    uses the authenticated user's GID via /users/me.
    """

    def __init__(self, workspace_id: str, boards: dict[str, str], token: str):
        self.workspace_id = workspace_id
        # boards: {"dev": "project_gid", "bugs": "other_gid"}
        self.boards = boards
        self.token = token
        self._my_gid: str = ""

    def provider_name(self) -> str:
        return "asana"

    def _headers(self) -> dict[str, str]:
        return {"Authorization": f"Bearer {self.token}"}

    def _to_task(self, t: dict) -> Task:
        assignee = (t.get("assignee") or {}).get("name", "")
        projects = t.get("projects") or []
        board = projects[0].get("name", "") if projects else ""
        return Task(
            id=t.get("gid", ""),
            title=t.get("name", ""),
            description=t.get("notes", "") or "",
            status="closed" if t.get("completed") else "open",
            assignee=assignee,
            url=t.get("permalink_url", ""),
            labels=[tag.get("name", "") for tag in (t.get("tags") or [])],
            board=board,
            created_at=t.get("created_at", ""),
            updated_at=t.get("modified_at", ""),
            raw=t,
        )

    async def _get_my_gid(self, client: httpx.AsyncClient) -> str:
        if self._my_gid:
            return self._my_gid
        resp = await client.get(f"{ASANA_BASE}/users/me", headers=self._headers())
        if resp.status_code == 200:
            self._my_gid = resp.json().get("data", {}).get("gid", "")
        return self._my_gid

    async def list_tasks(self, board: str | None = None, state: str = "open", limit: int = 50) -> list[Task]:
        if not self.boards:
            return []

        # Which boards to query
        board_gids: list[str]
        if board and board in self.boards:
            board_gids = [self.boards[board]]
        else:
            board_gids = list(self.boards.values())

        tasks: list[Task] = []
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            for gid in board_gids:
                # `completed_since=now` tells Asana to exclude tasks completed
                # before this instant (i.e. give us open + just-now-completed).
                # Don't send it at all when we WANT completed tasks — empty
                # string is rejected by Asana's validator.
                params = {
                    "opt_fields": "name,notes,completed,assignee.name,projects.name,modified_at,created_at,permalink_url,tags.name",
                    "limit": str(min(limit, 100)),
                }
                if state == "open":
                    params["completed_since"] = "now"
                resp = await client.get(f"{ASANA_BASE}/projects/{gid}/tasks", params=params)
                if resp.status_code != 200:
                    continue
                for t in resp.json().get("data", []):
                    # completed_since gives everything after a timestamp — we
                    # still need to filter to match the requested state.
                    if state == "open" and t.get("completed"):
                        continue
                    if state == "closed" and not t.get("completed"):
                        continue
                    tasks.append(self._to_task(t))

        tasks.sort(key=lambda t: t.updated_at, reverse=True)
        return tasks[:limit]

    async def get_task(self, task_id: str) -> Task | None:
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.get(
                f"{ASANA_BASE}/tasks/{task_id}",
                params={"opt_fields": "name,notes,completed,assignee.name,projects.name,modified_at,created_at,permalink_url,tags.name"},
            )
            if resp.status_code != 200:
                return None
            return self._to_task(resp.json().get("data", {}))

    async def get_comments(self, task_id: str) -> list[Comment]:
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.get(
                f"{ASANA_BASE}/tasks/{task_id}/stories",
                params={"opt_fields": "type,text,created_at,created_by.name,resource_subtype"},
            )
            if resp.status_code != 200:
                return []
            out: list[Comment] = []
            for s in resp.json().get("data", []):
                if s.get("resource_subtype") != "comment_added":
                    continue
                out.append(Comment(
                    id=s.get("gid", ""),
                    author=(s.get("created_by") or {}).get("name", ""),
                    text=s.get("text", ""),
                    created_at=s.get("created_at", ""),
                ))
            return out

    async def add_comment(self, task_id: str, text: str) -> Comment | None:
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.post(
                f"{ASANA_BASE}/tasks/{task_id}/stories",
                headers={**self._headers(), "Content-Type": "application/json"},
                json={"data": {"text": text}},
            )
            if resp.status_code not in (200, 201):
                return None
            d = resp.json().get("data", {})
            return Comment(
                id=d.get("gid", ""),
                author=(d.get("created_by") or {}).get("name", ""),
                text=d.get("text", text),
                created_at=d.get("created_at", ""),
            )

    async def update_status(self, task_id: str, status: str) -> bool:
        completed = status.lower().strip() in ("done", "closed", "complete", "completed", "resolved")
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.put(
                f"{ASANA_BASE}/tasks/{task_id}",
                headers={**self._headers(), "Content-Type": "application/json"},
                json={"data": {"completed": completed}},
            )
            return resp.status_code == 200

    async def list_followed(self, user_id: str | None = None, limit: int = 50) -> list[Task]:
        if not self.workspace_id:
            return []
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            gid = user_id or await self._get_my_gid(client)
            if not gid:
                return []
            resp = await client.get(
                f"{ASANA_BASE}/workspaces/{self.workspace_id}/tasks/search",
                params={
                    "followers.any": gid,
                    "completed": "false",
                    "sort_by": "modified_at",
                    "opt_fields": "name,notes,assignee.name,projects.name,modified_at,permalink_url",
                    "limit": str(min(limit, 100)),
                },
            )
            if resp.status_code != 200:
                return []
            return [self._to_task(t) for t in resp.json().get("data", [])]

    async def search_tasks(self, query: str, limit: int = 20) -> list[Task]:
        if not self.workspace_id:
            return []
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.get(
                f"{ASANA_BASE}/workspaces/{self.workspace_id}/tasks/search",
                params={
                    "text": query,
                    "opt_fields": "name,notes,completed,assignee.name,projects.name,modified_at,permalink_url",
                    "limit": str(min(limit, 100)),
                },
            )
            if resp.status_code != 200:
                return []
            return [self._to_task(t) for t in resp.json().get("data", [])]


================================================
FILE: maggy/maggy/providers/base.py
================================================
"""IssueTrackerProvider Protocol — all trackers (GitHub, Asana, Linear) implement this.

Services call provider.list_tasks() and work with Task/Comment dataclasses. They
don't care which tracker is underneath. Swap providers without touching services.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Protocol


@dataclass
class Task:
    """Provider-agnostic task representation.

    Fields that don't apply to a given provider are left empty — never None for strings
    so downstream formatters don't need null checks.
    """
    id: str                        # Provider-native ID ("123" for GH, "1213..." for Asana)
    title: str
    description: str = ""          # Full body/notes
    status: str = ""               # "open", "closed", "in progress", etc.
    assignee: str = ""             # Display name
    author: str = ""               # Who created it
    url: str = ""                  # Permalink
    labels: list[str] = field(default_factory=list)
    board: str = ""                # Project/repo name
    created_at: str = ""           # ISO 8601
    updated_at: str = ""           # ISO 8601
    raw: dict = field(default_factory=dict)  # Original provider payload for escape hatches


@dataclass
class Comment:
    id: str
    author: str
    text: str
    created_at: str = ""


class IssueTrackerProvider(Protocol):
    """Common interface across GitHub Issues, Asana, Linear, etc."""

    async def list_tasks(self, board: str | None = None, state: str = "open", limit: int = 50) -> list[Task]:
        """List tasks. `board` filters to a specific project/repo if provider supports it."""
        ...

    async def get_task(self, task_id: str) -> Task | None:
        ...

    async def get_comments(self, task_id: str) -> list[Comment]:
        ...

    async def add_comment(self, task_id: str, text: str) -> Comment | None:
        ...

    async def update_status(self, task_id: str, status: str) -> bool:
        """Update status. For providers that use labels (GitHub), this maps intelligently."""
        ...

    async def list_followed(self, user_id: str | None = None, limit: int = 50) -> list[Task]:
        """Tasks the user is watching/following/assigned to — powers the 'Latest' tab."""
        ...

    async def search_tasks(self, query: str, limit: int = 20) -> list[Task]:
        ...

    def provider_name(self) -> str:
        """Return 'github' | 'asana' | 'linear' — for UI display."""
        ...


================================================
FILE: maggy/maggy/providers/github_issues.py
================================================
"""GitHub Issues provider — talks to GitHub REST API across multiple repos."""

from __future__ import annotations

import logging

import httpx

from .base import Comment, Task

logger = logging.getLogger(__name__)

GITHUB_API = "https://api.github.com"


class GitHubIssuesProvider:
    """IssueTrackerProvider implementation for GitHub Issues.

    Handles multiple repos transparently — list_tasks() aggregates across all
    configured repos. Task IDs are encoded as "repo/number" (e.g. "api/123") so
    we can round-trip back to the right repo.
    """

    def __init__(self, org: str, repos: list[str], token: str, labels: list[str] | None = None):
        self.org = org
        self.repos = repos  # Full names: ["org/api", "org/web"]
        self.token = token
        self.label_filter = labels or []

    def provider_name(self) -> str:
        return "github"

    def _headers(self) -> dict[str, str]:
        return {
            "Authorization": f"Bearer {self.token}",
            "Accept": "application/vnd.github+json",
            "X-GitHub-Api-Version": "2022-11-28",
        }

    def _encode_id(self, repo: str, number: int) -> str:
        # Store repo slug (without org prefix for compactness) + issue number
        slug = repo.split("/")[-1]
        return f"{slug}/{number}"

    def _decode_id(self, task_id: str) -> tuple[str, int] | None:
        """Parse 'slug/number' IDs. Returns None for malformed input.

        Returning None (instead of raising) lets the caller translate to a
        404/None response instead of a 500 to the client.
        """
        if not task_id or "/" not in task_id:
            return None
        slug, _, num_str = task_id.partition("/")
        if not num_str.isdigit():
            return None
        number = int(num_str)
        for repo in self.repos:
            if repo.endswith("/" + slug):
                return repo, number
        # Fallback: assume org prefix (for repos not in the configured list)
        if self.org:
            return f"{self.org}/{slug}", number
        return None

    def _to_task(self, repo: str, issue: dict) -> Task:
        return Task(
            id=self._encode_id(repo, issue["number"]),
            title=issue.get("title", ""),
            description=issue.get("body") or "",
            status=issue.get("state", "open"),
            assignee=((issue.get("assignee") or {}) or {}).get("login", ""),
            author=((issue.get("user") or {}) or {}).get("login", ""),
            url=issue.get("html_url", ""),
            labels=[lbl["name"] for lbl in issue.get("labels", []) if isinstance(lbl, dict)],
            board=repo.split("/")[-1],
            created_at=issue.get("created_at", ""),
            updated_at=issue.get("updated_at", ""),
            raw=issue,
        )

    async def list_tasks(self, board: str | None = None, state: str = "open", limit: int = 50) -> list[Task]:
        """List issues across repos (or one repo if `board` given). Excludes PRs."""
        repos = [r for r in self.repos if not board or r.endswith("/" + board)]
        if not repos:
            return []

        per_repo = max(1, limit // max(len(repos), 1))
        tasks: list[Task] = []

        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            for repo in repos:
                params: dict[str, str] = {"state": state, "per_page": str(per_repo), "sort": "updated"}
                if self.label_filter:
                    params["labels"] = ",".join(self.label_filter)
                resp = await client.get(f"{GITHUB_API}/repos/{repo}/issues", params=params)
                if resp.status_code != 200:
                    # Log at WARNING so misconfiguration (bad token, repo renamed,
                    # missing read scope) is visible instead of silently returning
                    # an empty inbox. Include the status code + first 200 chars
                    # of the response body to make diagnostics easy.
                    body_excerpt = (resp.text or "")[:200].replace("\n", " ")
                    logger.warning(
                        "GitHub /repos/%s/issues returned %s: %s",
                        repo, resp.status_code, body_excerpt,
                    )
                    continue
                for issue in resp.json():
                    # GitHub returns PRs in /issues — filter them out
                    if "pull_request" in issue:
                        continue
                    tasks.append(self._to_task(repo, issue))

        tasks.sort(key=lambda t: t.updated_at, reverse=True)
        return tasks[:limit]

    async def get_task(self, task_id: str) -> Task | None:
        decoded = self._decode_id(task_id)
        if decoded is None:
            return None
        repo, number = decoded
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.get(f"{GITHUB_API}/repos/{repo}/issues/{number}")
            if resp.status_code != 200:
                return None
            return self._to_task(repo, resp.json())

    async def get_comments(self, task_id: str) -> list[Comment]:
        decoded = self._decode_id(task_id)
        if decoded is None:
            return []
        repo, number = decoded
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.get(f"{GITHUB_API}/repos/{repo}/issues/{number}/comments")
            if resp.status_code != 200:
                return []
            return [
                Comment(
                    id=str(c["id"]),
                    author=((c.get("user") or {}) or {}).get("login", ""),
                    text=c.get("body", ""),
                    created_at=c.get("created_at", ""),
                )
                for c in resp.json()
            ]

    async def add_comment(self, task_id: str, text: str) -> Comment | None:
        decoded = self._decode_id(task_id)
        if decoded is None:
            return None
        repo, number = decoded
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.post(
                f"{GITHUB_API}/repos/{repo}/issues/{number}/comments",
                json={"body": text},
            )
            if resp.status_code not in (200, 201):
                return None
            c = resp.json()
            return Comment(
                id=str(c["id"]),
                author=((c.get("user") or {}) or {}).get("login", ""),
                text=c.get("body", ""),
                created_at=c.get("created_at", ""),
            )

    async def update_status(self, task_id: str, status: str) -> bool:
        """GitHub issues only have open/closed — map any "done-like" status to closed."""
        decoded = self._decode_id(task_id)
        if decoded is None:
            return False
        repo, number = decoded
        normalized = status.lower().strip()
        new_state = "closed" if normalized in ("done", "closed", "complete", "completed", "resolved") else "open"
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            resp = await client.patch(
                f"{GITHUB_API}/repos/{repo}/issues/{number}",
                json={"state": new_state},
            )
            return resp.status_code == 200

    async def list_followed(self, user_id: str | None = None, limit: int = 50) -> list[Task]:
        """Issues assigned to or mentioning the authenticated user across configured repos.

        Refuses to run without repos — otherwise the GitHub search query would
        have no repo filter and hit every public issue on the site.
        """
        if not self.repos:
            return []
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            # Figure out the user if not provided
            if not user_id:
                me = await client.get(f"{GITHUB_API}/user")
                if me.status_code == 200:
                    user_id = me.json().get("login", "")
                else:
                    return []

            # Use search API: is:open + assignee/mentions + repo filter
            repo_qual = " ".join(f"repo:{r}" for r in self.repos)
            query = f"is:issue is:open ({repo_qual}) (assignee:{user_id} OR mentions:{user_id})"
            resp = await client.get(
                f"{GITHUB_API}/search/issues",
                params={"q": query, "sort": "updated", "per_page": str(limit)},
            )
            if resp.status_code != 200:
                return []

            tasks: list[Task] = []
            for issue in resp.json().get("items", []):
                if "pull_request" in issue:
                    continue
                # Derive repo from URL
                repo_url = issue.get("repository_url", "")
                repo = "/".join(repo_url.rstrip("/").split("/")[-2:])
                tasks.append(self._to_task(repo, issue))
            return tasks

    async def search_tasks(self, query: str, limit: int = 20) -> list[Task]:
        # Same guard as list_followed — without repos, the query would search
        # all public GitHub issues, which is never what we want.
        if not self.repos:
            return []
        async with httpx.AsyncClient(timeout=15, headers=self._headers()) as client:
            repo_qual = " ".join(f"repo:{r}" for r in self.repos)
            q = f"is:issue {query} {repo_qual}"
            resp = await client.get(
                f"{GITHUB_API}/search/issues",
                params={"q": q, "per_page": str(limit)},
            )
            if resp.status_code != 200:
                return []
            tasks: list[Task] = []
            for issue in resp.json().get("items", []):
                if "pull_request" in issue:
                    continue
                repo_url = issue.get("repository_url", "")
                repo = "/".join(repo_url.rstrip("/").split("/")[-2:])
                tasks.append(self._to_task(repo, issue))
            return tasks


================================================
FILE: maggy/maggy/providers/monday.py
================================================
"""Monday.com provider — IssueTrackerProvider implementation."""

from __future__ import annotations

import httpx

from .base import Comment, Task

MONDAY_API = "https://api.monday.com/v2"


class MondayProvider:
    """IssueTrackerProvider for Monday.com boards."""

    def __init__(self, api_token: str, board_id: str):
        self.api_token = api_token
        self.board_id = board_id

    def provider_name(self) -> str:
        return "monday"

    def _headers(self) -> dict[str, str]:
        return {
            "Authorization": self.api_token,
            "Content-Type": "application/json",
        }

    def _to_task(self, item: dict) -> Task:
        cols = item.get("column_values", [])
        status = _col_value(cols, "status")
        assignee = _col_value(cols, "person")
        return Task(
            id=item.get("id", ""),
            title=item.get("name", ""),
            description="",
            status=status,
            assignee=assignee,
            url=item.get("url", ""),
            created_at=item.get("created_at", ""),
            updated_at=item.get("updated_at", ""),
            raw=item,
        )

    async def _query(self, q: str) -> dict:
        async with httpx.AsyncClient(
            timeout=15, headers=self._headers(),
        ) as client:
            resp = await client.post(
                MONDAY_API, json={"query": q},
            )
            if resp.status_code != 200:
                return {}
            return resp.json().get("data", {})

    async def list_tasks(self, board=None, state="open", limit=50) -> list[Task]:
        bid = board or self.board_id
        q = _items_query(bid, limit)
        data = await self._query(q)
        boards = data.get("boards", [])
        if not boards:
            return []
        items = boards[0].get("items_page", {}).get("items", [])
        return [self._to_task(i) for i in items]

    async def get_task(self, task_id: str) -> Task | None:
        q = f'{{ items(ids: [{task_id}]) {{ id name column_values {{ id text }} url created_at updated_at }} }}'
        data = await self._query(q)
        items = data.get("items", [])
        if not items:
            return None
        return self._to_task(items[0])

    async def get_comments(self, task_id: str) -> list[Comment]:
        q = f'{{ items(ids: [{task_id}]) {{ updates {{ id body created_at creator {{ name }} }} }} }}'
        data = await self._query(q)
        items = data.get("items", [])
        if not items:
            return []
        updates = items[0].get("updates", [])
        return [
            Comment(
                id=u.get("id", ""),
                author=(u.get("creator") or {}).get("name", ""),
                text=u.get("body", ""),
                created_at=u.get("created_at", ""),
            )
            for u in updates
        ]

    async def add_comment(self, task_id: str, text: str) -> Comment | None:
        escaped = text.replace('"', '\\"')
        q = f'mutation {{ create_update(item_id: {task_id}, body: "{escaped}") {{ id body }} }}'
        data = await self._query(q)
        update = data.get("create_update", {})
        if not update:
            return None
        return Comment(
            id=update.get("id", ""),
            author="", text=update.get("body", text),
        )

    async def update_status(self, task_id: str, status: str) -> bool:
        return False  # Requires board-specific column ID

    async def list_followed(self, user_id=None, limit=50) -> list[Task]:
        return await self.list_tasks(limit=limit)

    async def search_tasks(self, query: str, limit=20) -> list[Task]:
        return await self.list_tasks(limit=limit)


def _col_value(cols: list[dict], col_id: str) -> str:
    for c in cols:
        if c.get("id") == col_id:
            return c.get("text", "")
    return ""


def _items_query(board_id: str, limit: int) -> str:
    return (
        f'{{ boards(ids: [{board_id}]) {{ items_page(limit: {limit}) '
        f'{{ items {{ id name column_values {{ id text }} url created_at updated_at }} }} }} }}'
    )


================================================
FILE: maggy/maggy/recovery/__init__.py
================================================


================================================
FILE: maggy/maggy/recovery/rollback.py
================================================
"""Git-backed rollback savepoints for Maggy sessions."""

from __future__ import annotations

import asyncio
import re

_SAFE_ID = re.compile(r"^[a-zA-Z0-9_\-]+$")


def _validate_session_id(session_id: str) -> None:
    if not _SAFE_ID.match(session_id):
        raise ValueError(f"Invalid session_id: {session_id!r}")


class RollbackManager:
    async def create_savepoint(self, session_id: str, working_dir: str) -> str:
        _validate_session_id(session_id)
        tag = _tag_name(session_id)
        code, output = await _run_git(working_dir, "tag", tag)
        if code != 0:
            raise RuntimeError(output or f"failed to create {tag}")
        return tag

    async def rollback(self, session_id: str, working_dir: str) -> bool:
        _validate_session_id(session_id)
        code, _ = await _run_git(working_dir, "reset", "--hard", _tag_name(session_id))
        return code == 0

    async def list_savepoints(self, working_dir: str) -> list[str]:
        code, output = await _run_git(working_dir, "tag", "--list", "maggy-save-*")
        if code != 0 or not output:
            return []
        return output.splitlines()

    async def delete_savepoint(self, session_id: str, working_dir: str) -> bool:
        code, _ = await _run_git(working_dir, "tag", "-d", _tag_name(session_id))
        return code == 0


async def _run_git(working_dir: str, *args: str) -> tuple[int, str]:
    proc = await asyncio.create_subprocess_exec(
        "git",
        *args,
        cwd=working_dir,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.STDOUT,
    )
    stdout, _ = await proc.communicate()
    text = (stdout or b"").decode("utf-8", errors="replace").strip()
    return proc.returncode or 0, text


def _tag_name(session_id: str) -> str:
    return f"maggy-save-{session_id}"


================================================
FILE: maggy/maggy/registry.py
================================================
"""Project registry backed by Maggy config."""

from __future__ import annotations

from maggy.config import MaggyConfig, ProjectConfig


class ProjectRegistry:
    """Manage configured projects in memory."""

    def __init__(self, cfg: MaggyConfig):
        self._projects = {project.name: project for project in cfg.projects}

    def list(self) -> list[ProjectConfig]:
        return list(self._projects.values())

    def get(self, name: str) -> ProjectConfig | None:
        return self._projects.get(name)

    def add(self, project: ProjectConfig) -> None:
        if project.name in self._projects:
            raise ValueError(f"Project {project.name!r} already exists")
        self._projects[project.name] = project

    def remove(self, name: str) -> bool:
        return self._projects.pop(name, None) is not None


================================================
FILE: maggy/maggy/routing.py
================================================
"""Blast-to-model routing with iCPG integration and reward learning.

Routes tasks to the optimal model based on complexity score.
High-blast tasks go to premium models, low-blast to cheap ones.
Learns from reward scores over time.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

from maggy.calibration.tracker import CalibrationTracker
from maggy.config import MaggyConfig
from maggy.process.model_router import (
    DEFAULT_TIERS,
    RoutingDecision,
    route_task,
)
from maggy.routing_rules import apply_override
from maggy.routing_rules_io import load as load_rules
from maggy.routing_rules import record_outcome as rules_record
from maggy.scores import RewardTable

MIN_CALIBRATION_ACCURACY = 0.5


@dataclass
class RoutingContext:
    """Input context for a routing decision."""

    blast_score: int = 0
    task_type: str = "general"
    security_sensitive: bool = False
    project_key: str = ""
    pipeline_phase: str = ""
    stakes: str = "low"


class RoutingService:
    """Blast-score aware routing with rule overrides."""

    def __init__(self, cfg: MaggyConfig):
        self.cfg = cfg
        self.rewards = RewardTable(cfg)
        db_dir = Path(cfg.storage.path).expanduser().parent
        self.calibration = CalibrationTracker(
            db_dir / "calibration.db",
        )
        self.rules = load_rules()

    def route(self, ctx: RoutingContext) -> RoutingDecision:
        """Pick the best model for this task context."""
        forced = apply_override(
            self.rules, ctx.task_type, ctx.pipeline_phase,
        )
        if forced:
            return self._forced_decision(forced, ctx)

        override = self.rewards.best_model(
            ctx.task_type, self._blast_tier(ctx.blast_score),
        )
        if override and self._is_calibrated(override):
            return RoutingDecision(
                primary=override,
                validator=None,
                fallback_chain=[],
                reason=(
                    f"Learned: best for {ctx.task_type} "
                    f"at blast {ctx.blast_score}"
                ),
            )

        decision = route_task(
            ctx.blast_score,
            ctx.task_type,
            ctx.security_sensitive,
            stakes=ctx.stakes,
        )
        return self._penalize_uncalibrated(decision)

    def record_outcome(
        self,
        model: str,
        task_type: str,
        blast_score: int,
        reward: float,
    ) -> None:
        """Record task outcome for learning."""
        tier = self._blast_tier(blast_score)
        self.rewards.record(model, task_type, tier, reward)
        self.calibration.record(model, task_type, reward, reward)
        success = reward > 0.0
        rules_record(self.rules, model, task_type, success)

    def reload_rules(self) -> None:
        """Reload rules from disk (after Maggy self-update)."""
        self.rules = load_rules()

    def get_heatmap(self) -> list[dict]:
        """Return reward heatmap data for dashboard."""
        return self.rewards.heatmap()

    def _blast_tier(self, score: int) -> str:
        if score <= 3:
            return "low"
        if score <= 6:
            return "medium"
        return "high"

    def _is_calibrated(self, model: str) -> bool:
        acc = self.calibration.accuracy(model)
        return acc == 0.0 or acc >= MIN_CALIBRATION_ACCURACY

    def _forced_decision(
        self, model_name: str, ctx: RoutingContext,
    ) -> RoutingDecision:
        """Build decision from a rules override."""
        tier = _find_tier(model_name)
        if tier is None:
            return route_task(
                ctx.blast_score,
                ctx.task_type,
                ctx.security_sensitive,
                stakes=ctx.stakes,
            )
        validator = None
        if ctx.blast_score >= 8 or ctx.security_sensitive or ctx.stakes == "high":
            validator = _find_tier("codex")
        return RoutingDecision(
            primary=tier,
            validator=validator,
            fallback_chain=[],
            reason=f"Rule override: {ctx.task_type}"
                   f"{f'/{ctx.pipeline_phase}' if ctx.pipeline_phase else ''}"
                   f" → {model_name}",
        )

    def _penalize_uncalibrated(
        self, decision: RoutingDecision,
    ) -> RoutingDecision:
        if not self._is_calibrated(decision.primary.name):
            chain = decision.fallback_chain
            if chain:
                return RoutingDecision(
                    primary=chain[0],
                    validator=decision.validator,
                    fallback_chain=chain[1:],
                    reason="Calibration penalty",
                )
        return decision


def _find_tier(name: str):
    """Look up a ModelTier by name from defaults."""
    for t in DEFAULT_TIERS:
        if t.name == name:
            return t
    return None


================================================
FILE: maggy/maggy/routing_rules.py
================================================
"""Routing rules — task-type, pipeline-phase, stakes, cascade config.

Loaded from ~/.maggy/routing-rules.yaml. Maggy can self-update
this file when benchmark or outcome data provides evidence for
better routing decisions. Manual edits are preserved.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path

MIN_CONFIDENCE = 0.6


@dataclass
class ModelOverride:
    """Force a specific model for a task type or phase."""

    model: str
    reason: str = ""
    confidence: float = 1.0
    source: str = "rule"


@dataclass
class PerformanceRecord:
    """Tracked model performance from outcomes."""

    strengths: list[str] = field(default_factory=list)
    weaknesses: list[str] = field(default_factory=list)
    tasks_completed: int = 0
    success_rate: float = 0.0


@dataclass
class Convention:
    """A team convention injected into prompts."""

    text: str
    applies_to: list[str] = field(default_factory=list)
    source: str = "manual"


@dataclass
class StakesLevel:
    """Patterns for a single stakes level."""

    file_patterns: list[str] = field(default_factory=list)
    task_types: list[str] = field(default_factory=list)
    keywords: list[str] = field(default_factory=list)


@dataclass
class StakesPatterns:
    """Stakes classification config — high/medium/low."""

    high: StakesLevel = field(default_factory=StakesLevel)
    medium: StakesLevel = field(default_factory=StakesLevel)
    low: StakesLevel = field(default_factory=StakesLevel)


@dataclass
class CascadePolicy:
    """Cascade execution policy."""

    enabled: bool = True
    min_blast: int = 5
    min_stakes: str = "medium"
    max_attempts: int = 3
    quality_threshold: int = 3


@dataclass
class RoutingRules:
    """All routing rules Maggy uses for orchestration."""

    version: int = 1
    updated_at: str = ""
    task_type_overrides: dict[str, ModelOverride] = field(
        default_factory=dict,
    )
    pipeline_phases: dict[str, ModelOverride] = field(
        default_factory=dict,
    )
    model_performance: dict[str, PerformanceRecord] = field(
        default_factory=dict,
    )
    conventions: list[Convention] = field(default_factory=list)
    project_conventions: dict[str, list[Convention]] = field(
        default_factory=dict,
    )
    stakes: StakesPatterns = field(default_factory=StakesPatterns)
    cascade: CascadePolicy = field(default_factory=CascadePolicy)


def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def apply_override(
    rules: RoutingRules, task_type: str,
    phase: str | None = None,
) -> str | None:
    """Return model name if rules override routing."""
    if phase and phase in rules.pipeline_phases:
        override = rules.pipeline_phases[phase]
        if override.model != "auto" and _trusted(override):
            return override.model
    if task_type in rules.task_type_overrides:
        override = rules.task_type_overrides[task_type]
        if _trusted(override):
            return override.model
    return None


def record_outcome(
    rules: RoutingRules, model: str,
    task_type: str, success: bool,
    path: Path | None = None,
) -> None:
    """Update performance data from a task outcome."""
    from maggy.routing_rules_io import save

    perf = rules.model_performance.get(model)
    if perf is None:
        perf = PerformanceRecord()
        rules.model_performance[model] = perf
    _update_perf(perf, task_type, success)
    rules.updated_at = _now_iso()
    save(rules, path)


def learn_override(
    rules: RoutingRules, task_type: str,
    model: str, reason: str,
    confidence: float = 0.7,
    path: Path | None = None,
) -> None:
    """Maggy learns a new routing override from data."""
    from maggy.routing_rules_io import save

    rules.task_type_overrides[task_type] = ModelOverride(
        model=model, reason=reason,
        confidence=confidence, source="learned",
    )
    rules.updated_at = _now_iso()
    save(rules, path)


def conventions_for(
    rules: RoutingRules, task_type: str,
    project_key: str | None = None,
) -> str:
    """Return conventions text relevant to a task type."""
    all_convs = list(rules.conventions)
    if project_key and project_key in rules.project_conventions:
        all_convs.extend(rules.project_conventions[project_key])
    lines = [
        f"- {c.text}" for c in all_convs
        if "all" in c.applies_to or task_type in c.applies_to
    ]
    if not lines:
        return ""
    return "## Team Conventions\n" + "\n".join(lines)


def _trusted(override: ModelOverride) -> bool:
    return override.confidence >= MIN_CONFIDENCE


def _update_perf(
    perf: PerformanceRecord, task_type: str, success: bool,
) -> None:
    total = perf.tasks_completed
    rate = perf.success_rate
    new_total = total + 1
    perf.tasks_completed = new_total
    perf.success_rate = round(
        (rate * total + (1.0 if success else 0.0)) / new_total, 3,
    )
    if success and task_type not in perf.strengths:
        perf.strengths.append(task_type)
    if not success and task_type not in perf.weaknesses:
        perf.weaknesses.append(task_type)


================================================
FILE: maggy/maggy/routing_rules_defaults.py
================================================
"""Default routing rules — seed data for first-run initialization."""

from __future__ import annotations

from maggy.routing_rules import (
    CascadePolicy,
    Convention,
    ModelOverride,
    PerformanceRecord,
    RoutingRules,
    StakesLevel,
    StakesPatterns,
    _now_iso,
)

_CONV_DATA = [
    ("mWP: Ship minimum wowable product, not MVP. "
     "Target 5-7 on the 11-star scale.", ["all"]),
    ("TDD: RED (failing tests) -> GREEN (minimal code) "
     "-> VALIDATE (lint, types, coverage >= 80%).",
     ["feature", "bug", "refactor"]),
    ("No secrets in code. Parameterized SQL only. "
     "Validate at API boundaries.", ["all"]),
    ("Quality gates: max 20 lines/function, 3 params, "
     "2 nesting levels, 200 lines/file.", ["all"]),
    ("Use existing patterns. Read codebase before "
     "changing. Keep changes minimal.", ["all"]),
]

_OVERRIDES = {
    "docs": ("claude", "Not prose-optimized", 0.9, "benchmark"),
    "security": ("claude", "Deep reasoning needed", 1.0, "rule"),
    "architecture": ("claude", "Cross-context awareness", 0.8, "rule"),
    "tests": ("claude", "Test generation", 0.9, "benchmark"),
    "planning": ("claude", "Structured reasoning", 0.8, "rule"),
}

_PHASES = {
    "spec": ("claude", "Comprehensive docs", 1.0, "rule"),
    "tdd_red": ("claude", "Test design expertise", 0.9, "rule"),
    "tdd_green": ("auto", "Blast-score routing", 1.0, "rule"),
    "review": ("claude", "Security+arch depth", 1.0, "rule"),
}

_PERF = {
    "claude": (["security", "tests", "docs", "architecture"], ["cost"], 6, 1.0),
    "codex": (["code_generation", "api_design", "bug", "feature"], ["docs"], 5, 1.0),
    "kimi": (["schema", "simple_tasks", "docs"], ["complex_reasoning"], 1, 1.0),
    "local": (["code_formatting", "simple_edits", "feature"], ["docs", "prose"], 1, 1.0),
}


def default_conventions() -> list[Convention]:
    """Team conventions from claude-bootstrap skills."""
    return [Convention(t, a, "claude-bootstrap") for t, a in _CONV_DATA]


def default_stakes() -> StakesPatterns:
    return StakesPatterns(
        high=StakesLevel(
            ["auth", "billing", "payment", "migration",
             "security", "deploy", "infra", ".env"],
            ["security", "auth", "billing", "migration"],
            ["production", "customer data", "breaking change"],
        ),
        medium=StakesLevel(
            ["api", "routes", "models", "schema", "database"],
            ["feature", "refactor"],
        ),
        low=StakesLevel([], ["docs", "formatting", "tests"]),
    )


def default_rules() -> RoutingRules:
    """Seed rules from benchmark evidence + team conventions."""
    return RoutingRules(
        version=1, updated_at=_now_iso(),
        conventions=default_conventions(),
        stakes=default_stakes(),
        cascade=CascadePolicy(),
        task_type_overrides={
            k: ModelOverride(*v) for k, v in _OVERRIDES.items()
        },
        pipeline_phases={
            k: ModelOverride(*v) for k, v in _PHASES.items()
        },
        model_performance={
            k: PerformanceRecord(*v) for k, v in _PERF.items()
        },
    )


================================================
FILE: maggy/maggy/routing_rules_io.py
================================================
"""Routing rules YAML I/O — load, save, serialize, deserialize."""

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

import yaml

from maggy.config import CONFIG_DIR

if TYPE_CHECKING:
    from maggy.routing_rules import (
        CascadePolicy,
        ModelOverride,
        PerformanceRecord,
        RoutingRules,
        StakesLevel,
        StakesPatterns,
    )

RULES_PATH = CONFIG_DIR / "routing-rules.yaml"


def save(rules: RoutingRules, path: Path | None = None) -> None:
    """Write rules to YAML."""
    target = path or RULES_PATH
    target.parent.mkdir(parents=True, exist_ok=True)
    data = to_dict(rules)
    target.write_text(yaml.safe_dump(data, sort_keys=False))


def load(path: Path | None = None) -> RoutingRules:
    """Load rules from YAML. Seeds defaults if missing."""
    from maggy.routing_rules_defaults import default_conventions, default_rules

    target = path or RULES_PATH
    if not target.exists():
        rules = default_rules()
        save(rules, target)
        return rules
    rules = from_yaml(target)
    if not rules.conventions:
        rules.conventions = default_conventions()
        save(rules, target)
    return rules


def to_dict(rules: RoutingRules) -> dict:
    """Serialize RoutingRules to a plain dict for YAML."""
    return {
        "version": rules.version,
        "updated_at": rules.updated_at,
        "stakes_patterns": _stakes_to_dict(rules.stakes),
        "cascade_policy": _cascade_to_dict(rules.cascade),
        "conventions": [
            {"text": c.text, "applies_to": c.applies_to, "source": c.source}
            for c in rules.conventions
        ],
        "project_conventions": {
            k: [{"text": c.text, "applies_to": c.applies_to, "source": c.source} for c in v]
            for k, v in rules.project_conventions.items()
        },
        "task_type_overrides": {
            k: _override_to_dict(v)
            for k, v in rules.task_type_overrides.items()
        },
        "pipeline_phases": {
            k: _override_to_dict(v)
            for k, v in rules.pipeline_phases.items()
        },
        "model_performance": {
            k: _perf_to_dict(v)
            for k, v in rules.model_performance.items()
        },
    }


def from_yaml(path: Path) -> RoutingRules:
    """Deserialize RoutingRules from a YAML file."""
    from maggy.routing_rules import (
        CascadePolicy as CP,
        Convention,
        ModelOverride as MO,
        PerformanceRecord as PR,
        RoutingRules as RR,
    )

    data = yaml.safe_load(path.read_text()) or {}
    overrides = {
        k: MO(**v)
        for k, v in (data.get("task_type_overrides") or {}).items()
    }
    phases = {
        k: MO(**v)
        for k, v in (data.get("pipeline_phases") or {}).items()
    }
    perf = {
        k: PR(**v)
        for k, v in (data.get("model_performance") or {}).items()
    }
    convs = [
        Convention(**c) for c in (data.get("conventions") or [])
    ]
    proj_convs: dict[str, list] = {}
    for pk, cv_list in (data.get("project_conventions") or {}).items():
        proj_convs[pk] = [Convention(**c) for c in cv_list]
    stakes = _stakes_from_dict(data.get("stakes_patterns") or {})
    cascade_raw = data.get("cascade_policy") or {}
    cascade = CP(**cascade_raw) if cascade_raw else CP()
    return RR(
        version=data.get("version", 1),
        updated_at=data.get("updated_at", ""),
        task_type_overrides=overrides,
        pipeline_phases=phases,
        model_performance=perf,
        conventions=convs,
        project_conventions=proj_convs,
        stakes=stakes,
        cascade=cascade,
    )


def _stakes_to_dict(stakes: StakesPatterns) -> dict:
    return {
        "high": _level_to_dict(stakes.high),
        "medium": _level_to_dict(stakes.medium),
        "low": _level_to_dict(stakes.low),
    }


def _level_to_dict(level: StakesLevel) -> dict:
    return {
        "file_patterns": level.file_patterns,
        "task_types": level.task_types,
        "keywords": level.keywords,
    }


def _cascade_to_dict(cascade: CascadePolicy) -> dict:
    return {
        "enabled": cascade.enabled,
        "min_blast": cascade.min_blast,
        "min_stakes": cascade.min_stakes,
        "max_attempts": cascade.max_attempts,
        "quality_threshold": cascade.quality_threshold,
    }


def _override_to_dict(v: ModelOverride) -> dict:
    return {
        "model": v.model, "reason": v.reason,
        "confidence": v.confidence, "source": v.source,
    }


def _perf_to_dict(v: PerformanceRecord) -> dict:
    return {
        "strengths": v.strengths, "weaknesses": v.weaknesses,
        "tasks_completed": v.tasks_completed,
        "success_rate": v.success_rate,
    }


def _stakes_from_dict(raw: dict) -> StakesPatterns:
    from maggy.routing_rules import StakesLevel as SL
    from maggy.routing_rules import StakesPatterns as SP

    def _level(d: dict) -> SL:
        return SL(
            file_patterns=d.get("file_patterns", []),
            task_types=d.get("task_types", []),
            keywords=d.get("keywords", []),
        )

    if not raw:
        from maggy.routing_rules_defaults import default_stakes
        return default_stakes()
    return SP(
        high=_level(raw.get("high", {})),
        medium=_level(raw.get("medium", {})),
        low=_level(raw.get("low", {})),
    )


================================================
FILE: maggy/maggy/scores.py
================================================
"""Reward table — tracks model performance per task type and blast tier.

SQLite-backed with decay so old data ages out naturally.
"""

from __future__ import annotations

import sqlite3
from contextlib import contextmanager
from datetime import date, datetime, timezone
from pathlib import Path
from typing import Iterator

from maggy.config import MaggyConfig

SCHEMA = """
CREATE TABLE IF NOT EXISTS rewards (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    model TEXT NOT NULL,
    task_type TEXT NOT NULL,
    blast_tier TEXT NOT NULL,
    reward REAL NOT NULL,
    recorded_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_rewards_lookup
    ON rewards(model, task_type, blast_tier);
"""

MIN_SAMPLES = 5
DECAY_RATE = 0.95


@contextmanager
def _connect(path: Path) -> Iterator[sqlite3.Connection]:
    path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(path), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.execute("PRAGMA busy_timeout=30000")
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()


class RewardTable:
    """SQLite-backed reward table with time decay."""

    def __init__(self, cfg: MaggyConfig):
        db_dir = Path(cfg.storage.path).expanduser().parent
        self._db_path = db_dir / "model_scores.db"
        self._init_db()

    def _init_db(self) -> None:
        with _connect(self._db_path) as conn:
            conn.executescript(SCHEMA)

    def record(
        self, model: str, task_type: str,
        blast_tier: str, reward: float,
    ) -> None:
        """Record a reward observation."""
        now = datetime.now(timezone.utc).isoformat()
        with _connect(self._db_path) as conn:
            conn.execute(
                "INSERT INTO rewards "
                "(model, task_type, blast_tier, "
                "reward, recorded_at) "
                "VALUES (?, ?, ?, ?, ?)",
                (model, task_type, blast_tier, reward, now),
            )
            conn.commit()

    def best_model(
        self, task_type: str, blast_tier: str,
    ) -> str | None:
        """Return best model, or None if insufficient data."""
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT model, reward, recorded_at "
                "FROM rewards "
                "WHERE task_type = ? AND blast_tier = ?",
                (task_type, blast_tier),
            ).fetchall()

        if not rows:
            return None

        scores: dict[str, tuple[float, int]] = {}
        today = date.today()
        for r in rows:
            model = r["model"]
            rec_date = datetime.fromisoformat(
                r["recorded_at"],
            ).date()
            days = (today - rec_date).days
            weight = DECAY_RATE ** days
            weighted = r["reward"] * weight
            total, count = scores.get(model, (0.0, 0))
            scores[model] = (total + weighted, count + 1)

        candidates = {
            m: total / count
            for m, (total, count) in scores.items()
            if count >= MIN_SAMPLES
        }
        if not candidates:
            return None

        return max(candidates, key=candidates.get)

    def heatmap(self) -> list[dict]:
        """Return reward averages for dashboard."""
        with _connect(self._db_path) as conn:
            rows = conn.execute(
                "SELECT model, task_type, blast_tier, "
                "AVG(reward) as avg_reward, "
                "COUNT(*) as n "
                "FROM rewards "
                "GROUP BY model, task_type, blast_tier",
            ).fetchall()
        return [
            {
                "model": r["model"],
                "task_type": r["task_type"],
                "blast_tier": r["blast_tier"],
                "avg_reward": round(r["avg_reward"], 3),
                "samples": r["n"],
            }
            for r in rows
        ]


================================================
FILE: maggy/maggy/services/__init__.py
================================================


================================================
FILE: maggy/maggy/services/account_guide.py
================================================
"""Account switching guidance — detect profiles, suggest re-auth."""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

from rich.console import Console

console = Console()

_PROVIDERS = {
    ".claude": ("anthropic", "claude auth login"),
    ".codex": ("openai", "codex auth login"),
}


@dataclass
class AccountProfile:
    """Represents a CLI auth profile."""

    name: str
    provider: str
    auth_command: str
    is_active: bool = False


def detect_accounts(home: Path | None = None) -> list[AccountProfile]:
    """Discover CLI auth profiles from home dir."""
    root = home or Path.home()
    accounts: list[AccountProfile] = []
    for dirname, (provider, cmd) in _PROVIDERS.items():
        path = root / dirname
        if path.exists():
            accounts.append(AccountProfile(
                name=dirname.lstrip("."),
                provider=provider,
                auth_command=cmd,
            ))
    return accounts


def suggest_switch(provider: str) -> str:
    """Return CLI instructions to switch accounts."""
    if provider == "anthropic":
        return (
            "Claude quota hit. Switch account:\n"
            "  claude auth login\n"
            "Then restart your session."
        )
    if provider == "openai":
        return (
            "OpenAI/Codex quota hit. Switch account:\n"
            "  codex auth login\n"
            "Then restart your session."
        )
    return f"Quota hit for {provider}. Re-authenticate."


def render_switch_guide(provider: str) -> None:
    """Print Rich-formatted switch instructions."""
    guide = suggest_switch(provider)
    console.print(f"[yellow]{guide}[/yellow]")


================================================
FILE: maggy/maggy/services/activity.py
================================================
"""CLI activity scanner — detects running sessions and recent prompts."""

from __future__ import annotations

import json
import logging
import re
import subprocess
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path

logger = logging.getLogger(__name__)


@dataclass
class ActiveSession:
    """A currently running CLI session."""

    cli: str
    session_id: str
    project: str
    project_path: str
    status: str  # "running" | "agent"
    last_prompt: str
    agent_name: str
    team_name: str
    pid: int


@dataclass
class RecentPrompt:
    """A recent user prompt from CLI history."""

    cli: str
    text: str
    project: str
    timestamp: str
    session_id: str


class ActivityService:
    """Scans CLI histories and processes."""

    def get_activity(self) -> dict:
        sessions = _scan_processes()
        prompts = _recent_prompts()
        return {
            "sessions": [asdict(s) for s in sessions],
            "recent": [asdict(p) for p in prompts],
        }


# ── Process scanning ──────────────────────────────


def _scan_processes() -> list[ActiveSession]:
    """Find running claude/codex/kimi processes."""
    try:
        result = subprocess.run(
            ["ps", "aux"], capture_output=True,
            text=True, timeout=5,
        )
        lines = result.stdout.splitlines()
    except (subprocess.SubprocessError, OSError):
        return []
    return _parse_claude_processes(
        [line for line in lines if "claude" in line.lower()],
    )


def _parse_claude_processes(
    lines: list[str],
) -> list[ActiveSession]:
    """Parse ps aux lines for Claude CLI sessions."""
    sessions: list[ActiveSession] = []
    for line in lines:
        if not _is_cli_process(line):
            continue
        pid = _extract_pid(line)
        if not pid:
            continue
        cwd = _get_cwd(pid)
        project = Path(cwd).name if cwd else ""
        agent = _extract_flag(line, "--agent-name")
        team = _extract_flag(line, "--team-name")
        status = "agent" if agent else "running"
        sessions.append(ActiveSession(
            cli="claude", session_id="",
            project=project, project_path=cwd,
            status=status, last_prompt="",
            agent_name=agent, team_name=team,
            pid=pid,
        ))
    return sessions


def _is_cli_process(line: str) -> bool:
    """Filter real CLI processes from app helpers."""
    lower = line.lower()
    if "claude.app" in lower:
        return False
    if "grep" in lower:
        return False
    if "claude helper" in lower:
        return False
    return bool(re.search(
        r'(?:^|/|\s)claude\s+--', line,
    ))


def _extract_pid(line: str) -> int:
    """Extract PID from ps aux line."""
    parts = line.split()
    if len(parts) >= 2:
        try:
            return int(parts[1])
        except ValueError:
            pass
    return 0


def _extract_flag(line: str, flag: str) -> str:
    """Extract --flag value from command line."""
    idx = line.find(flag)
    if idx < 0:
        return ""
    rest = line[idx + len(flag):].strip()
    if not rest:
        return ""
    return rest.split()[0] if rest else ""


def _get_cwd(pid: int) -> str:
    """Get working directory of a process (macOS)."""
    try:
        result = subprocess.run(
            ["lsof", "-p", str(pid), "-Fn"],
            capture_output=True, text=True, timeout=3,
        )
        for line in result.stdout.splitlines():
            if line.startswith("n") and "/" in line:
                path = line[1:]
                if Path(path).is_dir():
                    return path
    except (subprocess.SubprocessError, OSError):
        pass
    return ""


# ── History scanning ──────────────────────────────


def _recent_prompts(
    claude_dir: Path | None = None,
    codex_dir: Path | None = None,
    kimi_dir: Path | None = None,
    limit: int = 15,
) -> list[RecentPrompt]:
    """Read recent prompts from all CLI histories."""
    home = Path.home()
    c_dir = claude_dir or (home / ".claude")
    x_dir = codex_dir or (home / ".codex")
    k_dir = kimi_dir or (home / ".kimi")

    prompts: list[RecentPrompt] = []
    prompts.extend(_read_claude_history(c_dir))
    prompts.extend(_read_codex_history(x_dir))
    prompts.extend(_read_kimi_history(k_dir))

    prompts.sort(key=lambda p: p.timestamp, reverse=True)
    return prompts[:limit]


def _read_claude_history(
    claude_dir: Path,
) -> list[RecentPrompt]:
    """Parse ~/.claude/history.jsonl."""
    path = claude_dir / "history.jsonl"
    if not path.exists():
        return []
    prompts: list[RecentPrompt] = []
    try:
        for line in _tail_lines(path, 50):
            try:
                entry = json.loads(line)
            except json.JSONDecodeError:
                continue
            text = entry.get("display", "")
            if not text:
                continue
            ts = entry.get("timestamp", 0)
            project = entry.get("project", "")
            prompts.append(RecentPrompt(
                cli="claude", text=text[:200],
                project=Path(project).name if project else "",
                timestamp=_ms_to_iso(ts),
                session_id=entry.get("sessionId", ""),
            ))
    except OSError:
        pass
    return prompts


def _read_codex_history(
    codex_dir: Path,
) -> list[RecentPrompt]:
    """Parse ~/.codex/history.jsonl."""
    path = codex_dir / "history.jsonl"
    if not path.exists():
        return []
    prompts: list[RecentPrompt] = []
    try:
        for line in _tail_lines(path, 50):
            try:
                entry = json.loads(line)
            except json.JSONDecodeError:
                continue
            text = entry.get("text", "")
            if not text:
                continue
            ts = entry.get("ts", 0)
            prompts.append(RecentPrompt(
                cli="codex", text=text[:200],
                project="",
                timestamp=_s_to_iso(ts),
                session_id=entry.get("session_id", ""),
            ))
    except OSError:
        pass
    return prompts


def _read_kimi_history(
    kimi_dir: Path,
) -> list[RecentPrompt]:
    """Parse ~/.kimi/user-history/*.jsonl."""
    hist_dir = kimi_dir / "user-history"
    if not hist_dir.is_dir():
        return []
    prompts: list[RecentPrompt] = []
    try:
        for f in sorted(
            hist_dir.glob("*.jsonl"),
            key=lambda p: p.stat().st_mtime,
            reverse=True,
        )[:3]:
            mtime = datetime.fromtimestamp(
                f.stat().st_mtime, tz=timezone.utc,
            ).isoformat()
            for line in _tail_lines(f, 10):
                try:
                    entry = json.loads(line)
                except json.JSONDecodeError:
                    continue
                text = entry.get("content", "")
                if text:
                    prompts.append(RecentPrompt(
                        cli="kimi", text=text[:200],
                        project="", timestamp=mtime,
                        session_id=f.stem,
                    ))
    except OSError:
        pass
    return prompts


# ── Helpers ───────────────────────────────────────


def _tail_lines(path: Path, n: int) -> list[str]:
    """Read last N non-empty lines from a file."""
    try:
        lines = path.read_text().splitlines()
        return [line for line in lines if line.strip()][-n:]
    except OSError:
        return []


def _ms_to_iso(ms: int | float) -> str:
    """Convert milliseconds epoch to ISO string."""
    if not ms:
        return ""
    try:
        dt = datetime.fromtimestamp(
            ms / 1000, tz=timezone.utc,
        )
        return dt.isoformat()
    except (ValueError, OSError):
        return ""


def _s_to_iso(s: int | float) -> str:
    """Convert seconds epoch to ISO string."""
    if not s:
        return ""
    try:
        dt = datetime.fromtimestamp(s, tz=timezone.utc)
        return dt.isoformat()
    except (ValueError, OSError):
        return ""


================================================
FILE: maggy/maggy/services/ai_client.py
================================================
"""AI client — uses API key or falls back to CLI subscription."""

from __future__ import annotations

import asyncio
import logging
import shutil

logger = logging.getLogger(__name__)


async def ai_complete(
    prompt: str, cfg, model: str = "",
) -> str | None:
    """Get AI completion. Tries API key, then CLI."""
    target_model = model or cfg.ai.model
    if cfg.ai.api_key:
        return await _api_complete(
            prompt, cfg.ai.api_key, target_model,
        )
    if shutil.which("claude"):
        return await _cli_complete(prompt, "claude")
    if shutil.which("codex"):
        return await _cli_complete(prompt, "codex")
    return None


async def _api_complete(
    prompt: str, api_key: str, model: str,
) -> str | None:
    """Call Anthropic API directly."""
    try:
        import anthropic
        client = anthropic.AsyncAnthropic(api_key=api_key)
        msg = await client.messages.create(
            model=model,
            max_tokens=2000,
            messages=[{"role": "user", "content": prompt}],
        )
        return msg.content[0].text
    except Exception as e:
        logger.warning("API completion failed: %s", e)
        return None


async def _cli_complete(
    prompt: str, cli: str,
) -> str | None:
    """Call AI via CLI subscription (claude/codex)."""
    try:
        process = await asyncio.create_subprocess_exec(
            cli, "-p", prompt, "--output-format", "text",
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await asyncio.wait_for(
            process.communicate(), timeout=120,
        )
        if process.returncode == 0:
            return stdout.decode().strip()
        logger.warning(
            "%s CLI failed (rc=%d): %s",
            cli, process.returncode,
            stderr.decode()[:200],
        )
    except asyncio.TimeoutError:
        logger.warning("%s CLI timed out", cli)
    except OSError as e:
        logger.warning("%s CLI not available: %s", cli, e)
    return None


================================================
FILE: maggy/maggy/services/cascade.py
================================================
"""Cascade execution — quality-gate-based model escalation.

Try cheapest model first, evaluate output quality, escalate
to next tier if quality gate fails. Max 3 attempts.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Callable

if TYPE_CHECKING:
    from maggy.adapters.pi import PiAdapter

logger = logging.getLogger(__name__)


@dataclass
class CascadeAttempt:
    """Record of a single cascade attempt."""

    model: str
    success: bool
    score: int = 0
    output: str = ""
    cost_usd: float = 0.0


@dataclass
class CascadeResult:
    """Result of cascade execution."""

    model: str
    output: str
    attempts: list[CascadeAttempt] = field(default_factory=list)
    escalated: bool = False
    cost_usd: float = 0.0


async def cascade_execute(
    pi: PiAdapter,
    chain: list[str],
    prompt: str,
    wd: str,
    quality_gate: Callable[[str], int],
) -> CascadeResult:
    """Try cheapest model, escalate on quality gate failure."""
    attempts: list[CascadeAttempt] = []
    best = CascadeAttempt("", False)
    max_attempts = min(len(chain), 3)

    for i in range(max_attempts):
        model = chain[i]
        result = await pi.send_prompt(model, prompt, wd)
        cost = getattr(result, "cost_usd", 0.0)
        if not result.success:
            attempts.append(CascadeAttempt(model, False))
            logger.info("Cascade: %s failed, escalating", model)
            continue
        score = await quality_gate(result.output)
        attempt = CascadeAttempt(model, True, score, result.output, cost)
        attempts.append(attempt)
        if score > best.score:
            best = attempt
        if score >= 3:
            return CascadeResult(
                model, result.output, attempts,
                escalated=i > 0, cost_usd=cost,
            )
        logger.info(
            "Cascade: %s scored %d, escalating", model, score,
        )

    return CascadeResult(
        best.model, best.output, attempts,
        escalated=len(attempts) > 1,
        cost_usd=best.cost_usd,
    )


================================================
FILE: maggy/maggy/services/chat.py
================================================
"""ChatManager — interactive Claude Code sessions with message queue."""

from __future__ import annotations

import asyncio
import logging
import uuid
from collections import deque
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import AsyncGenerator

from maggy.config import MaggyConfig
from maggy.services.chat_stream import stream_message

logger = logging.getLogger(__name__)

MAX_QUEUE = 5


@dataclass
class ChatMessage:
    """A single message in a chat session."""

    role: str  # "user" | "assistant"
    content: str
    timestamp: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )


@dataclass
class ChatSession:
    """An interactive Claude Code session."""

    id: str
    claude_session_id: str
    project_key: str
    working_dir: str
    messages: list[ChatMessage] = field(default_factory=list)
    status: str = "idle"
    created_at: str = field(
        default_factory=lambda: datetime.now(
            timezone.utc
        ).isoformat()
    )
    pid: int = 0
    history_context: str = ""
    pending_queue: deque = field(
        default_factory=lambda: deque(maxlen=MAX_QUEUE),
    )


def enqueue_msg(session: ChatSession, message: str) -> int:
    """Append message to session queue. Returns position or -1."""
    if len(session.pending_queue) >= MAX_QUEUE:
        return -1
    session.pending_queue.append(message)
    return len(session.pending_queue)


class ChatManager:
    """Manages interactive Claude Code sessions."""

    def __init__(self, cfg: MaggyConfig):
        self.cfg = cfg
        self._sessions: dict[str, ChatSession] = {}
        self._locks: dict[str, asyncio.Lock] = {}

    def create_session(
        self, project_key: str, project_path: str | None = None,
    ) -> ChatSession:
        """Create a new chat session for a project."""
        if project_path:
            wd = self._validate_path(project_path)
            key = project_key or Path(wd).name
        else:
            wd = self._resolve_project(project_key)
            key = project_key
        session = ChatSession(
            id=uuid.uuid4().hex[:10],
            claude_session_id="",
            project_key=key,
            working_dir=wd,
        )
        self._sessions[session.id] = session
        self._locks[session.id] = asyncio.Lock()
        return session

    def find_by_project(self, key: str) -> ChatSession | None:
        """Find existing session for a project key."""
        for s in self._sessions.values():
            if s.project_key == key:
                return s
        return None

    def auto_connect(
        self, active_sessions: list[dict],
    ) -> list[ChatSession]:
        """Create sessions for all active projects."""
        connected: dict[str, ChatSession] = {}
        for active in active_sessions:
            project = active.get("project", "")
            path = active.get("project_path", "")
            if not project or not path:
                continue
            if project in connected:
                continue
            existing = self.find_by_project(project)
            if existing:
                connected[project] = existing
                continue
            try:
                session = self.create_session(project, path)
            except ValueError:
                continue
            connected[project] = session
        return list(connected.values())

    def get_session(self, sid: str) -> ChatSession | None:
        return self._sessions.get(sid)

    def list_sessions(self) -> list[ChatSession]:
        return list(self._sessions.values())

    def delete_session(self, session_id: str) -> bool:
        if session_id in self._sessions:
            del self._sessions[session_id]
            self._locks.pop(session_id, None)
            return True
        return False

    async def send(
        self, session_id: str, message: str,
    ) -> AsyncGenerator[dict, None]:
        """Send message, yield streamed response chunks."""
        session = self._sessions.get(session_id)
        if not session:
            raise ValueError(f"Session {session_id} not found")
        lock = self._locks.setdefault(
            session_id, asyncio.Lock(),
        )
        if lock.locked():
            pos = enqueue_msg(session, message)
            if pos < 0:
                yield {"type": "error", "content": "Queue full."}
                return
            yield {"type": "queued", "position": pos}
            return
        async with lock:
            async for chunk in stream_message(session, message):
                yield chunk
            async for chunk in self._drain_queue(session):
                yield chunk

    async def _drain_queue(
        self, session: ChatSession,
    ) -> AsyncGenerator[dict, None]:
        """Process queued messages after current stream."""
        while session.pending_queue:
            msg = session.pending_queue.popleft()
            yield {
                "type": "queue_next",
                "content": msg[:80],
            }
            async for chunk in stream_message(session, msg):
                yield chunk

    def _validate_path(self, path: str) -> str:
        """Validate path is inside a configured codebase root."""
        candidate = Path(path).expanduser().resolve()
        roots = [
            Path(c.path).expanduser().resolve()
            for c in self.cfg.codebases
        ]
        for root in roots:
            try:
                candidate.relative_to(root)
                return str(candidate)
            except ValueError:
                continue
        raise ValueError(
            f"Path {path!r} is not inside any configured "
            f"codebase. Allowed: {[str(r) for r in roots]}"
        )

    def _resolve_project(self, project_key: str) -> str:
        """Map project_key to validated working directory."""
        for cb in self.cfg.codebases:
            if cb.key == project_key:
                path = Path(cb.path).expanduser().resolve()
                return str(path)
        raise ValueError(
            f"Project '{project_key}' not found in codebases"
        )


================================================
FILE: maggy/maggy/services/chat_context.py
================================================
"""Chat context builder — resolves history and session IDs.

Handles the three context gaps:
1. Path-based history matching (not just project name)
2. Recent prompt injection from activity data
3. Claude session_id lookup for true --resume
"""

from __future__ import annotations

import json
import logging
from pathlib import Path

logger = logging.getLogger(__name__)


def build_project_context(
    history, working_dir: str,
    project_key: str, recent_prompts: list[dict],
) -> str:
    """Build full context string for a project."""
    parts = []
    hist = _match_history(history, working_dir, project_key)
    if hist:
        parts.append(hist)
    prompts = _format_recent_prompts(recent_prompts, project_key)
    if prompts:
        parts.append(prompts)
    return "\n\n".join(parts)


def _match_history(
    history, working_dir: str, project_key: str,
) -> str:
    """Match history using report data (path-aware)."""
    if not history:
        return ""
    report = history.get_report()
    if report:
        return _match_from_report(
            report, working_dir, project_key,
        )
    return ""


def _match_from_report(
    report: dict, working_dir: str, project_key: str,
) -> str:
    """Match project in the aggregated history report."""
    projects = report.get("projects", [])
    if not projects:
        return ""
    candidates = _path_candidates(working_dir, project_key)
    matched = [
        p for p in projects
        if p.get("project", "") in candidates
    ]
    if not matched:
        return ""
    lines = []
    for p in matched:
        sessions = p.get("total_sessions", 0)
        prompts = p.get("total_prompts", 0)
        providers = ", ".join(p.get("providers_used", []))
        topics = ", ".join(p.get("top_topics", [])[:5])
        line = f"- {sessions} sessions, {prompts} prompts"
        if providers:
            line += f" ({providers})"
        if topics:
            line += f", topics: {topics}"
        lines.append(line)
    return (
        f"Project history ({len(matched)} entries):\n"
        + "\n".join(lines)
    )


_SKIP_DIRS = {
    "Users", "home", "Documents", "var", "tmp", "opt",
    "usr", "Library", "Applications",
}


def _path_candidates(
    working_dir: str, project_key: str,
) -> set[str]:
    """Generate candidate project names from path."""
    candidates = {project_key}
    if working_dir:
        parts = Path(working_dir).parts
        for part in parts:
            if (part and part != "/"
                    and len(part) > 2
                    and part not in _SKIP_DIRS):
                candidates.add(part)
    return candidates


def _format_recent_prompts(
    recent_prompts: list[dict], project_key: str,
) -> str:
    """Format recent prompts for this project."""
    matched = [
        p for p in recent_prompts
        if p.get("project", "") == project_key
    ][:5]
    if not matched:
        return ""
    lines = []
    for p in matched:
        text = p.get("text", "")[:120]
        ts = p.get("timestamp", "")[:10]
        lines.append(f"- [{ts}] {text}")
    return "Recent prompts:\n" + "\n".join(lines)


def resolve_claude_session_id(
    working_dir: str,
) -> str:
    """Find the latest Claude session_id for a project.

    Reads ~/.claude/history.jsonl to find the most recent
    sessionId used in this working directory.
    """
    history_path = Path.home() / ".claude" / "history.jsonl"
    if not history_path.exists():
        return ""
    try:
        lines = history_path.read_text().splitlines()
    except OSError:
        return ""
    target = working_dir.rstrip("/")
    for line in reversed(lines):
        line = line.strip()
        if not line:
            continue
        try:
            entry = json.loads(line)
        except (json.JSONDecodeError, ValueError):
            continue
        project = entry.get("project", "")
        if not project:
            continue
        if project.rstrip("/") == target:
            sid = entry.get("sessionId", "")
            if sid:
                return sid
    return ""


================================================
FILE: maggy/maggy/services/chat_router.py
================================================
"""Routed chat — blast-score routing for interactive messages.

Estimates complexity from message keywords, routes to the optimal
model via RoutingService, and builds CLI commands for any model.
"""

from __future__ import annotations

import re
from dataclasses import dataclass

from maggy.routing import RoutingContext

HIGH_KEYWORDS = frozenset({
    "security", "auth", "authentication", "authorization",
    "oauth", "encrypt", "vulnerability", "architecture",
    "refactor", "redesign", "migrate", "migration",
    "database", "schema", "performance", "optimize",
    "deploy", "infrastructure", "cicd", "pipeline",
})
MID_KEYWORDS = frozenset({
    "feature", "implement", "build", "create", "api",
    "endpoint", "component", "service", "integration",
    "pagination", "filter", "search", "cache",
})
LOW_KEYWORDS = frozenset({
    "fix", "typo", "rename", "move", "style", "format",
    "lint", "comment", "readme", "docs", "log", "print",
    "bump", "version", "config", "env", "update",
})
TYPE_KEYWORDS: dict[str, frozenset[str]] = {
    "security": frozenset({
        "auth", "authentication", "authorization",
        "security", "permission", "token",
        "encrypt", "vulnerability", "oauth", "csrf",
    }),
    "search": frozenset({
        "find", "search", "grep", "where", "locate",
        "which", "look", "scan", "show", "list", "read",
    }),
    "docs": frozenset({
        "document", "documentation", "readme", "docs",
        "docstring", "comment", "spec", "jsdoc", "write",
    }),
    "tests": frozenset({
        "test", "spec", "coverage", "mock", "fixture",
        "assert", "pytest", "jest", "vitest",
    }),
    "frontend": frozenset({
        "component", "css", "style", "ui", "layout",
        "responsive", "tailwind", "react", "vue",
    }),
}
DEFAULT_BLAST = 5
_RETRIEVAL = re.compile(
    r"\b(find|get|show|check|where|list|read|look|grab|pick)\b",
    re.IGNORECASE,
)
_MUTATION = re.compile(
    r"\b(create|add|build|implement|write|refactor|migrate"
    r"|redesign|overhaul|deploy)\b",
    re.IGNORECASE,
)


def estimate_blast(message: str) -> int:
    """Estimate blast score (1-10) from message text."""
    if not message.strip():
        return DEFAULT_BLAST
    words = set(re.findall(r"[a-zA-Z]+", message.lower()))
    has_kw = words & (HIGH_KEYWORDS | MID_KEYWORDS | LOW_KEYWORDS)
    if len(words) <= 3 and not has_kw:
        return 1
    high = len(words & HIGH_KEYWORDS)
    mid = len(words & MID_KEYWORDS)
    low = len(words & LOW_KEYWORDS)
    score = _keyword_score(high, mid, low)
    return _apply_intent(message, score)


def _keyword_score(high: int, mid: int, low: int) -> int:
    """Score based on keyword tier counts."""
    if high >= 2:
        return min(9, 7 + high - 2)
    if high == 1:
        return 7
    if low >= 2 and mid == 0:
        return 2
    if low >= 1 and mid == 0:
        return 3
    if mid >= 2:
        return 6
    if mid >= 1:
        return 5
    return 1


def _apply_intent(message: str, score: int) -> int:
    """Cap score for retrieval-only messages."""
    is_retrieval = bool(_RETRIEVAL.search(message))
    is_mutation = bool(_MUTATION.search(message))
    if is_retrieval and not is_mutation and score < 7:
        return min(score, 3)
    return score


def estimate_type(message: str) -> str:
    """Estimate task type from message keywords."""
    words = set(re.findall(r"[a-zA-Z]+", message.lower()))
    best_type = "general"
    best_count = 0
    for ttype, keywords in TYPE_KEYWORDS.items():
        count = len(words & keywords)
        if count > best_count:
            best_count = count
            best_type = ttype
    return best_type


@dataclass
class RouteDecision:
    """Result of routing a chat message."""

    model: str
    reason: str
    blast: int
    task_type: str


class RoutedChat:
    """Routes chat messages through blast-score engine."""

    def __init__(self, routing, budget):
        self._routing = routing
        self._budget = budget

    def decide(
        self,
        message: str,
        blast_override: int | None = None,
        type_override: str | None = None,
    ) -> RouteDecision:
        """Get routing decision for a message."""
        blast = blast_override or estimate_blast(message)
        task_type = type_override or estimate_type(message)
        ctx = RoutingContext(
            blast_score=blast, task_type=task_type,
        )
        decision = self._routing.route(ctx)
        model_name = self._model_name(decision.primary)
        return RouteDecision(
            model=model_name,
            reason=decision.reason,
            blast=blast,
            task_type=task_type,
        )

    def _model_name(self, primary) -> str:
        if isinstance(primary, str):
            return primary
        return str(getattr(primary, "name", primary))


================================================
FILE: maggy/maggy/services/chat_stream.py
================================================
"""Chat streaming — subprocess execution and JSON parsing.

Extracted from ChatManager for quality-gate compliance.
Handles claude CLI subprocess, stream-json parsing, and
assistant message extraction.
"""

from __future__ import annotations

import asyncio
import json
import logging
import os
from typing import TYPE_CHECKING, AsyncGenerator

if TYPE_CHECKING:
    from maggy.services.chat import ChatSession

logger = logging.getLogger(__name__)

CLAUDE_BIN = "claude"


def build_cmd(session: ChatSession, message: str) -> list[str]:
    """Build claude CLI command."""
    cmd = [
        CLAUDE_BIN, "-p", message,
        "--output-format", "stream-json",
        "--verbose",
        "--dangerously-skip-permissions",
    ]
    if session.claude_session_id:
        cmd += ["--resume", session.claude_session_id]
    return cmd


def parse_chunk(
    text: str, session: ChatSession,
) -> dict | None:
    """Parse a stream-json line from Claude."""
    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        return {"type": "text", "content": text}
    if "session_id" in data and not session.claude_session_id:
        session.claude_session_id = data["session_id"]
    msg_type = data.get("type", "")
    if msg_type == "assistant":
        return _extract_assistant(data)
    if msg_type == "result":
        content = data.get("result", "")
        chunk: dict = {"type": "result", "content": content}
        cost = data.get("cost_usd")
        if cost is not None:
            chunk["cost_usd"] = float(cost)
        usage = data.get("usage")
        if usage is not None:
            chunk["input_tokens"] = int(usage.get("input_tokens") or 0)
            chunk["output_tokens"] = int(usage.get("output_tokens") or 0)
        return chunk
    return None


def _extract_assistant(data: dict) -> dict:
    """Extract text from assistant message."""
    content = data.get("message", {}).get("content", "")
    if isinstance(content, list):
        parts = [
            b.get("text", "")
            for b in content
            if b.get("type") == "text"
        ]
        return {"type": "text", "content": "".join(parts)}
    return {"type": "text", "content": str(content)}


def check_context_pressure(session: ChatSession) -> dict | None:
    """Warn if session messages are getting large."""
    from maggy.services.context_compactor import estimate_tokens
    msgs = [{"content": m.content} for m in session.messages]
    tokens = estimate_tokens(msgs)
    if tokens > 24_000:
        return {"type": "warning", "content": f"Context: ~{tokens} tokens"}
    return None


async def stream_message(
    session: ChatSession, message: str,
) -> AsyncGenerator[dict, None]:
    """Run a single message through Claude CLI."""
    from maggy.services.chat import ChatMessage

    session.messages.append(
        ChatMessage(role="user", content=message),
    )
    session.status = "streaming"
    pressure = check_context_pressure(session)
    if pressure:
        yield pressure
    cmd = build_cmd(session, message)
    response_text = ""
    try:
        env = {
            k: v for k, v in os.environ.items()
            if k != "CLAUDECODE"
        }
        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.STDOUT,
            cwd=session.working_dir,
            env=env,
        )
        session.pid = proc.pid or 0
        async for line in proc.stdout:
            text = line.decode("utf-8", errors="replace").strip()
            if not text:
                continue
            chunk = parse_chunk(text, session)
            if chunk:
                response_text += chunk.get("content", "")
                yield chunk
        await proc.wait()
        session.status = "idle"
    except FileNotFoundError:
        session.status = "error"
        yield {"type": "error", "content": "claude CLI not found"}
    except Exception as e:
        session.status = "error"
        yield {"type": "error", "content": str(e)}
    if response_text:
        session.messages.append(
            ChatMessage(role="assistant", content=response_text),
        )


================================================
FILE: maggy/maggy/services/checkpoint.py
================================================
"""Cross-model checkpoint serializer.

Produces model-agnostic checkpoints that can be injected into
any model on switch, preserving task understanding.
"""

from __future__ import annotations

import json
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone


@dataclass
class Checkpoint:
    """Model-agnostic task checkpoint."""

    goal: str = ""
    constraints: list[str] = field(default_factory=list)
    progress: list[str] = field(default_factory=list)
    working_state: str = ""
    file_context: list[str] = field(default_factory=list)
    source_model: str = ""
    created_at: str = ""

    def serialize(self) -> str:
        """Serialize to JSON for storage/transfer."""
        if not self.created_at:
            self.created_at = datetime.now(
                timezone.utc
            ).isoformat()
        return json.dumps(asdict(self), indent=2)

    @classmethod
    def deserialize(cls, data: str) -> Checkpoint:
        """Reconstruct from JSON."""
        d = json.loads(data)
        return cls(**d)

    def to_prompt(self) -> str:
        """Format as a structured prompt for the new model."""
        parts = [
            "## Task Checkpoint (from previous model session)",
            f"**Goal:** {self.goal}",
        ]
        if self.constraints:
            parts.append("**Constraints:**")
            for c in self.constraints:
                parts.append(f"  - {c}")
        if self.progress:
            parts.append("**Progress so far:**")
            for p in self.progress:
                parts.append(f"  - {p}")
        if self.working_state:
            parts.append(
                f"**Current state:** {self.working_state}"
            )
        if self.file_context:
            parts.append("**Key files:**")
            for f in self.file_context[:10]:
                parts.append(f"  - {f}")
        parts.append(
            "\nPlease confirm you understand this context "
            "before proceeding."
        )
        return "\n".join(parts)


def create_checkpoint(
    goal: str,
    progress: list[str],
    model: str,
    working_state: str = "",
    files: list[str] | None = None,
    constraints: list[str] | None = None,
) -> Checkpoint:
    """Create a checkpoint from current session state."""
    return Checkpoint(
        goal=goal,
        constraints=constraints or [],
        progress=progress,
        working_state=working_state,
        file_context=files or [],
        source_model=model,
    )


================================================
FILE: maggy/maggy/services/competitor.py
================================================
"""Generic competitor intelligence — AI discovery + RSS/news monitoring + daily briefing.

Stores competitors in ~/.maggy/competitors.json. Monitored feeds stored in SQLite.
Works for ANY domain — CX, fintech, devtools, healthcare, etc. Domain comes from config.
"""

from __future__ import annotations

import hashlib
import ipaddress
import json
import logging
import socket
import sqlite3
from datetime import date, datetime, timezone
from pathlib import Path
from urllib.parse import quote, urlparse

import feedparser

from maggy.services.ai_client import ai_complete
import httpx

from maggy.config import MaggyConfig

logger = logging.getLogger(__name__)


def _connect_sqlite(path: Path) -> sqlite3.Connection:
    """Open a SQLite connection with WAL + foreign_keys + busy_timeout.

    Same defaults as InboxService — safe for concurrent FastAPI handlers
    plus the heartbeat worker writing from another thread.
    """
    db = sqlite3.connect(path, timeout=30.0)
    db.execute("PRAGMA journal_mode=WAL")
    db.execute("PRAGMA foreign_keys=ON")
    db.execute("PRAGMA busy_timeout=30000")
    return db


def _parse_feed_date(raw: str) -> datetime | None:
    """Parse RFC 822 / ISO 8601 date strings from RSS/Atom feeds.

    feedparser returns `published` as RFC 822 ("Mon, 15 Jan 2024 10:30:00 GMT").
    Comparing those lexicographically is wrong because day names cycle weekly.
    Returns a timezone-aware UTC datetime, or None if parsing fails.
    """
    if not raw:
        return None
    # feedparser exposes parsed tuple when it can
    try:
        from email.utils import parsedate_to_datetime
        dt = parsedate_to_datetime(raw)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(timezone.utc)
    except (TypeError, ValueError):
        pass
    # Fall through: try ISO 8601 (atom feeds, Google News sometimes)
    try:
        dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(timezone.utc)
    except (TypeError, ValueError):
        return None


def _is_safe_feed_url(url: str) -> bool:
    """Reject RSS URLs that would let an attacker hit internal services.

    Blocks non-HTTP(S), bare hostnames without scheme, and any host whose
    resolved IPs include loopback, link-local, private, or multicast ranges.
    Prevents SSRF via AI-discovered or user-edited competitor registry.
    """
    try:
        parsed = urlparse(url)
    except Exception:
        return False
    if parsed.scheme not in ("http", "https"):
        return False
    host = (parsed.hostname or "").strip().lower()
    if not host or host in ("localhost",):
        return False
    # Block bare IP strings that are themselves private
    try:
        ip = ipaddress.ip_address(host)
        return not (ip.is_loopback or ip.is_private or ip.is_link_local
                    or ip.is_multicast or ip.is_reserved or ip.is_unspecified)
    except ValueError:
        pass
    # Hostname: resolve and check every returned address
    try:
        infos = socket.getaddrinfo(host, None)
    except socket.gaierror:
        return False
    for info in infos:
        addr = info[4][0]
        try:
            ip = ipaddress.ip_address(addr.split("%")[0])  # strip scope id on v6
        except ValueError:
            return False
        if (ip.is_loopback or ip.is_private or ip.is_link_local
                or ip.is_multicast or ip.is_reserved or ip.is_unspecified):
            return False
    return True


class CompetitorService:
    def __init__(self, cfg: MaggyConfig):
        self.cfg = cfg
        self.competitors_path = Path(cfg.storage.path).expanduser().parent / "competitors.json"
        self.db_path = Path(cfg.storage.path).expanduser()
        self._init_db()

    def _init_db(self) -> None:
        with _connect_sqlite(self.db_path) as db:
            db.execute("""
                CREATE TABLE IF NOT EXISTS competitor_news (
                    id TEXT PRIMARY KEY,
                    competitor_id TEXT NOT NULL,
                    competitor_name TEXT NOT NULL,
                    event_type TEXT NOT NULL,
                    title TEXT NOT NULL,
                    url TEXT,
                    source TEXT,
                    created_at TEXT NOT NULL
                )
            """)
            db.execute("CREATE INDEX IF NOT EXISTS idx_news_created ON competitor_news(created_at DESC)")
            db.execute("""
                CREATE TABLE IF NOT EXISTS briefing_cache (
                    date TEXT PRIMARY KEY,
                    summary TEXT NOT NULL,
                    signal_count INTEGER NOT NULL,
                    generated_at TEXT NOT NULL
                )
            """)
            db.execute("""
                CREATE TABLE IF NOT EXISTS feed_cursors (
                    feed_key TEXT PRIMARY KEY,
                    cursor TEXT NOT NULL
                )
            """)

    # ── Registry ─────────────────────────────────────────────────────────

    def load_registry(self) -> dict[str, dict]:
        if not self.competitors_path.exists():
            return {}
        try:
            return json.loads(self.competitors_path.read_text())
        except Exception:
            return {}

    def save_registry(self, registry: dict[str, dict]) -> None:
        self.competitors_path.parent.mkdir(parents=True, exist_ok=True)
        self.competitors_path.write_text(json.dumps(registry, indent=2))

    # ── Discovery ────────────────────────────────────────────────────────

    async def discover(self) -> dict:
        """Ask Claude to identify competitors in the configured domain categories.

        Stores results in ~/.maggy/competitors.json (merges with existing).
        """
        if not self.cfg.competitors.categories:
            return {"error": "No competitor categories configured", "added": 0}

        registry = self.load_registry()
        before = len(registry)

        categories = self.cfg.competitors.categories
        seed = self.cfg.competitors.seed
        org_name = self.cfg.org.name

        prompt = f"""Identify competitors for {org_name}, operating in these categories: {', '.join(categories)}.
{f"User already mentioned: {', '.join(seed)}. Include these and add more." if seed else ""}

Return 12-18 competitors as JSON. Include a mix of:
- Established market leaders
- AI-first challengers / next-gen disruptors
- Vertical-specific specialists

Format (STRICT JSON):
{{"competitors": [
  {{
    "id": "lowercase-slug",
    "name": "Display Name",
    "category": "One of: {' | '.join(categories)}",
    "website": "example.com",
    "description": "One-sentence positioning",
    "strengths": ["str1", "str2", "str3"],
    "weaknesses": ["w1", "w2"],
    "tags": ["tag1", "tag2"],
    "blog_rss": "optional RSS URL or null"
  }}
]}}"""

        try:
            text = await ai_complete(prompt, self.cfg)
            if not text:
                return {"error": "No AI provider available", "added": 0}
            start = text.find("{")
            end = text.rfind("}")
            data = json.loads(text[start:end + 1])
        except Exception as e:
            logger.error("Discovery failed: %s", e)
            return {"error": str(e), "added": 0}

        for comp in data.get("competitors", []):
            cid = comp.get("id", "").lower()
            if not cid:
                continue
            # Preserve blog_rss inside a social sub-dict for monitoring
            rss = comp.pop("blog_rss", None)
            if rss:
                comp["social"] = {"blog_rss": rss}
            # Merge (don't overwrite existing manual edits)
            if cid in registry:
                registry[cid].setdefault("social", {})
                if rss and not registry[cid]["social"].get("blog_rss"):
                    registry[cid]["social"]["blog_rss"] = rss
            else:
                registry[cid] = comp

        self.save_registry(registry)
        return {"total": len(registry), "added": len(registry) - before}

    def list_all(self) -> list[dict]:
        return list(self.load_registry().values())

    # ── Monitoring ───────────────────────────────────────────────────────

    async def monitor_all(self) -> dict:
        """Scan RSS + Google News for all competitors. Called by heartbeat or on-demand."""
        registry = self.load_registry()
        rss_new = 0
        news_new = 0
        for cid, comp in registry.items():
            try:
                rss_new += await self._check_rss(cid, comp)
            except Exception as e:
                logger.debug("RSS %s: %s", cid, e)
            try:
                news_new += await self._check_google_news(cid, comp)
            except Exception as e:
                logger.debug("News %s: %s", cid, e)
        return {"rss": rss_new, "news": news_new, "total_competitors": len(registry)}

    def _get_cursor(self, key: str) -> str:
        with _connect_sqlite(self.db_path) as db:
            row = db.execute("SELECT cursor FROM feed_cursors WHERE feed_key = ?", (key,)).fetchone()
        return row[0] if row else ""

    def _set_cursor(self, key: str, cursor: str) -> None:
        with _connect_sqlite(self.db_path) as db:
            db.execute(
                "INSERT INTO feed_cursors (feed_key, cursor) VALUES (?, ?) "
                "ON CONFLICT(feed_key) DO UPDATE SET cursor = excluded.cursor",
                (key, cursor),
            )

    def _classify(self, title: str) -> str:
        t = title.lower()
        if any(w in t for w in ["launch", "release", "introduces", "announces new", "ships"]):
            return "feature_launch"
        if any(w in t for w in ["pricing", "price", "cost", "free tier"]):
            return "pricing_change"
        if any(w in t for w in ["funding", "raises", "series", "valuation", "investment"]):
            return "funding"
        if any(w in t for w in ["acquir", "acquisition", "merge", "bought"]):
            return "acquisition"
        if any(w in t for w in ["partner", "integration with", "teams up"]):
            return "partnership"
        return "news"

    def _log_event(self, competitor_id: str, competitor_name: str, event_type: str, title: str, url: str, source: str) -> None:
        # Deterministic ID so the same article logged twice (cursor reset,
        # overlapping scans) becomes a no-op instead of a duplicate row.
        id_seed = f"{competitor_id}|{source}|{url or title}"
        event_id = hashlib.sha256(id_seed.encode("utf-8")).hexdigest()[:32]
        with _connect_sqlite(self.db_path) as db:
            db.execute(
                "INSERT OR IGNORE INTO competitor_news "
                "(id, competitor_id, competitor_name, event_type, title, url, source, created_at) "
                "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
                (event_id, competitor_id, competitor_name, event_type, title, url, source,
                 datetime.now(timezone.utc).isoformat()),
            )

    async def _check_rss(self, cid: str, comp: dict) -> int:
        rss_url = (comp.get("social") or {}).get("blog_rss")
        if not rss_url:
            return 0
        if not _is_safe_feed_url(rss_url):
            logger.warning("Skipping unsafe RSS URL for %s: %s", cid, rss_url)
            return 0
        cursor_key = f"rss:{cid}"
        last_cursor = self._get_cursor(cursor_key)

        try:
            async with httpx.AsyncClient(timeout=15) as client:
                resp = await client.get(rss_url)
                if resp.status_code >= 400:
                    return 0
                feed = feedparser.parse(resp.text)
        except Exception:
            return 0

        # Cursor is stored as an ISO-8601 UTC string so comparisons are
        # valid lexicographically AND survive round-trips through SQLite.
        last_cursor_dt = _parse_feed_date(last_cursor) if last_cursor else None
        new_items = 0
        latest_dt = last_cursor_dt
        for entry in feed.entries[:10]:
            pub_raw = entry.get("published", entry.get("updated", ""))
            pub_dt = _parse_feed_date(pub_raw)
            # Skip entries already seen (we have a cursor AND the entry's parsed date is ≤ cursor).
            # Entries without a parseable date are always processed — INSERT OR IGNORE dedupes.
            if pub_dt and last_cursor_dt and pub_dt <= last_cursor_dt:
                continue
            title = entry.get("title", "")
            link = entry.get("link", "")
            if pub_dt and (latest_dt is None or pub_dt > latest_dt):
                latest_dt = pub_dt
            self._log_event(cid, comp.get("name", cid), "blog_post", f"{comp.get('name','')}: {title}", link, "rss")
            new_items += 1

        if latest_dt and latest_dt != last_cursor_dt:
            self._set_cursor(cursor_key, latest_dt.isoformat())
        return new_items

    async def _check_google_news(self, cid: str, comp: dict) -> int:
        name = comp.get("name", "")
        if not name:
            return 0
        cursor_key = f"news:{cid}"
        last_cursor = self._get_cursor(cursor_key)

        # Use domain + category for better relevance — e.g. "Sprinklr CX" not "Sprinklr software"
        category = (comp.get("category") or "").replace("_", " ").split("/")[0]
        search_term = f"{name} {category}" if category else f"{name} {self.cfg.org.domain}"
        url = f"https://news.google.com/rss/search?q={quote(search_term)}&hl=en-US&gl=US&ceid=US:en"

        try:
            async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
                resp = await client.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"})
                if resp.status_code >= 400:
                    return 0
                feed = feedparser.parse(resp.text)
        except Exception:
            return 0

        last_cursor_dt = _parse_feed_date(last_cursor) if last_cursor else None
        new_items = 0
        latest_dt = last_cursor_dt
        for entry in feed.entries[:5]:
            pub_dt = _parse_feed_date(entry.get("published", ""))
            if pub_dt and last_cursor_dt and pub_dt <= last_cursor_dt:
                continue
            title = entry.get("title", "")
            link = entry.get("link", "")
            if pub_dt and (latest_dt is None or pub_dt > latest_dt):
                latest_dt = pub_dt
            self._log_event(cid, name, self._classify(title), f"{name}: {title}", link, "google_news")
            new_items += 1

        if latest_dt and latest_dt != last_cursor_dt:
            self._set_cursor(cursor_key, latest_dt.isoformat())
        return new_items

    # ── News query ───────────────────────────────────────────────────────

    def get_news(self, limit: int = 100) -> list[dict]:
        with _connect_sqlite(self.db_path) as db:
            db.row_factory = sqlite3.Row
            rows = db.execute(
                "SELECT * FROM competitor_news ORDER BY created_at DESC LIMIT ?",
                (limit,),
            ).fetchall()
        return [dict(r) for r in rows]

    # ── Daily briefing (cached per day) ──────────────────────────────────

    async def get_daily_briefing(self, refresh: bool = False) -> dict:
        today = date.today().isoformat()

        if not refresh:
            with _connect_sqlite(self.db_path) as db:
                row = db.execute(
                    "SELECT summary, signal_count, generated_at FROM briefing_cache WHERE date = ?",
                    (today,),
                ).fetchone()
            if row:
                return {"date": today, "summary": row[0], "total_signals": row[1], "generated_at": row[2]}

        # Regenerate
        news = self.get_news(limit=80)
        if not news:
            return {"date": today, "summary": "No competitor news yet. Run a scan first.", "total_signals": 0}
        digest = [f"[{n['event_type']}] {n['competitor_name']}: {n['title']}" for n in news[:50]]
        domain = self.cfg.org.domain or "our domain"

        prompt = f"""You are the competitive intelligence analyst for {self.cfg.org.name} in the {domain} space.

Write a daily competitive landscape briefing for {today}. Structure:

1. **Top Signals Today** — 3-5 most important moves (acquisitions, launches, partnerships) with specific competitor names
2. **Market Trends** — patterns across multiple signals (AI adoption, consolidation, pricing shifts)
3. **Implications for {self.cfg.org.name}** — 2-3 specific, actionable takeaways

Be specific with competitor names and facts. No generic advice. Under 250 words.

Signals ({len(digest)} total):
{chr(10).join(digest)}"""

        try:
            summary = await ai_complete(prompt, self.cfg)
            if not summary:
                return {"date": today, "summary": "No AI provider available for briefing.", "total_signals": len(news)}
        except Exception as e:
            return {"date": today, "summary": f"Failed to generate briefing: {e}", "total_signals": len(news)}

        generated_at = datetime.now(timezone.utc).isoformat()
        with _connect_sqlite(self.db_path) as db:
            db.execute(
                "INSERT INTO briefing_cache (date, summary, signal_count, generated_at) VALUES (?, ?, ?, ?) "
                "ON CONFLICT(date) DO UPDATE SET summary = excluded.summary, signal_count = excluded.signal_count, generated_at = excluded.generated_at",
                (today, summary, len(news), generated_at),
            )

        return {"date": today, "summary": summary, "total_signals": len(news), "generated_at": generated_at}


================================================
FILE: maggy/maggy/services/context_compactor.py
================================================
"""Context compactor — summarize old messages to fit context window.

When conversation length exceeds 80% of the model's context window,
old messages are summarized into a single system message while keeping
the most recent messages intact.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Awaitable, Callable

logger = logging.getLogger(__name__)

COMPACT_THRESHOLD = 0.80
CHARS_PER_TOKEN = 4

SummarizerFn = Callable[[str], Awaitable[str]]


@dataclass
class CompactionResult:
    messages: list[dict]
    tokens_saved: int = 0
    summary: str = ""


def estimate_tokens(messages: list[dict]) -> int:
    """Rough token estimate based on char count / 4."""
    total = sum(len(m.get("content", "")) for m in messages)
    return total // CHARS_PER_TOKEN


def should_compact(messages: list[dict], context_window: int) -> bool:
    """Check if messages exceed 80% of context window."""
    tokens = estimate_tokens(messages)
    return tokens > int(context_window * COMPACT_THRESHOLD)


async def compact(
    messages: list[dict],
    keep_recent: int = 6,
    summarizer: SummarizerFn | None = None,
) -> CompactionResult:
    """Summarize old messages, keep recent ones."""
    if len(messages) <= keep_recent:
        return CompactionResult(messages=messages)
    old = messages[:-keep_recent]
    recent = messages[-keep_recent:]
    old_text = _format_for_summary(old)
    old_tokens = estimate_tokens(old)
    try:
        if summarizer is None:
            return CompactionResult(messages=messages)
        summary = await summarizer(old_text)
    except Exception as exc:
        logger.debug("Compaction failed: %s", exc)
        return CompactionResult(messages=messages)
    summary_msg = {"role": "system", "content": summary}
    new_tokens = estimate_tokens([summary_msg])
    return CompactionResult(
        messages=[summary_msg, *recent],
        tokens_saved=max(0, old_tokens - new_tokens),
        summary=summary,
    )


def _format_for_summary(messages: list[dict]) -> str:
    """Format messages into text for summarization."""
    parts: list[str] = []
    for m in messages:
        role = m.get("role", "unknown")
        content = m.get("content", "")[:500]
        parts.append(f"{role}: {content}")
    return "\n".join(parts)


================================================
FILE: maggy/maggy/services/convention_inferrer.py
================================================
"""LLM-based dynamic convention inference from project fingerprint.

Collects filesystem signals (file tree, config snippets, git log)
and sends them to a cheap/local model to infer project-specific
conventions that the static rule table doesn't cover.
"""

from __future__ import annotations

import logging
import re
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.adapters.pi import PiAdapter
    from maggy.routing_rules import Convention, RoutingRules

logger = logging.getLogger(__name__)

MAX_CONVENTIONS = 10
MAX_FINGERPRINT = 4000
FALLBACK_MODELS = ["local", "kimi"]

SKIP_DIRS = frozenset({
    ".git", "node_modules", "__pycache__", ".venv",
    "venv", "dist", "build", ".next", ".cache",
    ".tox", ".mypy_cache", ".ruff_cache", "egg-info",
})

CONFIG_FILES = [
    "pyproject.toml", "package.json", "Makefile",
    "docker-compose.yml", "Dockerfile", "tsconfig.json",
    ".env.example", "Cargo.toml", "go.mod", "Gemfile",
    "mix.exs", "build.gradle", "pom.xml",
]

PROMPT_TEMPLATE = (
    "Analyze this project and list its development conventions.\n"
    "Each convention must be one line starting with '- '.\n"
    "Focus on: build tools, test runners, deployment, migrations,\n"
    "package managers, CI/CD, linting, coding patterns.\n"
    "Be specific — mention exact commands and tool names.\n"
    "Max 10 conventions. No explanations, just the list.\n\n"
    "{fingerprint}"
)


def collect_fingerprint(working_dir: str) -> str:
    """Build compact project fingerprint for LLM analysis."""
    root = Path(working_dir)
    parts = [_file_tree(root), _config_snippets(root), _git_log(root)]
    return "\n".join(p for p in parts if p)[:MAX_FINGERPRINT]


def parse_conventions(text: str) -> list[Convention]:
    """Extract '- convention' lines from LLM response."""
    from maggy.routing_rules import Convention as Conv

    convs: list[Conv] = []
    for line in text.splitlines():
        m = re.match(r"^-\s+(.{5,200})$", line.strip())
        if m:
            convs.append(Conv(m.group(1).strip(), ["all"], "llm-inferred"))
        if len(convs) >= MAX_CONVENTIONS:
            break
    return convs


async def infer_conventions(
    pi: PiAdapter, working_dir: str,
) -> list[Convention]:
    """Send fingerprint to LLM, parse conventions from response."""
    fp = collect_fingerprint(working_dir)
    if len(fp.strip()) < 20:
        return []
    prompt = PROMPT_TEMPLATE.format(fingerprint=fp)
    for model in FALLBACK_MODELS:
        result = await pi.send_prompt(model, prompt, working_dir, max_turns=1, timeout=60)
        if result.success and result.output.strip():
            return parse_conventions(result.output)
        logger.debug("Inference failed on %s: %s", model, result.error)
    return []


async def ensure_inferred(
    rules: RoutingRules, project_key: str,
    working_dir: str, pi: PiAdapter,
) -> None:
    """Run LLM inference if not already cached for this project."""
    if not project_key:
        return
    existing = rules.project_conventions.get(project_key, [])
    if any(c.source == "llm-inferred" for c in existing):
        return
    try:
        convs = await infer_conventions(pi, working_dir)
    except Exception as exc:
        logger.warning("Convention inference failed: %s", exc)
        return
    if not convs:
        return
    existing_texts = {c.text for c in existing}
    new = [c for c in convs if c.text not in existing_texts]
    rules.project_conventions.setdefault(project_key, []).extend(new)


def _file_tree(root: Path) -> str:
    """List files/dirs to depth 2, excluding noise."""
    lines = ["## Project Files"]
    try:
        for p in sorted(root.iterdir()):
            if p.name in SKIP_DIRS or p.name.startswith("."):
                continue
            lines.append(p.name + ("/" if p.is_dir() else ""))
            if p.is_dir():
                for child in sorted(p.iterdir()):
                    if child.name in SKIP_DIRS:
                        continue
                    lines.append(f"  {child.name}")
    except OSError:
        pass
    return "\n".join(lines[:80])


def _config_snippets(root: Path) -> str:
    """Read first 300 chars of known config files."""
    parts: list[str] = []
    for name in CONFIG_FILES:
        path = root / name
        if path.is_file():
            try:
                text = path.read_text(errors="ignore")[:300]
                parts.append(f"## {name}\n{text}")
            except OSError:
                continue
    return "\n".join(parts)


def _git_log(root: Path) -> str:
    """Recent commit messages via git log --oneline -10."""
    if not (root / ".git").is_dir():
        return ""
    try:
        r = subprocess.run(
            ["git", "log", "--oneline", "-10"],
            cwd=root, capture_output=True, text=True, timeout=5,
        )
        if r.returncode == 0 and r.stdout.strip():
            return f"## Recent Commits\n{r.stdout.strip()}"
    except (OSError, subprocess.TimeoutExpired):
        pass
    return ""


================================================
FILE: maggy/maggy/services/convention_scanner.py
================================================
"""Declarative filesystem scanner for project-specific conventions.

Scans a project directory for config files, lock files, and directory
structures to auto-detect tooling conventions (e.g. supabase vs alembic,
npm vs pnpm, pytest vs jest).
"""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.routing_rules import Convention, RoutingRules


@dataclass
class ScanRule:
    """A filesystem marker that implies a convention."""

    marker: str
    convention: str
    applies_to: list[str] = field(default_factory=lambda: ["all"])
    content_match: str = ""
    is_dir: bool = False


RULES: list[ScanRule] = [
    ScanRule(
        "supabase/migrations", is_dir=True,
        convention="Use `supabase db push` for migrations. RLS policies required.",
    ),
    ScanRule(
        "alembic.ini",
        convention="Use `alembic revision --autogenerate` for schema changes.",
    ),
    ScanRule(
        "package-lock.json",
        convention="Package manager: npm. Use `npm install`, not yarn/pnpm.",
    ),
    ScanRule(
        "pnpm-lock.yaml",
        convention="Package manager: pnpm. Use `pnpm install`, not npm/yarn.",
    ),
    ScanRule(
        "yarn.lock",
        convention="Package manager: yarn. Use `yarn add`, not npm/pnpm.",
    ),
    ScanRule(
        "pyproject.toml", content_match=r"\[tool\.ruff\]",
        convention="Linter: ruff. Run `ruff check .` before committing.",
    ),
    ScanRule(
        "pyproject.toml", content_match=r"\[tool\.pytest",
        convention="Testing: pytest. Run `pytest` for tests.",
        applies_to=["feature", "bug", "all"],
    ),
    ScanRule(
        "pytest.ini",
        convention="Testing: pytest. Run `pytest` for tests.",
        applies_to=["feature", "bug", "all"],
    ),
    ScanRule(
        "docker-compose.yml",
        convention="Use Docker Compose for local services. `docker compose up -d`.",
    ),
    ScanRule(
        ".github/workflows", is_dir=True,
        convention="CI: GitHub Actions. Check workflow status before merging.",
    ),
    ScanRule(
        "Makefile",
        convention="Project uses Make. Check `make help` for available targets.",
    ),
    ScanRule(
        "tailwind.config.js",
        convention="Styling: Tailwind CSS. Use utility classes, not custom CSS.",
        applies_to=["feature"],
    ),
    ScanRule(
        "tailwind.config.ts",
        convention="Styling: Tailwind CSS. Use utility classes, not custom CSS.",
        applies_to=["feature"],
    ),
]


def scan_project(working_dir: str) -> list[Convention]:
    """Scan project directory, return detected conventions."""
    from maggy.routing_rules import Convention as Conv

    root = Path(working_dir)
    found: list[Conv] = []
    seen: set[str] = set()
    for rule in RULES:
        if not _matches(root, rule):
            continue
        if rule.convention in seen:
            continue
        seen.add(rule.convention)
        found.append(Conv(rule.convention, list(rule.applies_to), "auto-detected"))
    return found


def ensure_scanned(
    rules: RoutingRules, project_key: str, working_dir: str,
) -> None:
    """Scan project if not already cached in rules."""
    if project_key in rules.project_conventions:
        return
    convs = scan_project(working_dir)
    rules.project_conventions[project_key] = convs


def _matches(root: Path, rule: ScanRule) -> bool:
    """Check if a scan rule matches the project directory."""
    target = root / rule.marker
    if rule.is_dir:
        return target.is_dir()
    if not target.is_file():
        return False
    if not rule.content_match:
        return True
    try:
        text = target.read_text(errors="ignore")[:4096]
        return bool(re.search(rule.content_match, text))
    except OSError:
        return False


================================================
FILE: maggy/maggy/services/executor.py
================================================
from __future__ import annotations

import asyncio
import logging
import uuid
from datetime import datetime, timezone
from pathlib import Path

from maggy.adapters.pi import PiAdapter, RunResult
from maggy.budget import BudgetManager
from maggy.checkpoint import CheckpointManager
from maggy.config import MaggyConfig
from maggy.coordination.lock_manager import LockManager
from maggy.escalation.protocol import Escalator
from maggy.mnemos.fatigue import FatigueTracker
from maggy.mnemos.signals import SignalLog
from maggy.providers.base import IssueTrackerProvider
from maggy.recovery.rollback import RollbackManager
from maggy.routing import RoutingService
from maggy.services import executor_helpers as H
from maggy.services import executor_prompts as P
from maggy.services.executor_types import SessionCtx, StepSpec
from maggy.services.planner import DualPlanner

logger = logging.getLogger(__name__)


class ExecutorService:
    def __init__(self, cfg: MaggyConfig, provider: IssueTrackerProvider, status_cb=None):
        self.cfg, self.provider = cfg, provider
        self._pi = PiAdapter()
        self._routing = RoutingService(cfg)
        self._budget = BudgetManager(cfg)
        self._sessions: dict[str, dict] = {}
        self._bg_tasks: set[asyncio.Task] = set()
        db = Path(cfg.storage.path).expanduser().parent
        self._fatigue = FatigueTracker()
        self._signals = SignalLog(db / "signals.jsonl")
        self._locks = LockManager(db / "locks.db")
        self._rollback = RollbackManager()
        self._checkpoint = CheckpointManager(db / "checkpoints")
        self._escalator = Escalator(db / "escalations.db")
        self._planner, self._status_cb = DualPlanner(self._pi), status_cb
    async def start(self, task_id: str, mode: str = "tdd",
                    working_dir: str | None = None) -> str:
        if mode not in ("tdd", "plan"):
            raise ValueError(f"Unknown mode {mode!r}")
        task = await self.provider.get_task(task_id)
        if not task:
            raise ValueError(f"Task {task_id} not found")
        wd = H.resolve_working_dir(self.cfg, working_dir, task)
        sid = uuid.uuid4().hex[:10]
        self._sessions[sid] = dict(
            id=sid, task_id=task_id, task_title=task.title, mode=mode,
            working_dir=wd, status="running",
            started_at=datetime.now(timezone.utc).isoformat(), output="")
        self._locks.acquire(wd, sid)
        ctx = SessionCtx(self._sessions[sid], task, wd)
        bg = asyncio.create_task(self._run(ctx, mode))
        self._bg_tasks.add(bg)
        bg.add_done_callback(self._bg_tasks.discard)
        return sid

    def get_session(self, sid: str) -> dict | None: return self._sessions.get(sid)
    def list_sessions(self) -> list[dict]: return list(self._sessions.values())
    async def _run(self, ctx: SessionCtx, mode: str) -> None:
        try:
            from maggy.services.convention_inferrer import ensure_inferred
            from maggy.services.convention_scanner import ensure_scanned
            pk = str(ctx.task.raw.get("project_key", ""))
            ensure_scanned(self._routing.rules, pk, ctx.wd)
            await ensure_inferred(self._routing.rules, pk, ctx.wd, self._pi)
            ctx.icpg = await H.build_icpg_context(self.cfg, ctx.task)
            await (self._run_plan(ctx) if mode == "plan" else self._run_tdd(ctx))
        except Exception as e:
            logger.exception("Execution failed")
            ctx.session["status"], ctx.session["error"] = "failed", str(e)
        finally:
            self._locks.release_all(ctx.session["id"])
            self._checkpoint.delete(ctx.task.id.replace("/", "-"))

    async def _run_plan(self, ctx: SessionCtx) -> None:
        result = await self._run_model(ctx, P.plan_prompt(ctx.task, ctx.icpg, self._routing), 5)
        ctx.session["output"] = result.output[:10000]
        ctx.session["status"] = "completed" if result.success else "failed"
        if not result.success:
            ctx.session["error"] = result.output[:500]
        elif result.output:
            await H.post_plan(self.provider, ctx.task.id, result.output)

    async def _run_tdd(self, ctx: SessionCtx) -> None:
        if H.blast_score(ctx.task) >= 7:
            await self._dual_plan(ctx)
        prompt = P.analysis_prompt(ctx.task, ctx.icpg, self._routing)
        ok, analysis = await self._reviewed_step(ctx, StepSpec("ANALYZE", prompt, 5))
        if not ok:
            return
        prompt = P.tests_prompt(ctx.task, ctx.icpg, analysis, self._routing)
        ok, _ = await self._reviewed_step(ctx, StepSpec("WRITE TESTS", prompt, 15))
        if not ok:
            return
        if not await self._verify_red(ctx):
            return
        await H.save_rollback(self._rollback, ctx.session["id"], ctx.wd)
        prompt = P.impl_prompt(ctx.task, ctx.icpg, self._routing)
        ok, _ = await self._reviewed_step(ctx, StepSpec("IMPLEMENT", prompt, 25))
        if not ok:
            await H.try_rollback(self._rollback, ctx.session["id"], ctx.wd)
            H.maybe_escalate(self._escalator, ctx.session, ctx.task)
            return
        if not await self._verify_green(ctx):
            await H.try_rollback(self._rollback, ctx.session["id"], ctx.wd)
            return
        ctx.session["status"] = "completed"
        ctx.session["completed_at"] = datetime.now(timezone.utc).isoformat()

    async def _reviewed_step(self, ctx: SessionCtx, step: StepSpec) -> tuple[bool, str]:
        for attempt in range(2):
            ok, output = await self._run_step(ctx, step)
            if not ok:
                return ok, output
            if await self._review_step(ctx, step, output):
                return True, output
            if attempt == 0:
                ctx.session["output"] += f"\n--- RETRY {step.label} ---\n"
        ctx.session.update(status="failed", error=f"Review gate failed for {step.label}")
        return False, output

    async def _run_step(self, ctx: SessionCtx, step: StepSpec) -> tuple[bool, str]:
        result = await self._run_model(ctx, step.prompt, step.max_turns)
        ctx.session["output"] += f"\n=== {step.label} ===\n{result.output[:2000]}\n"
        H.log_signal(self._signals, ctx.session["id"], step.label, result)
        if not result.success:
            ctx.session["status"] = "failed"
        return result.success, result.output

    async def _review_step(self, ctx: SessionCtx, step: StepSpec, output: str) -> bool:
        from maggy.services.output_reviewer import review_output
        review = await review_output(self._pi, step.label, output, ctx.wd)
        ctx.session["output"] += f"\n--- REVIEW {step.label}: {review.score}/5 ---\n"
        return review.score >= 3

    async def _run_model(self, ctx: SessionCtx, prompt: str, turns: int) -> RunResult:
        decision = H.route_model(ctx.task, self._routing)
        name = H.model_name(decision.primary)
        H.write_checkpoint(self._checkpoint, ctx.task, name)
        self._emit_status(name, "running")
        result = await self._send(decision, name, prompt, ctx)
        self._emit_status(name, "done")
        if result.model != name and (e := self._pi.get_model(result.model)):
            self._fatigue.on_model_switch(e.context_window)
        H.track_fatigue(self._fatigue, result)
        if result.cost_usd > 0 or result.input_tokens > 0:
            self._budget.record_spend(
                decision.primary.provider, result.model, result.cost_usd,
                result.input_tokens, result.output_tokens)
        return result

    async def _send(self, decision, name, prompt, ctx):
        cascade = self._routing.rules.cascade
        if not cascade.enabled or H.blast_score(ctx.task) < cascade.min_blast:
            return await self._pi.send_with_fallback(name, prompt, ctx.wd)
        from maggy.services.cascade import cascade_execute
        from maggy.services.output_reviewer import review_output
        chain = [name] + decision.fallback_chain

        async def gate(output: str) -> int:
            return (await review_output(self._pi, "CASCADE", output, ctx.wd)).score
        cr = await cascade_execute(self._pi, chain, prompt, ctx.wd, gate)
        return RunResult(model=cr.model, success=bool(cr.output), output=cr.output, cost_usd=cr.cost_usd)

    def _emit_status(self, agent: str, status: str) -> None:
        if self._status_cb:
            self._status_cb({"type": "agent_status", "agent": agent, "status": status})
    async def _verify_red(self, ctx: SessionCtx) -> bool:
        from maggy.services.tdd_verifier import verify_tests_exist, verify_tests_fail
        for check, prefix in [(verify_tests_exist, "RED: no tests"), (verify_tests_fail, "RED")]:
            r = await check(ctx.wd)
            if not r.passed:
                ctx.session["status"], ctx.session["error"] = "failed", f"{prefix}: {r.detail}"
                return False
        ctx.session["output"] += f"\n=== RED ===\n{r.detail}\n"
        return True
    async def _verify_green(self, ctx: SessionCtx) -> bool:
        from maggy.services.tdd_verifier import verify_coverage, verify_lint, verify_tests_pass
        if not (green := await verify_tests_pass(ctx.wd)).passed:
            ctx.session["status"], ctx.session["error"] = "failed", f"GREEN: {green.detail}"
            return False
        for label, check in [("LINT", verify_lint), ("COVERAGE", verify_coverage)]:
            if not (r := await check(ctx.wd)).passed:
                ctx.session["output"] += f"\n=== {label} ===\n{r.detail}\n"
        ctx.session["output"] += "\n=== VALIDATE ===\nPassed\n"
        return True

    async def _dual_plan(self, ctx: SessionCtx) -> None:
        try:
            r = await self._planner.dual_plan(ctx.task.title, ctx.task.description[:1500], ctx.wd)
            ctx.session.update(dual_plan=r.primary_plan[:2000], plan_conflicts=r.conflicts or [])
        except Exception as exc:
            logger.warning("DualPlanner failed: %s", exc)


================================================
FILE: maggy/maggy/services/executor_helpers.py
================================================
"""Executor helpers — routing, rollback, fatigue, iCPG."""

from __future__ import annotations

import asyncio
import logging
from typing import TYPE_CHECKING

from maggy.adapters.pi import RunResult
from maggy.mnemos.fatigue import FatigueTracker
from maggy.mnemos.signals import SignalLog
from maggy.process.model_router import RoutingDecision
from maggy.routing import RoutingContext, RoutingService

if TYPE_CHECKING:
    from maggy.checkpoint import CheckpointManager
    from maggy.config import MaggyConfig
    from maggy.escalation.protocol import Escalator
    from maggy.providers.base import Task
    from maggy.recovery.rollback import RollbackManager

logger = logging.getLogger(__name__)


def route_model(task: Task, routing: RoutingService) -> RoutingDecision:
    """Pick the best model for a task via routing rules."""
    from maggy.services.stakes import classify_stakes

    raw = task.raw if isinstance(task.raw, dict) else {}
    task_type = str(raw.get("task_type") or _task_type(task))
    stakes = classify_stakes(task).level
    return routing.route(
        RoutingContext(
            blast_score=int_value(raw.get("blast_score")),
            task_type=task_type,
            security_sensitive=_security_flag(raw, task_type),
            project_key=str(raw.get("project_key") or task.board),
            stakes=stakes,
        ),
    )


def blast_score(task: Task) -> int:
    """Extract blast score from task metadata."""
    raw = task.raw if isinstance(task.raw, dict) else {}
    return int_value(raw.get("blast_score"))


def int_value(value: object) -> int:
    """Safely convert to int, default 0."""
    try:
        return int(value)
    except (TypeError, ValueError):
        return 0


def model_name(primary: object) -> str:
    """Extract model name string from routing decision."""
    if isinstance(primary, str):
        return primary
    return str(primary.name)


def track_fatigue(fatigue: FatigueTracker, result: RunResult) -> None:
    """Record context load from result output length."""
    load = min(len(result.output) / 50_000, 1.0)
    fatigue.record("context_load", load)


def log_signal(signals: SignalLog, sid: str, label: str, result: RunResult) -> None:
    """Append step signal to log."""
    signals.append({
        "session_id": sid, "step": label,
        "model": result.model, "success": result.success,
    })


def write_checkpoint(
    checkpoint: "CheckpointManager", task: Task, model: str,
) -> None:
    """Write execution checkpoint for crash recovery."""
    checkpoint.write(task.id.replace("/", "-"), {
        "goal": task.title,
        "model_history": [model],
        "current_subgoal": "executing",
    })


async def save_rollback(
    rollback: "RollbackManager", sid: str, wd: str,
) -> None:
    """Create git savepoint before implementation."""
    try:
        await rollback.create_savepoint(sid, wd)
    except Exception as exc:
        logger.warning("Savepoint failed: %s", exc)


async def try_rollback(
    rollback: "RollbackManager", sid: str, wd: str,
) -> None:
    """Revert to last savepoint on failure."""
    try:
        await rollback.rollback(sid, wd)
    except Exception as exc:
        logger.warning("Rollback failed: %s", exc)


def maybe_escalate(
    escalator: "Escalator", session: dict, task: Task,
) -> None:
    """Escalate after 3+ consecutive failures."""
    failures = session.get("_fail_count", 0) + 1
    session["_fail_count"] = failures
    if failures >= 3:
        escalator.escalate(
            session["id"], "repeated_failure",
            {"task_id": task.id, "failures": failures},
        )


async def build_icpg_context(cfg: "MaggyConfig", task: Task) -> str:
    """Query iCPG CLI for code intelligence context."""
    bp = cfg.resolve_bootstrap_path()
    if not bp or not (bp / "scripts" / "icpg" / "__main__.py").exists():
        return ""
    from maggy.services.executor_prompts import extract_keywords
    kw = extract_keywords(f"{task.title} {task.description}")
    if not kw:
        return ""
    try:
        proc = await asyncio.create_subprocess_exec(
            "python3", "-m", "scripts.icpg", "--project", str(bp),
            "query", "prior", "--text", " ".join(kw[:8]), "--limit", "8",
            stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=str(bp))
        stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
        if proc.returncode != 0:
            return ""
        text = (stdout or b"").decode("utf-8", errors="replace").strip()
    except (asyncio.TimeoutError, FileNotFoundError, OSError):
        return ""
    if not text:
        return ""
    return ("## iCPG Code Intelligence\n"
            "Pre-queried from Maggy's intent code property graph:\n\n"
            + text[:2000] + "\n\n**Use this to target your file reads.**")


def resolve_working_dir(cfg: "MaggyConfig", requested: str | None, task: "Task") -> str:
    """Resolve working_dir inside configured codebases."""
    from pathlib import Path
    if not cfg.codebases:
        raise ValueError("No codebases configured")
    roots = [Path(c.path).expanduser().resolve() for c in cfg.codebases]
    if requested:
        candidate = Path(requested).expanduser().resolve()
        for root in roots:
            try:
                candidate.relative_to(root)
                return str(candidate)
            except ValueError:
                continue
        raise ValueError(f"working_dir {requested!r} not inside codebases")
    return pick_working_dir(cfg, task)


def pick_working_dir(cfg: "MaggyConfig", task: "Task") -> str:
    """Match task keywords to configured codebases."""
    from pathlib import Path
    cbs = cfg.codebases
    if len(cbs) == 1:
        return str(Path(cbs[0].path).expanduser().resolve())
    text = f"{task.title} {task.description} {task.board}".lower()
    best_key, best_score = cbs[0].key, 0
    for cb in cbs:
        score = 5 if cb.key.lower() in text else 0
        name = Path(cb.path).name.lower()
        if name != cb.key.lower() and name in text:
            score += 3
        if score > best_score:
            best_key, best_score = cb.key, score
    picked = next(c for c in cbs if c.key == best_key)
    return str(Path(picked.path).expanduser().resolve())


async def post_plan(provider, task_id: str, output: str) -> None:
    """Post plan as comment to issue tracker."""
    try:
        await provider.add_comment(
            task_id, f"## Maggy Plan\n\n{output[:4000]}",
        )
    except Exception as e:
        logger.warning("Failed to post plan: %s", e)


def _task_type(task: "Task") -> str:
    return task.labels[0] if task.labels else "general"


def _security_flag(raw: dict, task_type: str) -> bool:
    if "security_sensitive" in raw:
        return bool(raw["security_sensitive"])
    return task_type in {"security", "auth", "billing"}


================================================
FILE: maggy/maggy/services/executor_prompts.py
================================================
"""Executor prompt templates for TDD pipeline steps."""

from __future__ import annotations

import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.providers.base import Task
    from maggy.routing import RoutingService

from maggy.routing_rules import conventions_for

STOP = frozenset({
    "the", "and", "for", "to", "in", "of", "a", "is", "with",
    "on", "from", "be", "as", "by", "an", "or", "not", "all",
    "that", "this", "are", "can", "should", "would", "when",
    "how", "what", "where", "which", "we", "need", "also",
    "been", "has", "have", "it", "its", "new", "add", "fix",
    "update", "create", "delete", "get", "set", "use",
})


def plan_prompt(task: Task, icpg_ctx: str, routing: RoutingService) -> str:
    conv = _conventions_block(task, routing)
    return (
        "Create an implementation plan for this ticket. "
        "No code changes — just a plan.\n\n"
        f"Ticket: {task.title}\n{task.description[:1500]}"
        f"{_icpg_block(icpg_ctx)}{conv}\n"
        "Output: numbered steps, files to touch, risks, tests."
    )


def analysis_prompt(task: Task, icpg_ctx: str, routing: RoutingService) -> str:
    conv = _conventions_block(task, routing)
    return (
        "Analyze this ticket against the codebase and output "
        "a concise plan.\nIdentify: files to change, functions "
        "affected, tests needed, risks.\n\n"
        f"Ticket: {task.title}\n{task.description[:1500]}"
        f"{_icpg_block(icpg_ctx)}{conv}"
    )


def tests_prompt(
    task: Task, icpg_ctx: str, analysis: str, routing: RoutingService,
) -> str:
    conv = _conventions_block(task, routing)
    return (
        "Write failing test cases for this ticket "
        "(TDD — no implementation yet).\n"
        "Use the project's existing test patterns. "
        "Commit tests separately.\n\n"
        f"Ticket: {task.title}\n{task.description[:1500]}"
        f"{_icpg_block(icpg_ctx)}{conv}\n"
        f"Analysis:\n{analysis[:1000]}"
    )


def impl_prompt(task: Task, icpg_ctx: str, routing: RoutingService) -> str:
    conv = _conventions_block(task, routing)
    return (
        "Implement the feature to make the failing tests pass.\n"
        "Follow existing code patterns. Keep changes minimal.\n\n"
        f"Ticket: {task.title}\n{task.description[:1500]}"
        f"{_icpg_block(icpg_ctx)}{conv}\n"
        "Run tests to verify, then commit with a conventional "
        "commit message."
    )


def extract_keywords(text: str) -> list[str]:
    """Extract unique keywords from text, filtering stop words."""
    words = re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", text.lower())
    seen: set[str] = set()
    result: list[str] = []
    for w in words:
        if w in STOP or len(w) < 3 or w in seen:
            continue
        seen.add(w)
        result.append(w)
    return result[:20]


def _icpg_block(icpg_ctx: str) -> str:
    if not icpg_ctx:
        return ""
    return f"\n\n{icpg_ctx}\n"


def _task_type(task: Task) -> str:
    if task.labels:
        return task.labels[0]
    return "general"


def _conventions_block(task: Task, routing: RoutingService) -> str:
    raw = task.raw if isinstance(task.raw, dict) else {}
    task_type = str(raw.get("task_type") or _task_type(task))
    project_key = str(raw.get("project_key") or "")
    text = conventions_for(routing.rules, task_type, project_key or None)
    if not text:
        return ""
    return f"\n\n{text}\n"


================================================
FILE: maggy/maggy/services/executor_types.py
================================================
"""Executor shared types — context and step descriptors."""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.providers.base import Task


@dataclass
class SessionCtx:
    """Bundles session state, task, and working dir for executor."""

    session: dict
    task: Task
    wd: str
    icpg: str = ""


@dataclass
class StepSpec:
    """Describes a single TDD pipeline step."""

    label: str
    prompt: str
    max_turns: int


================================================
FILE: maggy/maggy/services/inbox.py
================================================
"""AI-prioritized inbox — ranks tasks by urgency, OKR alignment, and age.

Works with any IssueTrackerProvider. Caches ranking for 30 minutes in SQLite.
"""

from __future__ import annotations

import json
import logging
import sqlite3
from datetime import datetime, timezone
from pathlib import Path

from maggy.config import MaggyConfig
from maggy.services.ai_client import ai_complete
from maggy.providers.base import IssueTrackerProvider, Task

logger = logging.getLogger(__name__)

CACHE_TTL_SECONDS = 30 * 60  # 30 min


def _connect_sqlite(path: Path) -> sqlite3.Connection:
    """Open a SQLite connection with sensible defaults for concurrent use.

    FastAPI serves requests concurrently, and the heartbeat worker writes from
    a different thread. WAL lets readers and writers coexist; foreign_keys
    enforces referential integrity; busy_timeout avoids 'database is locked'
    errors under contention. Matches the convention used by scripts/icpg/store.py.
    """
    db = sqlite3.connect(path, timeout=30.0)
    db.execute("PRAGMA journal_mode=WAL")
    db.execute("PRAGMA foreign_keys=ON")
    db.execute("PRAGMA busy_timeout=30000")
    return db


class InboxService:
    def __init__(self, cfg: MaggyConfig, provider: IssueTrackerProvider):
        self.cfg = cfg
        self.provider = provider
        self.db_path = Path(cfg.storage.path).expanduser()
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init_db()

    def _init_db(self) -> None:
        with _connect_sqlite(self.db_path) as db:
            db.execute("""
                CREATE TABLE IF NOT EXISTS inbox_cache (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    cached_at TEXT NOT NULL,
                    payload TEXT NOT NULL
                )
            """)

    def _read_cache(self, ignore_ttl: bool = False) -> list[dict] | None:
        with _connect_sqlite(self.db_path) as db:
            row = db.execute(
                "SELECT cached_at, payload FROM inbox_cache ORDER BY id DESC LIMIT 1"
            ).fetchone()
        if not row:
            return None
        if not ignore_ttl:
            cached_at = datetime.fromisoformat(row[0])
            age = (datetime.now(timezone.utc) - cached_at).total_seconds()
            if age > CACHE_TTL_SECONDS:
                return None
        return json.loads(row[1])

    def _write_cache(self, items: list[dict]) -> None:
        with _connect_sqlite(self.db_path) as db:
            db.execute("DELETE FROM inbox_cache")  # keep just latest
            db.execute(
                "INSERT INTO inbox_cache (cached_at, payload) VALUES (?, ?)",
                (datetime.now(timezone.utc).isoformat(), json.dumps(items)),
            )

    async def get_prioritized(self, force_refresh: bool = False) -> list[dict]:
        """Return AI-ranked tasks. Cached 30 min.

        On provider failure (GitHub/Asana down), fall back to the last cached
        ranking — even if stale — rather than 500ing the whole endpoint.
        Staleness is indicated to clients via the `stale` flag on items.
        """
        if not force_refresh:
            cached = self._read_cache()
            if cached is not None:
                return cached

        try:
            tasks = await self.provider.list_tasks(state="open", limit=50)
        except Exception as e:
            logger.warning("provider.list_tasks failed, falling back to stale cache: %s", e)
            stale = self._read_cache(ignore_ttl=True) or []
            for item in stale:
                item["stale"] = True
            return stale

        if not tasks:
            return []

        ranked = await self._rank_with_ai(tasks)
        self._write_cache(ranked)
        return ranked

    async def _rank_with_ai(self, tasks: list[Task]) -> list[dict]:
        """Ask Claude to rank tasks by priority. Falls back to date-sorted if AI unavailable."""
        prompt = self._build_rank_prompt(tasks)
        text = await self._call_ai(prompt)
        if not text:
            return [self._task_to_dict(t, rank=i + 1, reason="AI not available; sorted by recency")
                    for i, t in enumerate(tasks)]
        try:
            start = text.find("{")
            end = text.rfind("}")
            data = json.loads(text[start:end + 1]) if start >= 0 else {"rankings": []}
        except Exception as e:
            logger.warning("AI ranking parse failed: %s", e)
            return [self._task_to_dict(t, rank=i + 1, reason="AI ranking unavailable")
                    for i, t in enumerate(tasks)]

        # Apply rankings — validate each row before trusting it.
        # LLMs routinely return missing indices, string ranks, or out-of-range values.
        rank_map: dict[int, dict] = {}
        for r in data.get("rankings", []):
            if not isinstance(r, dict):
                continue
            idx = r.get("index")
            rank = r.get("rank")
            if not isinstance(idx, int) or idx < 0 or idx >= len(tasks):
                continue
            # Coerce rank defensively
            try:
                rank_int = int(rank)
            except (TypeError, ValueError):
                continue
            if rank_int < 1:
                continue
            # First write wins — LLM occasionally emits duplicate indices
            rank_map.setdefault(idx, {"rank": rank_int, "reason": str(r.get("reason", ""))[:300]})

        ranked: list[dict] = []
        for i, t in enumerate(tasks):
            r = rank_map.get(i) or {"rank": i + 1, "reason": ""}
            ranked.append(self._task_to_dict(t, rank=r["rank"], reason=r["reason"]))
        ranked.sort(key=lambda x: x["rank"])
        return ranked

    def _build_rank_prompt(self, tasks: list[Task]) -> str:
        """Build the ranking prompt for AI."""
        okr_block = ""
        if self.cfg.okrs.source == "yaml" and self.cfg.okrs.items:
            okr_lines = [f"- {o.id}: {o.title}" for o in self.cfg.okrs.items]
            okr_block = "## Current OKRs\n" + "\n".join(okr_lines) + "\n"
        task_lines = []
        for i, t in enumerate(tasks):
            snippet = (t.description or "")[:200].replace("\n", " ")
            task_lines.append(f"[{i}] id={t.id} board={t.board} labels={','.join(t.labels[:3])}\n    {t.title}\n    {snippet}")
        return f"""You are the AI triage assistant for {self.cfg.org.name}.

{okr_block}
Rank the following {len(tasks)} open tasks by priority. Consider:
- OKR alignment (if OKRs provided)
- Urgency signals (labels like "bug", "critical", "urgent")
- Age (older + stale = deprioritize, older + active = maybe important)

Respond with STRICT JSON only:
{{"rankings": [{{"index": 0, "rank": 1, "reason": "<20 word explanation>"}}, ...]}}

Tasks:
{chr(10).join(task_lines)}"""

    async def _call_ai(self, prompt: str) -> str | None:
        """Call AI via API key or CLI subscription."""
        return await ai_complete(prompt, self.cfg)

    def _task_to_dict(self, t: Task, rank: int, reason: str) -> dict:
        return {
            "id": t.id,
            "title": t.title,
            "description": t.description[:500],
            "status": t.status,
            "assignee": t.assignee,
            "author": t.author,
            "url": t.url,
            "labels": t.labels,
            "board": t.board,
            "created_at": t.created_at,
            "updated_at": t.updated_at,
            "rank": rank,
            "ai_reason": reason,
        }


================================================
FILE: maggy/maggy/services/monitor.py
================================================
"""MonitorService — background polling for issue trackers."""

from __future__ import annotations

import logging
import sqlite3
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path

import httpx

logger = logging.getLogger(__name__)

GITHUB_API = "https://api.github.com"
MONDAY_API = "https://api.monday.com/v2"


@dataclass
class MonitorConfig:
    """Config for a single project monitor."""

    project_key: str
    provider: str  # "github" | "asana" | "monday"
    poll_command: str = ""
    interval_seconds: int = 300
    enabled: bool = True


@dataclass
class MonitorEvent:
    """A detected new item from a tracker."""

    id: str
    title: str
    url: str
    provider: str
    project_key: str
    seen_at: str = ""


class MonitorService:
    """SQLite-backed tracker polling service."""

    def __init__(self, db_path: Path) -> None:
        self._db = sqlite3.connect(str(db_path))
        self._init_tables()

    def _init_tables(self) -> None:
        self._db.executescript("""
            CREATE TABLE IF NOT EXISTS monitors (
                project_key TEXT PRIMARY KEY,
                provider TEXT NOT NULL,
                poll_command TEXT DEFAULT '',
                interval_seconds INTEGER DEFAULT 300,
                enabled INTEGER DEFAULT 1
            );
            CREATE TABLE IF NOT EXISTS seen_events (
                event_id TEXT,
                project_key TEXT,
                seen_at TEXT,
                PRIMARY KEY (event_id, project_key)
            );
        """)

    def add(self, cfg: MonitorConfig) -> None:
        self._db.execute(
            "INSERT OR REPLACE INTO monitors VALUES (?,?,?,?,?)",
            (cfg.project_key, cfg.provider, cfg.poll_command,
             cfg.interval_seconds, int(cfg.enabled)),
        )
        self._db.commit()

    def remove(self, project_key: str) -> None:
        self._db.execute(
            "DELETE FROM monitors WHERE project_key=?",
            (project_key,),
        )
        self._db.commit()

    def list_active(self) -> list[MonitorConfig]:
        rows = self._db.execute(
            "SELECT * FROM monitors WHERE enabled=1",
        ).fetchall()
        return [_row_to_config(r) for r in rows]

    def is_new(self, event_id: str, project_key: str) -> bool:
        row = self._db.execute(
            "SELECT 1 FROM seen_events WHERE event_id=? AND project_key=?",
            (event_id, project_key),
        ).fetchone()
        return row is None

    def mark_seen(self, event_id: str, project_key: str) -> None:
        now = datetime.now(timezone.utc).isoformat()
        self._db.execute(
            "INSERT OR IGNORE INTO seen_events VALUES (?,?,?)",
            (event_id, project_key, now),
        )
        self._db.commit()

    def status(self) -> dict:
        active = len(self.list_active())
        total = self._db.execute(
            "SELECT COUNT(*) FROM seen_events",
        ).fetchone()[0]
        return {"active": active, "seen_events": total}

    async def poll(self, cfg: MonitorConfig) -> list[MonitorEvent]:
        """Poll tracker and return new events."""
        if cfg.provider == "github":
            return await _poll_github(self, cfg)
        if cfg.provider == "monday":
            return await _poll_monday(self, cfg)
        return []


def _row_to_config(row: tuple) -> MonitorConfig:
    return MonitorConfig(
        project_key=row[0], provider=row[1],
        poll_command=row[2], interval_seconds=row[3],
        enabled=bool(row[4]),
    )


async def _poll_github(svc: MonitorService, cfg: MonitorConfig) -> list[MonitorEvent]:
    repo = cfg.poll_command or ""
    if not repo:
        return []
    events: list[MonitorEvent] = []
    async with httpx.AsyncClient(timeout=15) as client:
        url = f"{GITHUB_API}/repos/{repo}/pulls"
        resp = await client.get(url, params={"state": "open"})
        if resp.status_code != 200:
            return []
        for pr in resp.json():
            eid = f"gh-pr-{pr.get('number', '')}"
            if svc.is_new(eid, cfg.project_key):
                events.append(MonitorEvent(
                    id=eid, title=pr.get("title", ""),
                    url=pr.get("html_url", ""),
                    provider="github",
                    project_key=cfg.project_key,
                ))
                svc.mark_seen(eid, cfg.project_key)
    return events


async def _poll_monday(svc: MonitorService, cfg: MonitorConfig) -> list[MonitorEvent]:
    board_id = cfg.poll_command or ""
    if not board_id:
        return []
    events: list[MonitorEvent] = []
    query = f'{{ boards(ids: [{board_id}]) {{ items_page(limit: 20) {{ items {{ id name }} }} }} }}'
    async with httpx.AsyncClient(timeout=15) as client:
        resp = await client.post(
            MONDAY_API,
            json={"query": query},
        )
        if resp.status_code != 200:
            return []
        boards = resp.json().get("data", {}).get("boards", [])
        if not boards:
            return []
        items = boards[0].get("items_page", {}).get("items", [])
        for item in items:
            eid = f"mon-{item.get('id', '')}"
            if svc.is_new(eid, cfg.project_key):
                events.append(MonitorEvent(
                    id=eid, title=item.get("name", ""),
                    url="", provider="monday",
                    project_key=cfg.project_key,
                ))
                svc.mark_seen(eid, cfg.project_key)
    return events


================================================
FILE: maggy/maggy/services/output_reviewer.py
================================================
"""Inter-task output quality reviewer.

Sends step output to a fast local model for quality scoring.
Falls back to pass-through (score=3) on any failure so it
never blocks the pipeline.
"""

from __future__ import annotations

import logging
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.adapters.pi import PiAdapter

logger = logging.getLogger(__name__)

_SCORE_RE = re.compile(r"SCORE:\s*(\d+)", re.IGNORECASE)
_REASON_RE = re.compile(r"REASON:\s*(.+)", re.IGNORECASE)

REVIEW_MODEL = "local"
REVIEW_MAX_TURNS = 1


@dataclass
class ReviewResult:
    score: int
    reason: str = ""


def _parse_review(text: str) -> ReviewResult:
    """Extract score and reason from reviewer output."""
    m = _SCORE_RE.search(text)
    if not m:
        return ReviewResult(score=3)
    score = max(1, min(5, int(m.group(1))))
    rm = _REASON_RE.search(text)
    reason = rm.group(1).strip() if rm else ""
    return ReviewResult(score=score, reason=reason)


def _build_prompt(step_label: str, output: str) -> str:
    """Build the review prompt for the local model."""
    trimmed = output[:3000]
    return (
        f"Review this {step_label} output for quality.\n"
        "Rate 1-5 (1=wrong, 3=acceptable, 5=excellent).\n"
        "Reply ONLY in this format:\n"
        "SCORE: <number>\nREASON: <one sentence>\n\n"
        f"--- OUTPUT ---\n{trimmed}"
    )


async def review_output(
    pi: "PiAdapter", step_label: str, output: str, wd: str,
) -> ReviewResult:
    """Send step output to local model for quality review."""
    prompt = _build_prompt(step_label, output)
    try:
        result = await pi.send_prompt(
            REVIEW_MODEL, prompt, wd,
            max_turns=REVIEW_MAX_TURNS, timeout=30,
        )
        if not result.success:
            return ReviewResult(score=3, reason="review unavailable")
        return _parse_review(result.output)
    except Exception as exc:
        logger.debug("Review failed: %s", exc)
        return ReviewResult(score=3, reason="review error")


================================================
FILE: maggy/maggy/services/planner.py
================================================
"""Dual-model planning service."""

from __future__ import annotations

from dataclasses import dataclass, field

from maggy.adapters.pi import PiAdapter, RunResult


@dataclass
class PlanResult:
    primary_plan: str
    counter_check: str
    conflicts: list[str] = field(default_factory=list)


class DualPlanner:
    def __init__(self, pi: PiAdapter):
        self._pi = pi

    async def plan(
        self, task_title: str, task_desc: str, wd: str,
    ) -> str:
        prompt = _plan_prompt(task_title, task_desc)
        return await self._send("claude", prompt, wd)

    async def counter_check(self, plan_text: str, wd: str) -> str:
        prompt = _review_prompt(plan_text)
        return await self._send("codex", prompt, wd)

    async def dual_plan(
        self, task_title: str, task_desc: str, wd: str,
    ) -> PlanResult:
        primary = await self.plan(task_title, task_desc, wd)
        review = await self.counter_check(primary, wd)
        return PlanResult(primary, review, _conflicts(review))

    async def _send(self, model: str, prompt: str, wd: str) -> str:
        result = await self._pi.send_prompt(model, prompt, wd, 5)
        return _result_text(result, model)


def _plan_prompt(task_title: str, task_desc: str) -> str:
    return (
        "Create an implementation plan.\n"
        "Return numbered steps, files to touch, risks, and tests.\n\n"
        f"Title: {task_title}\n"
        f"Description: {task_desc}"
    )


def _review_prompt(plan_text: str) -> str:
    return (
        "Review this implementation plan.\n"
        "Flag conflicts as 'CONFLICT:' and keep the note short.\n"
        "Call out risky omissions and invalid assumptions.\n\n"
        f"Plan:\n{plan_text}"
    )


def _result_text(result: RunResult, model: str) -> str:
    if result.success:
        return result.output.strip()
    message = result.output or result.error
    raise RuntimeError((message or f"{model} planning failed").strip())


def _conflicts(text: str) -> list[str]:
    return [
        line.partition(":")[2].strip()
        for line in text.splitlines()
        if line.upper().startswith("CONFLICT:")
    ]


================================================
FILE: maggy/maggy/services/session_detect.py
================================================
"""Multi-CLI session detection.

Scans Claude, Kimi, Codex state directories to find
previous sessions for a given working directory.
"""

from __future__ import annotations

import json
import logging
from dataclasses import dataclass, field
from pathlib import Path

logger = logging.getLogger(__name__)


def _home() -> Path:
    """Testable home directory getter."""
    return Path.home()


@dataclass
class CliSessionInfo:
    """Detected session from a CLI tool."""

    cli: str
    session_id: str
    project_path: str = ""


@dataclass
class DetectedSessions:
    """Results from scanning all CLIs."""

    sessions: list[CliSessionInfo] = field(
        default_factory=list,
    )


def detect_all(working_dir: str) -> DetectedSessions:
    """Scan all CLIs for previous sessions."""
    result = DetectedSessions()
    for fn in (detect_claude, detect_kimi, detect_codex):
        try:
            info = fn(working_dir)
            if info:
                result.sessions.append(info)
        except Exception:
            continue
    return result


def detect_claude(working_dir: str) -> CliSessionInfo | None:
    """Find latest Claude session for this directory."""
    path = _home() / ".claude" / "history.jsonl"
    if not path.exists():
        return None
    target = working_dir.rstrip("/")
    for line in reversed(path.read_text().splitlines()):
        entry = _parse_json(line)
        if not entry:
            continue
        project = entry.get("project", "").rstrip("/")
        sid = entry.get("sessionId", "")
        if project == target and sid:
            return CliSessionInfo("claude", sid, target)
    return None


def detect_kimi(working_dir: str) -> CliSessionInfo | None:
    """Find latest Kimi session from kimi.json."""
    path = _home() / ".kimi" / "kimi.json"
    if not path.exists():
        return None
    data = _parse_json(path.read_text())
    if not data:
        return None
    target = working_dir.rstrip("/")
    for entry in data.get("work_dirs", []):
        entry_path = entry.get("path", "").rstrip("/")
        sid = entry.get("last_session_id")
        if entry_path == target and sid:
            return CliSessionInfo("kimi", sid, target)
    return None


def detect_codex(working_dir: str) -> CliSessionInfo | None:
    """Find latest Codex session by scanning files."""
    sess_dir = _home() / ".codex" / "sessions"
    if not sess_dir.exists():
        return None
    target = working_dir.rstrip("/")
    files = sorted(
        sess_dir.rglob("rollout-*.jsonl"), reverse=True,
    )
    for f in files[:50]:
        entry = _parse_json(_read_first_line(f))
        if not entry:
            continue
        payload = entry.get("payload", {})
        cwd = payload.get("cwd", "").rstrip("/")
        sid = payload.get("id", "")
        if cwd == target and sid:
            return CliSessionInfo("codex", sid, target)
    return None


def _parse_json(text: str) -> dict | None:
    """Safe JSON parse, returns None on failure."""
    text = text.strip()
    if not text:
        return None
    try:
        return json.loads(text)
    except (json.JSONDecodeError, ValueError):
        return None


def _read_first_line(path: Path) -> str:
    """Read first line of a file safely."""
    try:
        with path.open() as f:
            return f.readline()
    except OSError:
        return ""


================================================
FILE: maggy/maggy/services/stakes.py
================================================
"""Stakes classification — HIGH/MEDIUM/LOW from task metadata."""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from maggy.providers.base import Task
    from maggy.routing_rules import StakesLevel, StakesPatterns


@dataclass
class StakesResult:
    """Result of stakes classification."""

    level: str  # "high" | "medium" | "low"
    reasons: list[str] = field(default_factory=list)


def classify_stakes(
    task: Task,
    patterns: StakesPatterns | None = None,
) -> StakesResult:
    """Classify task stakes from metadata and text."""
    if patterns is None:
        from maggy.routing_rules_defaults import default_stakes
        patterns = default_stakes()

    text = f"{task.title} {task.description}".lower()
    raw = task.raw if isinstance(task.raw, dict) else {}
    task_type = str(raw.get("task_type", ""))

    reasons: list[str] = []
    if _matches(text, task_type, patterns.high, reasons):
        return StakesResult("high", reasons)
    if _matches(text, task_type, patterns.medium, reasons):
        return StakesResult("medium", reasons)
    return StakesResult("low", ["default"])


def _matches(
    text: str, task_type: str,
    level: "StakesLevel", reasons: list[str],
) -> bool:
    """Check if text/task_type matches a stakes level."""
    matched = False
    for pat in level.file_patterns:
        if re.search(re.escape(pat), text):
            reasons.append(f"file:{pat}")
            matched = True
    if task_type and task_type in level.task_types:
        reasons.append(f"type:{task_type}")
        matched = True
    for kw in level.keywords:
        if kw.lower() in text:
            reasons.append(f"keyword:{kw}")
            matched = True
    return matched


================================================
FILE: maggy/maggy/services/tdd_verifier.py
================================================
"""TDD verification — runs pytest/ruff/coverage between executor steps."""

from __future__ import annotations

import asyncio
import logging
import re
from dataclasses import dataclass

logger = logging.getLogger(__name__)

DEFAULT_TIMEOUT = 120
COVERAGE_THRESHOLD = 80.0


@dataclass
class VerifyResult:
    """Outcome of a verification step."""

    passed: bool
    detail: str
    tests_found: int = 0
    tests_failed: int = 0


async def verify_tests_exist(wd: str) -> VerifyResult:
    """Run pytest --collect-only to verify tests were written."""
    code, output = await _run_cmd(
        ["python3", "-m", "pytest", "--collect-only", "-q"], wd,
    )
    count = _count_collected(output)
    if code != 0 or count == 0:
        return VerifyResult(False, output[:500], count)
    return VerifyResult(True, f"{count} tests collected", count)


async def verify_tests_fail(wd: str) -> VerifyResult:
    """Run pytest -x and confirm failures (RED phase)."""
    code, output = await _run_cmd(
        ["python3", "-m", "pytest", "-x", "--tb=short", "-q"], wd,
    )
    failed = _count_failures(output)
    if code == 0:
        return VerifyResult(
            False, "Tests passed — expected failures (RED)",
        )
    if failed == 0:
        return VerifyResult(False, f"Non-test error:\n{output[:500]}")
    return VerifyResult(True, f"{failed} tests failed (RED)", 0, failed)


async def verify_tests_pass(wd: str) -> VerifyResult:
    """Run pytest -x and confirm all pass (GREEN phase)."""
    code, output = await _run_cmd(
        ["python3", "-m", "pytest", "-x", "--tb=short", "-q"], wd,
    )
    if code != 0:
        return VerifyResult(
            False, f"Tests failing:\n{output[:500]}",
        )
    return VerifyResult(True, "All tests pass (GREEN)")


async def verify_lint(wd: str) -> VerifyResult:
    """Run ruff check on the working directory."""
    code, output = await _run_cmd(
        ["python3", "-m", "ruff", "check", "."], wd,
    )
    if code != 0:
        return VerifyResult(False, f"Lint errors:\n{output[:500]}")
    return VerifyResult(True, "Lint clean")


async def verify_coverage(
    wd: str, threshold: float = COVERAGE_THRESHOLD,
) -> VerifyResult:
    """Run pytest with coverage and check threshold."""
    code, output = await _run_cmd(
        ["python3", "-m", "pytest", "--cov", "-q"], wd,
    )
    pct = _parse_coverage(output)
    if pct < threshold:
        return VerifyResult(
            False, f"Coverage {pct:.0f}% < {threshold:.0f}%",
        )
    return VerifyResult(True, f"Coverage {pct:.0f}%")


async def _run_cmd(
    cmd: list[str], cwd: str,
) -> tuple[int, str]:
    """Run a subprocess, return (exit_code, output)."""
    try:
        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.STDOUT,
            cwd=cwd,
        )
        stdout, _ = await asyncio.wait_for(
            proc.communicate(), timeout=DEFAULT_TIMEOUT,
        )
        text = (stdout or b"").decode("utf-8", errors="replace")
        return proc.returncode or 0, text
    except asyncio.TimeoutError:
        return 1, "Command timed out"
    except FileNotFoundError:
        return 1, f"Command not found: {cmd[0]}"


def _count_collected(output: str) -> int:
    """Parse 'N tests collected' from pytest output."""
    m = re.search(r"(\d+)\s+tests?\s+collected", output)
    return int(m.group(1)) if m else 0


def _count_failures(output: str) -> int:
    """Parse 'N failed' from pytest summary."""
    m = re.search(r"(\d+)\s+failed", output)
    return int(m.group(1)) if m else 0


def _parse_coverage(output: str) -> float:
    """Parse 'TOTAL ... NN%' from coverage output."""
    m = re.search(r"TOTAL\s+.*?(\d+)%", output)
    return float(m.group(1)) if m else 0.0


================================================
FILE: maggy/maggy/services/vision.py
================================================
"""Vision analysis via Ollama Qwen3-VL — screenshot review."""

from __future__ import annotations

import base64
import json
import logging
from pathlib import Path
from typing import Generator

import httpx

logger = logging.getLogger(__name__)

OLLAMA_URL = "http://localhost:11434"
VISION_MODEL = "qwen3-vl:32b"
_IMAGE_EXTS = frozenset({
    ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp",
})
_DEFAULT_PROMPT = (
    "Analyze this screenshot. Describe what you see, "
    "identify any UI issues, and suggest improvements."
)


def _validate(path: str) -> Path | None:
    """Check file exists and is an image."""
    p = Path(path).expanduser().resolve()
    if not p.exists():
        return None
    if p.suffix.lower() not in _IMAGE_EXTS:
        return None
    return p


def _encode(path: Path) -> str:
    """Base64-encode an image file."""
    return base64.b64encode(path.read_bytes()).decode()


def analyze_image(
    path: str,
    prompt: str | None = None,
) -> Generator[dict, None, None]:
    """Stream vision analysis from Ollama Qwen3-VL.

    Yields dicts: {type: text|error|done, content: ...}
    """
    resolved = _validate(path)
    if resolved is None:
        yield _err(f"Invalid image: {path}")
        return
    img_b64 = _encode(resolved)
    body = {
        "model": VISION_MODEL,
        "messages": [{
            "role": "user",
            "content": prompt or _DEFAULT_PROMPT,
            "images": [img_b64],
        }],
        "stream": True,
    }
    try:
        with httpx.stream(
            "POST", f"{OLLAMA_URL}/api/chat",
            json=body, timeout=120.0,
        ) as resp:
            for line in resp.iter_lines():
                chunk = json.loads(line)
                if chunk.get("done"):
                    break
                text = chunk.get("message", {}).get(
                    "content", "",
                )
                if text:
                    yield {"type": "text", "content": text}
    except httpx.ConnectError as e:
        yield _err(f"Cannot connect to Ollama: {e}")
        return
    except Exception as e:
        yield _err(str(e))
        return
    yield {"type": "done"}


def _err(msg: str) -> dict:
    return {"type": "error", "content": msg}


================================================
FILE: maggy/maggy/static/app.js
================================================
// Maggy dashboard — vanilla JS, no build step.
// Talks to /api/* routes. Single-user local install; no auth by default.

const API = '/api';
let CURRENT_TAB = 'chat';

// ── Fetch helper ────────────────────────────────────────────────────────
async function api(path, opts = {}) {
  const apiKey = localStorage.getItem('maggy-api-key') || '';
  const headers = { 'Content-Type': 'application/json', ...(opts.headers || {}) };
  if (apiKey) headers['X-API-Key'] = apiKey;
  const resp = await fetch(`${API}${path}`, { ...opts, headers });
  if (!resp.ok) {
    const text = await resp.text().catch(() => '');
    throw new Error(`${resp.status}: ${text || resp.statusText}`);
  }
  return resp.json();
}

// ── HTML escape ─────────────────────────────────────────────────────────
function esc(s) {
  if (s === null || s === undefined) return '';
  if (typeof s !== 'string') s = String(s);
  return s.replace(/[&<>"']/g, c => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' }[c]));
}

// Only allow http(s) / mailto URLs when rendering external `href`.
// Blocks javascript:, data:, vbscript: and other script-capable schemes that
// would slip past `esc()` (since it only encodes angle brackets and quotes).
function safeHref(url) {
  if (!url || typeof url !== 'string') return '';
  const trimmed = url.trim();
  if (!/^(https?:|mailto:)/i.test(trimmed)) return '';
  return esc(trimmed);
}

// Escape a value for use inside a JS string literal that is itself embedded in
// an HTML attribute. esc() is NOT enough here — it leaves single quotes and
// backslashes intact, so a task id containing `'); alert(1);//` would break
// out of onclick="executeTask('${id}', ...)". We need to:
//   1. escape the backslash first (so later escapes don't double-encode)
//   2. escape the single quote that wraps the JS string
//   3. escape angle brackets in case the attribute is interpreted as HTML
//   4. escape newlines and carriage returns that would break the statement
function jsStr(s) {
  if (s === null || s === undefined) return '';
  return String(s)
    .replace(/\\/g, '\\\\')
    .replace(/'/g, "\\'")
    .replace(/</g, '\\u003C')
    .replace(/>/g, '\\u003E')
    .replace(/\r?\n/g, '\\n');
}

function relDate(iso) {
  if (!iso) return '';
  const d = new Date(iso);
  const diff = (Date.now() - d.getTime()) / 1000;
  if (diff < 60) return 'just now';
  if (diff < 3600) return `${Math.floor(diff/60)}m ago`;
  if (diff < 86400) return `${Math.floor(diff/3600)}h ago`;
  if (diff < 2592000) return `${Math.floor(diff/86400)}d ago`;
  return d.toLocaleDateString();
}

// ── Tabs ────────────────────────────────────────────────────────────────
function switchTab(tab) {
  CURRENT_TAB = tab;
  // Close system dropdown
  const menu = document.getElementById('system-menu');
  if (menu) menu.classList.add('hidden');
  // Highlight active tab button (nav bar)
  for (const b of document.querySelectorAll('.tab-btn')) {
    b.classList.toggle('active', b.dataset.tab === tab);
  }
  // Highlight active system dropdown item
  const gear = document.getElementById('system-gear');
  const sysTabs = ['budget', 'routing', 'forge', 'settings'];
  if (gear) {
    gear.classList.toggle('active', sysTabs.includes(tab));
  }
  for (const s of document.querySelectorAll('.sys-item')) {
    s.classList.toggle(
      'text-orange-400', s.dataset.tab === tab,
    );
  }
  // Show/hide panes
  for (const p of document.querySelectorAll('.pane')) {
    p.classList.toggle('hidden', p.id !== `pane-${tab}`);
  }
  if (tab === 'chat') loadChat();
  else if (tab === 'inbox') loadInbox();
  else if (tab === 'followed') loadFollowed();
  else if (tab === 'competitors') loadCompetitors();
  else if (tab === 'process') loadProcess();
  else if (tab === 'budget') loadBudget();
  else if (tab === 'routing') loadRouting();
  else if (tab === 'forge') loadForge();
  else if (tab === 'settings') loadSettings();
}

function toggleSystemMenu() {
  const menu = document.getElementById('system-menu');
  if (menu) menu.classList.toggle('hidden');
}

// Close system menu when clicking outside
document.addEventListener('click', (e) => {
  const menu = document.getElementById('system-menu');
  const gear = document.getElementById('system-gear');
  if (!menu || !gear) return;
  if (!gear.contains(e.target) && !menu.contains(e.target)) {
    menu.classList.add('hidden');
  }
});

// ── Drawer ──────────────────────────────────────────────────────────────
function openDrawer(title, html) {
  document.getElementById('drawer-title').textContent = title;
  document.getElementById('drawer-body').innerHTML = html;
  document.getElementById('drawer').classList.remove('translate-x-full');
}
function closeDrawer() {
  document.getElementById('drawer').classList.add('translate-x-full');
}

// ── Inbox ───────────────────────────────────────────────────────────────
async function loadInbox(refresh = false) {
  const pane = document.getElementById('pane-inbox');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading…</div>`;
  const [activity, inbox] = await Promise.all([
    api('/activity').catch(() => ({ sessions: [], recent: [] })),
    api(`/inbox${refresh ? '?refresh=true' : ''}`).catch(() => ({ items: [] })),
  ]);
  const sessions = activity.sessions || [];
  const recent = activity.recent || [];
  const items = inbox.items || [];
  let html = '';
  if (sessions.length) {
    html += `<div class="mb-4"><h2 class="text-sm font-bold text-white mb-2"><i class="fas fa-terminal mr-1 text-green-400"></i>Active Sessions (${sessions.length})</h2><div class="space-y-2">`;
    for (const s of sessions) {
      const badge = s.status === 'agent'
        ? '<span class="text-[10px] px-1.5 py-0.5 rounded bg-purple-900 text-purple-300">agent</span>'
        : '<span class="text-[10px] px-1.5 py-0.5 rounded bg-green-900 text-green-300">running</span>';
      const label = s.status === 'agent' ? `${esc(s.agent_name)} @ ${esc(s.team_name)}` : esc(s.project || 'unknown');
      html += `<div class="card p-3"><div class="flex items-center gap-2">
        <span class="text-[10px] font-mono text-blue-400 uppercase">${esc(s.cli)}</span>
        ${badge}
        <span class="text-sm text-white">${label}</span>
        <span class="text-[10px] text-gray-500 ml-auto">PID ${s.pid}</span>
      </div>
      ${s.last_prompt ? `<div class="text-[11px] text-gray-400 mt-1 truncate">"${esc(s.last_prompt)}"</div>` : ''}
      </div>`;
    }
    html += `</div></div>`;
  }
  if (recent.length) {
    html += `<div class="mb-4"><h2 class="text-sm font-bold text-white mb-2"><i class="fas fa-clock-rotate-left mr-1 text-yellow-400"></i>Recent Activity</h2><div class="space-y-1">`;
    for (const r of recent.slice(0, 10)) {
      html += `<div class="card p-2 flex items-center gap-2">
        <span class="text-[10px] font-mono text-blue-400 uppercase w-10">${esc(r.cli)}</span>
        <span class="text-[11px] text-gray-300 flex-1 truncate">${esc(r.text)}</span>
        <span class="text-[10px] text-gray-500 shrink-0">${r.project ? esc(r.project) + ' · ' : ''}${esc(relDate(r.timestamp))}</span>
      </div>`;
    }
    html += `</div></div>`;
  }
  if (items.length) {
    html += `<div class="mb-4"><div class="flex items-center gap-3 mb-2">
      <h2 class="text-sm font-bold text-white"><i class="fas fa-inbox mr-1 text-orange-400"></i>Issues (${items.length})</h2>
      <button onclick="loadInbox(true)" class="text-[10px] text-gray-400 hover:text-white"><i class="fas fa-rotate mr-1"></i>Re-rank</button>
    </div><div class="space-y-2">`;
    for (const i of items) {
      const labels = (i.labels || []).slice(0, 4).map(l => `<span class="text-[10px] px-1.5 py-0.5 rounded bg-gray-800 text-gray-400">${esc(l)}</span>`).join(' ');
      html += `<div class="card p-3 hover:bg-gray-900 cursor-pointer" onclick="openTaskDetail('${jsStr(i.id)}')">
        <div class="flex items-start gap-3">
          <div class="text-xs font-mono text-orange-400 mt-0.5">#${i.rank}</div>
          <div class="flex-1 min-w-0">
            <div class="text-sm text-white">${esc(i.title)}</div>
            <div class="text-[11px] text-gray-500 mt-0.5">
              <span class="text-blue-400">${esc(i.board || '')}</span>
              ${i.assignee ? `· ${esc(i.assignee)}` : ''}
              · ${esc(relDate(i.updated_at))}
              ${labels ? '· ' + labels : ''}
            </div>
            ${i.ai_reason ? `<div class="text-[11px] text-gray-400 mt-1 italic">"${esc(i.ai_reason)}"</div>` : ''}
          </div>
          <div class="flex gap-1 shrink-0" onclick="event.stopPropagation()">
            <button onclick="executeTask('${jsStr(i.id)}', 'plan')" class="text-[10px] px-2 py-1 rounded bg-gray-800 hover:bg-gray-700 text-gray-300">Plan</button>
            <button onclick="executeTask('${jsStr(i.id)}', 'tdd')" class="text-[10px] px-2 py-1 rounded bg-orange-600 hover:bg-orange-700 text-white">Execute</button>
          </div>
        </div>
      </div>`;
    }
    html += `</div></div>`;
  }
  if (!sessions.length && !recent.length && !items.length) {
    html = `<div class="card p-4 text-sm text-gray-400">No activity detected. Start a Claude, Codex, or Kimi session to see it here.</div>`;
  }
  pane.innerHTML = html;
}

// ── Followed ────────────────────────────────────────────────────────────
async function loadFollowed() {
  const pane = document.getElementById('pane-followed');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading followed tasks…</div>`;
  try {
    const data = await api('/followed');
    const items = data.items || [];
    if (!items.length) {
      pane.innerHTML = `<div class="card p-4 text-sm text-gray-400">Nothing you're following right now.</div>`;
      return;
    }
    let html = `<h2 class="text-sm font-bold text-white mb-3">Following (${items.length})</h2><div class="space-y-2">`;
    for (const i of items) {
      html += `<div class="card p-3 hover:bg-gray-900 cursor-pointer" onclick="openTaskDetail('${jsStr(i.id)}')">
        <div class="text-sm text-white">${esc(i.title)}</div>
        <div class="text-[11px] text-gray-500 mt-0.5">
          <span class="text-blue-400">${esc(i.board || '')}</span>
          ${i.assignee ? `· ${esc(i.assignee)}` : ''}
          · ${esc(relDate(i.updated_at))}
        </div>
      </div>`;
    }
    html += `</div>`;
    pane.innerHTML = html;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

// ── Task detail drawer ──────────────────────────────────────────────────
async function openTaskDetail(taskId) {
  openDrawer('Loading…', '<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading task…</div>');
  try {
    const data = await api(`/task/${encodeURIComponent(taskId)}`);
    const t = data.task;
    const comments = data.comments || [];
    document.getElementById('drawer-title').textContent = t.title;
    let html = `<div class="space-y-3">
      <div class="card p-3">
        <div class="text-[10px] text-gray-500 uppercase mb-1">Details</div>
        <div class="flex flex-wrap gap-2 text-[11px] text-gray-400">
          <span class="text-blue-400">${esc(t.board)}</span>
          <span>${esc(t.status)}</span>
          ${t.assignee ? `<span>@${esc(t.assignee)}</span>` : ''}
          <span>${esc(relDate(t.updated_at))}</span>
          ${safeHref(t.url) ? `<a href="${safeHref(t.url)}" target="_blank" rel="noopener noreferrer" class="text-orange-400">Open ↗</a>` : ''}
        </div>
      </div>`;
    if (t.description) {
      html += `<div class="card p-3"><div class="text-[10px] text-gray-500 uppercase mb-1">Description</div><pre class="text-xs text-gray-300 max-h-48 overflow-y-auto">${esc(t.description)}</pre></div>`;
    }
    html += `<div class="flex gap-2">
      <button onclick="executeTask('${jsStr(t.id)}', 'plan')" class="flex-1 text-xs px-3 py-1.5 rounded bg-gray-700 hover:bg-gray-600 text-white"><i class="fas fa-list-check mr-1"></i>Plan</button>
      <button onclick="executeTask('${jsStr(t.id)}', 'tdd')" class="flex-1 text-xs px-3 py-1.5 rounded bg-orange-600 hover:bg-orange-700 text-white"><i class="fas fa-play mr-1"></i>Execute (TDD)</button>
    </div>`;
    if (comments.length) {
      html += `<div class="card p-3"><div class="text-[10px] text-gray-500 uppercase mb-2">Comments (${comments.length})</div><div class="space-y-2 max-h-64 overflow-y-auto">`;
      for (const c of comments) {
        html += `<div class="bg-gray-900 rounded p-2">
          <div class="flex justify-between text-[10px] text-gray-500 mb-1"><span>${esc(c.author)}</span><span>${esc(relDate(c.created_at))}</span></div>
          <div class="text-xs text-gray-300 whitespace-pre-wrap">${esc(c.text)}</div>
        </div>`;
      }
      html += `</div></div>`;
    }
    html += `<div class="card p-3">
      <div class="text-[10px] text-gray-500 uppercase mb-1">Reply</div>
      <textarea id="reply-box" rows="3" class="w-full bg-gray-900 text-xs text-white rounded px-2 py-1.5 border border-gray-700"></textarea>
      <button onclick="postReply('${jsStr(t.id)}')" class="mt-2 text-xs px-3 py-1 rounded bg-blue-600 text-white">Post</button>
    </div>`;
    html += `</div>`;
    document.getElementById('drawer-body').innerHTML = html;
  } catch (e) {
    document.getElementById('drawer-body').innerHTML = `<div class="text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

async function postReply(taskId) {
  const text = document.getElementById('reply-box').value.trim();
  if (!text) return;
  try {
    await api(`/task/${encodeURIComponent(taskId)}/comment`, { method: 'POST', body: JSON.stringify({ text }) });
    openTaskDetail(taskId);  // refresh
  } catch (e) {
    alert('Failed to post: ' + e.message);
  }
}

async function executeTask(taskId, mode) {
  try {
    const data = await api('/execute', { method: 'POST', body: JSON.stringify({ task_id: taskId, mode }) });
    alert(`Started session ${data.session_id} (${mode}). Open the Sessions tab to follow progress.`);
    switchTab('sessions');
  } catch (e) {
    alert('Execute failed: ' + e.message);
  }
}

// ── Competitors ─────────────────────────────────────────────────────────
let COMP_VIEW = 'news';  // 'news' | 'list'

async function loadCompetitors() {
  const pane = document.getElementById('pane-competitors');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading competitors…</div>`;
  try {
    const [comps, news] = await Promise.all([
      api('/competitors'),
      api('/competitors/news?limit=100').catch(() => []),
    ]);
    let html = `<div class="flex items-center gap-2 mb-3">
      <button onclick="COMP_VIEW='news'; loadCompetitors()" class="text-[10px] px-3 py-1.5 rounded-full ${COMP_VIEW==='news' ? 'bg-orange-600 text-white' : 'bg-gray-800 text-gray-300'}"><i class="fas fa-newspaper mr-1"></i>News (${news.length})</button>
      <button onclick="COMP_VIEW='list'; loadCompetitors()" class="text-[10px] px-3 py-1.5 rounded-full ${COMP_VIEW==='list' ? 'bg-orange-600 text-white' : 'bg-gray-800 text-gray-300'}"><i class="fas fa-list mr-1"></i>Competitors (${comps.length})</button>
      <div class="flex-1"></div>
      ${COMP_VIEW==='news' ? '<button onclick="scanCompetitors()" class="text-[10px] px-3 py-1 rounded bg-gray-700 text-gray-300 hover:bg-gray-600"><i class="fas fa-rotate mr-1"></i>Scan</button>' : '<button onclick="discoverCompetitors()" class="text-[10px] px-3 py-1 rounded bg-purple-600 text-white hover:bg-purple-700"><i class="fas fa-magnifying-glass-plus mr-1"></i>Discover More</button>'}
    </div>`;

    if (COMP_VIEW === 'news') {
      html += `<div id="briefing" class="card p-4 mb-3 border-purple-700/50"><div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading daily briefing…</div></div>`;
      pane.innerHTML = html + renderNewsFeed(news);
      loadBriefing();
    } else {
      if (!comps.length) {
        html += `<div class="card p-4 text-sm text-gray-400">No competitors yet. Click <b>Discover More</b> to have Maggy find competitors in your domain.</div>`;
      } else {
        html += `<div class="grid grid-cols-1 md:grid-cols-2 gap-3">`;
        for (const c of comps) {
          html += `<div class="card p-3">
            <div class="text-sm font-bold text-white">${esc(c.name)}</div>
            <div class="text-[10px] text-gray-500">${esc(c.category || '')} · ${esc(c.website || '')}</div>
            <div class="text-xs text-gray-400 mt-2">${esc(c.description || '')}</div>
          </div>`;
        }
        html += `</div>`;
      }
      pane.innerHTML = html;
    }
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

function renderNewsFeed(news) {
  if (!news.length) return '<div class="card p-4 text-sm text-gray-400">No competitor news yet. Click <b>Scan</b> to fetch.</div>';
  const typeIcon = {
    feature_launch: 'fa-rocket text-cyan-400',
    acquisition: 'fa-handshake text-yellow-400',
    partnership: 'fa-link text-green-400',
    pricing_change: 'fa-tag text-orange-400',
    funding: 'fa-dollar-sign text-green-400',
    blog_post: 'fa-rss text-blue-400',
    news: 'fa-newspaper text-gray-400',
  };
  let html = `<div class="space-y-1.5 max-h-[70vh] overflow-y-auto">`;
  for (const n of news.slice(0, 80)) {
    const icon = typeIcon[n.event_type] || 'fa-circle text-gray-500';
    html += `<div class="card px-3 py-2 flex items-start gap-2">
      <i class="fas ${icon} text-[10px] mt-1.5"></i>
      <div class="flex-1 min-w-0">
        <div class="text-xs text-white">${esc(n.title)}</div>
        <div class="text-[10px] text-gray-500 mt-0.5">
          <span class="text-orange-400">${esc(n.competitor_name)}</span>
          · ${esc(n.source === 'rss' ? 'blog' : 'news')}
          · ${esc(relDate(n.created_at))}
        </div>
      </div>
      ${safeHref(n.url) ? `<a href="${safeHref(n.url)}" target="_blank" rel="noopener noreferrer" class="text-blue-400 text-[10px]"><i class="fas fa-external-link-alt"></i></a>` : ''}
    </div>`;
  }
  html += `</div>`;
  return html;
}

async function loadBriefing() {
  try {
    const data = await api('/competitors/news/summary');
    document.getElementById('briefing').innerHTML = `
      <div class="flex items-center justify-between mb-2">
        <div class="text-[10px] text-purple-400 uppercase font-bold"><i class="fas fa-robot mr-1"></i>Daily Briefing — ${esc(data.date || '')}</div>
        <button onclick="regenerateBriefing()" class="text-[10px] text-gray-500 hover:text-purple-400"><i class="fas fa-sync-alt mr-1"></i>Regenerate</button>
      </div>
      <pre class="text-xs text-gray-300">${esc(data.summary || '')}</pre>
      <div class="text-[10px] text-gray-600 mt-2">${data.total_signals || 0} signals analyzed</div>`;
  } catch (e) {
    document.getElementById('briefing').innerHTML = `<div class="text-xs text-red-400">Briefing failed: ${esc(e.message)}</div>`;
  }
}

async function regenerateBriefing() {
  const el = document.getElementById('briefing');
  if (el) el.innerHTML = '<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Regenerating…</div>';
  try {
    await api('/competitors/news/summary?refresh=true');
    loadBriefing();
  } catch (e) {
    if (el) el.innerHTML = `<div class="text-xs text-red-400">Regenerate failed: ${esc(e.message)}</div>`;
  }
}

async function discoverCompetitors() {
  if (!confirm('Ask Maggy to discover competitors for your domain? This calls the AI.')) return;
  try {
    const data = await api('/competitors/discover', { method: 'POST' });
    alert(`Added ${data.added} new competitors (total: ${data.total})`);
    loadCompetitors();
  } catch (e) {
    alert('Discovery failed: ' + e.message);
  }
}

async function scanCompetitors() {
  try {
    const data = await api('/competitors/monitor', { method: 'POST' });
    alert(`Found ${data.rss || 0} blog posts + ${data.news || 0} news items across ${data.total_competitors} competitors`);
    loadCompetitors();
  } catch (e) {
    alert('Scan failed: ' + e.message);
  }
}

// ── Chat ────────────────────────────────────────────────────────────────
let CHAT_SESSION_ID = null;
let CHAT_SESSIONS_CACHE = [];

async function loadChat() {
  const pane = document.getElementById('pane-chat');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Auto-connecting to active projects…</div>`;
  try {
    const result = await api('/chat/auto-connect', { method: 'POST' });
    CHAT_SESSIONS_CACHE = result.sessions || [];
    if (!CHAT_SESSION_ID && CHAT_SESSIONS_CACHE.length) {
      CHAT_SESSION_ID = CHAT_SESSIONS_CACHE[0].id;
    }
    renderChatUI(pane);
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

function renderChatUI(pane) {
  const sessions = CHAT_SESSIONS_CACHE;
  let html = `<div class="flex h-[calc(100vh-10rem)]">`;
  html += renderChatSidebar(sessions);
  html += renderChatMain();
  html += `</div>`;
  pane.innerHTML = html;
  if (CHAT_SESSION_ID) loadChatMessages(CHAT_SESSION_ID);
}

function renderChatSidebar(sessions) {
  let html = `<div class="w-60 shrink-0 border-r border-gray-800 pr-3 overflow-y-auto">`;
  html += `<div class="flex items-center justify-between mb-2">
    <span class="text-[10px] text-gray-500 uppercase font-bold"><i class="fas fa-circle text-green-400 text-[8px] mr-1"></i>Connected Projects</span>
    <button onclick="newChatSession()" class="text-[10px] px-2 py-1 rounded bg-orange-600 hover:bg-orange-700 text-white"><i class="fas fa-plus mr-1"></i>New</button>
  </div><div class="space-y-1">`;
  if (!sessions.length) {
    html += `<div class="text-[10px] text-gray-500 p-2">No active CLI sessions detected</div>`;
  }
  for (const s of sessions) {
    const active = s.id === CHAT_SESSION_ID ? 'bg-gray-800 border-orange-500' : 'border-transparent hover:bg-gray-900';
    const ctx = s.history_context ? ' title="' + esc(s.history_context) + '"' : '';
    html += `<div class="card px-2 py-1.5 cursor-pointer border ${active}" onclick="openChatSession('${jsStr(s.id)}')"${ctx}>
      <div class="flex items-center gap-1"><i class="fas fa-circle text-green-400 text-[6px]"></i><span class="text-xs text-white truncate">${esc(s.project_key)}</span></div>
      <div class="text-[10px] text-gray-500 truncate">${esc(s.working_dir)}</div>
      ${s.history_context ? '<div class="text-[9px] text-gray-600 mt-0.5 truncate"><i class="fas fa-history mr-0.5"></i>has history</div>' : ''}
    </div>`;
  }
  html += `</div></div>`;
  return html;
}

function renderChatMain() {
  let html = `<div class="flex-1 flex flex-col pl-4">`;
  if (CHAT_SESSION_ID) {
    html += `<div id="chat-messages" class="flex-1 overflow-y-auto space-y-3 mb-3"></div>`;
    html += `<div class="shrink-0 flex gap-2">
      <input id="chat-input" type="text" placeholder="Type a message to Claude…"
        class="flex-1 bg-gray-900 text-sm text-white rounded px-3 py-2 border border-gray-700 focus:border-orange-500 outline-none"
        onkeydown="if(event.key==='Enter')sendChatMessage()" />
      <button onclick="sendChatMessage()" class="px-4 py-2 rounded bg-orange-600 hover:bg-orange-700 text-white text-sm"><i class="fas fa-paper-plane"></i></button>
    </div>`;
  } else {
    html += `<div class="flex-1 flex items-center justify-center">
      <div class="text-center">
        <i class="fas fa-robot text-4xl text-gray-700 mb-3"></i>
        <div class="text-sm text-gray-400 mb-2">No active CLI sessions detected</div>
        <div class="text-xs text-gray-500">Start a Claude Code session in any project and Maggy will auto-connect</div>
      </div>
    </div>`;
  }
  html += `</div>`;
  return html;
}

async function newChatSession() {
  let projects;
  try {
    const [cfg, activity] = await Promise.all([
      api('/config').catch(() => ({ codebases: [] })),
      api('/activity').catch(() => ({ sessions: [] })),
    ]);
    const configProjects = (cfg.codebases || []).map(c => ({ key: c.key, path: c.path }));
    const activeProjects = (activity.sessions || []).map(s => ({ key: s.project, path: s.project_path }));
    const seen = new Set();
    projects = [];
    for (const p of [...activeProjects, ...configProjects]) {
      if (p.key && !seen.has(p.key)) { seen.add(p.key); projects.push(p); }
    }
  } catch { projects = []; }
  if (!projects.length) { alert('No codebases found.'); return; }
  let chosen = projects[0];
  if (projects.length > 1) {
    const name = prompt('Select project:\n' + projects.map((p, i) => `${i+1}. ${p.key}`).join('\n') + '\n\nEnter name:', projects[0].key);
    if (!name) return;
    chosen = projects.find(p => p.key === name) || { key: name, path: '' };
  }
  try {
    const data = await api('/chat/sessions', { method: 'POST', body: JSON.stringify({ project_key: chosen.key, project_path: chosen.path }) });
    CHAT_SESSION_ID = data.id;
    loadChat();
  } catch (e) { alert('Failed: ' + e.message); }
}

function openChatSession(id) {
  CHAT_SESSION_ID = id;
  const pane = document.getElementById('pane-chat');
  if (pane) renderChatUI(pane);
}

async function loadChatMessages(id) {
  const el = document.getElementById('chat-messages');
  if (!el) return;
  try {
    const data = await api(`/chat/sessions/${id}`);
    let html = renderSessionHeader(data);
    if (data.history_context && !(data.messages || []).length) {
      html += renderHistoryContext(data.history_context);
    }
    for (const m of data.messages || []) {
      html += m.role === 'user' ? renderUserMsg(m) : renderAssistantMsg(m);
    }
    el.innerHTML = html;
    el.scrollTop = el.scrollHeight;
  } catch (e) {
    el.innerHTML = `<div class="text-xs text-red-400">${esc(e.message)}</div>`;
  }
}

function renderSessionHeader(data) {
  return `<div class="text-[10px] text-gray-500 mb-2"><i class="fas fa-folder-open mr-1"></i>${esc(data.project_key)} · <span class="font-mono">${esc(data.working_dir)}</span></div>`;
}

function renderHistoryContext(ctx) {
  return `<div class="card px-3 py-2 mb-2 border border-gray-700 bg-gray-900/50">
    <div class="text-[10px] text-gray-400 font-bold mb-1"><i class="fas fa-history mr-1"></i>Session History (Maggy knows this)</div>
    <pre class="text-[10px] text-gray-500 whitespace-pre-wrap">${esc(ctx)}</pre>
  </div>`;
}

function renderUserMsg(m) {
  return `<div class="flex justify-end"><div class="max-w-[80%] bg-orange-600/20 border border-orange-600/30 rounded-lg px-3 py-2">
    <div class="text-xs text-white">${esc(m.content)}</div>
    <div class="text-[10px] text-gray-500 mt-1">${esc(relDate(m.timestamp))}</div>
  </div></div>`;
}

function renderAssistantMsg(m) {
  return `<div class="flex justify-start"><div class="max-w-[80%] card px-3 py-2">
    <pre class="text-xs text-gray-300 whitespace-pre-wrap">${esc(m.content)}</pre>
    <div class="text-[10px] text-gray-500 mt-1">${esc(relDate(m.timestamp))}</div>
  </div></div>`;
}

async function sendChatMessage() {
  const input = document.getElementById('chat-input');
  if (!input) return;
  const message = input.value.trim();
  if (!message || !CHAT_SESSION_ID) return;
  input.value = '';
  input.disabled = true;
  const el = document.getElementById('chat-messages');
  el.innerHTML += renderUserMsg({ content: message, timestamp: '' });
  el.innerHTML += `<div id="stream-response" class="flex justify-start"><div class="max-w-[80%] card px-3 py-2">
    <pre id="stream-text" class="text-xs text-gray-300"><i class="fas fa-spinner fa-spin text-orange-400"></i> Claude is thinking…</pre>
  </div></div>`;
  el.scrollTop = el.scrollHeight;
  try {
    await streamChatResponse(message, el);
  } catch (e) {
    const streamEl = document.getElementById('stream-text');
    if (streamEl) streamEl.innerHTML = `<span class="text-red-400">Error: ${esc(e.message)}</span>`;
  }
  input.disabled = false;
  input.focus();
}

async function streamChatResponse(message, el) {
  const apiKey = localStorage.getItem('maggy-api-key') || '';
  const resp = await fetch(`${API}/chat/sessions/${CHAT_SESSION_ID}/send`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json', ...(apiKey ? { 'X-API-Key': apiKey } : {}) },
    body: JSON.stringify({ message }),
  });
  const reader = resp.body.getReader();
  const decoder = new TextDecoder();
  let responseText = '';
  const streamEl = document.getElementById('stream-text');
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    const chunk = decoder.decode(value, { stream: true });
    for (const line of chunk.split('\n')) {
      if (!line.startsWith('data: ')) continue;
      try {
        const data = JSON.parse(line.slice(6));
        if (data.type === 'done') continue;
        if (data.type === 'error') { streamEl.innerHTML = `<span class="text-red-400">${esc(data.content)}</span>`; continue; }
        if (data.content) { responseText += data.content; streamEl.textContent = responseText; el.scrollTop = el.scrollHeight; }
      } catch {}
    }
  }
  if (!responseText) streamEl.textContent = '(no response)';
}

// ── Settings ────────────────────────────────────────────────────────────
async function loadSettings() {
  const pane = document.getElementById('pane-settings');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading settings…</div>`;
  try {
    const cfg = await api('/config');
    pane.innerHTML = `
      <h2 class="text-sm font-bold text-white mb-3">Settings</h2>
      <div class="card p-4 space-y-3 text-sm text-gray-300">
        <div><span class="text-gray-500 text-[10px] uppercase">Org</span> — <b>${esc(cfg.org.name)}</b> ${cfg.org.domain ? `(domain: <span class="text-orange-400">${esc(cfg.org.domain)}</span>)` : ''}</div>
        <div><span class="text-gray-500 text-[10px] uppercase">Issue Tracker</span> — ${esc(cfg.issue_tracker.provider)}</div>
        <div><span class="text-gray-500 text-[10px] uppercase">Codebases</span>
          <ul class="ml-4 text-xs">${cfg.codebases.map(c => `<li>${esc(c.key)} → <code class="text-gray-400">${esc(c.path)}</code></li>`).join('')}</ul>
        </div>
        <div><span class="text-gray-500 text-[10px] uppercase">Competitors</span> — categories: ${cfg.competitors.categories.map(esc).join(', ') || '—'}</div>
        <div><span class="text-gray-500 text-[10px] uppercase">OKRs</span> — source: ${esc(cfg.okrs.source)} (${cfg.okrs.count} items)</div>
        <div><span class="text-gray-500 text-[10px] uppercase">AI</span> — ${esc(cfg.ai.provider)} / ${esc(cfg.ai.model)} · API key ${cfg.ai.has_key ? '<span class="text-green-400">set</span>' : '<span class="text-red-400">MISSING</span>'}</div>
      </div>
      <p class="text-[11px] text-gray-500 mt-4">Edit <code>~/.maggy/config.yaml</code> and restart Maggy to apply changes.</p>
    `;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

// ── Budget ──────────────────────────────────────────────────────────────
async function loadBudget() {
  const pane = document.getElementById('pane-budget');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading budget…</div>`;
  try {
    const [status, byProvider] = await Promise.all([
      api('/budget'),
      api('/budget/by-provider'),
    ]);
    const statusColor = status.status === 'ok' ? 'text-green-400' : status.status === 'warning' ? 'text-yellow-400' : 'text-red-400';
    let html = `<h2 class="text-sm font-bold text-white mb-3">Token Budget</h2>`;
    html += `<div class="grid grid-cols-1 md:grid-cols-3 gap-3 mb-4">
      <div class="card p-4 text-center">
        <div class="text-2xl font-bold ${statusColor}">$${esc(status.spent_today_usd)}</div>
        <div class="text-[10px] text-gray-500">Spent Today</div>
      </div>
      <div class="card p-4 text-center">
        <div class="text-2xl font-bold text-gray-300">$${esc(status.daily_limit_usd)}</div>
        <div class="text-[10px] text-gray-500">Daily Limit</div>
      </div>
      <div class="card p-4 text-center">
        <div class="text-2xl font-bold ${statusColor}">${esc(Math.round(status.utilization * 100))}%</div>
        <div class="text-[10px] text-gray-500">${esc(status.status)}</div>
      </div>
    </div>`;
    const providers = byProvider.providers || byProvider || [];
    if (providers.length) {
      html += `<h3 class="text-xs font-bold text-gray-400 mb-2">By Provider</h3><div class="space-y-1">`;
      for (const p of providers) {
        html += `<div class="card px-3 py-2 flex justify-between"><span class="text-xs text-white">${esc(p.provider)}</span><span class="text-xs text-orange-400">$${esc(p.spent_usd)}</span></div>`;
      }
      html += `</div>`;
    }
    pane.innerHTML = html;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

// ── Model Routing ───────────────────────────────────────────────────────
async function loadRouting() {
  const pane = document.getElementById('pane-routing');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading model performance…</div>`;
  try {
    const data = await api('/routing/heatmap');
    const heatmap = data.heatmap || data || [];
    let html = `<h2 class="text-sm font-bold text-white mb-3">Model Performance Heatmap</h2>`;
    if (!heatmap.length) {
      html += `<div class="card p-4 text-sm text-gray-400">No reward data yet. Execute some tasks to build the heatmap.</div>`;
    } else {
      html += `<div class="overflow-x-auto"><table class="text-xs w-full"><thead><tr class="text-gray-500">
        <th class="text-left p-2">Model</th><th class="text-left p-2">Task Type</th><th class="text-left p-2">Blast Tier</th><th class="text-right p-2">Avg Reward</th><th class="text-right p-2">Samples</th>
      </tr></thead><tbody>`;
      for (const r of heatmap) {
        const color = r.avg_reward >= 0.7 ? 'text-green-400' : r.avg_reward >= 0.4 ? 'text-yellow-400' : 'text-red-400';
        html += `<tr class="border-t border-gray-800"><td class="p-2 text-white">${esc(r.model)}</td><td class="p-2">${esc(r.task_type)}</td><td class="p-2">${esc(r.blast_tier)}</td><td class="p-2 text-right ${color}">${esc(r.avg_reward)}</td><td class="p-2 text-right text-gray-500">${esc(r.samples)}</td></tr>`;
      }
      html += `</tbody></table></div>`;
    }
    pane.innerHTML = html;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

// ── Process Intelligence ────────────────────────────────────────────────
async function loadProcess() {
  const pane = document.getElementById('pane-process');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading process intelligence…</div>`;
  try {
    const [events, history, improve, landscape, activity] = await Promise.all([
      api('/events/count').catch(() => ({ count: 0 })),
      api('/history/report').catch(() => ({ status: 'no_data' })),
      api('/improve/report').catch(() => ({ report: null })),
      api('/cikg/landscape').catch(() => ({ technologies: 0 })),
      api('/activity').catch(() => ({ sessions: [], recent: [] })),
    ]);
    let html = `<h2 class="text-sm font-bold text-white mb-3">Process Intelligence</h2>`;
    html += renderPIStats(events, history, landscape);
    html += renderPIPatterns(history);
    html += renderPIHealth(improve);
    html += renderPIActivity(activity);
    html += renderPIActions();
    pane.innerHTML = html;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

function renderPIStats(events, history, landscape) {
  return `<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-4">
    <div class="card p-3 text-center"><div class="text-xl font-bold text-orange-400">${esc(events.count || 0)}</div><div class="text-[10px] text-gray-500">Events</div></div>
    <div class="card p-3 text-center"><div class="text-xl font-bold text-blue-400">${esc(history.total_sessions || 0)}</div><div class="text-[10px] text-gray-500">CLI Sessions</div></div>
    <div class="card p-3 text-center"><div class="text-xl font-bold text-green-400">${esc(history.total_prompts || 0)}</div><div class="text-[10px] text-gray-500">Total Prompts</div></div>
    <div class="card p-3 text-center"><div class="text-xl font-bold text-purple-400">${esc(landscape.technologies || 0)}</div><div class="text-[10px] text-gray-500">Technologies</div></div>
  </div>`;
}

function renderPIPatterns(history) {
  if (!history.patterns || !history.patterns.length) return '';
  let html = `<div class="card p-4 mb-3"><div class="text-[10px] text-gray-500 uppercase mb-2"><i class="fas fa-chart-bar mr-1"></i>Session Patterns</div><div class="space-y-1">`;
  for (const p of history.patterns.slice(0, 5)) {
    html += `<div class="text-xs text-gray-300">- ${esc(typeof p === 'string' ? p : JSON.stringify(p))}</div>`;
  }
  return html + `</div></div>`;
}

function renderPIHealth(improve) {
  const report = improve.report;
  if (!report) return '';
  const health = report.health_summary || {};
  const keys = Object.keys(health);
  if (!keys.length) return '';
  let html = `<div class="card p-4 mb-3"><div class="text-[10px] text-gray-500 uppercase mb-2"><i class="fas fa-heartbeat mr-1"></i>Health Signals</div>`;
  html += `<div class="grid grid-cols-2 md:grid-cols-4 gap-2">`;
  for (const k of keys) {
    const val = health[k];
    const pct = Math.round(val * 100);
    const color = pct >= 80 ? 'text-green-400' : pct >= 50 ? 'text-yellow-400' : 'text-red-400';
    html += `<div class="text-center"><div class="text-lg font-bold ${color}">${pct}%</div><div class="text-[10px] text-gray-500 capitalize">${esc(k)}</div></div>`;
  }
  html += `</div>`;
  if (report.top_actions && report.top_actions.length) {
    html += `<div class="mt-3 space-y-1">`;
    for (const a of report.top_actions) {
      html += `<div class="text-xs text-yellow-300"><i class="fas fa-lightbulb mr-1"></i>${esc(a)}</div>`;
    }
    html += `</div>`;
  }
  return html + `</div>`;
}

function renderPIActivity(activity) {
  const sessions = activity.sessions || [];
  const recent = activity.recent || [];
  if (!sessions.length && !recent.length) return '';
  let html = `<div class="card p-4 mb-3"><div class="text-[10px] text-gray-500 uppercase mb-2"><i class="fas fa-bolt mr-1"></i>Live Activity</div>`;
  if (sessions.length) {
    html += `<div class="mb-2"><span class="text-[10px] text-green-400 font-bold">${sessions.length} active session${sessions.length > 1 ? 's' : ''}</span></div>`;
    html += `<div class="grid grid-cols-2 md:grid-cols-4 gap-2 mb-3">`;
    const seen = new Set();
    for (const s of sessions) {
      if (seen.has(s.project)) continue;
      seen.add(s.project);
      html += `<div class="bg-gray-900 rounded px-2 py-1.5"><div class="text-xs text-white truncate"><i class="fas fa-circle text-green-400 text-[6px] mr-1"></i>${esc(s.project)}</div><div class="text-[9px] text-gray-500">${esc(s.status)}</div></div>`;
    }
    html += `</div>`;
  }
  if (recent.length) {
    html += `<div class="text-[10px] text-gray-500 mb-1">Recent prompts:</div><div class="space-y-1">`;
    for (const p of recent.slice(0, 5)) {
      html += `<div class="text-[10px] text-gray-400 truncate"><span class="text-gray-600">${esc(p.project)}</span> ${esc(p.text)}</div>`;
    }
    html += `</div>`;
  }
  return html + `</div>`;
}

function renderPIActions() {
  return `<div class="card p-4"><div class="text-[10px] text-gray-500 uppercase mb-2">Quick Actions</div>
    <div class="flex flex-wrap gap-2">
      <button id="btn-history" onclick="triggerAnalysis('history')" class="text-[10px] px-3 py-1.5 rounded bg-gray-800 hover:bg-gray-700 text-gray-300"><i class="fas fa-clock-rotate-left mr-1"></i>Analyze History</button>
      <button id="btn-improve" onclick="triggerAnalysis('improve')" class="text-[10px] px-3 py-1.5 rounded bg-gray-800 hover:bg-gray-700 text-gray-300"><i class="fas fa-brain mr-1"></i>Self-Improve</button>
      <a href="/api/events?limit=20" target="_blank" class="text-[10px] px-3 py-1.5 rounded bg-gray-800 hover:bg-gray-700 text-blue-400">Events JSON</a>
      <a href="/api/cikg/landscape" target="_blank" class="text-[10px] px-3 py-1.5 rounded bg-gray-800 hover:bg-gray-700 text-blue-400">CIKG Landscape</a>
    </div>
  </div>`;
}

async function triggerAnalysis(type) {
  const btn = document.getElementById('btn-' + type);
  const origText = btn ? btn.innerHTML : '';
  if (btn) btn.innerHTML = `<i class="fas fa-spinner fa-spin mr-1"></i>Running…`;
  if (btn) btn.disabled = true;
  try {
    let result;
    if (type === 'history') result = await api('/history/analyze', { method: 'POST' });
    else if (type === 'improve') result = await api('/improve/analyze', { method: 'POST' });
    showToast(type === 'history'
      ? `History: ${result.total_sessions || 0} sessions, ${result.total_prompts || 0} prompts`
      : `Improve: ${(result.report || {}).total_signals || 0} signals collected`);
    loadProcess();
  } catch (e) {
    alert('Analysis failed: ' + e.message);
    if (btn) { btn.innerHTML = origText; btn.disabled = false; }
  }
}

function showToast(msg) {
  const el = document.createElement('div');
  el.className = 'fixed bottom-4 right-4 bg-green-600 text-white text-xs px-4 py-2 rounded shadow-lg z-50';
  el.innerHTML = `<i class="fas fa-check mr-1"></i>${esc(msg)}`;
  document.body.appendChild(el);
  setTimeout(() => el.remove(), 3000);
}

// ── Forge ───────────────────────────────────────────────────────────────
async function loadForge() {
  const pane = document.getElementById('pane-forge');
  pane.innerHTML = `<div class="text-xs text-gray-500"><i class="fas fa-spinner fa-spin mr-1"></i>Loading forge…</div>`;
  try {
    const [status, gaps] = await Promise.all([
      api('/forge/status'),
      api('/forge/gaps'),
    ]);
    let html = `<h2 class="text-sm font-bold text-white mb-3">MCP Forge</h2>`;
    html += `<div class="grid grid-cols-1 md:grid-cols-3 gap-3 mb-4">
      <div class="card p-4 text-center">
        <div class="text-xl font-bold ${status.available ? 'text-green-400' : 'text-red-400'}">${status.available ? 'Online' : 'Offline'}</div>
        <div class="text-[10px] text-gray-500">Status</div>
      </div>
      <div class="card p-4 text-center">
        <div class="text-xl font-bold text-orange-400">${esc(status.registry_count || 0)}</div>
        <div class="text-[10px] text-gray-500">Tools in Registry</div>
      </div>
      <div class="card p-4 text-center">
        <div class="text-xl font-bold text-yellow-400">${esc(status.pending_gaps || 0)}</div>
        <div class="text-[10px] text-gray-500">Detected Gaps</div>
      </div>
    </div>`;
    const gapList = gaps.gaps || [];
    if (gapList.length) {
      html += `<h3 class="text-xs font-bold text-gray-400 mb-2">Capability Gaps</h3><div class="space-y-1">`;
      for (const g of gapList) {
        html += `<div class="card px-3 py-2 flex justify-between"><span class="text-xs text-white">${esc(g.capability)}</span><span class="text-xs text-gray-400">${esc(g.occurrences)} hits ${g.triggered ? '<span class="text-orange-400">TRIGGERED</span>' : ''}</span></div>`;
      }
      html += `</div>`;
    }
    pane.innerHTML = html;
  } catch (e) {
    pane.innerHTML = `<div class="card p-4 text-sm text-red-400">Failed: ${esc(e.message)}</div>`;
  }
}

// ── Setup Wizard ────────────────────────────────────────────────────────
async function checkSetup() {
  try {
    const status = await api('/setup/status');
    if (status.configured) return true;
    showSetupWizard(status);
    return false;
  } catch { return true; }
}

function showSetupWizard(status) {
  const pane = document.getElementById('pane-inbox');
  const missing = status.steps.filter(s => s.status === 'missing');
  const disc = status.discovery || {};
  const clis = disc.clis || {};
  const cliAuth = disc.cli_auth || {};
  const tokens = disc.tokens || {};
  let html = `<div class="max-w-2xl mx-auto mt-4 space-y-4">`;
  // Header
  html += `<div class="card p-6">
    <div class="flex items-center gap-3 mb-3">
      <i class="fas fa-wand-magic-sparkles text-orange-500 text-xl"></i>
      <h2 class="text-lg font-bold text-white">Welcome to Maggy</h2>
      <span class="text-[10px] text-gray-500">${esc(status.progress)} configured</span>
    </div>
    <div class="space-y-2">`;
  for (const step of status.steps) {
    const icon = step.status === 'done'
      ? '<i class="fas fa-check-circle text-green-400"></i>'
      : '<i class="fas fa-circle-xmark text-red-400/60"></i>';
    html += `<div class="flex items-center gap-3 px-3 py-2 rounded ${step.status === 'done' ? 'bg-green-900/20' : 'bg-red-900/10'}">
      ${icon}
      <span class="text-sm ${step.status === 'done' ? 'text-green-300' : 'text-gray-300'}">${esc(step.label)}</span>
      ${step.status !== 'done' && step.hint ? `<span class="text-[10px] text-gray-500 ml-auto">${esc(step.hint)}</span>` : ''}
    </div>`;
  }
  html += `</div></div>`;
  // Discovered CLIs
  const cliNames = Object.keys(clis);
  if (cliNames.length) {
    html += `<div class="card p-4">
      <div class="text-[10px] text-gray-500 uppercase mb-2"><i class="fas fa-terminal mr-1"></i>Detected CLI Tools</div>
      <div class="space-y-1">`;
    for (const name of cliNames) {
      const auth = cliAuth[name];
      html += `<div class="flex items-center gap-2 text-xs">
        <i class="fas fa-check text-green-400"></i>
        <span class="text-white font-mono">${esc(name)}</span>
        <span class="text-gray-500">${esc(clis[name])}</span>
        ${auth ? '<span class="text-[10px] px-1.5 py-0.5 rounded bg-green-900/40 text-green-400">authenticated</span>' : '<span class="text-[10px] px-1.5 py-0.5 rounded bg-gray-800 text-gray-500">not logged in</span>'}
      </div>`;
    }
    html += `</div></div>`;
  }
  // Token sources
  html += `<div class="card p-4">
    <div class="text-[10px] text-gray-500 uppercase mb-2"><i class="fas fa-key mr-1"></i>Credential Sources</div>
    <div class="space-y-1 text-xs">`;
  if (tokens.GITHUB_TOKEN) html += `<div class="text-green-400"><i class="fas fa-check mr-1"></i>GITHUB_TOKEN (env var)</div>`;
  else if (tokens.GIT_CREDENTIAL) html += `<div class="text-green-400"><i class="fas fa-check mr-1"></i>GitHub token (git credential helper)</div>`;
  else html += `<div class="text-red-400/60"><i class="fas fa-xmark mr-1"></i>No GitHub token found</div>`;
  if (tokens.ANTHROPIC_API_KEY) html += `<div class="text-green-400"><i class="fas fa-check mr-1"></i>ANTHROPIC_API_KEY (env var)</div>`;
  else if (cliAuth.claude) html += `<div class="text-green-400"><i class="fas fa-check mr-1"></i>Claude Code subscription (CLI auth)</div>`;
  else html += `<div class="text-gray-500"><i class="fas fa-info-circle mr-1"></i>No Anthropic API key (Claude CLI can be used instead)</div>`;
  html += `</div></div>`;
  // Actions
  html += `<div class="flex gap-2">
    <button onclick="autoConfigureSetup()" class="text-xs px-4 py-2 rounded bg-orange-600 hover:bg-orange-700 text-white"><i class="fas fa-wand-magic mr-1"></i>Auto-Configure</button>
    <button onclick="reloadConfig()" class="text-xs px-4 py-2 rounded bg-gray-700 hover:bg-gray-600 text-gray-300"><i class="fas fa-rotate mr-1"></i>Reload</button>
    <button onclick="enterLocalMode()" class="text-xs px-4 py-2 rounded bg-gray-800 hover:bg-gray-700 text-gray-400"><i class="fas fa-laptop mr-1"></i>Local Mode</button>
  </div>`;
  html += `</div>`;
  pane.innerHTML = html;
}

function enterLocalMode() {
  const pane = document.getElementById('pane-inbox');
  pane.innerHTML = `<div class="card p-6 max-w-2xl mx-auto mt-4">
    <div class="flex items-center gap-3 mb-3">
      <i class="fas fa-laptop text-blue-400 text-lg"></i>
      <h2 class="text-sm font-bold text-white">Local Mode</h2>
    </div>
    <p class="text-xs text-gray-400 mb-3">These features work without provider credentials:</p>
    <div class="grid grid-cols-2 gap-2">
      <button onclick="switchTab('budget')" class="card p-3 text-left hover:bg-gray-900"><div class="text-xs text-white"><i class="fas fa-wallet text-orange-400 mr-1"></i>Budget</div><div class="text-[10px] text-gray-500">Track token spend</div></button>
      <button onclick="switchTab('routing')" class="card p-3 text-left hover:bg-gray-900"><div class="text-xs text-white"><i class="fas fa-route text-blue-400 mr-1"></i>Model Routing</div><div class="text-[10px] text-gray-500">Performance heatmap</div></button>
      <button onclick="switchTab('process')" class="card p-3 text-left hover:bg-gray-900"><div class="text-xs text-white"><i class="fas fa-chart-line text-green-400 mr-1"></i>Process</div><div class="text-[10px] text-gray-500">Events + knowledge graph</div></button>
      <button onclick="switchTab('forge')" class="card p-3 text-left hover:bg-gray-900"><div class="text-xs text-white"><i class="fas fa-hammer text-yellow-400 mr-1"></i>Forge</div><div class="text-[10px] text-gray-500">MCP tool gaps</div></button>
    </div>
    <button onclick="loadAll()" class="mt-3 text-[10px] text-gray-500 hover:text-white"><i class="fas fa-arrow-left mr-1"></i>Back to setup</button>
  </div>`;
}

async function reloadConfig() {
  try {
    const result = await api('/setup/reload', { method: 'POST' });
    if (result.mode === 'full') {
      loadAll();
    } else {
      const status = await api('/setup/status');
      showSetupWizard(status);
    }
  } catch (e) {
    alert('Reload failed: ' + e.message);
  }
}

async function autoConfigureSetup() {
  const btn = event.target;
  btn.innerHTML = '<i class="fas fa-spinner fa-spin mr-1"></i>Discovering...';
  btn.disabled = true;
  try {
    const result = await api('/setup/auto-configure', { method: 'POST' });
    if (result.mode === 'full') {
      loadAll();
    } else {
      const status = await api('/setup/status');
      showSetupWizard(status);
    }
  } catch (e) {
    alert('Auto-configure failed: ' + e.message);
    btn.innerHTML = '<i class="fas fa-wand-magic mr-1"></i>Auto-Configure';
    btn.disabled = false;
  }
}

// ── Init ────────────────────────────────────────────────────────────────
async function loadAll() {
  try {
    const h = await api('/health');
    document.getElementById('org-badge').textContent = `${h.org} · ${h.provider} · ${h.codebases} codebases`;
  } catch {}
  const ready = await checkSetup();
  if (ready) switchTab(CURRENT_TAB);
}

loadAll();


================================================
FILE: maggy/maggy/static/index.html
================================================
<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>Maggy</title>

  <!--
    Content Security Policy — limits where scripts/styles can load from.
    This mitigates the risk of a compromised CDN injecting arbitrary code,
    since Maggy runs with local file-system access to your codebases and
    can spawn `claude --dangerously-skip-permissions`.

    For production / air-gapped installs: run `maggy/scripts/vendor-assets.sh`
    (TODO) to copy Tailwind + Font Awesome locally, then replace the two
    external references below with /static/tailwind.css and /static/fontawesome.css.
  -->
  <meta http-equiv="Content-Security-Policy" content="
    default-src 'self';
    script-src 'self' https://cdn.tailwindcss.com 'unsafe-inline';
    style-src 'self' https://cdn.tailwindcss.com https://cdnjs.cloudflare.com 'unsafe-inline';
    font-src 'self' https://cdnjs.cloudflare.com data:;
    connect-src 'self';
    img-src 'self' data:;
    frame-ancestors 'none';
    base-uri 'self';
  " />

  <!-- Tailwind Play CDN: no stable SRI hash (generated on demand). Vendor for prod. -->
  <script src="https://cdn.tailwindcss.com"></script>

  <!-- Font Awesome 6.5.0 all.min.css — SHA-384 subresource integrity per cdnjs -->
  <link rel="stylesheet"
        href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.0/css/all.min.css"
        integrity="sha512-SzlrxWUlpfuzQ+pcUCosxcglQRNAq/DZjVsC0lE40xsADsfeQoEypE+enwcOiGjk/bSuGGKHEyjSoQ1zVisanQ=="
        crossorigin="anonymous"
        referrerpolicy="no-referrer" />
  <style>
    body { background:#0b0e14; color:#e6e6e6; font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; }
    .tab-btn.active { background:#ea580c; color:white; }
    #system-gear.active { background:#ea580c; color:white; }
    .card { background:#151922; border:1px solid #262b3a; border-radius: 0.5rem; }
    pre { white-space: pre-wrap; word-break: break-word; }
  </style>
</head>
<body>
  <div class="min-h-screen">
    <header class="border-b border-gray-800 bg-black/40 px-6 py-3 flex items-center gap-4">
      <div class="flex items-center gap-2">
        <i class="fas fa-robot text-orange-500 text-xl"></i>
        <h1 class="text-lg font-bold text-white">Maggy</h1>
        <span class="text-[10px] text-gray-500">v0.1.0</span>
      </div>
      <div id="org-badge" class="text-xs text-gray-400"></div>
      <div class="flex-1"></div>
      <button onclick="loadAll()" class="text-xs text-gray-400 hover:text-white"><i class="fas fa-sync-alt mr-1"></i>Refresh</button>
    </header>

    <nav class="border-b border-gray-800 px-6 py-2 flex items-center gap-1 bg-black/20">
      <span class="text-[9px] text-gray-600 uppercase tracking-wider mr-1">Work</span>
      <button class="tab-btn active text-xs px-3 py-1.5 rounded bg-gray-800 text-gray-300" data-tab="chat" onclick="switchTab('chat')"><i class="fas fa-terminal mr-1"></i>Chat</button>
      <button class="tab-btn text-xs px-3 py-1.5 rounded bg-gray-800 text-gray-300" data-tab="inbox" onclick="switchTab('inbox')"><i class="fas fa-list-check mr-1"></i>Tasks</button>
      <button class="tab-btn text-xs px-3 py-1.5 rounded bg-gray-800 text-gray-300" data-tab="followed" onclick="switchTab('followed')"><i class="fas fa-eye mr-1"></i>Watching</button>
      <span class="mx-2 border-l border-gray-700 h-4 inline-block"></span>
      <span class="text-[9px] text-gray-600 uppercase tracking-wider mr-1">Intel</span>
      <button class="tab-btn text-xs px-3 py-1.5 rounded bg-gray-800 text-gray-300" data-tab="competitors" onclick="switchTab('competitors')"><i class="fas fa-chess mr-1"></i>Competitors</button>
      <button class="tab-btn text-xs px-3 py-1.5 rounded bg-gray-800 text-gray-300" data-tab="process" onclick="switchTab('process')"><i class="fas fa-chart-line mr-1"></i>Insights</button>
      <div class="flex-1"></div>
      <div class="relative">
        <button onclick="toggleSystemMenu()" class="text-xs px-2.5 py-1.5 rounded bg-gray-800 text-gray-400 hover:text-white" id="system-gear"><i class="fas fa-gear"></i></button>
        <div id="system-menu" class="hidden absolute right-0 top-full mt-1 w-40 rounded border border-gray-700 bg-[#151922] shadow-xl z-30 py-1">
          <button onclick="switchTab('budget')" class="sys-item w-full text-left text-xs px-3 py-2 text-gray-300 hover:bg-gray-800" data-tab="budget"><i class="fas fa-wallet mr-2 text-gray-500"></i>Budget</button>
          <button onclick="switchTab('routing')" class="sys-item w-full text-left text-xs px-3 py-2 text-gray-300 hover:bg-gray-800" data-tab="routing"><i class="fas fa-route mr-2 text-gray-500"></i>Models</button>
          <button onclick="switchTab('forge')" class="sys-item w-full text-left text-xs px-3 py-2 text-gray-300 hover:bg-gray-800" data-tab="forge"><i class="fas fa-hammer mr-2 text-gray-500"></i>Forge</button>
          <button onclick="switchTab('settings')" class="sys-item w-full text-left text-xs px-3 py-2 text-gray-300 hover:bg-gray-800" data-tab="settings"><i class="fas fa-sliders mr-2 text-gray-500"></i>Settings</button>
        </div>
      </div>
    </nav>

    <main class="px-6 py-4">
      <div id="pane-chat" class="pane"></div>
      <div id="pane-inbox" class="pane hidden"></div>
      <div id="pane-followed" class="pane hidden"></div>
      <div id="pane-competitors" class="pane hidden"></div>
      <div id="pane-budget" class="pane hidden"></div>
      <div id="pane-routing" class="pane hidden"></div>
      <div id="pane-process" class="pane hidden"></div>
      <div id="pane-forge" class="pane hidden"></div>
      <div id="pane-settings" class="pane hidden"></div>
    </main>

    <div id="drawer" class="fixed top-0 right-0 h-full w-[40rem] max-w-full card border-l border-gray-800 p-4 overflow-y-auto translate-x-full transition-transform z-20">
      <div class="flex items-center justify-between mb-3">
        <h3 id="drawer-title" class="text-sm font-bold text-white">Task</h3>
        <button onclick="closeDrawer()" class="text-gray-400 hover:text-white text-lg"><i class="fas fa-xmark"></i></button>
      </div>
      <div id="drawer-body" class="text-sm text-gray-300"></div>
    </div>
  </div>

  <script src="/static/app.js?v=3"></script>
</body>
</html>


================================================
FILE: maggy/pyproject.toml
================================================
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "maggy"
version = "0.1.0"
description = "Generic AI engineering command center — part of the Maggy platform"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "MIT" }
authors = [{ name = "Maggy Contributors" }]
dependencies = [
    "fastapi>=0.115",
    "uvicorn[standard]>=0.30",
    "httpx>=0.27",
    "anthropic>=0.40",
    "bcrypt>=4.1",
    "email-validator>=2.0",
    "pyyaml>=6.0",
    "feedparser>=6.0",
    "pydantic>=2.6",
    "typer>=0.12",
    "rich>=13.0",
]

[project.scripts]
maggy = "maggy.cli:app"

[tool.setuptools.packages.find]
where = ["."]
include = ["maggy*"]


================================================
FILE: maggy/tests/conftest.py
================================================
"""Shared test fixtures for Maggy test suite."""

from __future__ import annotations

import tempfile
from pathlib import Path
from unittest.mock import MagicMock

import pytest

from maggy.config import (
    BudgetConfig,
    DashboardConfig,
    MaggyConfig,
    MeshConfig,
    OrgConfig,
    RoutingConfig,
    StorageConfig,
)


@pytest.fixture
def tmp_dir(tmp_path: Path) -> Path:
    return tmp_path


@pytest.fixture
def mock_cfg(tmp_path: Path) -> MaggyConfig:
    """Minimal MaggyConfig pointing to tmp storage."""
    return MaggyConfig(
        org=OrgConfig(name="test-org"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        dashboard=DashboardConfig(),
        budget=BudgetConfig(daily_limit_usd=10.0),
        routing=RoutingConfig(),
        mesh=MeshConfig(),
    )


================================================
FILE: maggy/tests/integration/__init__.py
================================================
"""Integration tests for cross-module flows."""


================================================
FILE: maggy/tests/integration/test_full_task_flow.py
================================================
"""Integration test: Ticket -> Route -> Execute -> Reward.

Tests the full lifecycle of a task through routing, event
emission, and reward recording.
"""

from __future__ import annotations

from pathlib import Path

from maggy.event_spine.emitter import EventEmitter
from maggy.event_spine.events import (
    ExecutionEvent,
    IntentEvent,
    OutcomeEvent,
)
from maggy.event_spine.store import EventStore
from maggy.routing import RoutingContext, RoutingService
from maggy.scores import MIN_SAMPLES


class TestFullTaskFlow:
    def test_route_emit_reward(self, mock_cfg, tmp_path: Path):
        """Full flow: route task, emit events, record reward."""
        # 1. Route the task
        router = RoutingService(mock_cfg)
        ctx = RoutingContext(
            blast_score=5, task_type="feature",
        )
        decision = router.route(ctx)
        name = (
            decision.primary
            if isinstance(decision.primary, str)
            else decision.primary.name
        )
        assert name  # Got a routing decision

        # 2. Emit events through the spine
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)

        intent = IntentEvent(
            intent_text="Add user dashboard",
            decomposed_steps=["create component", "add api"],
        )
        intent.header.task_id = "task-123"
        emitter.emit(intent)

        exec_evt = ExecutionEvent(
            tool_name="code_edit",
            duration_ms=500,
            success=True,
        )
        exec_evt.header.task_id = "task-123"
        emitter.emit(exec_evt)

        outcome = OutcomeEvent(success=True, reward=0.85)
        outcome.header.task_id = "task-123"
        emitter.emit(outcome)

        # 3. Verify trace
        trace = emitter.trace("task-123")
        assert len(trace) == 3

        # 4. Record reward for learning
        router.record_outcome(name, "feature", 5, 0.85)
        heatmap = router.get_heatmap()
        assert len(heatmap) >= 1

    def test_multi_task_routing(self, mock_cfg):
        """Route multiple tasks, verify different tiers."""
        router = RoutingService(mock_cfg)

        low = router.route(RoutingContext(blast_score=1))
        high = router.route(RoutingContext(blast_score=9))

        low_name = (
            low.primary if isinstance(low.primary, str)
            else low.primary.name
        )
        high_name = (
            high.primary if isinstance(high.primary, str)
            else high.primary.name
        )

        # Low should be cheaper, high should be premium
        assert low_name != high_name or low_name == "claude"


================================================
FILE: maggy/tests/integration/test_model_fallback.py
================================================
"""Integration test: Quota -> Checkpoint -> Switch -> Continue.

Tests fatigue-based checkpointing and model switching.
"""

from __future__ import annotations

from maggy.fatigue import create_profile
from maggy.services.checkpoint import Checkpoint, create_checkpoint


class TestModelFallback:
    def test_fatigue_triggers_checkpoint(self):
        """When fatigue is high, checkpoint and switch."""
        profile = create_profile("claude")
        profile.tokens_used = 170_000
        profile.turns = 40

        assert profile.should_checkpoint()

        # Create checkpoint
        cp = create_checkpoint(
            goal="Refactor auth module",
            progress=["Extracted interface", "Updated tests"],
            model="claude",
            working_state="Mid-refactor, 3 files changed",
            files=["auth.py", "test_auth.py"],
        )

        # Serialize for handoff
        data = cp.serialize()
        restored = Checkpoint.deserialize(data)
        assert restored.goal == "Refactor auth module"
        assert restored.source_model == "claude"

        # Generate prompt for next model
        prompt = restored.to_prompt()
        assert "Refactor auth module" in prompt
        assert "Mid-refactor" in prompt

    def test_cross_model_checkpoint_round_trip(self):
        """Checkpoint survives serialization across models."""
        cp = create_checkpoint(
            goal="Fix API pagination",
            progress=["Found bug in offset calc"],
            model="gpt",
            constraints=["Don't break existing tests"],
            files=["api/routes.py"],
        )

        # Simulate model switch: serialize -> transfer -> restore
        serialized = cp.serialize()
        new_model_cp = Checkpoint.deserialize(serialized)

        assert new_model_cp.source_model == "gpt"
        prompt = new_model_cp.to_prompt()
        assert "Don't break existing tests" in prompt

    def test_fresh_model_low_fatigue(self):
        """A fresh model should not be fatigued."""
        profile = create_profile("kimi")
        assert not profile.should_checkpoint()
        assert profile.fatigue_score == 0.0


================================================
FILE: maggy/tests/integration/test_process_loop.py
================================================
"""Integration test: CI fail -> Signal -> Pattern -> Fix.

Tests the process intelligence pipeline with CIKG and Engram.
"""

from __future__ import annotations

from pathlib import Path

from maggy.cikg.graph import KnowledgeGraphService
from maggy.cikg.models import Edge, Node
from maggy.cikg.queries import find_gaps, get_landscape
from maggy.engram.diagnostics import diagnose
from maggy.engram.record import EngramRecord
from maggy.engram.retrieval import EngramRetrieval
from maggy.engram.store import EngramStore
from maggy.lexon.router import LexonRouter


class TestProcessLoop:
    def test_cikg_gap_to_engram(self, tmp_path: Path):
        """Detect feature gap in CIKG, store in Engram."""
        # 1. Build competitive landscape
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        for i in range(3):
            g.add_node(Node(
                id=f"c{i}", node_type="competitor",
                name=f"Competitor{i}",
            ))
        g.add_node(Node(
            id="f1", node_type="feature", name="SSO",
        ))
        g.add_edge(Edge("c0", "f1", "has_feature"))

        # 2. Detect gap
        score = find_gaps(g, "SSO")
        assert score.gap_count == 2

        # 3. Store insight in Engram
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="gap-sso",
            namespace="process",
            memory_type="decision",
            content=f"Gap detected: {score.recommendation}",
            tags=["cikg", "gap", "sso"],
        ))

        # 4. Verify retrieval
        retrieval = EngramRetrieval(store)
        results = retrieval.by_tag("cikg")
        assert len(results) == 1
        assert "Gap detected" in results[0].content

    def test_lexon_to_engram(self, tmp_path: Path):
        """Parse intent with Lexon, store in Engram."""
        # 1. Parse user intent
        router = LexonRouter()
        record = router.route("deploy the app to production")
        assert record.confidence > 0.5

        # 2. Store the resolution in Engram
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="intent-deploy",
            namespace="session-1",
            memory_type="fact",
            content=f"User said '{record.phrase}' -> "
                    f"{record.resolved_tool}",
            tags=["lexon", "intent"],
        ))

        # 3. Verify
        result = store.get("intent-deploy")
        assert result is not None
        assert "deploy" in result.content

    def test_full_diagnostics(self, tmp_path: Path):
        """Memory diagnostics across diverse types."""
        store = EngramStore(tmp_path / "engram.db")
        types = ["fact", "decision", "code_ref", "handoff"]
        for i, mt in enumerate(types):
            store.write(EngramRecord(
                engram_id=f"e{i}",
                namespace="test",
                memory_type=mt,
                content=f"Content for {mt}",
            ))

        profile = diagnose(store)
        assert profile.total_memories == 4
        assert profile.health_score > 0.8


================================================
FILE: maggy/tests/test_account_guide.py
================================================
"""Tests for account switching guidance."""

from __future__ import annotations

from maggy.services.account_guide import (
    AccountProfile,
    detect_accounts,
    suggest_switch,
)


def test_account_profile_dataclass():
    """AccountProfile stores provider and auth command."""
    p = AccountProfile(
        name="claude-work", provider="anthropic",
        auth_command="claude auth login",
    )
    assert p.provider == "anthropic"
    assert "login" in p.auth_command


def test_detect_accounts_finds_claude(tmp_path):
    """Detects Claude accounts from ~/.claude directory."""
    (tmp_path / ".claude").mkdir()
    (tmp_path / ".claude" / "credentials.json").write_text("{}")
    accounts = detect_accounts(home=tmp_path)
    providers = [a.provider for a in accounts]
    assert "anthropic" in providers


def test_detect_accounts_finds_codex(tmp_path):
    """Detects Codex accounts from ~/.codex directory."""
    (tmp_path / ".codex").mkdir()
    accounts = detect_accounts(home=tmp_path)
    providers = [a.provider for a in accounts]
    assert "openai" in providers


def test_suggest_switch_anthropic():
    """Suggests claude auth login for anthropic quota hit."""
    guide = suggest_switch("anthropic")
    assert "claude" in guide.lower()
    assert "login" in guide.lower() or "auth" in guide.lower()


def test_suggest_switch_openai():
    """Suggests codex auth for openai quota hit."""
    guide = suggest_switch("openai")
    assert "codex" in guide.lower() or "openai" in guide.lower()


================================================
FILE: maggy/tests/test_activity.py
================================================
"""Tests for CLI activity scanner."""

from __future__ import annotations

import json
from pathlib import Path
from unittest.mock import patch

import pytest

from maggy.services.activity import (
    ActiveSession,
    ActivityService,
    RecentPrompt,
    _parse_claude_processes,
    _recent_prompts,
)


class TestParseClaudeProcesses:
    def test_detects_running_session(self):
        lines = [
            "user  1234  0.0  0.1  claude --dangerously-skip-permissions --continue",
        ]
        with patch(
            "maggy.services.activity._get_cwd",
            return_value="/Users/me/proj-a",
        ):
            sessions = _parse_claude_processes(lines)
        assert len(sessions) == 1
        assert sessions[0].cli == "claude"
        assert sessions[0].pid == 1234
        assert sessions[0].status == "running"
        assert sessions[0].project == "proj-a"

    def test_detects_agent_subprocess(self):
        lines = [
            "user  5678  0.1  0.3  /path/to/claude "
            "--agent-id be-schema@maia-demo "
            "--agent-name be-schema "
            "--team-name maia-demo "
            "--parent-session-id abc-123",
        ]
        with patch(
            "maggy.services.activity._get_cwd",
            return_value="/Users/me/proj-b",
        ):
            sessions = _parse_claude_processes(lines)
        assert len(sessions) == 1
        s = sessions[0]
        assert s.status == "agent"
        assert s.agent_name == "be-schema"
        assert s.team_name == "maia-demo"

    def test_ignores_non_cli_processes(self):
        lines = [
            "user  9999  0.0  0.0  /Applications/Claude.app/Contents/MacOS/Claude",
            "user  8888  0.0  0.0  grep claude",
        ]
        sessions = _parse_claude_processes(lines)
        assert sessions == []

    def test_empty_input(self):
        assert _parse_claude_processes([]) == []


class TestRecentPrompts:
    def test_reads_claude_history(self, tmp_path: Path):
        history = tmp_path / "history.jsonl"
        entries = [
            {"display": "fix the bug", "timestamp": 1000, "project": "/Users/me/app", "sessionId": "s1"},
            {"display": "run tests", "timestamp": 2000, "project": "/Users/me/app", "sessionId": "s1"},
        ]
        history.write_text(
            "\n".join(json.dumps(e) for e in entries) + "\n",
        )
        prompts = _recent_prompts(
            claude_dir=tmp_path, codex_dir=tmp_path / "none",
            kimi_dir=tmp_path / "none2", limit=5,
        )
        assert len(prompts) == 2
        assert prompts[0].text == "run tests"
        assert prompts[0].cli == "claude"
        assert prompts[0].project == "app"

    def test_reads_codex_history(self, tmp_path: Path):
        history = tmp_path / "history.jsonl"
        entries = [
            {"session_id": "c1", "ts": 3000, "text": "deploy it"},
        ]
        history.write_text(
            "\n".join(json.dumps(e) for e in entries) + "\n",
        )
        prompts = _recent_prompts(
            claude_dir=tmp_path / "none", codex_dir=tmp_path,
            kimi_dir=tmp_path / "none2", limit=5,
        )
        assert len(prompts) == 1
        assert prompts[0].cli == "codex"
        assert prompts[0].text == "deploy it"

    def test_merges_and_sorts_by_time(self, tmp_path: Path):
        claude_dir = tmp_path / "claude"
        codex_dir = tmp_path / "codex"
        claude_dir.mkdir()
        codex_dir.mkdir()
        (claude_dir / "history.jsonl").write_text(
            json.dumps({"display": "old", "timestamp": 1000, "project": "/p", "sessionId": "s"}) + "\n",
        )
        (codex_dir / "history.jsonl").write_text(
            json.dumps({"session_id": "c1", "ts": 5000, "text": "new"}) + "\n",
        )
        prompts = _recent_prompts(
            claude_dir=claude_dir, codex_dir=codex_dir,
            kimi_dir=tmp_path / "none", limit=5,
        )
        assert prompts[0].text == "new"
        assert prompts[1].text == "old"

    def test_limits_output(self, tmp_path: Path):
        history = tmp_path / "history.jsonl"
        lines = []
        for i in range(20):
            lines.append(json.dumps({
                "display": f"msg-{i}", "timestamp": i * 1000,
                "project": "/p", "sessionId": "s",
            }))
        history.write_text("\n".join(lines) + "\n")
        prompts = _recent_prompts(
            claude_dir=tmp_path, codex_dir=tmp_path / "x",
            kimi_dir=tmp_path / "y", limit=5,
        )
        assert len(prompts) == 5

    def test_no_history_files(self, tmp_path: Path):
        prompts = _recent_prompts(
            claude_dir=tmp_path / "a", codex_dir=tmp_path / "b",
            kimi_dir=tmp_path / "c", limit=5,
        )
        assert prompts == []

    def test_malformed_json_skipped(self, tmp_path: Path):
        history = tmp_path / "history.jsonl"
        history.write_text(
            "not-json\n"
            + json.dumps({"display": "ok", "timestamp": 1000, "project": "/p", "sessionId": "s"})
            + "\n",
        )
        prompts = _recent_prompts(
            claude_dir=tmp_path, codex_dir=tmp_path / "x",
            kimi_dir=tmp_path / "y", limit=5,
        )
        assert len(prompts) == 1
        assert prompts[0].text == "ok"


class TestActivityService:
    def test_get_activity_shape(self):
        svc = ActivityService()
        with patch(
            "maggy.services.activity._scan_processes",
            return_value=[],
        ), patch(
            "maggy.services.activity._recent_prompts",
            return_value=[],
        ):
            result = svc.get_activity()
        assert "sessions" in result
        assert "recent" in result

    def test_serializes_sessions(self):
        session = ActiveSession(
            cli="claude", session_id="", project="myapp",
            project_path="/Users/me/myapp", status="running",
            last_prompt="fix bug", agent_name="", team_name="",
            pid=1234,
        )
        svc = ActivityService()
        with patch(
            "maggy.services.activity._scan_processes",
            return_value=[session],
        ), patch(
            "maggy.services.activity._recent_prompts",
            return_value=[],
        ):
            result = svc.get_activity()
        assert len(result["sessions"]) == 1
        s = result["sessions"][0]
        assert s["cli"] == "claude"
        assert s["project"] == "myapp"
        assert s["pid"] == 1234

    def test_serializes_prompts(self):
        prompt = RecentPrompt(
            cli="codex", text="deploy",
            project="api", timestamp="2026-05-10T12:00:00",
            session_id="c1",
        )
        svc = ActivityService()
        with patch(
            "maggy.services.activity._scan_processes",
            return_value=[],
        ), patch(
            "maggy.services.activity._recent_prompts",
            return_value=[prompt],
        ):
            result = svc.get_activity()
        assert len(result["recent"]) == 1
        assert result["recent"][0]["text"] == "deploy"


================================================
FILE: maggy/tests/test_api_endpoints.py
================================================
"""Full API endpoint validation tests.

Creates a real FastAPI app with all services wired in
(using tmp directories for SQLite) and validates every
endpoint from all 14 phases.
"""

from __future__ import annotations

from pathlib import Path
from types import SimpleNamespace

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from maggy.budget import BudgetManager
from maggy.cikg.graph import KnowledgeGraphService
from maggy.cikg.models import Edge, Node
from maggy.config import (
    BudgetConfig,
    DashboardConfig,
    MaggyConfig,
    MeshConfig,
    OrgConfig,
    RoutingConfig,
    StorageConfig,
)
from maggy.deploy import DeployService
from maggy.engram.record import EngramRecord
from maggy.engram.store import EngramStore
from maggy.event_spine.emitter import EventEmitter
from maggy.event_spine.events import IntentEvent
from maggy.event_spine.store import EventStore
from maggy.forge.connector import ForgeConnector
from maggy.lexon.router import LexonRouter
from maggy.mesh.manager import MeshManager
from maggy.mesh.store import MeshStore
from maggy.planning import PlanningService
from maggy.history.service import HistoryService
from maggy.improve.service import Introspector
from maggy.routing import RoutingService


@pytest.fixture
def app_with_services(tmp_path: Path) -> FastAPI:
    """Build a FastAPI app with all services wired."""
    cfg = MaggyConfig(
        org=OrgConfig(name="test-org", domain="devtools"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        dashboard=DashboardConfig(auth_mode="local"),
        budget=BudgetConfig(daily_limit_usd=10.0),
        routing=RoutingConfig(),
        mesh=MeshConfig(enabled=True),
    )

    app = FastAPI()
    app.state.cfg = cfg
    app.state.configured = True
    app.state.mode = "local"

    # Wire all services
    app.state.budget = BudgetManager(cfg)
    app.state.routing = RoutingService(cfg)
    app.state.events = EventEmitter(
        EventStore(tmp_path / "events.db"),
    )
    app.state.cikg = KnowledgeGraphService(
        tmp_path / "cikg.db",
    )
    app.state.planning = PlanningService(cfg)
    app.state.deploy = DeployService()
    app.state.forge = ForgeConnector(
        forge_path=tmp_path / "fake-forge",
    )
    app.state.engram = EngramStore(tmp_path / "engram.db")
    app.state.lexon = LexonRouter()

    mesh_store = MeshStore(tmp_path / "mesh.db")
    mesh_cfg = SimpleNamespace(
        peer_id="test-peer",
        org_key_secret="secret",
        port=8080,
        tunnel_url="",
        git_discovery=False,
    )
    mgr = MeshManager(mesh_cfg, mesh_store)
    mgr.add_network("test-org")
    app.state.mesh = mgr
    app.state.history = HistoryService(
        db_path=tmp_path / "history.db",
        cli_dirs={
            "claude": tmp_path / "no_claude",
            "codex": tmp_path / "no_codex",
            "kimi": tmp_path / "no_kimi",
        },
    )
    app.state.introspector = Introspector(app.state)
    app.state.heartbeat = None

    # Register all routers
    from maggy.api.routes import router as r_api
    from maggy.api.routes_budget import router as r_budget
    from maggy.api.routes_cikg import router as r_cikg
    from maggy.api.routes_deploy import router as r_deploy
    from maggy.api.routes_engram import router as r_engram
    from maggy.api.routes_events import router as r_events
    from maggy.api.routes_forge import router as r_forge
    from maggy.api.routes_heartbeat import router as r_heartbeat
    from maggy.api.routes_history import router as r_history
    from maggy.api.routes_improve import router as r_improve
    from maggy.api.routes_lexon import router as r_lexon
    from maggy.api.routes_mesh import router as r_mesh
    from maggy.api.routes_planning import router as r_plan
    from maggy.api.routes_routing import router as r_routing
    from maggy.api.routes_setup import router as r_setup
    from maggy.api.routes_users import router as r_users

    for r in (
        r_api, r_budget, r_cikg, r_deploy, r_engram,
        r_events, r_forge, r_heartbeat, r_history,
        r_improve, r_lexon, r_mesh, r_plan, r_routing,
        r_setup, r_users,
    ):
        app.include_router(r)

    return app


@pytest.fixture
def client(app_with_services: FastAPI) -> TestClient:
    return TestClient(app_with_services)


# ── Phase 1: Budget ─────────────────────────────────────


class TestBudgetAPI:
    def test_get_budget_empty(self, client: TestClient):
        resp = client.get("/api/budget")
        assert resp.status_code == 200
        data = resp.json()
        assert "daily_limit_usd" in data
        assert "spent_today_usd" in data
        assert data["spent_today_usd"] == 0.0

    def test_budget_by_provider_empty(self, client: TestClient):
        resp = client.get("/api/budget/by-provider")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_budget_with_spend(
        self, app_with_services: FastAPI,
    ):
        mgr = app_with_services.state.budget
        mgr.record_spend("anthropic", "claude", 2.5)
        mgr.record_spend("openai", "gpt-4", 1.0)

        c = TestClient(app_with_services)
        resp = c.get("/api/budget")
        data = resp.json()
        assert data["spent_today_usd"] == 3.5

        resp = c.get("/api/budget/by-provider")
        providers = {
            r["provider"]: r["spent_usd"]
            for r in resp.json()
        }
        assert providers["anthropic"] == 2.5
        assert providers["openai"] == 1.0


# ── Phase 2: Routing ────────────────────────────────────


class TestRoutingAPI:
    def test_heatmap_empty(self, client: TestClient):
        resp = client.get("/api/routing/heatmap")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_decide_low_blast(self, client: TestClient):
        resp = client.get(
            "/api/routing/decide?blast=1&task_type=bugfix",
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "primary" in data
        assert "reason" in data

    def test_decide_high_blast(self, client: TestClient):
        resp = client.get(
            "/api/routing/decide?blast=9&task_type=feature",
        )
        data = resp.json()
        assert data["primary"] is not None

    def test_heatmap_after_recording(
        self, app_with_services: FastAPI,
    ):
        svc = app_with_services.state.routing
        svc.record_outcome("claude", "feature", 5, 0.9)
        c = TestClient(app_with_services)
        resp = c.get("/api/routing/heatmap")
        assert len(resp.json()) >= 1


class TestUsersAPI:
    def test_create_user(self, client: TestClient):
        resp = client.post(
            "/api/users",
            json={"email": "user@example.com", "password": "secret123"},
        )

        assert resp.status_code == 201
        data = resp.json()
        assert data["email"] == "user@example.com"
        assert "password_hash" not in data


# ── Phase 14: Event Spine ───────────────────────────────


class TestEventsAPI:
    def test_events_empty(self, client: TestClient):
        resp = client.get("/api/events")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_event_count_empty(self, client: TestClient):
        resp = client.get("/api/events/count")
        assert resp.status_code == 200
        assert resp.json()["count"] == 0

    def test_trace_empty(self, client: TestClient):
        resp = client.get("/api/events/trace/nope")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_events_after_emit(
        self, app_with_services: FastAPI,
    ):
        emitter = app_with_services.state.events
        evt = IntentEvent(
            intent_text="Add login",
            decomposed_steps=["create form", "add auth"],
        )
        evt.header.task_id = "t-1"
        emitter.emit(evt)

        c = TestClient(app_with_services)
        resp = c.get("/api/events?task_id=t-1")
        assert len(resp.json()) == 1

        resp = c.get("/api/events/trace/t-1")
        assert len(resp.json()) == 1

        resp = c.get("/api/events/count")
        assert resp.json()["count"] == 1


# ── Phase 4: CIKG ───────────────────────────────────────


class TestCIKGAPI:
    def test_landscape_empty(self, client: TestClient):
        resp = client.get("/api/cikg/landscape")
        assert resp.status_code == 200
        data = resp.json()
        assert data["competitors"] == 0

    def test_gaps_no_feature(self, client: TestClient):
        resp = client.get("/api/cikg/gaps/SSO")
        assert resp.status_code == 200
        data = resp.json()
        assert "gap_count" in data

    def test_landscape_with_data(
        self, app_with_services: FastAPI,
    ):
        graph = app_with_services.state.cikg
        graph.add_node(Node(
            id="c1", node_type="competitor", name="Rival",
        ))
        graph.add_node(Node(
            id="f1", node_type="feature", name="SSO",
        ))
        graph.add_edge(Edge("c1", "f1", "has_feature"))

        c = TestClient(app_with_services)
        resp = c.get("/api/cikg/landscape")
        data = resp.json()
        assert data["competitors"] == 1
        assert data["features_tracked"] == 1

        resp = c.get("/api/cikg/gaps/SSO")
        data = resp.json()
        assert data["feature"] == "SSO"


# ── Phase 6: Planning ───────────────────────────────────


class TestPlanningAPI:
    def test_single_plan(self, client: TestClient):
        resp = client.post(
            "/api/planning/generate",
            json={"task": "Add auth", "blast_score": 2},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert data["mode"] == "single"
        assert len(data["plan"]["steps"]) == 3

    def test_dual_plan(self, client: TestClient):
        resp = client.post(
            "/api/planning/generate",
            json={"task": "Refactor core", "blast_score": 7},
        )
        data = resp.json()
        assert data["mode"] == "dual"
        assert "diff" in data


# ── Phase 7: Deploy ─────────────────────────────────────


class TestDeployAPI:
    def test_sessions_empty(self, client: TestClient):
        resp = client.get("/api/deploy/sessions")
        assert resp.status_code == 200
        assert resp.json()["sessions"] == []

    def test_create_and_get(self, client: TestClient):
        resp = client.post(
            "/api/deploy/sessions",
            json={"project": "web", "branch": "feat-x"},
        )
        assert resp.status_code == 200
        data = resp.json()
        sid = data["session_id"]
        assert data["status"] == "building"

        resp = client.get(f"/api/deploy/sessions/{sid}")
        assert resp.json()["project"] == "web"

    def test_missing_session(self, client: TestClient):
        resp = client.get("/api/deploy/sessions/nope")
        data = resp.json()
        assert data.get("error") == "session not found"


# ── Phase 9: Forge ──────────────────────────────────────


class TestForgeAPI:
    def test_forge_status(self, client: TestClient):
        resp = client.get("/api/forge/status")
        assert resp.status_code == 200
        data = resp.json()
        assert "available" in data
        assert "registry_count" in data

    def test_forge_search(self, client: TestClient):
        resp = client.get("/api/forge/search?q=test")
        assert resp.status_code == 200
        assert "results" in resp.json()

    def test_forge_gaps_empty(self, client: TestClient):
        resp = client.get("/api/forge/gaps")
        assert resp.status_code == 200
        assert resp.json()["gaps"] == []

    def test_report_gap(self, client: TestClient):
        resp = client.post(
            "/api/forge/gaps",
            json={"capability": "slack-notify"},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert data["capability"] == "slack-notify"

        resp = client.get("/api/forge/gaps")
        gaps = resp.json()["gaps"]
        assert len(gaps) == 1


# ── Phase 12: Engram ────────────────────────────────────


class TestEngramAPI:
    def test_query_empty(self, client: TestClient):
        resp = client.get("/api/engram/query")
        assert resp.status_code == 200
        assert resp.json()["records"] == []

    def test_diagnostics_empty(self, client: TestClient):
        resp = client.get("/api/engram/diagnostics")
        assert resp.status_code == 200
        data = resp.json()
        assert "total_memories" in data

    def test_query_with_data(
        self, app_with_services: FastAPI,
    ):
        store = app_with_services.state.engram
        store.write(EngramRecord(
            engram_id="e1",
            namespace="test",
            memory_type="fact",
            content="Test memory",
            tags=["test"],
        ))

        c = TestClient(app_with_services)
        resp = c.get("/api/engram/query?namespace=test")
        records = resp.json()["records"]
        assert len(records) == 1
        assert records[0]["content"] == "Test memory"

    def test_diagnostics_with_data(
        self, app_with_services: FastAPI,
    ):
        store = app_with_services.state.engram
        store.write(EngramRecord(
            engram_id="e2",
            namespace="test",
            memory_type="decision",
            content="Chose X over Y",
        ))

        c = TestClient(app_with_services)
        resp = c.get("/api/engram/diagnostics")
        data = resp.json()
        assert data["total_memories"] >= 1


# ── Phase 13: Lexon ─────────────────────────────────────


class TestLexonAPI:
    def test_parse_known(self, client: TestClient):
        resp = client.get("/api/lexon/parse?q=deploy")
        assert resp.status_code == 200
        data = resp.json()
        assert "resolved_tool" in data
        assert data["confidence"] > 0

    def test_parse_unknown(self, client: TestClient):
        resp = client.get(
            "/api/lexon/parse?q=xyzzy_unknown_phrase",
        )
        data = resp.json()
        assert data["resolved_tool"] == ""

    def test_learn(self, client: TestClient):
        resp = client.post(
            "/api/lexon/learn",
            json={"phrase": "ship it", "tool": "deploy"},
        )
        assert resp.status_code == 200
        assert resp.json()["status"] == "learned"

        resp = client.get("/api/lexon/parse?q=ship+it")
        data = resp.json()
        assert data["resolved_tool"] == "deploy"


# ── Phase 11: Mesh ──────────────────────────────────────


class TestMeshAPI:
    def test_mesh_status_enabled(self, client: TestClient):
        resp = client.get("/api/mesh/status")
        assert resp.status_code == 200
        data = resp.json()
        assert data["enabled"] is True
        assert data["peers"] == 0
        assert "networks" in data

    def test_mesh_peers_empty(self, client: TestClient):
        resp = client.get("/api/mesh/peers")
        assert resp.status_code == 200
        assert resp.json()["peers"] == []

    def test_mesh_networks(self, client: TestClient):
        resp = client.get("/api/mesh/networks")
        assert resp.status_code == 200
        nets = resp.json()["networks"]
        assert len(nets) == 1
        assert nets[0]["org"] == "test-org"

    def test_mesh_quarantine_requires_org(
        self, client: TestClient,
    ):
        resp = client.get("/api/mesh/quarantine")
        assert resp.status_code == 422
        assert "error" in resp.json()

    def test_mesh_quarantine_with_org(
        self, client: TestClient,
    ):
        resp = client.get("/api/mesh/quarantine?org=test-org")
        assert resp.status_code == 200
        assert resp.json()["items"] == []

    def test_mesh_add_peer(self, client: TestClient):
        resp = client.post(
            "/api/mesh/peers",
            json={
                "org": "test-org",
                "peer_id": "p1",
                "name": "remote",
                "address": "ws://x",
            },
        )
        assert resp.json()["status"] == "added"
        resp = client.get("/api/mesh/peers?org=test-org")
        assert len(resp.json()["peers"]) == 1


# ── Unconfigured state ──────────────────────────────────


class TestUnconfiguredState:
    """Verify graceful behavior when services are None."""

    @pytest.fixture
    def unconfigured_client(self) -> TestClient:
        app = FastAPI()
        app.state.cfg = MaggyConfig()
        app.state.configured = False
        app.state.budget = None
        app.state.routing = None
        app.state.events = None
        app.state.cikg = None
        app.state.planning = None
        app.state.deploy = None
        app.state.forge = None
        app.state.engram = None
        app.state.lexon = None
        app.state.mesh = None

        from maggy.api.routes_budget import router as r1
        from maggy.api.routes_cikg import router as r2
        from maggy.api.routes_deploy import router as r3
        from maggy.api.routes_engram import router as r4
        from maggy.api.routes_events import router as r5
        from maggy.api.routes_forge import router as r6
        from maggy.api.routes_lexon import router as r7
        from maggy.api.routes_mesh import router as r8
        from maggy.api.routes_planning import router as r9
        from maggy.api.routes_routing import router as r0

        for r in (r1, r2, r3, r4, r5, r6, r7, r8, r9, r0):
            app.include_router(r)
        return TestClient(app)

    def test_budget_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/budget")
        assert resp.status_code == 200
        assert resp.json()["status"] == "unconfigured"

    def test_routing_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/routing/heatmap")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_events_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/events")
        assert resp.json() == []

    def test_mesh_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/mesh/status")
        data = resp.json()
        assert data["enabled"] is False

    def test_engram_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/engram/query")
        assert "error" in resp.json()

    def test_lexon_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/lexon/parse?q=hi")
        assert "error" in resp.json()

    def test_deploy_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/deploy/sessions")
        assert "error" in resp.json()

    def test_forge_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/forge/status")
        assert "error" in resp.json()

    def test_planning_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.post(
            "/api/planning/generate",
            json={"task": "test"},
        )
        assert "error" in resp.json()

    def test_cikg_unconfigured(
        self, unconfigured_client: TestClient,
    ):
        resp = unconfigured_client.get("/api/cikg/landscape")
        assert "error" in resp.json()


# --- History Endpoint Tests ---


class TestHistoryEndpoints:
    """Tests for /api/history/* endpoints."""

    def test_providers(self, client: TestClient):
        resp = client.get("/api/history/providers")
        assert resp.status_code == 200
        assert "providers" in resp.json()

    def test_analyze(self, client: TestClient):
        resp = client.post("/api/history/analyze")
        assert resp.status_code == 200
        data = resp.json()
        assert "total_sessions" in data
        assert "total_prompts" in data

    def test_report_empty(self, client: TestClient):
        resp = client.get("/api/history/report")
        assert resp.status_code == 200

    def test_sessions(self, client: TestClient):
        # First analyze to populate
        client.post("/api/history/analyze")
        resp = client.get("/api/history/sessions")
        assert resp.status_code == 200
        assert "sessions" in resp.json()

    def test_sessions_filter(self, client: TestClient):
        resp = client.get(
            "/api/history/sessions?provider=claude",
        )
        assert resp.status_code == 200


# --- Discovery + Enhanced Health ---


class TestDiscoveryEndpoint:
    def test_discovery_returns_data(
        self, client: TestClient,
    ):
        resp = client.get("/api/discovery")
        assert resp.status_code == 200
        data = resp.json()
        assert "clis" in data
        assert "repos" in data
        assert "tokens" in data

    def test_health_has_mode(
        self, client: TestClient,
    ):
        resp = client.get("/api/health")
        assert resp.status_code == 200
        data = resp.json()
        assert "mode" in data
        assert data["mode"] in ("full", "local")


# --- Heartbeat Endpoint Tests ---


class TestHeartbeatEndpoints:
    def test_status_no_scheduler(self, client: TestClient):
        resp = client.get("/api/heartbeat/status")
        assert resp.status_code == 200
        assert resp.json() == []

    def test_trigger_no_scheduler(self, client: TestClient):
        resp = client.post("/api/heartbeat/trigger/nope")
        assert resp.status_code == 503

    def test_status_with_scheduler(
        self, app_with_services: FastAPI,
    ):
        from maggy.heartbeat.scheduler import HeartbeatScheduler
        from unittest.mock import AsyncMock
        sched = HeartbeatScheduler()
        sched.register("test_job", AsyncMock(), 60)
        app_with_services.state.heartbeat = sched
        c = TestClient(app_with_services)
        resp = c.get("/api/heartbeat/status")
        assert resp.status_code == 200
        data = resp.json()
        assert len(data) == 1
        assert data[0]["name"] == "test_job"


# --- Self-Improvement Endpoint Tests ---


class TestImproveEndpoints:
    def test_report_empty(self, client: TestClient):
        resp = client.get("/api/improve/report")
        assert resp.status_code == 200
        assert resp.json()["report"] is None

    def test_analyze_returns_report(
        self, client: TestClient,
    ):
        resp = client.post("/api/improve/analyze")
        assert resp.status_code == 200
        data = resp.json()
        assert "report" in data
        report = data["report"]
        assert "generated_at" in report
        assert "recommendations" in report

    def test_report_after_analyze(
        self, client: TestClient,
    ):
        client.post("/api/improve/analyze")
        resp = client.get("/api/improve/report")
        data = resp.json()
        assert data["report"] is not None


================================================
FILE: maggy/tests/test_benchmark_scenario.py
================================================
"""Benchmark scenario — simulate a 10-task sprint across 3 models.

Measures Maggy's effectiveness at:
  1. Routing accuracy  — correct model for each complexity tier
  2. Budget efficiency — spend distribution across providers
  3. Fallback resilience — recovery when models hit quota
  4. Fatigue awareness — detects and reacts to context overload
  5. Lock safety — prevents file clobbering between agents
  6. Escalation — auto-escalates repeated failures
  7. Checkpoint continuity — survives model handoff
  8. Calibration learning — penalizes bad models over time
  9. Dual planning — counter-checks high-blast tasks
  10. Observability — signals recorded for all activity
"""

from __future__ import annotations

from unittest.mock import AsyncMock

import pytest

from maggy.adapters.pi import PiAdapter, RunResult
from maggy.budget import BudgetManager
from maggy.calibration.tracker import CalibrationTracker
from maggy.checkpoint import CheckpointManager
from maggy.config import (
    CodebaseConfig,
    MaggyConfig,
    OrgConfig,
    ProjectConfig,
    StorageConfig,
)
from maggy.coordination.lock_manager import LockManager
from maggy.escalation.protocol import Escalator
from maggy.mnemos.fatigue import FatigueTracker
from maggy.mnemos.signals import SignalLog
from maggy.observability.collector import ObservabilityCollector
from maggy.providers.base import Task
from maggy.registry import ProjectRegistry
from maggy.routing import RoutingContext, RoutingService
from maggy.services.executor import ExecutorService
from maggy.services.executor_types import SessionCtx
from maggy.services.planner import DualPlanner


# -- fixtures ----------------------------------------------------------------

def _cfg(tmp_path) -> MaggyConfig:
    return MaggyConfig(
        org=OrgConfig(name="benchmark-org"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        codebases=[
            CodebaseConfig(path=str(tmp_path / "repo"), key="app"),
        ],
        projects=[
            ProjectConfig(
                name="app", repo="bench/app",
                path=str(tmp_path / "repo"),
                default_branch="main",
            ),
        ],
    )


SPRINT_TASKS = [
    Task(id="T-1", title="Fix README typo", description="Typo fix",
         raw={"blast_score": 1, "task_type": "docs"}),
    Task(id="T-2", title="Lint cleanup", description="Format files",
         raw={"blast_score": 1, "task_type": "formatting"}),
    Task(id="T-3", title="Add health endpoint", description="GET /health",
         raw={"blast_score": 3, "task_type": "feature"}),
    Task(id="T-4", title="Pagination for /users", description="Cursor pagination",
         raw={"blast_score": 5, "task_type": "feature"}),
    Task(id="T-5", title="Refactor auth service", description="Extract middleware",
         raw={"blast_score": 6, "task_type": "refactor"}),
    Task(id="T-6", title="Add rate limiter", description="Redis rate limit",
         raw={"blast_score": 7, "task_type": "feature"}),
    Task(id="T-7", title="Migrate to v2 API", description="Breaking change",
         raw={"blast_score": 8, "task_type": "refactor"}),
    Task(id="T-8", title="Fix XSS in comments", description="Sanitize HTML",
         raw={"blast_score": 9, "task_type": "security",
              "security_sensitive": True}),
    Task(id="T-9", title="OAuth2 PKCE flow", description="Full OAuth impl",
         raw={"blast_score": 10, "task_type": "security",
              "security_sensitive": True}),
    Task(id="T-10", title="Performance audit", description="Profile + optimize",
         raw={"blast_score": 7, "task_type": "performance"}),
]


# -- 1. Routing accuracy -----------------------------------------------------

class TestRoutingAccuracy:
    """Every task lands on the right model tier."""

    def test_all_10_tasks_route_correctly(self, tmp_path):
        cfg = _cfg(tmp_path)
        svc = RoutingService(cfg)
        results: dict[str, str] = {}

        for task in SPRINT_TASKS:
            raw = task.raw or {}
            ctx = RoutingContext(
                blast_score=raw.get("blast_score", 0),
                task_type=raw.get("task_type", "general"),
                security_sensitive=raw.get("security_sensitive", False),
            )
            decision = svc.route(ctx)
            name = decision.primary if isinstance(decision.primary, str) else decision.primary.name
            results[task.id] = name

        # Low blast (1-3) → cheap tier unless rules override
        # T-1 is docs → rules force claude
        assert results["T-1"] == "claude"
        assert results["T-2"] in ("local", "kimi")
        assert results["T-3"] in ("local", "kimi")
        # Blast 5 → local(0-5) cheapest, codex(4-10), claude(5-10)
        assert results["T-4"] in ("local", "codex")
        # Blast 6 → codex(4-10) cheapest, claude(5-10)
        assert results["T-5"] in ("codex", "claude")
        # Blast 7 → codex or claude
        assert results["T-6"] in ("codex", "claude")
        # Blast 8+ → codex or claude (security→claude)
        assert results["T-7"] in ("codex", "claude")
        # Security always premium (claude)
        assert results["T-8"] == "claude"
        assert results["T-9"] == "claude"
        assert results["T-10"] in ("codex", "claude")

    def test_routing_accuracy_score(self, tmp_path):
        """Compute accuracy as % of correct routing decisions."""
        cfg = _cfg(tmp_path)
        svc = RoutingService(cfg)
        correct = 0

        expected_tiers = {
            "T-1": "premium", "T-2": "cheap", "T-3": "cheap",
            "T-4": "cheap",   # local covers 0-5
            "T-5": "mid",     # codex covers 4-10
            "T-6": "mid",     # codex covers 4-10
            "T-7": "mid",     # codex (no security override)
            "T-8": "premium", "T-9": "premium",
            "T-10": "mid",    # codex covers 4-10
        }
        tier_map = {"local": "cheap", "kimi": "cheap",
                     "codex": "mid", "claude": "premium"}

        for task in SPRINT_TASKS:
            raw = task.raw or {}
            ctx = RoutingContext(
                blast_score=raw.get("blast_score", 0),
                task_type=raw.get("task_type", "general"),
                security_sensitive=raw.get("security_sensitive", False),
            )
            decision = svc.route(ctx)
            name = decision.primary if isinstance(decision.primary, str) else decision.primary.name
            actual_tier = tier_map.get(name, "unknown")
            if actual_tier == expected_tiers[task.id]:
                correct += 1

        accuracy = correct / len(SPRINT_TASKS)
        assert accuracy >= 0.9, f"Routing accuracy {accuracy:.0%} < 90%"


# -- 2. Budget efficiency ----------------------------------------------------

class TestBudgetEfficiency:
    def test_spend_distribution(self, tmp_path):
        cfg = _cfg(tmp_path)
        bm = BudgetManager(cfg)
        # Simulate spend from a 10-task sprint
        bm.record_spend("moonshot", "kimi-k2", 0.03)
        bm.record_spend("moonshot", "kimi-k2", 0.03)
        bm.record_spend("moonshot", "kimi-k2", 0.02)
        bm.record_spend("openai", "gpt-4o", 0.30)
        bm.record_spend("openai", "gpt-4o", 0.25)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.20)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.50)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.80)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.60)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.40)

        breakdown = bm.by_provider()
        by_name = {r["provider"]: r["spent_usd"] for r in breakdown}

        # Cheap tasks should be < 5% of total
        total = sum(by_name.values())
        cheap_pct = by_name.get("moonshot", 0) / total
        assert cheap_pct < 0.05, f"Cheap tier {cheap_pct:.0%} >= 5%"

        # Premium should be > 70% (complex tasks dominate)
        premium_pct = by_name.get("anthropic", 0) / total
        assert premium_pct > 0.70, f"Premium {premium_pct:.0%} <= 70%"


# -- 3. Fallback resilience --------------------------------------------------

class TestFallbackResilience:
    @pytest.mark.asyncio
    async def test_quota_recovery(self):
        pi = PiAdapter()
        attempts: list[str] = []

        async def fake_send(model, prompt, wd, max_turns=20, timeout=600):
            attempts.append(model)
            if model in ("kimi", "deepseek"):
                return RunResult(model=model, success=False, error="quota", quota_hit=True)
            return RunResult(model=model, success=True, output="recovered")

        pi.send_prompt = fake_send
        result = await pi.send_with_fallback("kimi", "test", "/tmp")

        assert result.success
        assert len(attempts) >= 3, "Should try multiple models"
        assert attempts[0] == "kimi"
        assert result.model not in ("kimi", "deepseek")

    @pytest.mark.asyncio
    async def test_full_chain_failure(self):
        pi = PiAdapter()

        async def all_fail(model, prompt, wd, max_turns=20, timeout=600):
            return RunResult(model=model, success=False, error="down")

        pi.send_prompt = all_fail
        result = await pi.send_with_fallback("kimi", "test", "/tmp")

        assert not result.success


# -- 4. Fatigue awareness ----------------------------------------------------

class TestFatigueAwareness:
    def test_progressive_fatigue(self):
        ft = FatigueTracker(context_window=200_000)
        assert ft.state() == "ok"

        # Simulate 5 steps of increasing context
        for i in range(5):
            ft.record("context_load", 0.15 * (i + 1))
            ft.record("turn_pressure", 0.1 * (i + 1))

        assert ft.composite() > 0.3

    def test_model_switch_degrades_fatigue(self):
        ft = FatigueTracker(context_window=200_000)
        ft.record("reread_ratio", 0.2)

        ft.on_model_switch(128_000)
        assert ft.dimensions["reread_ratio"] == pytest.approx(0.35)
        assert ft.context_window == 128_000

        ft.on_model_switch(128_000)
        assert ft.dimensions["reread_ratio"] == pytest.approx(0.50)

    def test_critical_state_detection(self):
        ft = FatigueTracker()
        for dim in ("context_load", "turn_pressure", "reread_ratio", "handoff_risk"):
            ft.record(dim, 0.85)
        assert ft.state() == "critical"


# -- 5. Lock safety ----------------------------------------------------------

class TestLockSafety:
    def test_concurrent_agent_protection(self, tmp_path):
        locks = LockManager(tmp_path / "bench-locks.db")
        assert locks.acquire("src/auth.py", "kimi-agent")
        assert not locks.acquire("src/auth.py", "claude-agent")
        assert locks.acquire("src/api.py", "claude-agent")

        conflicts = locks.conflicts(["src/auth.py", "src/api.py"])
        assert len(conflicts) == 2

    def test_release_allows_reacquire(self, tmp_path):
        locks = LockManager(tmp_path / "bench-locks.db")
        locks.acquire("src/main.py", "agent-a")
        locks.release("src/main.py", "agent-a")
        assert locks.acquire("src/main.py", "agent-b")

    def test_release_all_by_session(self, tmp_path):
        locks = LockManager(tmp_path / "bench-locks.db")
        locks.acquire("f1.py", "sess-1")
        locks.acquire("f2.py", "sess-1")
        locks.acquire("f3.py", "sess-1")
        count = locks.release_all("sess-1")
        assert count == 3


# -- 6. Escalation -----------------------------------------------------------

class TestEscalation:
    def test_auto_escalate_after_failures(self, tmp_path):
        esc = Escalator(tmp_path / "bench-esc.db")
        assert len(esc.list_pending()) == 0

        esc.escalate("sess-1", "repeated_failure", {"failures": 3})
        pending = esc.list_pending()
        assert len(pending) == 1
        assert pending[0].reason == "repeated_failure"

    def test_resolve_clears_pending(self, tmp_path):
        esc = Escalator(tmp_path / "bench-esc.db")
        pkt = esc.escalate("sess-2", "stuck", {})
        esc.resolve(pkt.id, "retry with claude")
        assert len(esc.list_pending()) == 0


# -- 7. Checkpoint continuity ------------------------------------------------

class TestCheckpointContinuity:
    def test_model_handoff_preserves_state(self, tmp_path):
        mgr = CheckpointManager(tmp_path / "bench-cp")
        mgr.write("session-x", {
            "goal": "Add OAuth2",
            "model_history": ["kimi", "gpt", "claude"],
            "progress": ["Step 1 by kimi", "Step 2 by gpt"],
            "current_subgoal": "Write tests",
            "fatigue_score": 0.45,
        })
        data = mgr.read("session-x")
        assert data["goal"] == "Add OAuth2"
        assert len(data["model_history"]) == 3
        assert data["fatigue_score"] == 0.45

    def test_checkpoint_cleanup(self, tmp_path):
        mgr = CheckpointManager(tmp_path / "bench-cp")
        mgr.write("temp-sess", {"goal": "temp"})
        assert mgr.read("temp-sess") is not None
        mgr.delete("temp-sess")
        assert mgr.read("temp-sess") is None


# -- 8. Calibration learning -------------------------------------------------

class TestCalibrationLearning:
    def test_bad_model_gets_penalized(self, tmp_path):
        cal = CalibrationTracker(tmp_path / "bench-cal.db")
        # Record consistently bad predictions for "kimi"
        for _ in range(10):
            cal.record("kimi", "feature", 0.9, 0.1)
        # Record good predictions for "claude"
        for _ in range(10):
            cal.record("claude", "feature", 0.8, 0.85)

        kimi_acc = cal.accuracy("kimi")
        claude_acc = cal.accuracy("claude")

        assert kimi_acc < 0.5, f"Bad model accuracy {kimi_acc} >= 0.5"
        assert claude_acc > 0.9, f"Good model accuracy {claude_acc} <= 0.9"

    def test_routing_penalizes_uncalibrated(self, tmp_path):
        cfg = _cfg(tmp_path)
        svc = RoutingService(cfg)
        # Poison kimi's calibration
        for _ in range(10):
            svc.calibration.record("kimi", "feature", 0.9, 0.1)

        ctx = RoutingContext(blast_score=1, task_type="feature")
        decision = svc.route(ctx)
        name = decision.primary if isinstance(decision.primary, str) else decision.primary.name
        # kimi should be penalized — routing skips it
        # (only applies if kimi was the primary)
        assert name is not None  # routing still works


# -- 9. Dual planning -------------------------------------------------------

class TestDualPlanning:
    @pytest.mark.asyncio
    async def test_counter_check_runs(self):
        models_used: list[str] = []

        async def fake_send(model, prompt, wd, turns=5, timeout=600):
            models_used.append(model)
            text = "CONFLICT: Missing error handling" if model == "codex" else "Step 1: implement"
            return RunResult(model=model, success=True, output=text)

        pi = PiAdapter()
        pi.send_prompt = fake_send
        planner = DualPlanner(pi)
        result = await planner.dual_plan("Add OAuth", "Implement OAuth2", "/tmp")

        assert "claude" in models_used
        assert "codex" in models_used
        assert len(result.conflicts) >= 1
        assert "Missing error handling" in result.conflicts[0]


# -- 10. Observability -------------------------------------------------------

class TestObservability:
    def test_signal_recording(self, tmp_path):
        obs = ObservabilityCollector(tmp_path / "bench-obs.db")
        obs.record_signal("app", "deploy_status", 1.0)
        obs.record_signal("app", "test_coverage", 0.87)
        obs.record_signal("api", "latency_p99", 0.250)

        app_signals = obs.recent_signals("app", limit=10)
        assert len(app_signals) == 2

        api_signals = obs.recent_signals("api", limit=10)
        assert len(api_signals) == 1
        assert api_signals[0]["signal_type"] == "latency_p99"

    def test_signal_log_jsonl(self, tmp_path):
        log = SignalLog(tmp_path / "bench-signals.jsonl")
        for i in range(5):
            log.append({"step": i, "model": "claude"})
        recent = log.recent(3)
        assert len(recent) == 3
        assert recent[0]["step"] == 2


# -- 11. Full executor pipeline (E2E) ----------------------------------------

class TestFullExecutorPipeline:
    @pytest.mark.asyncio
    async def test_10_task_sprint(self, tmp_path):
        """Simulate a full 10-task sprint through the executor."""
        cfg = _cfg(tmp_path)
        (tmp_path / "repo").mkdir()
        provider = AsyncMock()
        executor = ExecutorService(cfg, provider)

        models_used: list[str] = []

        async def fake_send(model, prompt, wd, max_turns=20, timeout=600):
            models_used.append(model)
            return RunResult(model=model, success=True, output="done", cost_usd=0.10)

        async def fake_ctx(cfg, task):
            return ""

        executor._pi.send_prompt = fake_send
        from maggy.services import executor_helpers
        _orig_icpg = executor_helpers.build_icpg_context
        executor_helpers.build_icpg_context = fake_ctx

        for task in SPRINT_TASKS:
            sid = f"s-{task.id}"
            session = {
                "id": sid, "task_id": task.id,
                "task_title": task.title, "mode": "plan",
                "working_dir": str(tmp_path / "repo"),
                "status": "running", "started_at": "", "output": "",
            }
            executor._sessions[sid] = session
            ctx = SessionCtx(session, task, str(tmp_path / "repo"))
            await executor._run(ctx, "plan")

        # Verify multi-model distribution
        unique_models = set(models_used)
        assert len(unique_models) >= 3, f"Only {unique_models} used"
        assert "claude" in unique_models
        assert "codex" in unique_models
        cheap = {"kimi", "local"}
        assert cheap & unique_models, "No cheap model used"

        # Verify fatigue was tracked
        assert executor._fatigue.dimensions["context_load"] > 0

        # Verify signals were logged (plan mode uses _run_model directly)
        # Checkpoints were written and cleaned up
        for task in SPRINT_TASKS:
            clean_id = task.id.replace("/", "-")
            assert executor._checkpoint.read(clean_id) is None

    @pytest.mark.asyncio
    async def test_sprint_budget_summary(self, tmp_path):
        """After a sprint, budget tracks all providers."""
        cfg = _cfg(tmp_path)
        (tmp_path / "repo").mkdir()
        provider = AsyncMock()
        executor = ExecutorService(cfg, provider)

        cost_map = {"kimi": 0.01, "local": 0.0, "claude": 0.80, "codex": 0.10}

        async def fake_send(model, prompt, wd, max_turns=20, timeout=600):
            return RunResult(model=model, success=True, output="ok", cost_usd=cost_map.get(model, 0.05))

        async def fake_ctx(cfg, task):
            return ""

        executor._pi.send_prompt = fake_send
        from maggy.services import executor_helpers
        _orig_icpg = executor_helpers.build_icpg_context
        executor_helpers.build_icpg_context = fake_ctx

        for task in SPRINT_TASKS:
            sid = f"s-{task.id}"
            session = {
                "id": sid, "task_id": task.id,
                "task_title": task.title, "mode": "plan",
                "working_dir": str(tmp_path / "repo"),
                "status": "running", "started_at": "", "output": "",
            }
            executor._sessions[sid] = session
            ctx = SessionCtx(session, task, str(tmp_path / "repo"))
            await executor._run(ctx, "plan")

        breakdown = executor._budget.by_provider()
        providers = {r["provider"] for r in breakdown}
        assert len(providers) >= 2, f"Only {providers}"


# -- 12. Project Registry CRUD -----------------------------------------------

class TestProjectRegistry:
    def test_full_lifecycle(self, tmp_path):
        cfg = _cfg(tmp_path)
        reg = ProjectRegistry(cfg)
        assert len(reg.list()) == 1

        reg.add(ProjectConfig(
            name="api", repo="bench/api",
            path="/tmp/api", default_branch="main",
        ))
        assert len(reg.list()) == 2
        assert reg.get("api") is not None

        reg.remove("api")
        assert reg.get("api") is None
        assert len(reg.list()) == 1


================================================
FILE: maggy/tests/test_bootstrap.py
================================================
"""Tests for startup bootstrap — auto-populate services."""

from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest


def _make_cfg(tmp_path: Path):
    """Build a minimal MaggyConfig with codebases."""
    from maggy.config import CodebaseConfig, MaggyConfig
    # Create fake codebase dirs
    repo_a = tmp_path / "repo-a"
    repo_a.mkdir()
    (repo_a / "main.py").write_text("print('hello')")
    (repo_a / "utils.ts").write_text("export const x = 1;")
    repo_b = tmp_path / "repo-b"
    repo_b.mkdir()
    (repo_b / "app.go").write_text("package main")
    return MaggyConfig(
        codebases=[
            CodebaseConfig(path=str(repo_a), key="repo-a"),
            CodebaseConfig(path=str(repo_b), key="repo-b"),
        ],
    )


class TestSeedCIKG:
    """Test CIKG seeding from codebases."""

    def test_creates_codebase_nodes(self, tmp_path):
        from maggy.main import _seed_cikg
        from maggy.cikg.graph import KnowledgeGraphService
        cfg = _make_cfg(tmp_path)
        cikg = KnowledgeGraphService(tmp_path / "cikg.db")
        _seed_cikg(cikg, cfg)
        nodes = cikg.list_nodes("codebase")
        assert len(nodes) == 2
        names = {n.name for n in nodes}
        assert names == {"repo-a", "repo-b"}

    def test_creates_language_nodes(self, tmp_path):
        from maggy.main import _seed_cikg
        from maggy.cikg.graph import KnowledgeGraphService
        cfg = _make_cfg(tmp_path)
        cikg = KnowledgeGraphService(tmp_path / "cikg.db")
        _seed_cikg(cikg, cfg)
        langs = cikg.list_nodes("technology")
        lang_names = {n.name for n in langs}
        assert "python" in lang_names
        assert "typescript" in lang_names
        assert "go" in lang_names

    def test_creates_edges(self, tmp_path):
        from maggy.main import _seed_cikg
        from maggy.cikg.graph import KnowledgeGraphService
        cfg = _make_cfg(tmp_path)
        cikg = KnowledgeGraphService(tmp_path / "cikg.db")
        _seed_cikg(cikg, cfg)
        edges = cikg.get_edges("codebase:repo-a", "out")
        edge_types = {e.edge_type for e in edges}
        assert "uses_technology" in edge_types

    def test_skips_missing_dirs(self, tmp_path):
        from maggy.config import CodebaseConfig, MaggyConfig
        from maggy.main import _seed_cikg
        from maggy.cikg.graph import KnowledgeGraphService
        cfg = MaggyConfig(codebases=[
            CodebaseConfig(path="/nonexistent/path", key="missing"),
        ])
        cikg = KnowledgeGraphService(tmp_path / "cikg.db")
        _seed_cikg(cikg, cfg)
        assert cikg.list_nodes("codebase") == []

    def test_idempotent(self, tmp_path):
        from maggy.main import _seed_cikg
        from maggy.cikg.graph import KnowledgeGraphService
        cfg = _make_cfg(tmp_path)
        cikg = KnowledgeGraphService(tmp_path / "cikg.db")
        _seed_cikg(cikg, cfg)
        _seed_cikg(cikg, cfg)  # run again
        nodes = cikg.list_nodes("codebase")
        assert len(nodes) == 2  # no duplicates


class TestBootstrap:
    """Test the full _bootstrap function."""

    @pytest.mark.asyncio
    async def test_calls_services(self):
        from maggy.main import _bootstrap
        app = MagicMock()
        app.state.history = MagicMock()
        app.state.introspector = MagicMock()
        app.state.cikg = None
        app.state.cfg = MagicMock()
        await _bootstrap(app)
        app.state.history.analyze.assert_called_once()
        app.state.introspector.analyze.assert_called_once()

    @pytest.mark.asyncio
    async def test_handles_missing_services(self):
        from maggy.main import _bootstrap
        app = MagicMock()
        app.state.history = None
        app.state.introspector = None
        app.state.cikg = None
        app.state.cfg = None
        await _bootstrap(app)  # should not raise

    @pytest.mark.asyncio
    async def test_handles_analyze_error(self):
        from maggy.main import _bootstrap
        app = MagicMock()
        app.state.history = MagicMock()
        app.state.history.analyze.side_effect = RuntimeError("db locked")
        app.state.introspector = None
        app.state.cikg = None
        app.state.cfg = None
        await _bootstrap(app)  # should not raise


================================================
FILE: maggy/tests/test_budget.py
================================================
"""Tests for BudgetManager — spend tracking and status."""

from __future__ import annotations

from maggy.budget import ProviderBudget, TaskSpendTracker
from maggy.config import BudgetConfig
from maggy.budget import BudgetManager


class TestBudgetTracking:
    def test_initial_spend_is_zero(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        assert bm.today_spend() == 0.0

    def test_record_and_read(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.5)
        assert bm.today_spend() >= 0.5

    def test_multiple_records_sum(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.3)
        bm.record_spend("openai", "gpt-4o", 0.2)
        assert bm.today_spend() >= 0.5


class TestBudgetStatus:
    def test_ok_status(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 1.0)
        status = bm.budget_status()
        assert status["status"] == "ok"

    def test_warning_status(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 8.5)
        status = bm.budget_status()
        assert status["status"] == "warning"

    def test_exhausted_status(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 10.0)
        status = bm.budget_status()
        assert status["status"] == "exhausted"


class TestByProvider:
    def test_breakdown(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.5)
        bm.record_spend("openai", "gpt-4o", 0.3)
        breakdown = bm.by_provider()
        assert len(breakdown) == 2
        providers = {r["provider"] for r in breakdown}
        assert "anthropic" in providers
        assert "openai" in providers


class TestIsExhausted:
    def test_not_exhausted(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        assert not bm.is_exhausted()

    def test_exhausted(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 11.0)
        assert bm.is_exhausted()


class TestProviderBudgets:
    def test_provider_exhaustion_uses_provider_limit(self, mock_cfg):
        mock_cfg.budget = BudgetConfig(
            daily_limit_usd=20.0,
            providers=[
                ProviderBudget("moonshot", 1.0, "kimi"),
                ProviderBudget("openai", 5.0, "gpt"),
            ],
        )
        bm = BudgetManager(mock_cfg)
        bm.record_spend("moonshot", "kimi", 1.1)
        assert bm.is_provider_exhausted("moonshot")
        assert not bm.is_provider_exhausted("openai")

    def test_cheapest_available_skips_exhausted_provider(self, mock_cfg):
        mock_cfg.budget = BudgetConfig(
            providers=[
                ProviderBudget("moonshot", 1.0, "kimi"),
                ProviderBudget("openai", 5.0, "gpt"),
            ],
        )
        bm = BudgetManager(mock_cfg)
        bm.record_spend("moonshot", "kimi", 1.0)
        assert bm.cheapest_available() == "gpt"


class TestTokenTracking:
    def test_initial_tokens_zero(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        tokens = bm.today_tokens()
        assert tokens == {"input": 0, "output": 0}

    def test_record_and_read_tokens(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.5, 1000, 500)
        bm.record_spend("openai", "gpt-4o", 0.3, 2000, 800)
        tokens = bm.today_tokens()
        assert tokens["input"] == 3000
        assert tokens["output"] == 1300

    def test_tokens_by_provider(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.5, 1000, 500)
        bm.record_spend("openai", "gpt", 0.3, 2000, 800)
        tokens = bm.today_tokens("anthropic")
        assert tokens["input"] == 1000

    def test_budget_status_includes_tokens(self, mock_cfg):
        bm = BudgetManager(mock_cfg)
        bm.record_spend("anthropic", "claude", 0.5, 1500, 600)
        status = bm.budget_status()
        assert status["input_tokens"] == 1500
        assert status["output_tokens"] == 600


class TestTaskSpendTracker:
    def test_records_total_cost(self) -> None:
        tracker = TaskSpendTracker(5.0)
        tracker.record(1.5)
        tracker.record(0.5)
        assert tracker.total() == 2.0

    def test_detects_exceeded_spend(self) -> None:
        tracker = TaskSpendTracker(2.0)
        tracker.record(2.0)
        assert tracker.is_exceeded()

    def test_tracks_edit_loops(self) -> None:
        tracker = TaskSpendTracker(5.0)
        for _ in range(4):
            tracker.record_edit("maggy/services/planner.py")
        tracker.record_edit("maggy/budget.py")
        assert tracker.detect_loop() == ["maggy/services/planner.py"]

    def test_budget_config_has_task_limit(self) -> None:
        cfg = BudgetConfig(max_spend_per_task=3.5)
        assert cfg.max_spend_per_task == 3.5


================================================
FILE: maggy/tests/test_calibration.py
================================================
"""Tests for calibration tracking."""

from __future__ import annotations

import pytest

from maggy.calibration import CalibrationTracker


def test_records_accuracy_and_error(tmp_path) -> None:
    tracker = CalibrationTracker(tmp_path / "calibration.db")
    tracker.record("claude", "planning", 0.8, 0.7)
    tracker.record("claude", "planning", 0.4, 0.5)

    assert tracker.accuracy("claude") == pytest.approx(0.9)
    assert tracker.calibration_error("claude") == pytest.approx(0.1)


def test_unknown_model_returns_zero(tmp_path) -> None:
    tracker = CalibrationTracker(tmp_path / "calibration.db")
    assert tracker.accuracy("codex") == 0.0
    assert tracker.calibration_error("codex") == 0.0


def test_accuracy_clamps_at_zero_for_large_errors(tmp_path) -> None:
    tracker = CalibrationTracker(tmp_path / "calibration.db")
    tracker.record("claude", "planning", 0.0, 2.0)
    assert tracker.accuracy("claude") >= 0.0


================================================
FILE: maggy/tests/test_cascade.py
================================================
"""Tests for cascade execution — quality-gate-based model escalation."""

from __future__ import annotations

import pytest

from maggy.adapters.pi import PiAdapter, RunResult
from maggy.services.cascade import cascade_execute


class TestCascadeNoEscalation:
    @pytest.mark.asyncio
    async def test_first_model_passes(self):
        pi = PiAdapter()
        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            return RunResult(model=model_name, success=True, output="good")

        pi.send_prompt = fake_send

        async def good_gate(output: str) -> int:
            return 4

        result = await cascade_execute(
            pi, ["local", "gpt", "claude"], "test", "/tmp", good_gate,
        )
        assert result.model == "local"
        assert not result.escalated
        assert len(calls) == 1


class TestCascadeEscalation:
    @pytest.mark.asyncio
    async def test_low_quality_escalates(self):
        pi = PiAdapter()
        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            return RunResult(model=model_name, success=True, output="ok")

        pi.send_prompt = fake_send
        scores = iter([2, 4])

        async def improving_gate(output: str) -> int:
            return next(scores)

        result = await cascade_execute(
            pi, ["local", "gpt", "claude"], "test", "/tmp",
            improving_gate,
        )
        assert result.model == "gpt"
        assert result.escalated
        assert len(calls) == 2

    @pytest.mark.asyncio
    async def test_max_3_attempts(self):
        pi = PiAdapter()
        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            return RunResult(model=model_name, success=True, output="bad")

        pi.send_prompt = fake_send

        async def always_bad(output: str) -> int:
            return 1

        result = await cascade_execute(
            pi, ["local", "gpt", "claude"], "test", "/tmp", always_bad,
        )
        assert len(result.attempts) == 3
        # All scored equally — returns best (first with highest score)
        assert len(calls) == 3


class TestCascadeFailure:
    @pytest.mark.asyncio
    async def test_send_failure_escalates(self):
        pi = PiAdapter()
        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            if model_name == "local":
                return RunResult(
                    model=model_name, success=False, error="crash",
                )
            return RunResult(model=model_name, success=True, output="ok")

        pi.send_prompt = fake_send

        async def ok_gate(output: str) -> int:
            return 4

        result = await cascade_execute(
            pi, ["local", "gpt"], "test", "/tmp", ok_gate,
        )
        assert result.model == "gpt"
        assert result.escalated

    @pytest.mark.asyncio
    async def test_single_model_no_escalation(self):
        pi = PiAdapter()

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            return RunResult(model=model_name, success=True, output="ok")

        pi.send_prompt = fake_send

        async def low_gate(output: str) -> int:
            return 2

        result = await cascade_execute(
            pi, ["claude"], "test", "/tmp", low_gate,
        )
        assert result.model == "claude"
        assert len(result.attempts) == 1


class TestCascadeAttemptTracking:
    @pytest.mark.asyncio
    async def test_attempts_recorded(self):
        pi = PiAdapter()

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            return RunResult(model=model_name, success=True, output="ok")

        pi.send_prompt = fake_send
        scores = iter([1, 4])

        async def gate(output: str) -> int:
            return next(scores)

        result = await cascade_execute(
            pi, ["local", "gpt"], "test", "/tmp", gate,
        )
        assert len(result.attempts) == 2
        assert result.attempts[0].model == "local"
        assert result.attempts[0].score == 1
        assert result.attempts[1].model == "gpt"
        assert result.attempts[1].score == 4


================================================
FILE: maggy/tests/test_chat.py
================================================
"""Tests for ChatManager — interactive Claude sessions."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.config import CodebaseConfig, MaggyConfig


def _make_cfg(tmp_path: Path) -> MaggyConfig:
    repo = tmp_path / "my-project"
    repo.mkdir()
    return MaggyConfig(codebases=[
        CodebaseConfig(path=str(repo), key="my-project"),
    ])


class TestChatManager:
    """Test ChatManager session lifecycle."""

    def test_create_session(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        session = mgr.create_session("my-project")
        assert session.project_key == "my-project"
        assert session.status == "idle"
        assert session.working_dir == str(
            tmp_path / "my-project"
        )
        assert session.messages == []

    def test_create_session_invalid_project(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        with pytest.raises(ValueError, match="not found"):
            mgr.create_session("nonexistent")

    def test_create_with_project_path(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        # Subdirectory of configured codebase is allowed
        sub = tmp_path / "my-project" / "src"
        sub.mkdir()
        s = mgr.create_session("my-project", str(sub))
        assert s.project_key == "my-project"
        assert s.working_dir == str(sub)

    def test_create_rejects_outside_path(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        outside = tmp_path / "other-repo"
        outside.mkdir()
        with pytest.raises(ValueError, match="not inside"):
            mgr.create_session("other", str(outside))

    def test_list_sessions(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        mgr.create_session("my-project")
        mgr.create_session("my-project")
        sessions = mgr.list_sessions()
        assert len(sessions) == 2

    def test_get_session(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        got = mgr.get_session(s.id)
        assert got is not None
        assert got.id == s.id

    def test_get_missing_session(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        assert mgr.get_session("missing") is None

    def test_build_cmd_new_session(self, tmp_path):
        from maggy.services.chat import ChatManager
        from maggy.services.chat_stream import build_cmd
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        cmd = build_cmd(s, "fix the bug")
        assert "claude" in cmd[0]
        assert "-p" in cmd
        assert "fix the bug" in cmd
        assert "--output-format" in cmd
        assert "--resume" not in cmd

    def test_build_cmd_resume(self, tmp_path):
        from maggy.services.chat import ChatManager
        from maggy.services.chat_stream import build_cmd
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        s.claude_session_id = "abc123"
        cmd = build_cmd(s, "continue working")
        assert "--resume" in cmd
        idx = cmd.index("--resume")
        assert cmd[idx + 1] == "abc123"

    def test_delete_session(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        assert mgr.delete_session(s.id) is True
        assert mgr.get_session(s.id) is None

    def test_delete_missing(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        assert mgr.delete_session("nope") is False

    def test_working_dir_security_bad_key(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        with pytest.raises(ValueError, match="not found"):
            mgr.create_session("hacker-repo")

    def test_working_dir_security_bad_path(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        with pytest.raises(ValueError, match="not inside"):
            mgr.create_session("x", "/etc")


class TestAutoConnect:
    """Test auto-connect to active projects."""

    def test_auto_connect_creates_sessions(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        repo = tmp_path / "my-project"
        active = [
            {"project": "my-project", "project_path": str(repo)},
        ]
        result = mgr.auto_connect(active)
        assert len(result) == 1
        assert result[0].project_key == "my-project"

    def test_auto_connect_deduplicates(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        repo = tmp_path / "my-project"
        active = [
            {"project": "my-project", "project_path": str(repo)},
            {"project": "my-project", "project_path": str(repo)},
        ]
        result = mgr.auto_connect(active)
        assert len(result) == 1

    def test_auto_connect_multiple_projects(self, tmp_path):
        from maggy.services.chat import ChatManager
        r1 = tmp_path / "proj-a"
        r2 = tmp_path / "proj-b"
        r1.mkdir()
        r2.mkdir()
        cfg = MaggyConfig(codebases=[
            CodebaseConfig(path=str(r1), key="proj-a"),
            CodebaseConfig(path=str(r2), key="proj-b"),
        ])
        mgr = ChatManager(cfg)
        active = [
            {"project": "proj-a", "project_path": str(r1)},
            {"project": "proj-b", "project_path": str(r2)},
        ]
        result = mgr.auto_connect(active)
        assert len(result) == 2
        keys = {s.project_key for s in result}
        assert keys == {"proj-a", "proj-b"}

    def test_auto_connect_skips_empty(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        repo = tmp_path / "my-project"
        active = [
            {"project": "", "project_path": ""},
            {"project": "my-project", "project_path": str(repo)},
        ]
        result = mgr.auto_connect(active)
        assert len(result) == 1

    def test_find_by_project(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        found = mgr.find_by_project("my-project")
        assert found is not None
        assert found.id == s.id

    def test_find_by_project_missing(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        assert mgr.find_by_project("nope") is None


class TestMessageQueue:
    """Message queuing when session is busy."""

    def test_enqueue_returns_position(self, tmp_path):
        from maggy.services.chat import ChatManager, enqueue_msg
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        assert enqueue_msg(s, "msg 1") == 1
        assert enqueue_msg(s, "msg 2") == 2

    def test_enqueue_full_returns_negative(self, tmp_path):
        from maggy.services.chat import ChatManager, enqueue_msg
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        for i in range(5):
            enqueue_msg(s, f"msg {i}")
        assert enqueue_msg(s, "overflow") == -1

    def test_session_has_pending_queue(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        assert hasattr(s, "pending_queue")
        assert len(s.pending_queue) == 0

    @pytest.mark.asyncio
    async def test_send_while_locked_enqueues(self, tmp_path):
        from maggy.services.chat import ChatManager
        cfg = _make_cfg(tmp_path)
        mgr = ChatManager(cfg)
        s = mgr.create_session("my-project")
        lock = mgr._locks[s.id]
        async with lock:
            chunks = [c async for c in mgr.send(s.id, "queued")]
        assert any(c.get("type") == "queued" for c in chunks)
        assert len(s.pending_queue) == 1


================================================
FILE: maggy/tests/test_chat_context.py
================================================
"""Tests for chat context builder."""

from __future__ import annotations

import json
from unittest.mock import MagicMock

import pytest

from maggy.services.chat_context import (
    _format_recent_prompts,
    _match_from_report,
    _match_history,
    _path_candidates,
    build_project_context,
    resolve_claude_session_id,
)


class TestPathCandidates:
    """Test path candidate generation."""

    def test_basic_path(self):
        result = _path_candidates(
            "/Users/ali/Documents/protaige", "protaige"
        )
        assert "protaige" in result
        assert "Documents" not in result  # skipped
        assert "Users" not in result  # skipped
        assert "ali" in result

    def test_nested_path(self):
        result = _path_candidates(
            "/Users/ali/Documents/AI-Playground/"
            "claude-skills-package",
            "claude-skills-package",
        )
        assert "claude-skills-package" in result
        assert "AI-Playground" in result

    def test_empty_path(self):
        result = _path_candidates("", "my-project")
        assert "my-project" in result


class TestMatchFromReport:
    """Test matching via aggregated report data."""

    def test_exact_project_match(self):
        report = {
            "projects": [
                {
                    "project": "protaige",
                    "total_sessions": 22,
                    "total_prompts": 2369,
                    "providers_used": ["claude"],
                    "top_topics": ["maia", "api", "auth"],
                },
            ],
        }
        result = _match_from_report(
            report, "/Users/ali/protaige", "protaige"
        )
        assert "22 sessions" in result
        assert "2369 prompts" in result
        assert "maia" in result

    def test_parent_dir_match(self):
        """Match claude-skills-package via AI-Playground."""
        report = {
            "projects": [
                {
                    "project": "AI-Playground",
                    "total_sessions": 5,
                    "total_prompts": 51,
                    "providers_used": ["claude"],
                    "top_topics": ["setup", "config"],
                },
            ],
        }
        result = _match_from_report(
            report,
            "/Users/ali/Documents/AI-Playground/"
            "claude-skills-package",
            "claude-skills-package",
        )
        assert "5 sessions" in result
        assert "51 prompts" in result

    def test_multiple_matches(self):
        """Match both direct and parent entries."""
        report = {
            "projects": [
                {
                    "project": "plugins",
                    "total_sessions": 22,
                    "total_prompts": 990,
                    "providers_used": ["claude"],
                    "top_topics": ["plugin"],
                },
                {
                    "project": "edubites",
                    "total_sessions": 10,
                    "total_prompts": 200,
                    "providers_used": ["claude"],
                    "top_topics": ["platform"],
                },
            ],
        }
        result = _match_from_report(
            report,
            "/Users/ali/edubites/plugins",
            "plugins",
        )
        assert "plugins" in result or "22 sessions" in result
        assert "edubites" in result or "10 sessions" in result

    def test_no_match(self):
        report = {
            "projects": [
                {"project": "unrelated", "total_sessions": 1,
                 "total_prompts": 5, "providers_used": [],
                 "top_topics": []},
            ],
        }
        result = _match_from_report(
            report, "/Users/ali/my-project", "my-project"
        )
        assert result == ""


class TestMatchHistory:
    """Test the main matching function."""

    def test_uses_report_when_available(self):
        history = MagicMock()
        history.get_report.return_value = {
            "projects": [
                {
                    "project": "myapp",
                    "total_sessions": 5,
                    "total_prompts": 100,
                    "providers_used": ["claude"],
                    "top_topics": ["api"],
                },
            ],
        }
        result = _match_history(
            history, "/Users/ali/myapp", "myapp"
        )
        assert "5 sessions" in result

    def test_returns_empty_when_no_history(self):
        result = _match_history(
            None, "/some/path", "proj"
        )
        assert result == ""

    def test_returns_empty_when_no_report(self):
        history = MagicMock()
        history.get_report.return_value = None
        result = _match_history(
            history, "/some/path", "proj"
        )
        assert result == ""


class TestFormatRecentPrompts:
    """Test recent prompt formatting."""

    def test_matching_prompts(self):
        prompts = [
            {"project": "protaige", "text": "fix the auth bug",
             "timestamp": "2026-05-10T14:00:00"},
            {"project": "other", "text": "unrelated",
             "timestamp": "2026-05-10T13:00:00"},
        ]
        result = _format_recent_prompts(prompts, "protaige")
        assert "fix the auth bug" in result
        assert "unrelated" not in result

    def test_no_matching_prompts(self):
        prompts = [
            {"project": "other", "text": "something",
             "timestamp": "2026-05-10T14:00:00"},
        ]
        result = _format_recent_prompts(prompts, "protaige")
        assert result == ""

    def test_limits_to_five(self):
        prompts = [
            {"project": "x", "text": f"msg {i}",
             "timestamp": f"2026-05-10T1{i}:00:00"}
            for i in range(10)
        ]
        result = _format_recent_prompts(prompts, "x")
        assert result.count("- [") == 5


class TestResolveSessionId:
    """Test Claude session ID resolution."""

    def test_finds_session_id(self, tmp_path):
        history = tmp_path / ".claude" / "history.jsonl"
        history.parent.mkdir(parents=True)
        entries = [
            json.dumps({
                "project": "/Users/ali/protaige",
                "sessionId": "abc-123",
                "timestamp": 1715000000000,
            }),
            json.dumps({
                "project": "/Users/ali/protaige",
                "sessionId": "def-456",
                "timestamp": 1715100000000,
            }),
        ]
        history.write_text("\n".join(entries))
        from unittest.mock import patch
        with patch(
            "maggy.services.chat_context.Path.home",
            return_value=tmp_path,
        ):
            result = resolve_claude_session_id(
                "/Users/ali/protaige"
            )
        assert result == "def-456"

    def test_no_match(self, tmp_path):
        history = tmp_path / ".claude" / "history.jsonl"
        history.parent.mkdir(parents=True)
        history.write_text(json.dumps({
            "project": "/Users/ali/other",
            "sessionId": "xyz",
            "timestamp": 1715000000000,
        }))
        from unittest.mock import patch
        with patch(
            "maggy.services.chat_context.Path.home",
            return_value=tmp_path,
        ):
            result = resolve_claude_session_id(
                "/Users/ali/protaige"
            )
        assert result == ""

    def test_missing_file(self, tmp_path):
        from unittest.mock import patch
        with patch(
            "maggy.services.chat_context.Path.home",
            return_value=tmp_path,
        ):
            result = resolve_claude_session_id("/some/path")
        assert result == ""


class TestBuildProjectContext:
    """Test full context assembly."""

    def test_combines_history_and_prompts(self):
        history = MagicMock()
        history.get_report.return_value = {
            "projects": [
                {
                    "project": "myapp",
                    "total_sessions": 8,
                    "total_prompts": 200,
                    "providers_used": ["claude"],
                    "top_topics": ["api", "auth"],
                },
            ],
        }
        prompts = [
            {"project": "myapp", "text": "add endpoint",
             "timestamp": "2026-05-10T14:00:00"},
        ]
        result = build_project_context(
            history, "/Users/ali/myapp", "myapp", prompts,
        )
        assert "8 sessions" in result
        assert "add endpoint" in result

    def test_empty_when_nothing(self):
        history = MagicMock()
        history.get_report.return_value = {"projects": []}
        result = build_project_context(
            history, "/some/path", "proj", [],
        )
        assert result == ""


================================================
FILE: maggy/tests/test_chat_routed.py
================================================
"""Tests for routed chat — multi-model routing in ChatManager."""

from __future__ import annotations

from unittest.mock import MagicMock

import pytest

from maggy.services.chat_router import estimate_blast, estimate_type


class TestBlastEstimation:
    """Blast score estimation from message keywords."""

    def test_low_blast_simple_fix(self):
        assert estimate_blast("fix the typo in README") <= 3

    def test_high_blast_security(self):
        assert estimate_blast("design auth system with OAuth") >= 7

    def test_high_blast_architecture(self):
        assert estimate_blast("refactor database schema") >= 5

    def test_medium_blast_feature(self):
        score = estimate_blast("add pagination to the API")
        assert 3 <= score <= 6

    def test_empty_returns_default(self):
        assert estimate_blast("") == 5

    # --- Intent-based scoring ---

    def test_retrieval_find_key_low_blast(self):
        """'find the API key' is retrieval, not mid-complexity."""
        assert estimate_blast("find the API key in ~/Documents") <= 3

    def test_retrieval_show_config(self):
        assert estimate_blast("show me the current config") <= 3

    def test_retrieval_check_env(self):
        assert estimate_blast("check the env variables") <= 3

    def test_retrieval_where_is_file(self):
        assert estimate_blast("where is the routes file") <= 3

    def test_retrieval_list_endpoints(self):
        assert estimate_blast("list all API endpoints") <= 3

    def test_retrieval_read_file(self):
        assert estimate_blast("read the package.json") <= 3

    def test_creation_still_mid(self):
        """create/implement should stay in 4-6 range."""
        score = estimate_blast("create a new user service")
        assert 4 <= score <= 6

    def test_multi_step_high(self):
        """refactor + migrate = high blast."""
        assert estimate_blast("refactor and migrate the database") >= 7

    def test_retrieval_with_action_not_capped(self):
        """'find the bug and fix it' has both retrieval and mutation."""
        score = estimate_blast("find the bug and fix the auth")
        assert score >= 4


class TestTypeEstimation:
    """Task type estimation from message keywords."""

    def test_security_type(self):
        assert estimate_type("fix authentication bug") == "security"

    def test_docs_type(self):
        assert estimate_type("write documentation for API") == "docs"

    def test_test_type(self):
        assert estimate_type("add unit tests with mock fixtures") == "tests"

    def test_general_default(self):
        assert estimate_type("make it faster") == "general"


class TestRoutedEndpoint:
    """API endpoint /send-routed returns routing metadata."""

    @pytest.mark.asyncio
    async def test_send_routed_yields_routing_chunk(self):
        """First SSE chunk should be routing decision."""
        from maggy.services.chat_router import RoutedChat

        mock_routing = MagicMock()
        mock_routing.route.return_value = MagicMock(
            primary=MagicMock(name="claude"),
            reason="blast 8 → claude",
        )
        mock_budget = MagicMock()
        mock_budget.check.return_value = True

        rc = RoutedChat(mock_routing, mock_budget)
        # We only test the routing decision, not the full send
        decision = rc.decide("design auth system", None, None)
        assert decision is not None
        mock_routing.route.assert_called_once()


class TestRewardRecording:
    """Reward recording after routed chat completes."""

    def test_success_records_reward(self):
        """Successful chat records reward=1.0."""
        from maggy.api.routes_chat import _record_routing_outcome
        routing = MagicMock()
        decision = MagicMock(
            model="local", task_type="general", blast=5,
        )
        _record_routing_outcome(routing, decision, had_error=False)
        routing.record_outcome.assert_called_once_with(
            "local", "general", 5, 1.0,
        )

    def test_error_records_zero_reward(self):
        """Chat with error records reward=0.0."""
        from maggy.api.routes_chat import _record_routing_outcome
        routing = MagicMock()
        decision = MagicMock(
            model="claude", task_type="security", blast=8,
        )
        _record_routing_outcome(routing, decision, had_error=True)
        routing.record_outcome.assert_called_once_with(
            "claude", "security", 8, 0.0,
        )

    def test_no_routing_service_noop(self):
        """No routing service → no crash."""
        from maggy.api.routes_chat import _record_routing_outcome
        _record_routing_outcome(None, None, had_error=False)


================================================
FILE: maggy/tests/test_chat_router.py
================================================
"""Tests for blast-score estimation and task-type detection."""

from __future__ import annotations

from maggy.services.chat_router import (
    DEFAULT_BLAST,
    estimate_blast,
    estimate_type,
)


def test_blast_hi_scores_low():
    """Trivial greeting should score 1, not 5."""
    assert estimate_blast("hi") == 1


def test_blast_exit_scores_low():
    """Exit-like messages should score 1."""
    assert estimate_blast("exit") == 1


def test_blast_empty_returns_default():
    """Empty string uses DEFAULT_BLAST."""
    assert estimate_blast("") == DEFAULT_BLAST


def test_blast_security_audit_scores_high():
    """Multiple high-tier keywords → blast >= 7."""
    score = estimate_blast("security audit migration")
    assert score >= 7


def test_blast_fix_typo_scores_low():
    """Low-tier keywords → blast <= 3."""
    score = estimate_blast("fix typo in readme")
    assert score <= 3


def test_type_security_detected():
    """Security keywords map to security type."""
    assert estimate_type("fix auth vulnerability") == "security"


def test_type_general_default():
    """No keyword matches → general."""
    assert estimate_type("hello world") == "general"


def test_type_search_detected():
    """Search queries map to search type."""
    assert estimate_type("find the utils module") == "search"


def test_type_search_grep():
    """grep-like queries map to search type."""
    assert estimate_type("grep for config files") == "search"


def test_blast_search_scores_low():
    """Search queries should score low blast (cheap model)."""
    score = estimate_blast("find the utils module")
    assert score <= 3


================================================
FILE: maggy/tests/test_chat_stream.py
================================================
"""Tests for chat streaming JSON parser and usage extraction."""

from __future__ import annotations

import json

from maggy.services.chat_stream import parse_chunk


class _FakeSession:
    def __init__(self):
        self.claude_session_id = ""


def test_parse_result_extracts_usage():
    session = _FakeSession()
    data = json.dumps({
        "type": "result",
        "result": "Done",
        "cost_usd": 0.05,
        "usage": {"input_tokens": 1500, "output_tokens": 800},
    })
    chunk = parse_chunk(data, session)
    assert chunk["type"] == "result"
    assert chunk["content"] == "Done"
    assert chunk["cost_usd"] == 0.05
    assert chunk["input_tokens"] == 1500
    assert chunk["output_tokens"] == 800


def test_parse_result_without_usage():
    session = _FakeSession()
    data = json.dumps({"type": "result", "result": "Done"})
    chunk = parse_chunk(data, session)
    assert chunk["type"] == "result"
    assert chunk["content"] == "Done"
    assert "cost_usd" not in chunk


def test_parse_assistant_text():
    session = _FakeSession()
    data = json.dumps({
        "type": "assistant",
        "message": {"content": [{"type": "text", "text": "Hello"}]},
    })
    chunk = parse_chunk(data, session)
    assert chunk["type"] == "text"
    assert chunk["content"] == "Hello"


def test_parse_captures_session_id():
    session = _FakeSession()
    data = json.dumps({"session_id": "abc123", "type": "system"})
    parse_chunk(data, session)
    assert session.claude_session_id == "abc123"


def test_parse_result_zero_cost_preserved():
    """cost_usd=0.0 must appear in chunk, not be dropped."""
    session = _FakeSession()
    data = json.dumps({
        "type": "result",
        "result": "Done",
        "cost_usd": 0.0,
        "usage": {"input_tokens": 0, "output_tokens": 0},
    })
    chunk = parse_chunk(data, session)
    assert chunk["cost_usd"] == 0.0
    assert chunk["input_tokens"] == 0
    assert chunk["output_tokens"] == 0


def test_parse_invalid_json():
    session = _FakeSession()
    chunk = parse_chunk("not json {{", session)
    assert chunk["type"] == "text"


================================================
FILE: maggy/tests/test_checkpoint.py
================================================
"""Tests for cross-model checkpoint serializer."""

from __future__ import annotations

from maggy.services.checkpoint import (
    Checkpoint,
    create_checkpoint,
)


class TestCheckpoint:
    def test_serialize_round_trip(self):
        cp = Checkpoint(
            goal="Fix auth bug",
            progress=["Found root cause"],
            source_model="claude",
        )
        data = cp.serialize()
        restored = Checkpoint.deserialize(data)
        assert restored.goal == "Fix auth bug"
        assert restored.source_model == "claude"
        assert len(restored.progress) == 1

    def test_serialize_sets_timestamp(self):
        cp = Checkpoint(goal="test")
        data = cp.serialize()
        restored = Checkpoint.deserialize(data)
        assert restored.created_at != ""

    def test_to_prompt_format(self):
        cp = Checkpoint(
            goal="Add logout button",
            constraints=["No breaking changes"],
            progress=["Created component"],
            working_state="Testing phase",
            file_context=["src/auth.ts"],
        )
        prompt = cp.to_prompt()
        assert "Add logout button" in prompt
        assert "No breaking changes" in prompt
        assert "Created component" in prompt
        assert "Testing phase" in prompt
        assert "src/auth.ts" in prompt

    def test_to_prompt_minimal(self):
        cp = Checkpoint(goal="Simple task")
        prompt = cp.to_prompt()
        assert "Simple task" in prompt
        assert "confirm you understand" in prompt


class TestCreateCheckpoint:
    def test_helper_function(self):
        cp = create_checkpoint(
            goal="Refactor DB layer",
            progress=["Extracted interface"],
            model="gpt",
            working_state="mid-refactor",
            files=["db.py", "models.py"],
            constraints=["Keep API stable"],
        )
        assert cp.goal == "Refactor DB layer"
        assert cp.source_model == "gpt"
        assert len(cp.file_context) == 2

    def test_defaults(self):
        cp = create_checkpoint(
            goal="Test", progress=[], model="claude",
        )
        assert cp.constraints == []
        assert cp.file_context == []


================================================
FILE: maggy/tests/test_checkpoint_mgr.py
================================================
"""Tests for CheckpointManager persistence."""

from __future__ import annotations

from maggy.checkpoint import CheckpointManager


def _checkpoint() -> dict:
    return {
        "goal": "Ship Phase 2",
        "constraints": ["Keep tests green"],
        "progress": ["Planner added"],
        "model_history": ["claude"],
        "current_subgoal": "Add checkpoints",
        "fatigue_score": 0.2,
    }


class TestCheckpointManager:
    def test_write_and_read(self, tmp_path) -> None:
        mgr = CheckpointManager(tmp_path)
        mgr.write("session-1", _checkpoint())

        assert mgr.read("session-1") == _checkpoint()

    def test_read_missing_returns_none(self, tmp_path) -> None:
        mgr = CheckpointManager(tmp_path)
        assert mgr.read("missing") is None

    def test_delete_returns_true_when_removed(self, tmp_path) -> None:
        mgr = CheckpointManager(tmp_path)
        mgr.write("session-1", _checkpoint())

        assert mgr.delete("session-1") is True
        assert mgr.read("session-1") is None

    def test_list_checkpoints_returns_session_ids(self, tmp_path) -> None:
        mgr = CheckpointManager(tmp_path)
        mgr.write("b", _checkpoint())
        mgr.write("a", _checkpoint())

        assert mgr.list_checkpoints() == ["a", "b"]

    def test_path_traversal_rejected(self, tmp_path) -> None:
        import pytest
        mgr = CheckpointManager(tmp_path)
        with pytest.raises(ValueError, match="Invalid session id"):
            mgr.write("../../etc/passwd", _checkpoint())

    def test_read_corrupt_json_returns_none(self, tmp_path) -> None:
        mgr = CheckpointManager(tmp_path)
        mgr.write("sess-1", _checkpoint())
        path = tmp_path / "sess-1.json"
        path.write_text("{corrupt")
        assert mgr.read("sess-1") is None


================================================
FILE: maggy/tests/test_cikg.py
================================================
"""Tests for CIKG — knowledge graph, queries, market scoring."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.cikg.graph import KnowledgeGraphService
from maggy.cikg.models import Edge, Node
from maggy.cikg.queries import (
    compare_entities,
    find_gaps,
    find_gaps_raw,
    get_landscape,
    get_segment_landscape,
)


class TestKnowledgeGraph:
    def test_add_and_get_node(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        node = Node(
            id="c1", node_type="competitor", name="Acme",
        )
        g.add_node(node)
        result = g.get_node("c1")
        assert result is not None
        assert result.name == "Acme"

    def test_get_missing_node(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        assert g.get_node("nonexistent") is None

    def test_list_nodes_by_type(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="B"))
        comps = g.list_nodes("competitor")
        assert len(comps) == 1
        assert comps[0].name == "A"

    def test_list_all_nodes(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="B"))
        assert len(g.list_nodes()) == 2


class TestEdges:
    def test_add_and_get_edge(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="SSO"))
        g.add_edge(Edge(
            source_id="c1", target_id="f1",
            edge_type="has_feature",
        ))
        edges = g.get_edges("c1", "out")
        assert len(edges) == 1
        assert edges[0].target_id == "f1"

    def test_inbound_edges(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="SSO"))
        g.add_edge(Edge(
            source_id="c1", target_id="f1",
            edge_type="has_feature",
        ))
        edges = g.get_edges("f1", "in")
        assert len(edges) == 1
        assert edges[0].source_id == "c1"

    def test_neighbors(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="SSO"))
        g.add_edge(Edge(
            source_id="c1", target_id="f1",
            edge_type="has_feature",
        ))
        neighbors = g.neighbors("c1")
        assert len(neighbors) == 1
        assert neighbors[0].id == "f1"


class TestDeleteNode:
    def test_delete_removes_node_and_edges(self, tmp_path: Path):
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c1", node_type="competitor", name="A"))
        g.add_node(Node(id="f1", node_type="feature", name="SSO"))
        g.add_edge(Edge(
            source_id="c1", target_id="f1",
            edge_type="has_feature",
        ))
        g.delete_node("c1")
        assert g.get_node("c1") is None
        assert g.get_edges("c1", "out") == []


class TestQueries:
    def _seed_graph(self, tmp_path: Path) -> KnowledgeGraphService:
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        for i in range(3):
            g.add_node(Node(
                id=f"c{i}", node_type="competitor",
                name=f"Comp{i}",
            ))
        g.add_node(Node(
            id="f1", node_type="feature", name="SSO",
        ))
        g.add_node(Node(
            id="t1", node_type="technology", name="React",
        ))
        # 2 out of 3 competitors have SSO
        g.add_edge(Edge("c0", "f1", "has_feature"))
        g.add_edge(Edge("c1", "f1", "has_feature"))
        return g

    def test_find_gaps_existing(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        score = find_gaps(g, "SSO")
        assert score.feature == "SSO"
        assert score.gap_count == 1
        assert score.threat_level == "medium"

    def test_find_gaps_unknown(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        score = find_gaps(g, "AI Chat")
        assert score.gap_count == 3
        assert score.threat_level == "low"
        assert "differentiator" in score.recommendation.lower()

    def test_get_landscape(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        ls = get_landscape(g)
        assert ls["competitors"] == 3
        assert ls["features_tracked"] == 1
        assert ls["technologies"] == 1

    def test_compare_entities(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        result = compare_entities(g, "c0", "c1")
        assert "f1" in result["shared"]


class TestServiceQueries:
    def _seed_graph(self, tmp_path: Path) -> KnowledgeGraphService:
        g = KnowledgeGraphService(tmp_path / "cikg.db")
        g.add_node(Node(id="c0", node_type="competitor", name="Alpha"))
        g.add_node(Node(id="c1", node_type="competitor", name="Bravo"))
        g.add_node(Node(id="c2", node_type="competitor", name="Charlie"))
        g.add_node(Node(id="f1", node_type="feature", name="SSO"))
        g.add_node(Node(id="f2", node_type="feature", name="AI Chat"))
        g.add_node(Node(id="t1", node_type="technology", name="React"))
        g.add_node(Node(id="s1", node_type="market_segment", name="SMB"))
        g.add_node(Node(id="s2", node_type="market_segment", name="Enterprise"))
        g.add_edge(Edge("c0", "f1", "has_feature"))
        g.add_edge(Edge("c1", "f1", "has_feature"))
        g.add_edge(Edge("c1", "f2", "has_feature"))
        g.add_edge(Edge("c0", "c1", "competes_with"))
        g.add_edge(Edge("c0", "t1", "uses_technology"))
        g.add_edge(Edge("c1", "t1", "uses_technology"))
        g.add_edge(Edge("c0", "s1", "targets_market"))
        g.add_edge(Edge("c1", "s1", "targets_market"))
        g.add_edge(Edge("c2", "s2", "targets_market"))
        g.add_edge(Edge("c1", "c0", "threatens"))
        return g

    def test_find_gaps_raw(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        result = find_gaps_raw(g, "SSO")
        assert {item["entity"] for item in result} == {
            "Alpha", "Bravo", "Charlie",
        }
        status = {item["entity"]: item["status"] for item in result}
        assert status == {
            "Alpha": "has",
            "Bravo": "has",
            "Charlie": "lacks",
        }

    def test_compare_entities(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        result = compare_entities(g, "c0", "c1")
        assert result["shared"] == ["f1"]
        assert result["only_a"] == []
        assert result["only_b"] == ["f2"]
        assert result["relationships"][0]["edge_type"] == "competes_with"

    def test_segment_landscape(self, tmp_path: Path):
        g = self._seed_graph(tmp_path)
        result = get_segment_landscape(g, "SMB")
        assert result["segment"] == "SMB"
        assert result["competitors"] == 2
        assert result["features_tracked"] == 2
        assert result["technologies"] == 1
        assert result["threat_count"] == 1


class TestTypeValidation:
    def test_valid_node_type_accepted(self):
        node = Node(id="c1", node_type="competitor", name="Test")
        assert node.node_type == "competitor"

    def test_invalid_node_type_rejected(self):
        with pytest.raises(ValueError, match="Invalid node_type"):
            Node(id="c1", node_type="bogus", name="Test")

    def test_valid_edge_type_accepted(self):
        edge = Edge(source_id="a", target_id="b", edge_type="has_feature")
        assert edge.edge_type == "has_feature"

    def test_invalid_edge_type_rejected(self):
        with pytest.raises(ValueError, match="Invalid edge_type"):
            Edge(source_id="a", target_id="b", edge_type="bogus")


================================================
FILE: maggy/tests/test_cli.py
================================================
"""Tests for Maggy CLI — thin client over REST API."""

from __future__ import annotations

import json
import subprocess
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from typer.testing import CliRunner

from maggy.cli import app

runner = CliRunner()


# ── Fixtures ────────────────────────────────────────────────────────


@pytest.fixture(autouse=True)
def _mock_server_running(monkeypatch):
    """Pretend server is always up."""
    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._check_health",
        lambda self: True,
    )


def _mock_get(response_json: dict | list):
    """Return a mock httpx response."""
    resp = MagicMock()
    resp.status_code = 200
    resp.json.return_value = response_json
    resp.raise_for_status = MagicMock()
    return resp


# ── Status ──────────────────────────────────────────────────────────


def test_status_shows_health():
    health = {
        "status": "ok",
        "mode": "full",
        "org": "Protaige",
        "codebases": 5,
        "provider": "github",
    }
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(health)):
        result = runner.invoke(app, ["status"])
    assert result.exit_code == 0
    assert "Protaige" in result.output


def test_status_json_flag():
    health = {"status": "ok", "mode": "full", "org": "X", "codebases": 1}
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(health)):
        result = runner.invoke(app, ["status", "--json"])
    assert result.exit_code == 0
    parsed = json.loads(result.output)
    assert parsed["status"] == "ok"


# ── Inbox ───────────────────────────────────────────────────────────


def test_inbox_renders_table():
    items = {
        "items": [
            {"rank": 1, "title": "Fix auth bug", "labels": ["bug"], "ai_reason": "critical", "id": "1", "board": "repo"},
            {"rank": 2, "title": "Add tests", "labels": ["test"], "ai_reason": "coverage", "id": "2", "board": "repo"},
        ],
        "total": 2,
    }
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(items)):
        result = runner.invoke(app, ["inbox"])
    assert result.exit_code == 0
    assert "Fix auth bug" in result.output


def test_inbox_empty():
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get({"items": [], "total": 0})):
        result = runner.invoke(app, ["inbox"])
    assert result.exit_code == 0
    assert "No tasks" in result.output


# ── Sessions ────────────────────────────────────────────────────────


def test_sessions_renders():
    data = {
        "sessions": [
            {"pid": 1234, "tool": "claude", "project": "myapp", "prompts": 42, "duration": "1h 20m"},
        ],
        "total": 1,
    }
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(data)):
        result = runner.invoke(app, ["sessions"])
    assert result.exit_code == 0
    assert "claude" in result.output


# ── Route ───────────────────────────────────────────────────────────


def test_route_decision():
    decision = {
        "primary": "claude",
        "validator": "codex",
        "fallback": ["kimi", "ollama"],
        "reason": "blast 8 → premium tier",
    }
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(decision)):
        result = runner.invoke(app, ["route", "8"])
    assert result.exit_code == 0
    assert "claude" in result.output


# ── Budget ──────────────────────────────────────────────────────────


def test_budget_renders():
    data = {
        "daily_limit_usd": 10.0,
        "used_today_usd": 3.50,
        "providers": [
            {"name": "anthropic", "used": 2.50, "limit": 5.0},
            {"name": "openai", "used": 1.00, "limit": 3.0},
        ],
    }
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(data)):
        result = runner.invoke(app, ["budget"])
    assert result.exit_code == 0
    assert "anthropic" in result.output


# ── Competitors ─────────────────────────────────────────────────────


def test_competitors_news():
    news = [
        {"date": "2026-05-11", "source": "TechCrunch", "event_type": "funding", "headline": "Rival raises $50M"},
    ]
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(news)):
        result = runner.invoke(app, ["competitors"])
    assert result.exit_code == 0
    assert "Rival" in result.output


# ── Models ──────────────────────────────────────────────────────────


def test_models_heatmap():
    heatmap = [
        {"model": "claude", "task_type": "security", "reward": 0.92},
        {"model": "codex", "task_type": "crud", "reward": 0.85},
    ]
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(heatmap)):
        result = runner.invoke(app, ["models"])
    assert result.exit_code == 0
    assert "claude" in result.output


# ── Server auto-start ───────────────────────────────────────────────


def test_server_not_running_starts_it(monkeypatch):
    """If health check fails, CLI should attempt to start server."""
    monkeypatch.undo()  # remove autouse mock
    call_count = {"n": 0}

    def fake_check(self):
        call_count["n"] += 1
        if call_count["n"] <= 1:
            return False
        return True

    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._check_health",
        fake_check,
    )
    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._start_server",
        lambda self: None,
    )
    health = {"status": "ok", "mode": "local", "org": "Test", "codebases": 0}
    with patch("maggy.cli_client.httpx.get", return_value=_mock_get(health)):
        result = runner.invoke(app, ["status"])
    assert result.exit_code == 0


def test_stale_port_killed_before_start(monkeypatch):
    """Stale port holder is killed before spawning server."""
    monkeypatch.undo()
    calls = {"health": 0, "kill": 0}

    def fake_check(self):
        calls["health"] += 1
        return calls["health"] > 2

    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._check_health",
        fake_check,
    )
    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._start_server",
        lambda self: None,
    )
    monkeypatch.setattr(
        "maggy.cli_client.MaggyClient._kill_stale_port",
        lambda self: calls.__setitem__("kill", 1),
    )
    health = {
        "status": "ok", "mode": "local",
        "org": "T", "codebases": 0,
    }
    with patch(
        "maggy.cli_client.httpx.get",
        return_value=_mock_get(health),
    ):
        result = runner.invoke(app, ["status"])
    assert result.exit_code == 0
    assert calls["kill"] == 1


def test_server_log_written_to_file(monkeypatch, tmp_path):
    """Server stdout/stderr go to ~/.maggy/server.log."""
    monkeypatch.setattr("maggy.cli_client.CONFIG_DIR", tmp_path)
    captured = {}

    def fake_popen(cmd, **kw):
        captured.update(kw)

    monkeypatch.setattr(
        "maggy.cli_client.subprocess.Popen", fake_popen,
    )
    from maggy.cli_client import MaggyClient
    MaggyClient()._start_server()
    assert captured.get("stdout") is not subprocess.DEVNULL
    assert (tmp_path / "server.log").exists()


================================================
FILE: maggy/tests/test_cli_chat.py
================================================
"""Tests for maggy chat CLI — interactive REPL."""

from __future__ import annotations

from unittest.mock import patch

import pytest
from typer.testing import CliRunner

from maggy.cli import app

runner = CliRunner()

SESSION = {
    "id": "abc123",
    "project_key": "my-proj",
    "working_dir": "/tmp/my-proj",
    "status": "idle",
    "messages": 0,
}

RESUMED = {
    "id": "abc123",
    "project_key": "my-proj",
    "working_dir": "/tmp/my-proj",
    "status": "idle",
    "messages": 5,
}

HISTORY = {
    "id": "abc123",
    "messages": [
        {"role": "user", "content": "hello"},
        {"role": "assistant", "content": "hi"},
    ],
}


@pytest.fixture(autouse=True)
def _no_detect(monkeypatch):
    """Prevent real CLI detection in tests."""
    from maggy.services import session_detect
    monkeypatch.setattr(
        session_detect, "detect_all",
        lambda wd: session_detect.DetectedSessions(),
    )


def _setup_new(mock_client):
    """Configure client mocks for new session flow."""
    mock_client.ensure_server.return_value = True
    mock_client.chat_sessions.return_value = []
    mock_client.chat_create.return_value = SESSION
    mock_client.chat_history.return_value = {"messages": []}
    mock_client.budget_summary.return_value = {
        "spent_today_usd": 0, "daily_limit_usd": 10, "status": "ok",
    }
    mock_client.models_heatmap.return_value = []


@patch("maggy.cli._client")
def test_chat_creates_session(mock_client):
    """Creates new session when none exist for project."""
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    assert "my-proj" in result.output
    mock_client.chat_create.assert_called_once_with("my-proj")


@patch("maggy.cli._client")
def test_chat_resumes_existing(mock_client):
    """Resumes existing session instead of creating new."""
    mock_client.ensure_server.return_value = True
    mock_client.chat_sessions.return_value = [RESUMED]
    mock_client.chat_history.return_value = HISTORY
    mock_client.budget_summary.return_value = {
        "spent_today_usd": 0, "daily_limit_usd": 10, "status": "ok",
    }
    mock_client.models_heatmap.return_value = []
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    assert "Resuming" in result.output
    mock_client.chat_create.assert_not_called()


@patch("maggy.cli._client")
def test_chat_routed_streams(mock_client):
    """Routed chat sends via send_routed and shows model."""
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "routing", "model": "kimi", "blast": 3,
         "reason": "low blast"},
        {"type": "text", "content": "Hello"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["say hi", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    mock_client.chat_send_routed.assert_called_once_with(
        "abc123", "say hi", blast=None, allowed_models=None,
    )


@patch("maggy.cli._client")
def test_chat_direct_mode(mock_client):
    """--direct flag uses send_stream instead of routed."""
    _setup_new(mock_client)
    mock_client.chat_send_stream.return_value = iter([
        {"type": "text", "content": "Hi"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["say hi", "/quit"]
        result = runner.invoke(
            app, ["chat", "my-proj", "--direct"],
        )
    assert result.exit_code == 0
    mock_client.chat_send_stream.assert_called_once_with(
        "abc123", "say hi",
    )


@patch("maggy.cli._client")
def test_chat_history_command(mock_client):
    _setup_new(mock_client)
    mock_client.chat_history.return_value = HISTORY
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["/history", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0


@patch("maggy.cli._client")
def test_chat_blast_override(mock_client):
    """'/blast 8' sets override for next message."""
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "routing", "model": "claude", "blast": 8,
         "reason": "override"},
        {"type": "text", "content": "Done"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["/blast 8", "do it", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    mock_client.chat_send_routed.assert_called_once_with(
        "abc123", "do it", blast=8, allowed_models=None,
    )


@patch("maggy.cli._client")
def test_chat_ctrl_c_exits(mock_client):
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = KeyboardInterrupt
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0


@patch("maggy.cli._client")
def test_chat_empty_input_ignored(mock_client):
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["", "  ", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    mock_client.chat_send_routed.assert_not_called()


@patch("maggy.cli._client")
def test_chat_error_displayed(mock_client):
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "error", "content": "CLI not found"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["test", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0


@patch("maggy.cli._client")
def test_chat_shows_queued_status(mock_client):
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "queued", "position": 2},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["test", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0


@patch("maggy.cli._client")
def test_chat_shows_warning(mock_client):
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "warning", "content": "Context: ~25000 tokens"},
        {"type": "text", "content": "Hi"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["test", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0


@patch("maggy.cli._client")
def test_chat_exit_word_quits(mock_client):
    """Typing 'exit' terminates the REPL (not routed to LLM)."""
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["exit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    mock_client.chat_send_routed.assert_not_called()


@patch("maggy.cli._client")
def test_chat_agent_status_rendered(mock_client):
    """Agent status chunks render @model> step status."""
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "agent_status", "agent": "local",
         "step": "ANALYZE", "status": "running"},
        {"type": "text", "content": "Done"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["test", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    assert "running" in result.output


@patch("maggy.cli._client")
def test_chat_quota_error_shows_guide(mock_client):
    """Quota error triggers account switch guidance."""
    _setup_new(mock_client)
    mock_client.chat_send_routed.return_value = iter([
        {"type": "error",
         "content": "rate_limit_exceeded: quota hit"},
        {"type": "done"},
    ])
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["test", "/quit"]
        result = runner.invoke(app, ["chat", "my-proj"])
    assert result.exit_code == 0
    out = result.output.lower()
    assert "switch" in out or "login" in out or "account" in out


@patch("maggy.cli._client")
def test_chat_prompt_uses_angle_bracket(mock_client):
    """Prompt uses '>' character, not 'maggy:'."""
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp:
        mp.ask.side_effect = ["/quit"]
        runner.invoke(app, ["chat", "my-proj"])
    call_args = mp.ask.call_args[0][0]
    assert ">" in call_args
    assert "maggy" not in call_args.lower()


@patch("maggy.cli._client")
def test_screenshot_command_dispatches(mock_client):
    """'/screenshot path.png' calls vision handler."""
    _setup_new(mock_client)
    with patch("maggy.cli_chat.Prompt") as mp, \
         patch("maggy.cli_chat._handle_screenshot") as mh:
        mp.ask.side_effect = ["/screenshot test.png", "/quit"]
        runner.invoke(app, ["chat", "my-proj"])
    mh.assert_called_once()
    assert "test.png" in mh.call_args[0][0]


================================================
FILE: maggy/tests/test_cli_discovery.py
================================================
"""Tests for CLI auto-discovery and command building."""

from __future__ import annotations

from maggy.adapters.cli_discovery import (
    CliProfile,
    discover_all,
    discover_cli,
)


def test_discover_all_returns_profiles():
    result = discover_all()
    assert "claude" in result.profiles
    assert "codex" in result.profiles
    assert "kimi" in result.profiles


def test_claude_discovered():
    p = discover_cli("claude")
    assert p.installed is True
    assert p.prompt_is_positional is True
    assert p.prompt_flag == "-p"
    assert "skip-permissions" in p.auto_approve_flag
    assert p.output_format_flag == "--output-format"
    assert p.work_dir_flag == ""


def test_codex_discovered():
    p = discover_cli("codex")
    assert p.installed is True
    assert p.uses_exec_subcommand is True
    assert p.prompt_is_positional is True
    assert "bypass" in p.auto_approve_flag
    assert p.work_dir_flag == "-C"


def test_kimi_discovered():
    p = discover_cli("kimi")
    assert p.installed is True
    assert p.prompt_flag == "-p"
    assert p.auto_approve_flag == "--yolo"
    assert p.afk_flag == "--afk"
    assert p.work_dir_flag == "-w"


def test_missing_cli():
    p = discover_cli("nonexistent_xyz")
    assert p.installed is False


def test_claude_build_command():
    p = CliProfile(
        name="claude", binary="claude", installed=True,
        prompt_flag="-p", prompt_is_positional=True,
        auto_approve_flag="--dangerously-skip-permissions",
        output_format_flag="--output-format",
    )
    cmd = p.build_command("do stuff", "/tmp/repo", 20)
    assert cmd[:3] == ["claude", "-p", "do stuff"]
    assert "--dangerously-skip-permissions" in cmd
    assert "--output-format" in cmd
    assert "json" in cmd


def test_codex_build_command():
    p = CliProfile(
        name="codex", binary="codex", installed=True,
        uses_exec_subcommand=True, prompt_is_positional=True,
        work_dir_flag="-C",
        auto_approve_flag="--dangerously-bypass-approvals-and-sandbox",
    )
    cmd = p.build_command("do stuff", "/tmp/repo", 10)
    assert cmd[:3] == ["codex", "exec", "do stuff"]
    assert "-C" in cmd
    assert "/tmp/repo" in cmd


def test_kimi_build_command():
    p = CliProfile(
        name="kimi", binary="kimi", installed=True,
        prompt_flag="-p", work_dir_flag="-w",
        auto_approve_flag="--yolo", afk_flag="--afk",
    )
    cmd = p.build_command("do stuff", "/tmp/repo", 10)
    assert cmd[:3] == ["kimi", "-p", "do stuff"]
    assert "-w" in cmd
    assert "--yolo" in cmd
    assert "--afk" in cmd


def test_ollama_discovered():
    p = discover_cli("ollama")
    assert p.installed is True
    assert p.uses_run_subcommand is True
    assert p.prompt_is_positional is True
    assert "qwen" in p.run_model and "coder" in p.run_model


def test_ollama_build_command():
    p = CliProfile(
        name="ollama", binary="ollama", installed=True,
        uses_run_subcommand=True, run_model="qwen3-coder:30b-a3b-q8_0",
        prompt_is_positional=True,
    )
    cmd = p.build_command("do stuff", "/tmp/repo", 5)
    assert cmd[:4] == ["ollama", "run", "qwen3-coder:30b-a3b-q8_0", "do stuff"]
    assert "--output-format" not in cmd


def test_pi_adapter_uses_discovery():
    from maggy.adapters.pi import PiAdapter
    pi = PiAdapter()
    profiles = pi.discovered_profiles
    assert "claude" in profiles
    assert profiles["claude"].installed is True
    assert "ollama" in profiles
    assert profiles["ollama"].installed is True


================================================
FILE: maggy/tests/test_cli_sessions.py
================================================
"""Tests for CLI session management commands."""

from __future__ import annotations

from unittest.mock import patch

from typer.testing import CliRunner

from maggy.cli import app

runner = CliRunner()


@patch("maggy.cli._client")
def test_spawn_creates_session(mock_client):
    """maggy spawn posts to execute endpoint."""
    mock_client.ensure_server.return_value = True
    mock_client.spawn.return_value = {
        "session_id": "abc123",
    }
    result = runner.invoke(
        app, ["spawn", "add unit tests"],
    )
    assert result.exit_code == 0
    assert "abc123" in result.output
    mock_client.spawn.assert_called_once()


@patch("maggy.cli._client")
def test_ps_lists_sessions(mock_client):
    """maggy ps shows all sessions."""
    mock_client.ensure_server.return_value = True
    mock_client.all_sessions.return_value = [
        {
            "id": "abc",
            "project": "edubites-core",
            "model": "claude",
            "status": "running",
            "type": "chat",
        },
    ]
    result = runner.invoke(app, ["ps"])
    assert result.exit_code == 0
    assert "edubites-core" in result.output


@patch("maggy.cli._client")
def test_kill_stops_session(mock_client):
    """maggy kill sends delete to session."""
    mock_client.ensure_server.return_value = True
    mock_client.kill_session.return_value = {"ok": True}
    result = runner.invoke(app, ["kill", "abc123"])
    assert result.exit_code == 0
    mock_client.kill_session.assert_called_once_with("abc123")


================================================
FILE: maggy/tests/test_cli_welcome.py
================================================
"""Tests for the CLI welcome banner."""

from __future__ import annotations

from unittest.mock import MagicMock

from maggy.cli_welcome import render_welcome


def _mock_client():
    c = MagicMock()
    c.budget_summary.return_value = {
        "spent_today_usd": 1.50,
        "daily_limit_usd": 10.0,
        "status": "ok",
    }
    c.models_heatmap.return_value = [
        {"model": "claude"},
        {"model": "kimi"},
    ]
    return c


SESSION = {
    "id": "abc123",
    "project_key": "edubites",
    "working_dir": "/tmp/edubites",
    "status": "idle",
    "messages": 5,
}


def test_render_welcome_shows_project(capsys):
    render_welcome("edubites", SESSION, _mock_client())
    out = capsys.readouterr().out
    assert "edubites" in out


def test_render_welcome_shows_budget(capsys):
    render_welcome("edubites", SESSION, _mock_client())
    out = capsys.readouterr().out
    assert "1.50" in out or "$1.50" in out


def test_render_welcome_shows_models(capsys):
    render_welcome("edubites", SESSION, _mock_client())
    out = capsys.readouterr().out
    assert "2" in out


def test_render_welcome_shows_health(capsys):
    """Welcome banner displays memory health score."""
    c = _mock_client()
    c.engram_diagnostics.return_value = {"health_score": 0.85}
    render_welcome("edubites", SESSION, c)
    out = capsys.readouterr().out
    assert "85%" in out or "0.85" in out


def test_render_welcome_shows_session_history(capsys):
    """Welcome banner shows previous session message count."""
    session = {**SESSION, "messages": 12}
    render_welcome("edubites", session, _mock_client())
    out = capsys.readouterr().out
    assert "12" in out


def test_dir_shows_cwd_fallback(capsys):
    """Dir row uses os.getcwd() when working_dir missing."""
    import os
    session = {**SESSION, "working_dir": ""}
    render_welcome("edubites", session, _mock_client())
    out = capsys.readouterr().out
    # Should contain part of the actual cwd, not empty string
    cwd_tail = os.path.basename(os.getcwd())
    assert cwd_tail in out


def test_models_shows_available_count(capsys):
    """Empty heatmap shows available model count."""
    c = _mock_client()
    c.models_heatmap.return_value = []
    render_welcome("edubites", SESSION, c)
    out = capsys.readouterr().out
    assert "5 available" in out or "available" in out


def test_budget_subscription_welcome(capsys):
    """Subscription plan shows Subscription in welcome."""
    c = _mock_client()
    c.budget_summary.return_value = {
        "spent_today_usd": 0, "daily_limit_usd": 10.0,
        "status": "ok", "plan": "subscription",
    }
    render_welcome("edubites", SESSION, c)
    out = capsys.readouterr().out
    assert "subscription" in out.lower()


================================================
FILE: maggy/tests/test_context_compactor.py
================================================
"""Tests for context compactor — message summarization."""

from __future__ import annotations

import pytest

from maggy.services.context_compactor import (
    CompactionResult,
    estimate_tokens,
    should_compact,
)


class TestEstimateTokens:
    def test_empty_list(self):
        assert estimate_tokens([]) == 0

    def test_single_message(self):
        msgs = [{"role": "user", "content": "hello world"}]
        assert estimate_tokens(msgs) > 0

    def test_approximation(self):
        text = "a" * 400
        msgs = [{"role": "user", "content": text}]
        assert estimate_tokens(msgs) == pytest.approx(100, abs=10)


class TestShouldCompact:
    def test_below_threshold_no_compact(self):
        msgs = [{"role": "user", "content": "short"}]
        assert not should_compact(msgs, context_window=200_000)

    def test_above_threshold_compact(self):
        big = "x" * 160_000
        msgs = [{"role": "user", "content": big}]
        assert should_compact(msgs, context_window=40_000)

    def test_threshold_at_80_pct(self):
        content = "a" * 32_800
        msgs = [{"role": "user", "content": content}]
        assert should_compact(msgs, context_window=10_000)


class TestCompact:
    @pytest.mark.asyncio
    async def test_keeps_recent_messages(self):
        from maggy.services.context_compactor import compact
        msgs = [
            {"role": "user", "content": f"msg {i}"}
            for i in range(10)
        ]

        async def fake_summarize(text):
            return "summary of old messages"

        result = await compact(msgs, keep_recent=4, summarizer=fake_summarize)
        assert isinstance(result, CompactionResult)
        assert len(result.messages) == 5
        assert result.messages[0]["role"] == "system"
        assert "summary" in result.messages[0]["content"]
        assert result.messages[-1]["content"] == "msg 9"

    @pytest.mark.asyncio
    async def test_nothing_to_compact(self):
        from maggy.services.context_compactor import compact
        msgs = [{"role": "user", "content": "hi"}]

        async def fake_summarize(text):
            return "summary"

        result = await compact(msgs, keep_recent=6, summarizer=fake_summarize)
        assert result.messages == msgs
        assert result.tokens_saved == 0

    @pytest.mark.asyncio
    async def test_summarizer_failure_passthrough(self):
        from maggy.services.context_compactor import compact
        msgs = [
            {"role": "user", "content": f"msg {i}"}
            for i in range(10)
        ]

        async def broken_summarize(text):
            raise RuntimeError("model down")

        result = await compact(msgs, keep_recent=4, summarizer=broken_summarize)
        assert result.messages == msgs
        assert result.tokens_saved == 0


================================================
FILE: maggy/tests/test_contracts.py
================================================
"""Tests for contract generation."""

from __future__ import annotations

from maggy.contracts import ContractGenerator


def test_generates_test_code_from_postcondition() -> None:
    generator = ContractGenerator()

    code = generator.from_postcondition(
        "returns sorted results",
        "maggy.services.planner.DualPlanner.plan",
    )

    assert "returns sorted results" in code
    assert "DualPlanner.plan" in code
    assert "def test_dualplanner_plan_contract()" in code


================================================
FILE: maggy/tests/test_convention_inferrer.py
================================================
"""Tests for LLM-based dynamic convention inference."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.adapters.pi import PiAdapter, RunResult
from maggy.routing_rules import Convention, RoutingRules
from maggy.services.convention_inferrer import (
    collect_fingerprint,
    ensure_inferred,
    infer_conventions,
    parse_conventions,
)


def test_collect_fingerprint_includes_files(tmp_path: Path):
    (tmp_path / "src").mkdir()
    (tmp_path / "src" / "main.py").write_text("print('hi')")
    (tmp_path / "README.md").write_text("# Hello")
    fp = collect_fingerprint(str(tmp_path))
    assert "src" in fp
    assert "README.md" in fp


def test_collect_fingerprint_excludes_noise(tmp_path: Path):
    (tmp_path / "node_modules" / "pkg").mkdir(parents=True)
    (tmp_path / ".git" / "objects").mkdir(parents=True)
    (tmp_path / "__pycache__").mkdir()
    (tmp_path / "src").mkdir()
    fp = collect_fingerprint(str(tmp_path))
    assert "node_modules" not in fp
    assert ".git" not in fp
    assert "__pycache__" not in fp
    assert "src" in fp


def test_collect_fingerprint_includes_config(tmp_path: Path):
    (tmp_path / "pyproject.toml").write_text("[tool.ruff]\nline-length = 88\n")
    fp = collect_fingerprint(str(tmp_path))
    assert "tool.ruff" in fp


def test_collect_fingerprint_includes_git_log(tmp_path: Path):
    import subprocess
    subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True)
    subprocess.run(["git", "config", "user.email", "t@t.com"], cwd=tmp_path, capture_output=True)
    subprocess.run(["git", "config", "user.name", "T"], cwd=tmp_path, capture_output=True)
    (tmp_path / "f.txt").write_text("x")
    subprocess.run(["git", "add", "."], cwd=tmp_path, capture_output=True)
    subprocess.run(["git", "commit", "-m", "chore: run prisma migrate"], cwd=tmp_path, capture_output=True)
    fp = collect_fingerprint(str(tmp_path))
    assert "prisma" in fp


def test_parse_conventions_from_llm_output():
    text = "Here are the conventions:\n- Use prisma migrate\n- Use turbo build\n"
    convs = parse_conventions(text)
    assert len(convs) == 2
    assert "prisma" in convs[0].text.lower()
    assert "turbo" in convs[1].text.lower()


def test_parse_ignores_non_convention_lines():
    text = "Analysis:\nThe project uses X.\n- Use X for builds\nEnd."
    convs = parse_conventions(text)
    assert len(convs) == 1
    assert "Use X" in convs[0].text


def test_parse_caps_at_10():
    lines = "\n".join(f"- Convention {i}" for i in range(15))
    assert len(parse_conventions(lines)) == 10


def test_parse_empty_response():
    assert parse_conventions("") == []
    assert parse_conventions("No conventions found.") == []


def _seed_project(tmp_path: Path) -> None:
    """Add a config file so fingerprint exceeds the 20-char minimum."""
    (tmp_path / "pyproject.toml").write_text("[tool.ruff]\nline-length=88\n")


@pytest.mark.asyncio
async def test_infer_calls_local_model(tmp_path: Path):
    _seed_project(tmp_path)
    pi, models_called = PiAdapter(), []

    async def fake_send(model, prompt, wd, **kw):
        models_called.append(model)
        return RunResult(model=model, success=True, output="- Use custom deploy\n")

    pi.send_prompt = fake_send
    convs = await infer_conventions(pi, str(tmp_path))
    assert models_called[0] == "local"
    assert len(convs) >= 1
    assert "custom deploy" in convs[0].text.lower()


@pytest.mark.asyncio
async def test_infer_falls_back_on_local_failure(tmp_path: Path):
    _seed_project(tmp_path)
    pi, models_called = PiAdapter(), []

    async def fake_send(model, prompt, wd, **kw):
        models_called.append(model)
        if model == "local":
            return RunResult(model=model, success=False, error="offline")
        return RunResult(model=model, success=True, output="- Use yarn\n")

    pi.send_prompt = fake_send
    convs = await infer_conventions(pi, str(tmp_path))
    assert "local" in models_called
    assert "kimi" in models_called
    assert len(convs) >= 1


@pytest.mark.asyncio
async def test_infer_returns_empty_on_all_failures(tmp_path: Path):
    _seed_project(tmp_path)
    pi = PiAdapter()

    async def fail_send(model, prompt, wd, **kw):
        return RunResult(model=model, success=False, error="down")

    pi.send_prompt = fail_send
    assert await infer_conventions(pi, str(tmp_path)) == []


@pytest.mark.asyncio
async def test_ensure_inferred_caches(tmp_path: Path):
    _seed_project(tmp_path)
    pi, call_count = PiAdapter(), [0]

    async def counting_send(model, prompt, wd, **kw):
        call_count[0] += 1
        return RunResult(model=model, success=True, output="- Use X\n")

    pi.send_prompt = counting_send
    rules = RoutingRules()
    await ensure_inferred(rules, "proj", str(tmp_path), pi)
    first_count = call_count[0]
    await ensure_inferred(rules, "proj", str(tmp_path), pi)
    assert call_count[0] == first_count


@pytest.mark.asyncio
async def test_ensure_inferred_deduplicates(tmp_path: Path):
    _seed_project(tmp_path)
    pi = PiAdapter()

    async def fake_send(model, prompt, wd, **kw):
        return RunResult(model=model, success=True, output="- Use npm install\n- Use custom script\n")

    pi.send_prompt = fake_send
    rules = RoutingRules(project_conventions={
        "proj": [Convention("Use npm install", ["all"], "auto-detected")],
    })
    await ensure_inferred(rules, "proj", str(tmp_path), pi)
    texts = [c.text for c in rules.project_conventions["proj"]]
    assert texts.count("Use npm install") == 1
    assert "Use custom script" in texts


@pytest.mark.asyncio
async def test_all_inferred_have_llm_source(tmp_path: Path):
    _seed_project(tmp_path)
    pi = PiAdapter()

    async def fake_send(model, prompt, wd, **kw):
        return RunResult(model=model, success=True, output="- Use X\n")

    pi.send_prompt = fake_send
    rules = RoutingRules()
    await ensure_inferred(rules, "proj", str(tmp_path), pi)
    llm_convs = [c for c in rules.project_conventions.get("proj", []) if c.source == "llm-inferred"]
    assert len(llm_convs) >= 1


================================================
FILE: maggy/tests/test_convention_scanner.py
================================================
"""Tests for project-specific convention detection from filesystem."""

from __future__ import annotations

from pathlib import Path

from maggy.routing_rules import Convention, RoutingRules
from maggy.services.convention_scanner import (
    ensure_scanned,
    scan_project,
)


def test_detects_supabase_migrations(tmp_path: Path):
    """supabase/migrations/ dir -> supabase convention."""
    (tmp_path / "supabase" / "migrations").mkdir(parents=True)
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "supabase" in texts.lower()


def test_detects_alembic(tmp_path: Path):
    """alembic.ini -> alembic convention."""
    (tmp_path / "alembic.ini").write_text("[alembic]\n")
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "alembic" in texts.lower()


def test_detects_npm(tmp_path: Path):
    """package-lock.json -> npm convention."""
    (tmp_path / "package-lock.json").write_text("{}")
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "npm" in texts.lower()


def test_detects_pnpm(tmp_path: Path):
    """pnpm-lock.yaml -> pnpm convention."""
    (tmp_path / "pnpm-lock.yaml").write_text("")
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "pnpm" in texts.lower()


def test_detects_pytest_in_pyproject(tmp_path: Path):
    """pyproject.toml with [tool.pytest] -> pytest convention."""
    (tmp_path / "pyproject.toml").write_text(
        "[tool.pytest.ini_options]\ntestpaths = ['tests']\n"
    )
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "pytest" in texts.lower()


def test_detects_ruff_in_pyproject(tmp_path: Path):
    """pyproject.toml with [tool.ruff] -> ruff convention."""
    (tmp_path / "pyproject.toml").write_text("[tool.ruff]\nline-length=88\n")
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "ruff" in texts.lower()


def test_empty_dir_no_conventions(tmp_path: Path):
    """Empty directory produces no conventions."""
    convs = scan_project(str(tmp_path))
    assert convs == []


def test_all_conventions_have_auto_source(tmp_path: Path):
    """Detected conventions have source='auto-detected'."""
    (tmp_path / "Makefile").write_text("all:\n\techo hi\n")
    convs = scan_project(str(tmp_path))
    assert len(convs) >= 1
    assert all(c.source == "auto-detected" for c in convs)


def test_conventions_for_merges_project():
    """conventions_for includes project-specific conventions."""
    from maggy.routing_rules import conventions_for

    rules = RoutingRules(
        conventions=[Convention("Global rule", ["all"], "manual")],
        project_conventions={
            "protaige": [
                Convention("Use supabase db push", ["all"], "auto"),
            ],
        },
    )
    text = conventions_for(rules, "feature", "protaige")
    assert "Global rule" in text
    assert "supabase" in text


def test_conventions_for_without_project():
    """conventions_for without project_key returns only global."""
    from maggy.routing_rules import conventions_for

    rules = RoutingRules(
        conventions=[Convention("Global rule", ["all"], "manual")],
        project_conventions={
            "protaige": [
                Convention("Use supabase db push", ["all"], "auto"),
            ],
        },
    )
    text = conventions_for(rules, "feature")
    assert "Global rule" in text
    assert "supabase" not in text


def test_ensure_scanned_caches(tmp_path: Path):
    """ensure_scanned only scans once per project_key."""
    (tmp_path / "alembic.ini").write_text("[alembic]\n")
    rules = RoutingRules()
    ensure_scanned(rules, "my-proj", str(tmp_path))
    assert "my-proj" in rules.project_conventions
    count = len(rules.project_conventions["my-proj"])
    ensure_scanned(rules, "my-proj", str(tmp_path))
    assert len(rules.project_conventions["my-proj"]) == count


def test_yaml_roundtrip_project_conventions(tmp_path: Path):
    """Project conventions survive YAML save/load cycle."""
    from maggy.routing_rules_io import load, save

    rules = RoutingRules(
        project_conventions={
            "protaige": [
                Convention("Use supabase", ["all"], "auto-detected"),
            ],
            "edubites": [
                Convention("Use alembic", ["all"], "auto-detected"),
            ],
        },
    )
    yaml_path = tmp_path / "rules.yaml"
    save(rules, yaml_path)
    loaded = load(yaml_path)
    assert "protaige" in loaded.project_conventions
    assert "edubites" in loaded.project_conventions
    assert "supabase" in loaded.project_conventions["protaige"][0].text


def test_detects_docker_compose(tmp_path: Path):
    """docker-compose.yml -> docker convention."""
    (tmp_path / "docker-compose.yml").write_text("version: '3'\n")
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "docker" in texts.lower()


def test_detects_github_actions(tmp_path: Path):
    """.github/workflows/ -> CI convention."""
    (tmp_path / ".github" / "workflows").mkdir(parents=True)
    convs = scan_project(str(tmp_path))
    texts = " ".join(c.text for c in convs)
    assert "github actions" in texts.lower()


================================================
FILE: maggy/tests/test_coordination.py
================================================
"""Tests for multi-agent coordination locks."""

from __future__ import annotations

import sqlite3
from datetime import datetime, timedelta, timezone

from maggy.coordination.lock_manager import LockManager


class TestLockManager:
    def test_acquire_and_release(self, tmp_path):
        manager = LockManager(tmp_path / "locks.db")
        assert manager.acquire("maggy/a.py", "agent-1") is True
        assert manager.release("maggy/a.py", "agent-1") is True
        assert manager.release("maggy/a.py", "agent-1") is False

    def test_blocks_other_agent(self, tmp_path):
        manager = LockManager(tmp_path / "locks.db")
        assert manager.acquire("maggy/a.py", "agent-1") is True
        assert manager.acquire("maggy/a.py", "agent-2") is False

    def test_release_all_returns_count(self, tmp_path):
        manager = LockManager(tmp_path / "locks.db")
        manager.acquire("maggy/a.py", "agent-1")
        manager.acquire("maggy/b.py", "agent-1")
        manager.acquire("maggy/c.py", "agent-2")
        assert manager.release_all("agent-1") == 2
        assert manager.conflicts(["maggy/a.py", "maggy/c.py"]) == ["maggy/c.py"]

    def test_conflicts_returns_locked_paths(self, tmp_path):
        manager = LockManager(tmp_path / "locks.db")
        manager.acquire("maggy/a.py", "agent-1")
        manager.acquire("maggy/c.py", "agent-2")
        conflicts = manager.conflicts(["maggy/a.py", "maggy/b.py", "maggy/c.py"])
        assert conflicts == ["maggy/a.py", "maggy/c.py"]

    def test_expired_locks_are_removed(self, tmp_path):
        db_path = tmp_path / "locks.db"
        manager = LockManager(db_path)
        expired_at = datetime.now(timezone.utc) - timedelta(minutes=31)
        with sqlite3.connect(db_path) as conn:
            conn.execute(
                "INSERT INTO locks(file_path, agent_id, acquired_at, expires_at) "
                "VALUES (?, ?, ?, ?)",
                (
                    "maggy/a.py",
                    "agent-1",
                    expired_at.isoformat(),
                    expired_at.isoformat(),
                ),
            )
            conn.commit()
        assert manager.acquire("maggy/a.py", "agent-2") is True


================================================
FILE: maggy/tests/test_deploy.py
================================================
"""Tests for deploy service — session management."""

from __future__ import annotations

from maggy.deploy import DeployService, DeploySession


class TestDeployService:
    def test_create_session(self):
        svc = DeployService()
        session = svc.create_session("myapp", "main")
        assert session.project == "myapp"
        assert session.branch == "main"
        assert session.status == "building"

    def test_get_session(self):
        svc = DeployService()
        session = svc.create_session("myapp", "feat")
        result = svc.get_session(session.session_id)
        assert result is not None
        assert result.branch == "feat"

    def test_get_missing_session(self):
        svc = DeployService()
        assert svc.get_session("nonexistent") is None

    def test_list_sessions(self):
        svc = DeployService()
        svc.create_session("app1", "main")
        svc.create_session("app2", "dev")
        sessions = svc.list_sessions()
        assert len(sessions) == 2

    def test_update_status(self):
        svc = DeployService()
        session = svc.create_session("myapp", "main")
        updated = svc.update_status(
            session.session_id, "live",
            url="https://preview.vercel.app",
        )
        assert updated.status == "live"
        assert updated.url == "https://preview.vercel.app"

    def test_update_missing_returns_none(self):
        svc = DeployService()
        assert svc.update_status("nope", "live") is None

    def test_teardown(self):
        svc = DeployService()
        session = svc.create_session("myapp", "main")
        assert svc.teardown(session.session_id)
        assert svc.get_session(session.session_id) is None

    def test_teardown_missing(self):
        svc = DeployService()
        assert not svc.teardown("nonexistent")


================================================
FILE: maggy/tests/test_discovery.py
================================================
"""Tests for environment auto-discovery."""

from __future__ import annotations

import json
from pathlib import Path
from unittest.mock import patch

import pytest

from maggy.discovery import (
    DiscoveryResult,
    _parse_org_from_url,
    discover_active_projects,
    discover_clis,
    discover_env_tokens,
    discover_repos,
    full_discovery,
)
from maggy.process.discovery import discover_local


class TestDiscoverLocal:
    def test_empty_project(self, tmp_path: Path):
        result = discover_local(tmp_path)
        assert result["ci"] == []
        assert result["quality"] == []
        assert result["review"] == []
        assert result["deps"] == []

    def test_detects_github_actions(self, tmp_path: Path):
        (tmp_path / ".github" / "workflows").mkdir(parents=True)
        result = discover_local(tmp_path)
        assert "github_actions" in result["ci"]

    def test_detects_jenkins(self, tmp_path: Path):
        (tmp_path / "Jenkinsfile").touch()
        result = discover_local(tmp_path)
        assert "jenkins" in result["ci"]

    def test_detects_circleci(self, tmp_path: Path):
        (tmp_path / ".circleci").mkdir()
        result = discover_local(tmp_path)
        assert "circleci" in result["ci"]

    def test_detects_gitlab_ci(self, tmp_path: Path):
        (tmp_path / ".gitlab-ci.yml").touch()
        result = discover_local(tmp_path)
        assert "gitlab_ci" in result["ci"]

    def test_detects_eslint(self, tmp_path: Path):
        (tmp_path / ".eslintrc.json").touch()
        result = discover_local(tmp_path)
        assert "eslint" in result["quality"]

    def test_detects_ruff_in_pyproject(self, tmp_path: Path):
        pyproject = tmp_path / "pyproject.toml"
        pyproject.write_text("[tool.ruff]\nline-length = 88\n")
        result = discover_local(tmp_path)
        assert "ruff" in result["quality"]

    def test_detects_pre_commit(self, tmp_path: Path):
        (tmp_path / ".pre-commit-config.yaml").touch()
        result = discover_local(tmp_path)
        assert "pre-commit" in result["quality"]

    def test_detects_codeowners(self, tmp_path: Path):
        (tmp_path / "CODEOWNERS").touch()
        result = discover_local(tmp_path)
        assert "codeowners" in result["review"]

    def test_detects_dependabot(self, tmp_path: Path):
        (tmp_path / ".github").mkdir(parents=True)
        (tmp_path / ".github" / "dependabot.yml").touch()
        result = discover_local(tmp_path)
        assert "dependabot" in result["deps"]

    def test_detects_renovate(self, tmp_path: Path):
        (tmp_path / "renovate.json").touch()
        result = discover_local(tmp_path)
        assert "renovate" in result["deps"]


# --- CLI Discovery ---


class TestDiscoverClis:
    def test_finds_installed(self):
        def _which(n):
            return f"/usr/bin/{n}" if n == "claude" else None

        with patch("shutil.which", side_effect=_which):
            result = discover_clis()
        assert result == {"claude": "/usr/bin/claude"}

    def test_finds_none(self):
        with patch("shutil.which", return_value=None):
            result = discover_clis()
        assert result == {}

    def test_finds_all(self):
        with patch("shutil.which", side_effect=lambda n: f"/usr/bin/{n}"):
            result = discover_clis()
        assert len(result) == 3
        assert "claude" in result


# --- Repo Discovery ---


class TestDiscoverRepos:
    def test_finds_git_repos(self, tmp_path: Path):
        docs = tmp_path / "Documents"
        docs.mkdir()
        repo = docs / "my-proj"
        repo.mkdir()
        (repo / ".git").mkdir()

        repos = discover_repos(home=tmp_path)
        assert len(repos) == 1
        assert repos[0]["key"] == "my-proj"

    def test_skips_hidden_dirs(self, tmp_path: Path):
        docs = tmp_path / "Documents"
        docs.mkdir()
        hidden = docs / ".secret"
        hidden.mkdir()
        (hidden / ".git").mkdir()

        repos = discover_repos(home=tmp_path)
        assert repos == []

    def test_depth_limited(self, tmp_path: Path):
        dev = tmp_path / "dev"
        deep = dev / "a" / "b" / "c" / "d" / "e"
        deep.mkdir(parents=True)
        (deep / ".git").mkdir()

        repos = discover_repos(home=tmp_path)
        assert repos == []

    def test_max_30_repos(self, tmp_path: Path):
        dev = tmp_path / "dev"
        dev.mkdir()
        for i in range(35):
            r = dev / f"repo-{i:02d}"
            r.mkdir()
            (r / ".git").mkdir()

        repos = discover_repos(home=tmp_path)
        assert len(repos) == 30

    def test_no_scan_dirs(self, tmp_path: Path):
        repos = discover_repos(home=tmp_path)
        assert repos == []


# --- Active Projects ---


class TestDiscoverActiveProjects:
    def test_parses_history(self, tmp_path: Path):
        lines = [
            json.dumps({"project": "/Users/me/proj-a"}),
            json.dumps({"project": "/Users/me/proj-a"}),
            json.dumps({"project": "/Users/me/proj-b"}),
        ]
        (tmp_path / "history.jsonl").write_text(
            "\n".join(lines) + "\n",
        )

        projects = discover_active_projects(tmp_path)
        assert projects[0] == "proj-a"
        assert "proj-b" in projects

    def test_no_history_file(self, tmp_path: Path):
        result = discover_active_projects(tmp_path)
        assert result == []

    def test_malformed_json(self, tmp_path: Path):
        content = "not-json\n{\"project\":\"/p\"}\n"
        (tmp_path / "history.jsonl").write_text(content)

        projects = discover_active_projects(tmp_path)
        assert projects == ["p"]


# --- Env Tokens ---


class TestDiscoverEnvTokens:
    def test_detects_tokens(self):
        env = {"GITHUB_TOKEN": "ghp_abc"}
        with patch.dict("os.environ", env, clear=True):
            result = discover_env_tokens()
        assert result["GITHUB_TOKEN"] is True
        assert result["ANTHROPIC_API_KEY"] is False

    def test_no_env_tokens(self):
        with patch.dict("os.environ", {}, clear=True):
            with patch("maggy.discovery.discover_git_token", return_value=""):
                result = discover_env_tokens()
        assert result["GITHUB_TOKEN"] is False
        assert result["ANTHROPIC_API_KEY"] is False
        assert result["ASANA_API_KEY"] is False


# --- URL Parsing ---


class TestParseOrgFromUrl:
    def test_ssh_url(self):
        url = "git@github.com:acme/webapp.git"
        assert _parse_org_from_url(url) == "acme"

    def test_https_url(self):
        url = "https://github.com/acme/webapp.git"
        assert _parse_org_from_url(url) == "acme"

    def test_non_github(self):
        url = "https://gitlab.com/acme/webapp.git"
        assert _parse_org_from_url(url) == ""


# --- Full Discovery ---


class TestFullDiscovery:
    def test_returns_result(self, tmp_path: Path):
        with patch("shutil.which", return_value=None):
            result = full_discovery(home=tmp_path)
        assert isinstance(result, DiscoveryResult)
        assert result.timestamp != ""

    def test_populates_repos(self, tmp_path: Path):
        dev = tmp_path / "dev"
        dev.mkdir()
        repo = dev / "my-app"
        repo.mkdir()
        (repo / ".git").mkdir()

        with patch("shutil.which", return_value=None):
            result = full_discovery(home=tmp_path)
        assert len(result.repos) == 1
        assert result.repos[0]["key"] == "my-app"


================================================
FILE: maggy/tests/test_dual_planner.py
================================================
"""Tests for DualPlanner orchestration."""

from __future__ import annotations

from unittest.mock import AsyncMock, MagicMock

import pytest

from maggy.adapters.pi import RunResult
from maggy.services.planner import DualPlanner


def _result(output: str) -> RunResult:
    return RunResult(model="test", success=True, output=output)


@pytest.mark.asyncio
async def test_plan_uses_claude_prompt() -> None:
    pi = MagicMock()
    pi.send_prompt = AsyncMock(return_value=_result("Primary plan"))
    planner = DualPlanner(pi)

    plan = await planner.plan("Fix auth", "Add logout flow", "/tmp/work")

    assert plan == "Primary plan"
    pi.send_prompt.assert_awaited_once()
    args = pi.send_prompt.await_args.args
    assert args[0] == "claude"
    assert args[2] == "/tmp/work"
    assert args[3] == 5
    assert "Fix auth" in args[1]
    assert "Add logout flow" in args[1]


@pytest.mark.asyncio
async def test_counter_check_uses_codex_prompt() -> None:
    pi = MagicMock()
    pi.send_prompt = AsyncMock(return_value=_result("Looks good"))
    planner = DualPlanner(pi)

    review = await planner.counter_check("1. Update auth\n2. Add tests", "/tmp/work")

    assert review == "Looks good"
    args = pi.send_prompt.await_args.args
    assert args[0] == "codex"
    assert args[2] == "/tmp/work"
    assert args[3] == 5
    assert "1. Update auth" in args[1]
    assert "Flag conflicts as 'CONFLICT:'" in args[1]


@pytest.mark.asyncio
async def test_dual_plan_collects_conflicts() -> None:
    pi = MagicMock()
    pi.send_prompt = AsyncMock(
        side_effect=[
            _result("1. Update auth\n2. Add tests"),
            _result("CONFLICT: use middleware\nkeep step 2"),
        ]
    )
    planner = DualPlanner(pi)

    result = await planner.dual_plan("Fix auth", "Add logout flow", "/tmp/work")

    assert result.primary_plan.startswith("1. Update auth")
    assert result.counter_check.startswith("CONFLICT:")
    assert result.conflicts == ["use middleware"]


================================================
FILE: maggy/tests/test_engram.py
================================================
"""Tests for Engram — record, store, retrieval, diagnostics."""

from __future__ import annotations

from pathlib import Path

from maggy.engram.diagnostics import AmnesiaProfile, diagnose
from maggy.engram.record import EngramRecord, Origin, Validity
from maggy.engram.retrieval import EngramRetrieval
from maggy.engram.store import EngramStore


class TestEngramRecord:
    def test_defaults(self):
        r = EngramRecord(
            engram_id="e1", namespace="proj-1",
            memory_type="fact", content="Python 3.11",
        )
        assert r.is_active
        assert r.origin == Origin.EXPLICIT

    def test_supersede(self):
        r = EngramRecord(
            engram_id="e1", namespace="proj-1",
            memory_type="fact", content="test",
        )
        r.supersede()
        assert not r.is_active
        assert r.validity == Validity.SUPERSEDED


class TestEngramStore:
    def test_write_and_get(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        r = EngramRecord(
            engram_id="e1", namespace="proj-1",
            memory_type="fact", content="Uses FastAPI",
        )
        store.write(r)
        result = store.get("e1")
        assert result is not None
        assert result.content == "Uses FastAPI"

    def test_get_missing(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        assert store.get("nope") is None

    def test_query_by_namespace(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="e1", namespace="proj-1",
            memory_type="fact", content="A",
        ))
        store.write(EngramRecord(
            engram_id="e2", namespace="proj-2",
            memory_type="fact", content="B",
        ))
        results = store.query(namespace="proj-1")
        assert len(results) == 1
        assert results[0].namespace == "proj-1"

    def test_query_by_type(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="e1", namespace="p",
            memory_type="fact", content="A",
        ))
        store.write(EngramRecord(
            engram_id="e2", namespace="p",
            memory_type="decision", content="B",
        ))
        results = store.query(memory_type="decision")
        assert len(results) == 1

    def test_count(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="e1", namespace="p",
            memory_type="fact", content="A",
        ))
        assert store.count() == 1
        assert store.count(namespace="p") == 1
        assert store.count(namespace="x") == 0


class TestRetrieval:
    def _seed(self, tmp_path: Path) -> EngramStore:
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="e1", namespace="proj",
            memory_type="fact", content="Uses FastAPI",
            tags=["backend", "python"],
        ))
        store.write(EngramRecord(
            engram_id="e2", namespace="proj",
            memory_type="decision", content="Chose SQLite",
            tags=["database"],
        ))
        return store

    def test_by_keyword(self, tmp_path: Path):
        store = self._seed(tmp_path)
        r = EngramRetrieval(store)
        results = r.by_keyword("FastAPI")
        assert len(results) == 1

    def test_by_tag(self, tmp_path: Path):
        store = self._seed(tmp_path)
        r = EngramRetrieval(store)
        results = r.by_tag("backend")
        assert len(results) == 1

    def test_by_type(self, tmp_path: Path):
        store = self._seed(tmp_path)
        r = EngramRetrieval(store)
        results = r.by_type("decision")
        assert len(results) == 1

    def test_recent(self, tmp_path: Path):
        store = self._seed(tmp_path)
        r = EngramRetrieval(store)
        results = r.recent()
        assert len(results) == 2


class TestDiagnostics:
    def test_empty_store(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        profile = diagnose(store)
        assert profile.health_score == 0.0

    def test_healthy_store(self, tmp_path: Path):
        store = EngramStore(tmp_path / "engram.db")
        for i, mt in enumerate(
            ["fact", "decision", "code_ref", "handoff"]
        ):
            store.write(EngramRecord(
                engram_id=f"e{i}", namespace="p",
                memory_type=mt, content=f"content {i}",
            ))
        profile = diagnose(store)
        assert profile.total_memories == 4
        assert profile.active_count == 4
        assert profile.health_score > 0.8


class TestEngramSeed:
    """Seed engrams on first boot for non-zero health."""

    def test_seed_writes_all_types(self, tmp_path: Path):
        from maggy.engram.seed import seed_if_empty
        store = EngramStore(tmp_path / "engram.db")
        seed_if_empty(store)
        profile = diagnose(store)
        assert profile.facts > 0
        assert profile.decisions > 0
        assert profile.code_refs > 0
        assert profile.handoffs > 0

    def test_seed_gives_healthy_score(self, tmp_path: Path):
        from maggy.engram.seed import seed_if_empty
        store = EngramStore(tmp_path / "engram.db")
        seed_if_empty(store)
        profile = diagnose(store)
        assert profile.health_score >= 0.8

    def test_seed_fills_missing_types(self, tmp_path: Path):
        from maggy.engram.seed import seed_if_empty
        store = EngramStore(tmp_path / "engram.db")
        store.write(EngramRecord(
            engram_id="existing", namespace="p",
            memory_type="fact", content="already here",
        ))
        seed_if_empty(store)
        profile = diagnose(store)
        # Original fact kept, missing types seeded
        assert profile.facts >= 1
        assert profile.decisions > 0
        assert profile.code_refs > 0
        assert profile.handoffs > 0

    def test_seed_skips_when_all_types_present(self, tmp_path: Path):
        from maggy.engram.seed import seed_if_empty
        store = EngramStore(tmp_path / "engram.db")
        for i, mt in enumerate(
            ["fact", "decision", "code_ref", "handoff"],
        ):
            store.write(EngramRecord(
                engram_id=f"e{i}", namespace="p",
                memory_type=mt, content=f"c{i}",
            ))
        seed_if_empty(store)
        assert store.count() == 4


================================================
FILE: maggy/tests/test_escalation.py
================================================
"""Tests for human escalation packets."""

from __future__ import annotations

from maggy.escalation.protocol import Escalator


class TestEscalator:
    def test_escalate_and_get(self, tmp_path):
        escalator = Escalator(tmp_path / "escalations.db")
        packet = escalator.escalate(
            "session-1",
            "blocked on merge conflict",
            {
                "agent_state": {"task": "coordination"},
                "suggested_actions": ["review lock owner"],
            },
        )
        loaded = escalator.get(packet.id)
        assert loaded is not None
        assert loaded.session_id == "session-1"
        assert loaded.agent_state == {"task": "coordination"}
        assert loaded.suggested_actions == ["review lock owner"]

    def test_list_pending_returns_unresolved(self, tmp_path):
        escalator = Escalator(tmp_path / "escalations.db")
        first = escalator.escalate("session-1", "needs input", {})
        escalator.escalate("session-2", "waiting on human", {})
        escalator.resolve(first.id, "continue with fallback")
        pending = escalator.list_pending()
        assert [packet.session_id for packet in pending] == ["session-2"]

    def test_resolve_marks_packet(self, tmp_path):
        escalator = Escalator(tmp_path / "escalations.db")
        packet = escalator.escalate("session-1", "needs approval", {})
        resolved = escalator.resolve(packet.id, "approved")
        assert resolved.resolved is True
        assert resolved.resolution == "approved"


================================================
FILE: maggy/tests/test_event_spine.py
================================================
"""Tests for Event Spine — header, typed events, emitter, store."""

from __future__ import annotations

from pathlib import Path

from maggy.event_spine.emitter import EventEmitter
from maggy.event_spine.events import (
    EVENT_TYPES,
    ExecutionEvent,
    IntentEvent,
    MeshEvent,
    OutcomeEvent,
)
from maggy.event_spine.header import EventHeader
from maggy.event_spine.store import EventStore


class TestEventHeader:
    def test_defaults(self):
        h = EventHeader(event_type="intent")
        assert h.event_type == "intent"
        assert h.event_id  # uuid generated
        assert h.timestamp  # iso time generated
        assert h.schema_version == 1
        assert h.confidence == 1.0

    def test_custom_fields(self):
        h = EventHeader(
            event_type="execution",
            task_id="t1",
            project_id="p1",
            agent_id="a1",
        )
        assert h.task_id == "t1"
        assert h.project_id == "p1"


class TestTypedEvents:
    def test_all_eight_types(self):
        assert len(EVENT_TYPES) == 8

    def test_intent_event(self):
        e = IntentEvent(
            intent_text="Add login button",
            decomposed_steps=["create component", "add route"],
        )
        assert e.header.event_type == "intent"
        assert len(e.decomposed_steps) == 2

    def test_execution_event(self):
        e = ExecutionEvent(
            tool_name="grep",
            duration_ms=150,
            success=True,
        )
        assert e.header.event_type == "execution"
        assert e.duration_ms == 150

    def test_outcome_event(self):
        e = OutcomeEvent(success=True, reward=0.9)
        assert e.header.event_type == "outcome"
        assert e.reward == 0.9


class TestEventStore:
    def test_write_and_query(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        h = EventHeader(event_type="intent", task_id="t1")
        store.write(h, {"header": {"event_type": "intent"}, "text": "hi"})
        results = store.query(task_id="t1")
        assert len(results) == 1

    def test_query_by_type(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        h1 = EventHeader(event_type="intent", task_id="t1")
        h2 = EventHeader(event_type="execution", task_id="t1")
        store.write(h1, {"type": "intent"})
        store.write(h2, {"type": "execution"})
        results = store.query(event_type="intent")
        assert len(results) == 1

    def test_count(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        for i in range(5):
            h = EventHeader(
                event_type="execution", task_id=f"t{i}",
            )
            store.write(h, {"i": i})
        assert store.count(event_type="execution") == 5
        assert store.count(event_type="intent") == 0

    def test_limit(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        for i in range(10):
            h = EventHeader(event_type="intent", task_id="t1")
            store.write(h, {"i": i})
        results = store.query(task_id="t1", limit=3)
        assert len(results) == 3


class TestEventEmitter:
    def test_emit_returns_id(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)
        event = IntentEvent(intent_text="test")
        eid = emitter.emit(event)
        assert eid == event.header.event_id

    def test_emit_invalid_raises(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)
        import pytest
        with pytest.raises(ValueError):
            emitter.emit({"not": "an event"})

    def test_trace(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)
        e1 = IntentEvent(intent_text="step 1")
        e1.header.task_id = "task-abc"
        e2 = ExecutionEvent(tool_name="grep")
        e2.header.task_id = "task-abc"
        emitter.emit(e1)
        emitter.emit(e2)
        trace = emitter.trace("task-abc")
        assert len(trace) == 2

    def test_count(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)
        for _ in range(3):
            emitter.emit(IntentEvent(intent_text="x"))
        assert emitter.count(event_type="intent") == 3

    def test_query_by_project(self, tmp_path: Path):
        store = EventStore(tmp_path / "events.db")
        emitter = EventEmitter(store)
        e = IntentEvent(intent_text="x")
        e.header.project_id = "proj-1"
        emitter.emit(e)
        results = emitter.query(project_id="proj-1")
        assert len(results) == 1


================================================
FILE: maggy/tests/test_executor_routing.py
================================================
"""Tests for executor model routing and spend recording."""

from __future__ import annotations

from unittest.mock import AsyncMock

import pytest

from maggy.adapters.pi import RunResult
from maggy.providers.base import Task
from maggy.services import executor_helpers
from maggy.services import output_reviewer as reviewer_mod
from maggy.services.executor import ExecutorService
from maggy.services.executor_types import SessionCtx


def _session() -> dict[str, str]:
    return {
        "id": "session-1",
        "task_id": "task-1",
        "task_title": "Test task",
        "mode": "plan",
        "working_dir": ".",
        "status": "running",
        "started_at": "",
        "output": "",
    }


def _task(blast_score: int, task_type: str) -> Task:
    return Task(
        id="task-1",
        title="Route this task",
        description="Use task metadata for routing.",
        raw={
            "blast_score": blast_score,
            "task_type": task_type,
            "security_sensitive": task_type == "security",
        },
    )


def _ctx(session: dict, task: Task, wd: str) -> SessionCtx:
    return SessionCtx(session=session, task=task, wd=wd)


def _patch_executor(executor, monkeypatch):
    """Wire fake send_prompt and context builder."""

    async def fake_context(cfg, task):
        return ""

    async def fake_send(
        model_name: str, prompt: str, working_dir: str,
        max_turns: int = 20, timeout: int = 600,
    ) -> RunResult:
        return RunResult(
            model=model_name, success=True, output="ok",
        )

    monkeypatch.setattr(
        executor_helpers, "build_icpg_context", fake_context,
    )
    monkeypatch.setattr(executor._pi, "send_prompt", fake_send)


@pytest.mark.asyncio
async def test_plan_mode_routes_high_blast_to_claude(
    mock_cfg, tmp_path, monkeypatch,
):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    models: list[str] = []

    async def fake_context(cfg, task):
        return ""

    async def tracking_send(
        model_name: str, prompt: str, working_dir: str,
        max_turns: int = 20, timeout: int = 600,
    ) -> RunResult:
        models.append(model_name)
        return RunResult(model=model_name, success=True, output="ok")

    monkeypatch.setattr(executor_helpers, "build_icpg_context", fake_context)
    monkeypatch.setattr(executor._pi, "send_prompt", tracking_send)
    task = _task(9, "general")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    # Blast 9 general → codex (cost_rank=3, covers 4-10)
    assert models[0] == "codex"


@pytest.mark.asyncio
async def test_plan_records_spend(mock_cfg, tmp_path, monkeypatch):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session

    async def fake_context(cfg, task):
        return ""

    async def fake_send(
        model_name: str, prompt: str, working_dir: str,
        max_turns: int = 20, timeout: int = 600,
    ) -> RunResult:
        return RunResult(
            model=model_name, success=True,
            output="plan", cost_usd=1.25,
        )

    monkeypatch.setattr(executor_helpers, "build_icpg_context", fake_context)
    monkeypatch.setattr(executor._pi, "send_prompt", fake_send)
    task = _task(3, "security")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    assert executor._budget.today_spend("anthropic") == pytest.approx(1.25)


@pytest.mark.asyncio
async def test_tdd_high_blast_calls_dual_planner(
    mock_cfg, tmp_path, monkeypatch,
):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    planner_called = []

    async def track_dual(ctx):
        planner_called.append(True)

    monkeypatch.setattr(executor, "_dual_plan", track_dual)
    task = _task(9, "feature")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "tdd")
    assert planner_called


@pytest.mark.asyncio
async def test_locks_released_after_run(
    mock_cfg, tmp_path, monkeypatch,
):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    wd = str(tmp_path)
    executor._locks.acquire(wd, "session-1")
    task = _task(3, "docs")
    ctx = _ctx(session, task, wd)
    await executor._run(ctx, "plan")
    assert executor._locks.acquire(wd, "other-agent")


@pytest.mark.asyncio
async def test_fatigue_tracked(mock_cfg, tmp_path, monkeypatch):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    task = _task(3, "docs")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    assert executor._fatigue.dimensions["context_load"] > 0


@pytest.mark.asyncio
async def test_conventions_in_prompts(
    mock_cfg, tmp_path, monkeypatch,
):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    prompts: list[str] = []

    async def fake_context(cfg, task):
        return ""

    async def fake_send(
        model_name: str, prompt: str, working_dir: str,
        max_turns: int = 20, timeout: int = 600,
    ) -> RunResult:
        prompts.append(prompt)
        return RunResult(model=model_name, success=True, output="ok")

    monkeypatch.setattr(executor_helpers, "build_icpg_context", fake_context)
    monkeypatch.setattr(executor._pi, "send_prompt", fake_send)
    task = _task(5, "feature")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    assert prompts
    assert "Team Conventions" in prompts[0]
    assert "minimum wowable product" in prompts[0]


@pytest.mark.asyncio
async def test_tdd_calls_reviewer(mock_cfg, tmp_path, monkeypatch):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    reviews: list[str] = []

    async def fake_review(pi, label, output, wd):
        reviews.append(label)
        from maggy.services.output_reviewer import ReviewResult
        return ReviewResult(score=4, reason="ok")

    monkeypatch.setattr(reviewer_mod, "review_output", fake_review)
    task = _task(3, "feature")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "tdd")
    assert "ANALYZE" in reviews
    assert "WRITE TESTS" in reviews


@pytest.mark.asyncio
async def test_review_retry_on_low_score(
    mock_cfg, tmp_path, monkeypatch,
):
    provider = AsyncMock()
    executor = ExecutorService(mock_cfg, provider)
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    call_count = [0]

    async def fake_review(pi, label, output, wd):
        call_count[0] += 1
        from maggy.services.output_reviewer import ReviewResult
        if call_count[0] == 1:
            return ReviewResult(score=2, reason="poor")
        return ReviewResult(score=4, reason="ok")

    monkeypatch.setattr(reviewer_mod, "review_output", fake_review)
    task = _task(3, "feature")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "tdd")
    assert call_count[0] >= 2
    assert "RETRY" in session["output"]


@pytest.mark.asyncio
async def test_status_callback_fires(
    mock_cfg, tmp_path, monkeypatch,
):
    """Status callback receives running/done events."""
    provider = AsyncMock()
    statuses: list[dict] = []
    executor = ExecutorService(
        mock_cfg, provider, status_cb=statuses.append,
    )
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    task = _task(3, "docs")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    assert any(s["status"] == "running" for s in statuses)
    assert any(s["status"] == "done" for s in statuses)


@pytest.mark.asyncio
async def test_status_shows_model_name(
    mock_cfg, tmp_path, monkeypatch,
):
    """Status events include the routed model name."""
    provider = AsyncMock()
    statuses: list[dict] = []
    executor = ExecutorService(
        mock_cfg, provider, status_cb=statuses.append,
    )
    session = _session()
    executor._sessions["session-1"] = session
    _patch_executor(executor, monkeypatch)
    task = _task(9, "general")
    ctx = _ctx(session, task, str(tmp_path))
    await executor._run(ctx, "plan")
    agents = {s.get("agent") for s in statuses}
    assert "codex" in agents


================================================
FILE: maggy/tests/test_fatigue.py
================================================
"""Tests for fatigue tracking — profiles and model comparison."""

from __future__ import annotations

from maggy.fatigue import (
    FatigueProfile,
    MODEL_CONTEXT_WINDOWS,
    compare_fatigue,
    create_profile,
)


class TestFatigueProfile:
    def test_zero_usage_no_fatigue(self):
        p = FatigueProfile(model="claude", context_window=200_000)
        assert p.fatigue_score == 0.0
        assert p.raw_utilization == 0.0

    def test_full_context_high_fatigue(self):
        p = FatigueProfile(
            model="claude", context_window=200_000,
            tokens_used=200_000, turns=50,
        )
        assert p.fatigue_score == 1.0

    def test_half_context_moderate_fatigue(self):
        p = FatigueProfile(
            model="gpt", context_window=128_000,
            tokens_used=64_000, turns=10,
        )
        score = p.fatigue_score
        assert 0.3 < score < 0.6

    def test_zero_context_window_safe(self):
        p = FatigueProfile(model="x", context_window=0)
        assert p.raw_utilization == 0.0


class TestShouldCheckpoint:
    def test_below_threshold(self):
        p = FatigueProfile(
            model="claude", context_window=200_000,
            tokens_used=50_000,
        )
        assert not p.should_checkpoint()

    def test_above_threshold(self):
        p = FatigueProfile(
            model="claude", context_window=200_000,
            tokens_used=180_000, turns=40,
        )
        assert p.should_checkpoint()

    def test_custom_threshold(self):
        p = FatigueProfile(
            model="claude", context_window=200_000,
            tokens_used=100_000,
        )
        assert p.should_checkpoint(threshold=0.3)


class TestCreateProfile:
    def test_known_model(self):
        p = create_profile("claude")
        assert p.context_window == 200_000

    def test_unknown_model_defaults(self):
        p = create_profile("unknown-model")
        assert p.context_window == 128_000


class TestCompareFatigue:
    def test_sorted_by_fatigue(self):
        p1 = FatigueProfile(
            model="claude", context_window=200_000,
            tokens_used=180_000, turns=40,
        )
        p2 = FatigueProfile(
            model="gpt", context_window=128_000,
            tokens_used=10_000, turns=2,
        )
        result = compare_fatigue([p1, p2])
        assert result[0]["model"] == "claude"
        assert result[0]["fatigue"] > result[1]["fatigue"]


================================================
FILE: maggy/tests/test_forge.py
================================================
"""Tests for MCP Forge connector, registry, and gap detection."""

from __future__ import annotations

from pathlib import Path

from maggy.forge.connector import ForgeConnector
from maggy.forge.detector import GapDetector, TRIGGER_THRESHOLD
from maggy.forge.registry import ForgeRegistry, ToolInfo


class TestForgeRegistry:
    def test_empty_without_forge(self):
        reg = ForgeRegistry(forge_path=None)
        assert reg.count == 0

    def test_loads_from_forge_path(self):
        forge = Path.home() / "Documents" / "protaige" / "mcp-forge"
        if not forge.exists():
            return  # skip if forge not available
        reg = ForgeRegistry(forge_path=forge)
        assert reg.count > 0

    def test_search(self):
        forge = Path.home() / "Documents" / "protaige" / "mcp-forge"
        if not forge.exists():
            return
        reg = ForgeRegistry(forge_path=forge)
        results = reg.search("stripe")
        assert any(t.slug == "stripe" for t in results)

    def test_get_missing(self):
        reg = ForgeRegistry(forge_path=None)
        assert reg.get("nonexistent") is None

    def test_set_enabled(self):
        reg = ForgeRegistry(forge_path=None)
        reg._tools["test"] = ToolInfo(slug="test")
        assert reg.set_enabled("test", False)
        assert not reg._tools["test"].enabled
        assert not reg.set_enabled("nope", False)


class TestGapDetector:
    def test_first_record_no_trigger(self):
        det = GapDetector()
        assert not det.record_gap("email sending")

    def test_trigger_at_threshold(self):
        det = GapDetector(threshold=3)
        det.record_gap("email sending")
        det.record_gap("email sending")
        assert det.record_gap("email sending")

    def test_no_double_trigger(self):
        det = GapDetector(threshold=2)
        det.record_gap("x")
        det.record_gap("x")  # triggers
        assert not det.record_gap("x")  # no re-trigger

    def test_list_gaps(self):
        det = GapDetector()
        det.record_gap("email")
        det.record_gap("email")
        det.record_gap("sms")
        gaps = det.list_gaps()
        assert len(gaps) == 2
        assert gaps[0].capability == "email"
        assert gaps[0].occurrences == 2

    def test_reset(self):
        det = GapDetector()
        det.record_gap("x")
        det.record_gap("x")
        det.reset("x")
        gaps = det.list_gaps()
        assert len(gaps) == 0


class TestForgeConnector:
    def test_status(self):
        conn = ForgeConnector(forge_path=Path("/nonexistent"))
        s = conn.status()
        assert not s.available
        assert s.registry_count == 0

    def test_report_gap(self):
        conn = ForgeConnector(forge_path=Path("/nonexistent"))
        r1 = conn.report_gap("payment processing")
        assert not r1["triggered"]

    def test_search_tools_empty(self):
        conn = ForgeConnector(forge_path=Path("/nonexistent"))
        assert conn.search_tools("stripe") == []

    def test_with_real_forge(self):
        forge = Path.home() / "Documents" / "protaige" / "mcp-forge"
        if not forge.exists():
            return
        conn = ForgeConnector(forge_path=forge)
        assert conn.available
        assert conn.status().registry_count > 0
        results = conn.search_tools("github")
        assert len(results) > 0


================================================
FILE: maggy/tests/test_heartbeat.py
================================================
"""Tests for heartbeat scheduler."""

from __future__ import annotations

import asyncio
from unittest.mock import AsyncMock

import pytest

from maggy.heartbeat.scheduler import HeartbeatScheduler, Job


# ── Job dataclass ────────────────────────────────────────────────────────


class TestJob:
    def test_defaults(self):
        fn = AsyncMock()
        job = Job(name="test", fn=fn, interval_seconds=60)
        assert job.name == "test"
        assert job.interval_seconds == 60
        assert job.run_count == 0
        assert job.last_run == ""
        assert job.last_error == ""
        assert job.enabled is True

    def test_is_due_no_last_run(self):
        fn = AsyncMock()
        job = Job(name="test", fn=fn, interval_seconds=60)
        assert job.is_due() is True

    def test_is_due_after_interval(self):
        from datetime import datetime, timezone, timedelta
        fn = AsyncMock()
        job = Job(name="test", fn=fn, interval_seconds=60)
        past = datetime.now(timezone.utc) - timedelta(seconds=120)
        job.last_run = past.isoformat()
        assert job.is_due() is True

    def test_not_due_before_interval(self):
        from datetime import datetime, timezone
        fn = AsyncMock()
        job = Job(name="test", fn=fn, interval_seconds=3600)
        job.last_run = datetime.now(timezone.utc).isoformat()
        assert job.is_due() is False


# ── Scheduler ────────────────────────────────────────────────────────────


class TestSchedulerRegister:
    def test_register_job(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("refresh", fn, 1800)
        assert "refresh" in sched._jobs

    def test_register_duplicate_raises(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("dupe", fn, 60)
        with pytest.raises(ValueError, match="already registered"):
            sched.register("dupe", fn, 60)

    def test_status_returns_list(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("a", fn, 60)
        sched.register("b", fn, 120)
        result = sched.status()
        assert len(result) == 2
        names = {r["name"] for r in result}
        assert names == {"a", "b"}


class TestSchedulerTick:
    @pytest.mark.asyncio
    async def test_tick_runs_due_jobs(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("job1", fn, 0)
        await sched.tick()
        fn.assert_awaited_once()

    @pytest.mark.asyncio
    async def test_tick_skips_disabled(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("disabled", fn, 0)
        sched._jobs["disabled"].enabled = False
        await sched.tick()
        fn.assert_not_awaited()

    @pytest.mark.asyncio
    async def test_tick_records_error(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock(side_effect=RuntimeError("boom"))
        sched.register("fail", fn, 0)
        await sched.tick()
        assert "boom" in sched._jobs["fail"].last_error

    @pytest.mark.asyncio
    async def test_tick_increments_count(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("counter", fn, 0)
        await sched.tick()
        await sched.tick()
        assert sched._jobs["counter"].run_count == 2


class TestSchedulerTrigger:
    @pytest.mark.asyncio
    async def test_trigger_runs_job(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock(return_value=None)
        sched.register("manual", fn, 9999)
        result = await sched.trigger("manual")
        fn.assert_awaited_once()
        assert result["ok"] is True

    @pytest.mark.asyncio
    async def test_trigger_unknown_raises(self):
        sched = HeartbeatScheduler()
        with pytest.raises(KeyError, match="nope"):
            await sched.trigger("nope")


class TestSchedulerLifecycle:
    @pytest.mark.asyncio
    async def test_start_stop(self):
        sched = HeartbeatScheduler()
        fn = AsyncMock()
        sched.register("tick_job", fn, 0)
        await sched.start()
        assert sched._task is not None
        await asyncio.sleep(0.05)
        await sched.stop()
        assert sched._task is None
        assert fn.await_count >= 1


# ── Jobs ─────────────────────────────────────────────────────────────────


class TestJobs:
    @pytest.mark.asyncio
    async def test_refresh_history_calls_analyze(self):
        from types import SimpleNamespace
        from unittest.mock import MagicMock
        from maggy.heartbeat.jobs import refresh_history
        history = MagicMock()
        app = SimpleNamespace(state=SimpleNamespace(history=history))
        await refresh_history(app)
        history.analyze.assert_called_once()

    @pytest.mark.asyncio
    async def test_refresh_history_skips_none(self):
        from types import SimpleNamespace
        from maggy.heartbeat.jobs import refresh_history
        app = SimpleNamespace(state=SimpleNamespace(history=None))
        await refresh_history(app)  # no error

    @pytest.mark.asyncio
    async def test_self_improve_calls_analyze(self):
        from types import SimpleNamespace
        from unittest.mock import MagicMock
        from maggy.heartbeat.jobs import self_improve
        intro = MagicMock()
        app = SimpleNamespace(state=SimpleNamespace(introspector=intro))
        await self_improve(app)
        intro.analyze.assert_called_once()

    @pytest.mark.asyncio
    async def test_self_improve_skips_none(self):
        from types import SimpleNamespace
        from maggy.heartbeat.jobs import self_improve
        app = SimpleNamespace(state=SimpleNamespace(introspector=None))
        await self_improve(app)  # no error


================================================
FILE: maggy/tests/test_history.py
================================================
"""Tests for history analyzer, store, and service."""

from __future__ import annotations

import json
from pathlib import Path

import pytest

from maggy.history.models import (
    HistoryReport,
    ProviderUsage,
    SessionEntry,
    TimeDistribution,
)


# --- Test Data Fixtures ---


def _make_session(
    sid: str = "s1",
    provider: str = "claude",
    project: str = "myproj",
    prompts: int = 5,
    tools: int = 3,
    started: str = "2024-01-15T10:00:00+00:00",
    ended: str = "2024-01-15T10:30:00+00:00",
) -> SessionEntry:
    return SessionEntry(
        session_id=sid,
        provider=provider,
        project=project,
        started_at=started,
        ended_at=ended,
        prompt_count=prompts,
        tool_use_count=tools,
        models_used=["claude-sonnet-4"],
        topics=["auth", "tests"],
        summary="fix auth bug",
    )


@pytest.fixture
def sample_sessions() -> list[SessionEntry]:
    return [
        _make_session("s1", "claude", "proj-a", 10, 5,
                       "2024-01-15T10:00:00+00:00",
                       "2024-01-15T10:45:00+00:00"),
        _make_session("s2", "claude", "proj-a", 8, 3,
                       "2024-01-15T14:00:00+00:00",
                       "2024-01-15T14:20:00+00:00"),
        _make_session("s3", "codex", "proj-b", 5, 2,
                       "2024-01-16T09:00:00+00:00",
                       "2024-01-16T09:15:00+00:00"),
        _make_session("s4", "kimi", "proj-a", 3, 1,
                       "2024-01-16T22:00:00+00:00",
                       "2024-01-16T22:10:00+00:00"),
    ]


# --- Analyzer Tests ---


class TestAnalyzer:
    """Tests for history/analyzer.py functions."""

    def test_build_report_empty(self):
        from maggy.history.analyzer import build_report
        report = build_report([])
        assert report.total_sessions == 0
        assert report.total_prompts == 0
        assert report.providers == []

    def test_build_report_with_data(self, sample_sessions):
        from maggy.history.analyzer import build_report
        report = build_report(sample_sessions)
        assert report.total_sessions == 4
        assert report.total_prompts == 26
        assert len(report.providers) == 3

    def test_aggregate_by_provider(self, sample_sessions):
        from maggy.history.analyzer import aggregate_by_provider
        usage = aggregate_by_provider(sample_sessions)
        assert len(usage) == 3
        claude = next(u for u in usage if u.provider == "claude")
        assert claude.session_count == 2
        assert claude.prompt_count == 18

    def test_aggregate_by_project(self, sample_sessions):
        from maggy.history.analyzer import aggregate_by_project
        projects = aggregate_by_project(sample_sessions)
        proj_a = next(p for p in projects if p.project == "proj-a")
        assert proj_a.total_sessions == 3
        assert "claude" in proj_a.providers_used

    def test_compute_time_distribution(self, sample_sessions):
        from maggy.history.analyzer import compute_time_distribution
        dist = compute_time_distribution(sample_sessions)
        assert isinstance(dist, TimeDistribution)
        # s1 starts at hour 10, s4 at hour 22
        assert 10 in dist.by_hour
        assert 22 in dist.by_hour

    def test_detect_patterns(self, sample_sessions):
        from maggy.history.analyzer import detect_patterns
        patterns = detect_patterns(sample_sessions)
        assert isinstance(patterns, list)
        assert len(patterns) > 0
        # Should produce human-readable strings
        assert all(isinstance(p, str) for p in patterns)

    def test_extract_top_topics(self, sample_sessions):
        from maggy.history.analyzer import extract_top_topics
        topics = extract_top_topics(sample_sessions)
        assert isinstance(topics, list)
        assert "auth" in topics


# --- Store Tests ---


class TestHistoryStore:
    """Tests for history/store.py."""

    def test_save_and_load_sessions(self, tmp_path: Path):
        from maggy.history.store import HistoryStore
        store = HistoryStore(tmp_path / "history.db")
        sessions = [_make_session("s1"), _make_session("s2")]
        store.save_sessions(sessions)
        loaded = store.load_sessions()
        assert len(loaded) == 2

    def test_load_sessions_by_provider(self, tmp_path: Path):
        from maggy.history.store import HistoryStore
        store = HistoryStore(tmp_path / "history.db")
        sessions = [
            _make_session("s1", "claude"),
            _make_session("s2", "codex"),
        ]
        store.save_sessions(sessions)
        claude = store.load_sessions(provider="claude")
        assert len(claude) == 1
        assert claude[0]["provider"] == "claude"

    def test_save_and_load_report(self, tmp_path: Path):
        from maggy.history.store import HistoryStore
        store = HistoryStore(tmp_path / "history.db")
        report = HistoryReport(
            generated_at="2024-01-15T00:00:00Z",
            total_sessions=5,
            total_prompts=50,
            summary="test report",
        )
        store.save_report(report)
        loaded = store.load_latest_report()
        assert loaded is not None
        assert loaded["total_sessions"] == 5

    def test_load_report_empty(self, tmp_path: Path):
        from maggy.history.store import HistoryStore
        store = HistoryStore(tmp_path / "history.db")
        assert store.load_latest_report() is None


# --- Service Tests ---


class TestHistoryService:
    """Tests for history/service.py."""

    def _isolated_dirs(self, tmp_path: Path) -> dict:
        """Return CLI dirs that don't exist to isolate tests."""
        return {
            "claude": tmp_path / "no_claude",
            "codex": tmp_path / "no_codex",
            "kimi": tmp_path / "no_kimi",
        }

    def test_analyze_no_parsers(self, tmp_path: Path):
        from maggy.history.service import HistoryService
        svc = HistoryService(
            db_path=tmp_path / "history.db",
            cli_dirs=self._isolated_dirs(tmp_path),
        )
        report = svc.analyze()
        assert report.total_sessions == 0

    def test_analyze_with_claude(self, tmp_path: Path):
        from maggy.history.service import HistoryService
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "fix", "project": "/p", "sessionId": "s1", "timestamp": 1700000000000}),
            json.dumps({"display": "test", "project": "/p", "sessionId": "s1", "timestamp": 1700000300000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        dirs = self._isolated_dirs(tmp_path)
        dirs["claude"] = claude_dir
        svc = HistoryService(
            db_path=tmp_path / "history.db",
            cli_dirs=dirs,
        )
        report = svc.analyze()
        assert report.total_sessions == 1
        assert report.total_prompts == 2

    def test_get_report_cached(self, tmp_path: Path):
        from maggy.history.service import HistoryService
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "x", "project": "/p", "sessionId": "s1", "timestamp": 1700000000000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        dirs = self._isolated_dirs(tmp_path)
        dirs["claude"] = claude_dir
        svc = HistoryService(
            db_path=tmp_path / "history.db",
            cli_dirs=dirs,
        )
        svc.analyze()
        cached = svc.get_report()
        assert cached is not None
        assert cached["total_sessions"] == 1

    def test_get_sessions(self, tmp_path: Path):
        from maggy.history.service import HistoryService
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "x", "project": "/p", "sessionId": "s1", "timestamp": 1700000000000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        dirs = self._isolated_dirs(tmp_path)
        dirs["claude"] = claude_dir
        svc = HistoryService(
            db_path=tmp_path / "history.db",
            cli_dirs=dirs,
        )
        svc.analyze()
        sessions = svc.get_sessions()
        assert len(sessions) == 1


================================================
FILE: maggy/tests/test_history_parsers.py
================================================
"""Tests for CLI history parsers — Claude, Codex, Kimi."""

from __future__ import annotations

import json
from pathlib import Path

import pytest

from maggy.history.parsers.claude import ClaudeHistoryParser
from maggy.history.parsers.codex import CodexHistoryParser
from maggy.history.parsers.kimi import KimiHistoryParser


# --- Claude Parser ---


class TestClaudeParser:
    """Tests for ClaudeHistoryParser."""

    def test_not_available_missing_dir(self, tmp_path: Path):
        p = ClaudeHistoryParser(tmp_path / ".claude")
        assert p.is_available() is False

    def test_available_with_history(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        (claude_dir / "history.jsonl").write_text("")
        p = ClaudeHistoryParser(claude_dir)
        assert p.is_available() is True

    def test_session_count_empty(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        (claude_dir / "history.jsonl").write_text("")
        p = ClaudeHistoryParser(claude_dir)
        assert p.session_count() == 0

    def test_session_count(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "fix bug", "project": "/p", "sessionId": "s1", "timestamp": 1700000000000}),
            json.dumps({"display": "add test", "project": "/p", "sessionId": "s1", "timestamp": 1700000100000}),
            json.dumps({"display": "deploy", "project": "/q", "sessionId": "s2", "timestamp": 1700001000000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        p = ClaudeHistoryParser(claude_dir)
        assert p.session_count() == 2

    def test_parse_sessions(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "fix auth", "project": "/Users/test/proj", "sessionId": "s1", "timestamp": 1700000000000}),
            json.dumps({"display": "add tests", "project": "/Users/test/proj", "sessionId": "s1", "timestamp": 1700000300000}),
            json.dumps({"display": "deploy app", "project": "/Users/test/other", "sessionId": "s2", "timestamp": 1700001000000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        p = ClaudeHistoryParser(claude_dir)
        sessions = p.parse_sessions(limit=10)
        assert len(sessions) == 2
        s1 = next(s for s in sessions if s.session_id == "s1")
        assert s1.provider == "claude"
        assert s1.prompt_count == 2
        assert s1.summary == "fix auth"
        assert "proj" in s1.project

    def test_parse_empty_history(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        (claude_dir / "history.jsonl").write_text("")
        p = ClaudeHistoryParser(claude_dir)
        assert p.parse_sessions() == []

    def test_parse_with_transcript(self, tmp_path: Path):
        claude_dir = tmp_path / ".claude"
        claude_dir.mkdir()
        lines = [
            json.dumps({"display": "task1", "project": "/Users/test/proj", "sessionId": "s1", "timestamp": 1700000000000}),
        ]
        (claude_dir / "history.jsonl").write_text("\n".join(lines) + "\n")
        # Create transcript directory
        proj_dir = claude_dir / "projects" / "-Users-test-proj"
        proj_dir.mkdir(parents=True)
        transcript = [
            json.dumps({"type": "user", "message": {"role": "user", "content": "fix the bug"}, "sessionId": "s1", "timestamp": 1700000000000, "gitBranch": "feat/auth"}),
            json.dumps({"type": "assistant", "message": {"role": "assistant", "content": [{"type": "text", "text": "ok"}, {"type": "tool_use", "name": "read"}]}, "model": "claude-sonnet-4", "timestamp": 1700000010000}),
        ]
        (proj_dir / "s1.jsonl").write_text("\n".join(transcript) + "\n")
        p = ClaudeHistoryParser(claude_dir)
        sessions = p.parse_sessions()
        assert len(sessions) == 1
        s = sessions[0]
        assert s.tool_use_count >= 1
        assert "claude-sonnet-4" in s.models_used
        assert s.git_branch == "feat/auth"


# --- Codex Parser ---


class TestCodexParser:
    """Tests for CodexHistoryParser."""

    def test_not_available_missing_dir(self, tmp_path: Path):
        p = CodexHistoryParser(tmp_path / ".codex")
        assert p.is_available() is False

    def test_available_with_index(self, tmp_path: Path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "session_index.jsonl").write_text("")
        p = CodexHistoryParser(codex_dir)
        assert p.is_available() is True

    def test_session_count(self, tmp_path: Path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        lines = [
            json.dumps({"id": "s1", "thread_name": "fix bug", "updated_at": "2024-01-01T00:00:00Z"}),
            json.dumps({"id": "s2", "thread_name": "add feature", "updated_at": "2024-01-02T00:00:00Z"}),
        ]
        (codex_dir / "session_index.jsonl").write_text("\n".join(lines) + "\n")
        p = CodexHistoryParser(codex_dir)
        assert p.session_count() == 2

    def test_parse_sessions(self, tmp_path: Path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        index_lines = [
            json.dumps({"id": "s1", "thread_name": "fix auth bug", "updated_at": "2024-01-01T10:00:00Z"}),
        ]
        (codex_dir / "session_index.jsonl").write_text("\n".join(index_lines) + "\n")
        history_lines = [
            json.dumps({"session_id": "s1", "ts": 1704100000, "text": "fix the auth bug"}),
            json.dumps({"session_id": "s1", "ts": 1704100300, "text": "now add tests"}),
        ]
        (codex_dir / "history.jsonl").write_text("\n".join(history_lines) + "\n")
        p = CodexHistoryParser(codex_dir)
        sessions = p.parse_sessions()
        assert len(sessions) == 1
        s = sessions[0]
        assert s.provider == "codex"
        assert s.prompt_count == 2
        assert s.summary == "fix auth bug"

    def test_parse_empty(self, tmp_path: Path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "session_index.jsonl").write_text("")
        (codex_dir / "history.jsonl").write_text("")
        p = CodexHistoryParser(codex_dir)
        assert p.parse_sessions() == []


# --- Kimi Parser ---


class TestKimiParser:
    """Tests for KimiHistoryParser."""

    def test_not_available_missing_dir(self, tmp_path: Path):
        p = KimiHistoryParser(tmp_path / ".kimi")
        assert p.is_available() is False

    def test_available_with_sessions(self, tmp_path: Path):
        kimi_dir = tmp_path / ".kimi"
        (kimi_dir / "sessions").mkdir(parents=True)
        p = KimiHistoryParser(kimi_dir)
        assert p.is_available() is True

    def test_session_count(self, tmp_path: Path):
        kimi_dir = tmp_path / ".kimi"
        sess_dir = kimi_dir / "sessions" / "abc" / "uuid1"
        sess_dir.mkdir(parents=True)
        (sess_dir / "context.jsonl").write_text("")
        sess_dir2 = kimi_dir / "sessions" / "abc" / "uuid2"
        sess_dir2.mkdir(parents=True)
        (sess_dir2 / "context.jsonl").write_text("")
        p = KimiHistoryParser(kimi_dir)
        assert p.session_count() == 2

    def test_parse_sessions(self, tmp_path: Path):
        kimi_dir = tmp_path / ".kimi"
        sess_dir = kimi_dir / "sessions" / "abc" / "uuid1"
        sess_dir.mkdir(parents=True)
        ctx_lines = [
            json.dumps({"role": "user", "content": "fix the deploy"}),
            json.dumps({"role": "assistant", "content": "sure"}),
            json.dumps({"role": "user", "content": "now test it"}),
        ]
        (sess_dir / "context.jsonl").write_text("\n".join(ctx_lines) + "\n")
        wire_lines = [
            json.dumps({"timestamp": 1700000000.0, "message": '{"type":"TurnBegin"}'}),
            json.dumps({"timestamp": 1700000010.0, "message": '{"type":"StepBegin"}'}),
            json.dumps({"timestamp": 1700000300.0, "message": '{"type":"TurnBegin"}'}),
        ]
        (sess_dir / "wire.jsonl").write_text("\n".join(wire_lines) + "\n")
        p = KimiHistoryParser(kimi_dir)
        sessions = p.parse_sessions()
        assert len(sessions) == 1
        s = sessions[0]
        assert s.provider == "kimi"
        assert s.prompt_count == 2
        assert s.tool_use_count >= 1
        assert s.summary == "fix the deploy"

    def test_parse_empty(self, tmp_path: Path):
        kimi_dir = tmp_path / ".kimi"
        (kimi_dir / "sessions").mkdir(parents=True)
        p = KimiHistoryParser(kimi_dir)
        assert p.parse_sessions() == []

    def test_parse_missing_wire(self, tmp_path: Path):
        """Graceful when wire.jsonl is missing."""
        kimi_dir = tmp_path / ".kimi"
        sess_dir = kimi_dir / "sessions" / "abc" / "uuid1"
        sess_dir.mkdir(parents=True)
        ctx_lines = [
            json.dumps({"role": "user", "content": "hello"}),
        ]
        (sess_dir / "context.jsonl").write_text("\n".join(ctx_lines) + "\n")
        p = KimiHistoryParser(kimi_dir)
        sessions = p.parse_sessions()
        assert len(sessions) == 1
        assert sessions[0].prompt_count == 1


================================================
FILE: maggy/tests/test_improve.py
================================================
"""Tests for self-improvement signals and analysis."""

from __future__ import annotations

from types import SimpleNamespace
from unittest.mock import MagicMock, patch

import pytest

from maggy.improve.models import (
    ImprovementReport,
    Recommendation,
    SignalBundle,
)


# ── Models ───────────────────────────────────────────────────────────────


class TestModels:
    def test_recommendation_defaults(self):
        rec = Recommendation(
            category="routing",
            severity="info",
            message="test",
            suggestion="do something",
        )
        assert rec.data == {}

    def test_signal_bundle_defaults(self):
        bundle = SignalBundle()
        assert bundle.routing == {}
        assert bundle.collected_at == ""

    def test_improvement_report(self):
        report = ImprovementReport(
            generated_at="2025-01-01",
            total_signals=3,
            recommendations=[],
            health_summary={"routing": 0.8},
            top_actions=["fix routing"],
        )
        assert report.total_signals == 3


# ── Signal Collectors ────────────────────────────────────────────────────


class TestCollectRouting:
    def test_collects_heatmap(self):
        from maggy.improve.signals import collect_routing
        routing = MagicMock()
        routing.get_heatmap.return_value = [
            {"model": "a", "task_type": "bug", "avg_reward": 0.8, "count": 10},
        ]
        result = collect_routing(routing)
        assert len(result["heatmap"]) == 1
        assert result["underperformers"] == []

    def test_flags_underperformers(self):
        from maggy.improve.signals import collect_routing
        routing = MagicMock()
        routing.get_heatmap.return_value = [
            {"model": "bad", "task_type": "bug", "avg_reward": 0.2, "count": 10},
        ]
        result = collect_routing(routing)
        assert len(result["underperformers"]) == 1


class TestCollectEvents:
    def test_calculates_failure_rate(self):
        from maggy.improve.signals import collect_events
        events = MagicMock()
        events.query.return_value = [
            {"success": True}, {"success": False},
            {"success": True}, {"success": True},
        ]
        result = collect_events(events)
        assert result["total"] == 4
        assert result["failures"] == 1
        assert result["failure_rate"] == 0.25

    def test_empty_events(self):
        from maggy.improve.signals import collect_events
        events = MagicMock()
        events.query.return_value = []
        result = collect_events(events)
        assert result["failure_rate"] == 0.0


class TestCollectHistory:
    def test_returns_patterns(self):
        from maggy.improve.signals import collect_history
        history = MagicMock()
        history.get_report.return_value = {
            "total_sessions": 50,
            "patterns": ["dominance"],
            "by_provider": {"claude": 40, "codex": 10},
        }
        result = collect_history(history)
        assert result["sessions"] == 50

    def test_no_report(self):
        from maggy.improve.signals import collect_history
        history = MagicMock()
        history.get_report.return_value = None
        result = collect_history(history)
        assert result["sessions"] == 0


class TestCollectForge:
    def test_returns_gaps(self):
        from maggy.improve.signals import collect_forge
        forge = MagicMock()
        forge.get_gaps.return_value = [
            {"name": "slack", "count": 5},
        ]
        result = collect_forge(forge)
        assert result["count"] == 1


class TestCollectEngram:
    def test_returns_health(self):
        from maggy.improve.signals import collect_engram
        engram = MagicMock()
        with patch("maggy.engram.diagnostics.diagnose") as mock_diag:
            profile = SimpleNamespace(
                health_score=0.7, total_memories=100,
                active_count=70, superseded_count=30,
            )
            mock_diag.return_value = profile
            result = collect_engram(engram)
        assert result["health_score"] == 0.7


class TestCollectBudget:
    def test_returns_status(self):
        from maggy.improve.signals import collect_budget
        budget = MagicMock()
        budget.budget_status.return_value = {
            "utilization": 0.5, "status": "ok",
        }
        result = collect_budget(budget)
        assert result["utilization"] == 0.5


class TestCollectAll:
    def test_skips_none_services(self):
        from maggy.improve.signals import collect_all
        state = SimpleNamespace(
            routing=None, events=None, history=None,
            forge=None, engram=None, budget=None,
        )
        bundle = collect_all(state)
        assert bundle.routing == {}
        assert bundle.events == {}


# ── Analyzer ─────────────────────────────────────────────────────────────


class TestAnalyzeRouting:
    def test_flags_underperformers(self):
        from maggy.improve.analyzer import analyze_routing
        signals = SignalBundle(
            routing={"underperformers": [
                {"model": "bad", "task_type": "bug", "avg_reward": 0.2},
            ]},
        )
        recs = analyze_routing(signals)
        assert len(recs) == 1
        assert recs[0].category == "routing"

    def test_no_issues(self):
        from maggy.improve.analyzer import analyze_routing
        signals = SignalBundle(routing={"underperformers": []})
        assert analyze_routing(signals) == []


class TestAnalyzeFailures:
    def test_flags_high_failure(self):
        from maggy.improve.analyzer import analyze_failures
        signals = SignalBundle(events={"failure_rate": 0.25})
        recs = analyze_failures(signals)
        assert len(recs) == 1
        assert recs[0].severity == "action"

    def test_ok_rate(self):
        from maggy.improve.analyzer import analyze_failures
        signals = SignalBundle(events={"failure_rate": 0.1})
        assert analyze_failures(signals) == []


class TestAnalyzeUsage:
    def test_flags_low_usage(self):
        from maggy.improve.analyzer import analyze_usage
        signals = SignalBundle(history={
            "sessions": 100,
            "by_provider": {"codex": 3},
        })
        recs = analyze_usage(signals)
        assert len(recs) == 1
        assert recs[0].category == "usage"

    def test_no_sessions(self):
        from maggy.improve.analyzer import analyze_usage
        signals = SignalBundle(history={"sessions": 0})
        assert analyze_usage(signals) == []


class TestAnalyzeGaps:
    def test_surfaces_gaps(self):
        from maggy.improve.analyzer import analyze_gaps
        signals = SignalBundle(forge={
            "gaps": [{"name": "slack", "count": 5}],
        })
        recs = analyze_gaps(signals)
        assert len(recs) == 1
        assert recs[0].category == "capability"


class TestAnalyzeMemory:
    def test_flags_low_health(self):
        from maggy.improve.analyzer import analyze_memory
        signals = SignalBundle(engram={"health_score": 0.3})
        recs = analyze_memory(signals)
        assert len(recs) == 1
        assert recs[0].category == "memory"

    def test_healthy(self):
        from maggy.improve.analyzer import analyze_memory
        signals = SignalBundle(engram={"health_score": 0.8})
        assert analyze_memory(signals) == []


class TestAnalyzeCost:
    def test_flags_high_util(self):
        from maggy.improve.analyzer import analyze_cost
        signals = SignalBundle(budget={"utilization": 0.95})
        recs = analyze_cost(signals)
        assert len(recs) == 1
        assert recs[0].category == "cost"

    def test_ok_util(self):
        from maggy.improve.analyzer import analyze_cost
        signals = SignalBundle(budget={"utilization": 0.5})
        assert analyze_cost(signals) == []


class TestAnalyzeAll:
    def test_merges_all(self):
        from maggy.improve.analyzer import analyze_all
        signals = SignalBundle(
            routing={"underperformers": [
                {"model": "x", "task_type": "bug", "avg_reward": 0.1},
            ]},
            events={"failure_rate": 0.3},
            budget={"utilization": 0.95},
            engram={"health_score": 0.2},
            forge={"gaps": [{"name": "y", "count": 3}]},
            history={"sessions": 0},
        )
        recs = analyze_all(signals)
        categories = {r.category for r in recs}
        assert "routing" in categories
        assert "reliability" in categories
        assert "cost" in categories


# ── Introspector Service ─────────────────────────────────────────────────


class TestIntrospector:
    def test_analyze_empty_state(self):
        from maggy.improve.service import Introspector
        state = SimpleNamespace(
            routing=None, events=None, history=None,
            forge=None, engram=None, budget=None,
        )
        intro = Introspector(state)
        report = intro.analyze()
        assert report.total_signals == 0
        assert report.recommendations == []

    def test_get_report_none_initially(self):
        from maggy.improve.service import Introspector
        state = SimpleNamespace(
            routing=None, events=None, history=None,
            forge=None, engram=None, budget=None,
        )
        intro = Introspector(state)
        assert intro.get_report() is None

    def test_get_report_after_analyze(self):
        from maggy.improve.service import Introspector
        state = SimpleNamespace(
            routing=None, events=None, history=None,
            forge=None, engram=None, budget=None,
        )
        intro = Introspector(state)
        intro.analyze()
        report = intro.get_report()
        assert report is not None
        assert report.generated_at != ""

    def test_health_summary_populated(self):
        from maggy.improve.service import Introspector
        routing = MagicMock()
        routing.get_heatmap.return_value = []
        events = MagicMock()
        events.query.return_value = [
            {"success": True}, {"success": True},
        ]
        budget = MagicMock()
        budget.budget_status.return_value = {
            "utilization": 0.5, "status": "ok",
        }
        state = SimpleNamespace(
            routing=routing, events=events, history=None,
            forge=None, engram=None, budget=budget,
        )
        intro = Introspector(state)
        report = intro.analyze()
        assert "routing" in report.health_summary
        assert "reliability" in report.health_summary
        assert "cost" in report.health_summary


================================================
FILE: maggy/tests/test_lexon.py
================================================
"""Tests for Lexon — routing, terminology, disambiguation."""

from __future__ import annotations

from maggy.lexon.disambiguate import disambiguate
from maggy.lexon.personalization import PersonalizationEngine
from maggy.lexon.record import LexonRecord
from maggy.lexon.router import LexonRouter
from maggy.lexon.terminology import TermEntry, TerminologyMap


class TestTerminology:
    def test_resolve_canonical(self):
        tm = TerminologyMap()
        assert tm.resolve("deploy") == "deploy"

    def test_resolve_synonym(self):
        tm = TerminologyMap()
        assert tm.resolve("ship") == "deploy"

    def test_resolve_unknown(self):
        tm = TerminologyMap()
        assert tm.resolve("xyzzy") is None

    def test_add_alias(self):
        tm = TerminologyMap()
        assert tm.add_alias("deploy", "yeet")
        assert tm.resolve("yeet") == "deploy"

    def test_add_alias_unknown_canonical(self):
        tm = TerminologyMap()
        assert not tm.add_alias("nonexistent", "alias")


class TestDisambiguate:
    def test_high_confidence_resolves(self):
        result = disambiguate(0.9, ["grep"])
        assert result.resolved
        assert result.tool == "grep"
        assert result.mode == "none"

    def test_mid_confidence_self_clarify(self):
        result = disambiguate(0.6, ["grep", "glob"])
        assert result.resolved
        assert result.mode == "self_clarify"

    def test_low_confidence_user_clarify(self):
        result = disambiguate(0.4, ["grep", "glob", "find"])
        assert not result.resolved
        assert result.mode == "user_clarify"

    def test_very_low_rejects(self):
        result = disambiguate(0.1, [])
        assert not result.resolved


class TestPersonalization:
    def test_record_and_top(self):
        pe = PersonalizationEngine()
        pe.record_use("grep")
        pe.record_use("grep")
        pe.record_use("glob")
        top = pe.top_tools(2)
        assert top[0] == "grep"

    def test_preferred_alias(self):
        pe = PersonalizationEngine()
        pe.record_alias("find stuff", "grep")
        assert pe.get_preferred("find stuff") == "grep"

    def test_correction(self):
        pe = PersonalizationEngine()
        pe.record_correction("test", "pytest")
        assert len(pe.signals.correction_pairs) == 1


class TestLexonRouter:
    def test_known_intent(self):
        lr = LexonRouter()
        record = lr.route("deploy my app")
        assert record.confidence > 0.5
        assert len(record.candidates) > 0

    def test_unknown_intent(self):
        lr = LexonRouter()
        record = lr.route("xyzzy plugh")
        assert record.disambiguation_mode == "llm"

    def test_learn_and_recall(self):
        lr = LexonRouter()
        lr.learn("push it live", "vercel_deploy")
        record = lr.route("push it live")
        assert record.resolved_tool == "vercel_deploy"
        assert record.confidence >= 0.9

    def test_multiple_candidates(self):
        lr = LexonRouter()
        record = lr.route("search for files")
        assert record.disambiguation_mode == "llm"

    def test_manifest_overrides_default_tools(self):
        lr = LexonRouter({
            "tool_manifest": {
                "deploy": ["shipctl"],
            },
        })
        record = lr.route("deploy release")
        assert record.resolved_tool == "shipctl"


class TestLexonRecord:
    def test_ambiguous(self):
        r = LexonRecord(phrase="test", confidence=0.3)
        assert r.is_ambiguous

    def test_not_ambiguous(self):
        r = LexonRecord(phrase="test", confidence=0.9)
        assert not r.is_ambiguous

    def test_needs_user_input(self):
        r = LexonRecord(
            phrase="x", disambiguation_mode="user_clarify",
        )
        assert r.needs_user_input


================================================
FILE: maggy/tests/test_mesh.py
================================================
"""Tests for Maggy Mesh — protocol, discovery, sync, quarantine."""

from __future__ import annotations

from maggy.mesh.discovery import PeerInfo, PeerRegistry
from maggy.mesh.memory import MemoryType, SharedMemory
from maggy.mesh.protocol import (
    MeshMessage,
    MessageType,
    create_hello,
    create_share,
)
from maggy.mesh.provenance import Provenance
from maggy.mesh.quarantine import QuarantineStore
from maggy.mesh.sync import SyncEngine
from maggy.mesh.transport import compute_hmac, verify_hmac


class TestProtocol:
    def test_serialize_round_trip(self):
        msg = create_hello("peer-1", "Alice")
        data = msg.serialize()
        restored = MeshMessage.deserialize(data)
        assert restored.msg_type == MessageType.HELLO
        assert restored.sender_id == "peer-1"

    def test_share_message(self):
        msg = create_share(
            "peer-1", "score:claude:fix",
            {"memory_type": "score", "model": "claude"},
        )
        assert msg.msg_type == MessageType.SHARE
        assert msg.payload["key"] == "score:claude:fix"


class TestPeerDiscovery:
    def test_register_and_list(self):
        reg = PeerRegistry()
        reg.register(PeerInfo(
            peer_id="p1", name="Alice",
            address="192.168.1.1",
        ))
        assert reg.count == 1
        assert reg.get("p1").name == "Alice"

    def test_unregister(self):
        reg = PeerRegistry()
        reg.register(PeerInfo(
            peer_id="p1", name="Alice",
            address="192.168.1.1",
        ))
        assert reg.unregister("p1")
        assert reg.count == 0

    def test_update_seen(self):
        reg = PeerRegistry()
        reg.register(PeerInfo(
            peer_id="p1", name="Alice",
            address="192.168.1.1",
        ))
        old = reg.get("p1").last_seen
        reg.update_seen("p1")
        # May or may not change within same ms
        assert reg.get("p1").last_seen is not None


class TestProvenance:
    def test_no_hop_full_confidence(self):
        p = Provenance(origin_peer="p1", base_confidence=1.0)
        assert p.effective_confidence == 1.0

    def test_decay_per_hop(self):
        p = Provenance(
            origin_peer="p1", hops=3, base_confidence=1.0,
        )
        assert p.effective_confidence == 0.7

    def test_add_hop(self):
        p = Provenance(origin_peer="p1", hops=1)
        p2 = p.add_hop()
        assert p2.hops == 2

    def test_min_confidence(self):
        p = Provenance(
            origin_peer="p1", hops=100, base_confidence=1.0,
        )
        assert p.effective_confidence == 0.1


class TestQuarantine:
    def test_quarantine_and_list(self):
        qs = QuarantineStore()
        qs.quarantine("k1", "peer-1", "low conf", {"x": 1})
        assert qs.count == 1
        assert qs.get("k1").reason == "low conf"

    def test_promote(self):
        qs = QuarantineStore()
        qs.quarantine("k1", "peer-1", "test", {})
        assert qs.promote("k1")
        assert qs.count == 0

    def test_promote_missing(self):
        qs = QuarantineStore()
        assert not qs.promote("nope")


class TestSync:
    def test_accept_high_confidence(self):
        qs = QuarantineStore()
        engine = SyncEngine(qs)
        mems = [
            SharedMemory(
                key="s1", memory_type="score",
                confidence=0.8, source_peer="p1",
            ),
        ]
        result = engine.sync_incoming(mems)
        assert result.accepted == 1
        assert engine.local_count == 1

    def test_quarantine_low_confidence(self):
        qs = QuarantineStore()
        engine = SyncEngine(qs)
        mems = [
            SharedMemory(
                key="s1", memory_type="score",
                confidence=0.3, source_peer="p1",
            ),
        ]
        result = engine.sync_incoming(mems)
        assert result.quarantined == 1
        assert qs.count == 1


class TestTransport:
    def test_hmac_round_trip(self):
        sig = compute_hmac("hello", "secret")
        assert verify_hmac("hello", "secret", sig)

    def test_hmac_mismatch(self):
        sig = compute_hmac("hello", "secret")
        assert not verify_hmac("hello", "wrong", sig)


================================================
FILE: maggy/tests/test_mesh_network.py
================================================
"""Tests for mesh network layer: org scanner, git discovery, transport, network, manager, publisher."""

from __future__ import annotations

import json
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch

import pytest

from maggy.mesh.discovery import PeerInfo


# ── Org Scanner ─────────────────────────────────────────


class TestEffectiveOrgs:
    def test_merge_scanned_and_manual(self):
        from maggy.mesh.org_scanner import effective_orgs
        result = effective_orgs(
            ["protaige", "edubites"], ["alinaqi"], [],
        )
        assert result == ["alinaqi", "edubites", "protaige"]

    def test_excludes_orgs(self):
        from maggy.mesh.org_scanner import effective_orgs
        result = effective_orgs(
            ["protaige", "edubites", "alinaqi"],
            [], ["edubites"],
        )
        assert "edubites" not in result
        assert len(result) == 2

    def test_deduplicates(self):
        from maggy.mesh.org_scanner import effective_orgs
        result = effective_orgs(
            ["protaige"], ["protaige"], [],
        )
        assert result == ["protaige"]

    def test_empty_inputs(self):
        from maggy.mesh.org_scanner import effective_orgs
        assert effective_orgs([], [], []) == []


# ── Transport ───────────────────────────────────────────


class TestDeriveOrgKey:
    def test_different_orgs_produce_different_keys(self):
        from maggy.mesh.transport import derive_org_key
        k1 = derive_org_key("protaige", "secret")
        k2 = derive_org_key("edubites", "secret")
        assert k1 != k2

    def test_deterministic(self):
        from maggy.mesh.transport import derive_org_key
        k1 = derive_org_key("protaige", "secret")
        k2 = derive_org_key("protaige", "secret")
        assert k1 == k2

    def test_returns_hex_string(self):
        from maggy.mesh.transport import derive_org_key
        key = derive_org_key("org", "secret")
        assert len(key) == 64  # SHA-256 hex


class TestSignVerify:
    def test_roundtrip(self):
        from maggy.mesh.transport import sign_message, verify_message
        from maggy.mesh.protocol import create_hello
        msg = create_hello("peer-1", "tester")
        signed = sign_message(msg, "test-key")
        result = verify_message(signed, "test-key")
        assert result is not None
        assert result.sender_id == "peer-1"

    def test_wrong_key_fails(self):
        from maggy.mesh.transport import sign_message, verify_message
        from maggy.mesh.protocol import create_hello
        msg = create_hello("peer-1", "tester")
        signed = sign_message(msg, "correct-key")
        result = verify_message(signed, "wrong-key")
        assert result is None

    def test_invalid_json_fails(self):
        from maggy.mesh.transport import verify_message
        result = verify_message("not-json", "key")
        assert result is None


# ── Network ─────────────────────────────────────────────


class TestBuildNetwork:
    def test_creates_network(self, tmp_path: Path):
        from maggy.mesh.network import build_network
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        net = build_network("protaige", "secret", store)
        assert net.org == "protaige"
        assert net.org_key != ""

    def test_isolated_org_keys(self, tmp_path: Path):
        from maggy.mesh.network import build_network
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        n1 = build_network("protaige", "secret", store)
        n2 = build_network("edubites", "secret", store)
        assert n1.org_key != n2.org_key

    def test_status_returns_counts(self, tmp_path: Path):
        from maggy.mesh.network import build_network
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        net = build_network("test-org", "secret", store)
        status = net.status()
        assert status["org"] == "test-org"
        assert status["peers"] == 0
        assert status["memories"] == 0
        assert status["quarantined"] == 0


# ── Manager ─────────────────────────────────────────────


def _make_cfg(**overrides):
    """Build a minimal MeshConfig-like SimpleNamespace."""
    defaults = {
        "peer_id": "test-peer",
        "org_key_secret": "secret",
        "port": 8080,
        "tunnel_url": "",
        "git_discovery": True,
    }
    defaults.update(overrides)
    return SimpleNamespace(**defaults)


class TestMeshManager:
    def test_add_and_get_network(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        mgr = MeshManager(_make_cfg(), store)
        net = mgr.add_network("protaige")
        assert net.org == "protaige"
        assert mgr.get_network("protaige") is net

    def test_missing_network_returns_none(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        mgr = MeshManager(_make_cfg(), store)
        assert mgr.get_network("nope") is None

    def test_list_networks(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        mgr = MeshManager(_make_cfg(), store)
        mgr.add_network("org-a")
        mgr.add_network("org-b")
        nets = mgr.list_networks()
        assert len(nets) == 2

    def test_total_peers_across_networks(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        mgr = MeshManager(_make_cfg(), store)
        net = mgr.add_network("org-a")
        net.peers.register(PeerInfo(
            peer_id="p1", name="peer1",
            address="ws://1", org="org-a",
        ))
        assert mgr.total_peers == 1

    def test_resolve_address_tunnel(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        cfg = _make_cfg(tunnel_url="wss://bore.pub/xyz")
        mgr = MeshManager(cfg, store)
        assert mgr._resolve_address() == "wss://bore.pub/xyz"

    def test_resolve_address_local(self, tmp_path: Path):
        from maggy.mesh.manager import MeshManager
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        mgr = MeshManager(_make_cfg(), store)
        assert "127.0.0.1:8080" in mgr._resolve_address()


# ── Publisher ───────────────────────────────────────────


class TestPublisher:
    def test_collect_scores_skips_low_count(self):
        from maggy.mesh.publisher import collect_scores
        routing = SimpleNamespace(
            get_heatmap=lambda: [
                {"model": "m1", "task_type": "fix", "count": 2},
            ],
        )
        result = collect_scores(routing, "peer-1")
        assert len(result) == 0

    def test_collect_scores_includes_high_count(self):
        from maggy.mesh.publisher import collect_scores
        routing = SimpleNamespace(
            get_heatmap=lambda: [
                {"model": "m1", "task_type": "fix", "count": 10},
            ],
        )
        result = collect_scores(routing, "peer-1")
        assert len(result) == 1
        assert result[0].memory_type == "score"

    def test_collect_gaps(self):
        from maggy.mesh.publisher import collect_gaps
        forge = SimpleNamespace(
            get_gaps=lambda: [{"name": "slack-notify"}],
        )
        result = collect_gaps(forge, "peer-1")
        assert len(result) == 1
        assert result[0].key == "gap:slack-notify"

    def test_collect_policies_filters_severity(self):
        from maggy.mesh.publisher import collect_policies
        rec = SimpleNamespace(
            severity="action",
            category="routing",
            message="Fix it",
            suggestion="Do this",
        )
        rec_info = SimpleNamespace(
            severity="info",
            category="mem",
            message="FYI",
            suggestion="N/A",
        )
        report = SimpleNamespace(
            recommendations=[rec, rec_info],
        )
        introspector = SimpleNamespace(get_report=lambda: report)
        result = collect_policies(introspector, "peer-1")
        assert len(result) == 1  # only action severity

    def test_collect_all_none_services(self):
        from maggy.mesh.publisher import collect_all_shares
        state = SimpleNamespace()
        result = collect_all_shares(state, "peer-1")
        assert result == []


# ── Git Discovery (mocked HTTP) ─────────────────────────


class TestGitDiscovery:
    @pytest.mark.asyncio
    async def test_ensure_repo_exists(self):
        from maggy.mesh.git_discovery import ensure_mesh_repo
        mock_resp = AsyncMock()
        mock_resp.status_code = 200
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=mock_resp)
        mock_client.__aenter__ = AsyncMock(
            return_value=mock_client,
        )
        mock_client.__aexit__ = AsyncMock()
        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await ensure_mesh_repo("org", "token")
        assert result is True

    @pytest.mark.asyncio
    async def test_ensure_repo_creates_new(self):
        from maggy.mesh.git_discovery import ensure_mesh_repo
        not_found = AsyncMock()
        not_found.status_code = 404
        created = AsyncMock()
        created.status_code = 201
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=not_found)
        mock_client.post = AsyncMock(return_value=created)
        mock_client.__aenter__ = AsyncMock(
            return_value=mock_client,
        )
        mock_client.__aexit__ = AsyncMock()
        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await ensure_mesh_repo("org", "token")
        assert result is True

    @pytest.mark.asyncio
    async def test_read_peers_empty(self):
        from maggy.mesh.git_discovery import read_peers
        not_found = AsyncMock()
        not_found.status_code = 404
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=not_found)
        mock_client.__aenter__ = AsyncMock(
            return_value=mock_client,
        )
        mock_client.__aexit__ = AsyncMock()
        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await read_peers("org", "token")
        assert result == []

    @pytest.mark.asyncio
    async def test_announce_success(self):
        from maggy.mesh.git_discovery import Announcement, announce
        not_found = AsyncMock()
        not_found.status_code = 404
        success = AsyncMock()
        success.status_code = 201
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=not_found)
        mock_client.put = AsyncMock(return_value=success)
        mock_client.__aenter__ = AsyncMock(
            return_value=mock_client,
        )
        mock_client.__aexit__ = AsyncMock()
        ann = Announcement(
            peer_id="peer-1", name="node",
            address="ws://x",
        )
        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await announce("org", ann, "tok")
        assert result is True

    @pytest.mark.asyncio
    async def test_remove_announcement(self):
        from maggy.mesh.git_discovery import remove_announcement
        found = AsyncMock()
        found.status_code = 200
        found.json = lambda: {"sha": "abc123"}
        deleted = AsyncMock()
        deleted.status_code = 200
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=found)
        mock_client.delete = AsyncMock(return_value=deleted)
        mock_client.__aenter__ = AsyncMock(
            return_value=mock_client,
        )
        mock_client.__aexit__ = AsyncMock()
        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await remove_announcement(
                "org", "peer-1", "tok",
            )
        assert result is True


# ── Promote Flow ────────────────────────────────────────


class TestPromoteFlow:
    def test_promote_accepts_into_sync(self, tmp_path: Path):
        from maggy.mesh.network import build_network
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        net = build_network("org-a", "secret", store)
        net.quarantine.quarantine(
            key="score:m1:fix",
            source="peer-2",
            reason="low confidence",
            content={"model": "m1"},
            memory_type="score",
        )
        assert net.quarantine.count == 1
        assert net.sync.local_count == 0
        ok = net.sync.promote_from_quarantine("score:m1:fix")
        assert ok is True
        assert net.quarantine.count == 0
        assert net.sync.local_count == 1
        mem = net.sync.get_local("score:m1:fix")
        assert mem is not None
        assert mem.content == {"model": "m1"}

    def test_promote_nonexistent_returns_false(
        self, tmp_path: Path,
    ):
        from maggy.mesh.network import build_network
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        net = build_network("org-a", "secret", store)
        ok = net.sync.promote_from_quarantine("nope")
        assert ok is False


# ── Replay Protection ──────────────────────────────────


class TestReplayProtection:
    def test_stale_message_rejected(self):
        import time
        from maggy.mesh.transport import (
            sign_message,
            verify_message,
        )
        from maggy.mesh.protocol import create_hello
        msg = create_hello("peer-1", "tester")
        signed = sign_message(msg, "key")
        # Tamper timestamp to make it old
        import json
        envelope = json.loads(signed)
        envelope["ts"] = time.time() - 600
        sig_field = envelope["sig"]
        tampered = json.dumps(envelope)
        result = verify_message(tampered, "key")
        assert result is None


# ── SQLite Reload on Init ──────────────────────────────


class TestSqliteReload:
    def test_peers_reload_from_store(self, tmp_path: Path):
        from maggy.mesh.discovery import PeerInfo, PeerRegistry
        from maggy.mesh.store import MeshStore
        store = MeshStore(tmp_path / "mesh.db")
        reg1 = PeerRegistry(store, "org-a")
        reg1.register(PeerInfo(
            peer_id="p1", name="Alice",
            address="ws://a", org="org-a",
        ))
        # Create new registry from same store — should reload
        reg2 = PeerRegistry(store, "org-a")
        assert reg2.count == 1
        assert reg2.get("p1") is not None

    def test_sync_reload_from_store(self, tmp_path: Path):
        from maggy.mesh.memory import SharedMemory
        from maggy.mesh.quarantine import QuarantineStore
        from maggy.mesh.store import MeshStore
        from maggy.mesh.sync import SyncEngine
        store = MeshStore(tmp_path / "mesh.db")
        q1 = QuarantineStore(store, "org-a")
        s1 = SyncEngine(q1, store, "org-a")
        s1.sync_incoming([SharedMemory(
            key="k1", memory_type="score",
            content={"x": 1}, source_peer="p1",
        )])
        # New engine from same store — should reload
        q2 = QuarantineStore(store, "org-a")
        s2 = SyncEngine(q2, store, "org-a")
        assert s2.local_count == 1


================================================
FILE: maggy/tests/test_mesh_store.py
================================================
"""Tests for mesh SQLite store."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.mesh.store import MeshStore


@pytest.fixture
def store(tmp_path: Path) -> MeshStore:
    return MeshStore(tmp_path / "mesh.db")


class TestPeerCRUD:
    def test_upsert_and_get(self, store: MeshStore):
        store.upsert_peer("p1", "Alice", "1.2.3.4", 8080, "acme")
        peer = store.get_peer("p1", "acme")
        assert peer is not None
        assert peer["name"] == "Alice"

    def test_list_by_org(self, store: MeshStore):
        store.upsert_peer("p1", "A", "1.1.1.1", 8080, "acme")
        store.upsert_peer("p2", "B", "2.2.2.2", 8080, "other")
        acme = store.list_peers(org="acme")
        assert len(acme) == 1

    def test_list_all(self, store: MeshStore):
        store.upsert_peer("p1", "A", "1.1.1.1", 8080, "a")
        store.upsert_peer("p2", "B", "2.2.2.2", 8080, "b")
        assert len(store.list_peers()) == 2

    def test_remove_peer(self, store: MeshStore):
        store.upsert_peer("p1", "A", "1.1.1.1", 8080, "acme")
        assert store.remove_peer("p1", "acme")
        assert store.get_peer("p1", "acme") is None

    def test_remove_missing(self, store: MeshStore):
        assert not store.remove_peer("nope", "acme")

    def test_upsert_updates(self, store: MeshStore):
        store.upsert_peer("p1", "A", "1.1.1.1", 8080, "acme")
        store.upsert_peer("p1", "A-new", "9.9.9.9", 8080, "acme")
        peer = store.get_peer("p1", "acme")
        assert peer["name"] == "A-new"
        assert peer["address"] == "9.9.9.9"


class TestMemoryCRUD:
    def test_write_and_list(self, store: MeshStore):
        store.write_memory("acme", "k1", "score", {"x": 1}, "p1")
        mems = store.list_memories("acme")
        assert len(mems) == 1
        assert mems[0]["key"] == "k1"

    def test_scoped_by_org(self, store: MeshStore):
        store.write_memory("acme", "k1", "score", {}, "p1")
        store.write_memory("other", "k2", "gap", {}, "p2")
        assert len(store.list_memories("acme")) == 1
        assert len(store.list_memories("other")) == 1

    def test_upsert_memory(self, store: MeshStore):
        store.write_memory("acme", "k1", "score", {"v": 1}, "p1")
        store.write_memory("acme", "k1", "score", {"v": 2}, "p1")
        mems = store.list_memories("acme")
        assert len(mems) == 1
        assert mems[0]["content"]["v"] == 2


class TestQuarantineCRUD:
    def test_quarantine_and_list(self, store: MeshStore):
        store.quarantine_item("acme", "k1", "p1", "low conf", {"x": 1})
        items = store.list_quarantined("acme")
        assert len(items) == 1
        assert items[0]["reason"] == "low conf"

    def test_promote(self, store: MeshStore):
        store.quarantine_item("acme", "k1", "p1", "test", {})
        assert store.promote_item("acme", "k1")
        assert len(store.list_quarantined("acme")) == 0

    def test_promote_missing(self, store: MeshStore):
        assert not store.promote_item("acme", "nope")

    def test_scoped_by_org(self, store: MeshStore):
        store.quarantine_item("acme", "k1", "p1", "r", {})
        store.quarantine_item("other", "k2", "p2", "r", {})
        assert len(store.list_quarantined("acme")) == 1


================================================
FILE: maggy/tests/test_mesh_ws.py
================================================
"""Tests for WebSocket server and client."""

from __future__ import annotations

import json
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from maggy.mesh.protocol import (
    MessageType,
    MeshMessage,
    create_hello,
    create_share,
)
from maggy.mesh.transport import sign_message
from maggy.mesh.ws_server import router


# ── WS Server ──────────────────────────────────────────


def _build_app_with_mesh(tmp_dir: Path | None = None):
    """Build a FastAPI app with mesh manager wired."""
    import tempfile
    from maggy.mesh.manager import MeshManager
    from maggy.mesh.store import MeshStore

    if tmp_dir is None:
        tmp_dir = Path(tempfile.mkdtemp())
    app = FastAPI()
    store = MeshStore(tmp_dir / "mesh.db")
    cfg = SimpleNamespace(
        peer_id="server-peer",
        org_key_secret="test-secret",
        port=8080,
        tunnel_url="",
        git_discovery=False,
    )
    mgr = MeshManager(cfg, store)
    mgr.add_network("test-org")
    app.state.mesh = mgr
    app.include_router(router)
    return app, mgr


class TestWsServerNoMesh:
    def test_no_mesh_closes_connection(self):
        app = FastAPI()
        app.state.mesh = None
        app.include_router(router)
        client = TestClient(app)
        with client.websocket_connect("/ws/mesh") as ws:
            # Server should close immediately with 1008
            try:
                ws.receive_text()
                assert False, "Should have disconnected"
            except Exception:
                pass  # expected disconnect


class TestWsServerAuth:
    def test_invalid_json_closes(self):
        app, mgr = _build_app_with_mesh()
        client = TestClient(app)
        with pytest.raises(Exception):
            with client.websocket_connect("/ws/mesh") as ws:
                ws.send_text("not-valid-json")
                ws.receive_text()

    def test_wrong_org_closes(self):
        app, mgr = _build_app_with_mesh()
        net = mgr.get_network("test-org")
        hello = create_hello("client-1", "client")
        hello.payload["org"] = "wrong-org"
        signed = sign_message(hello, net.org_key)
        client = TestClient(app)
        with pytest.raises(Exception):
            with client.websocket_connect("/ws/mesh") as ws:
                ws.send_text(signed)
                ws.receive_text()


class TestWsServerHello:
    def test_valid_hello_gets_reply(self):
        app, mgr = _build_app_with_mesh()
        net = mgr.get_network("test-org")
        hello = create_hello("client-1", "client")
        hello.payload["org"] = "test-org"
        signed = sign_message(hello, net.org_key)
        client = TestClient(app)
        with client.websocket_connect("/ws/mesh") as ws:
            ws.send_text(signed)
            reply_raw = ws.receive_text()
            envelope = json.loads(reply_raw)
            assert "payload" in envelope
            assert "sig" in envelope


# ── WS Client ──────────────────────────────────────────


class TestMeshClient:
    def test_init(self):
        from maggy.mesh.ws_client import MeshClient
        client = MeshClient("peer-1")
        assert client.connected_count == 0

    def test_is_connected_false(self):
        from maggy.mesh.ws_client import MeshClient
        client = MeshClient("peer-1")
        assert client.is_connected("nope") is False

    @pytest.mark.asyncio
    async def test_send_no_connection(self):
        from maggy.mesh.ws_client import MeshClient
        client = MeshClient("peer-1")
        msg = create_hello("peer-1", "test")
        result = await client.send("nope", msg, "key")
        assert result is False

    @pytest.mark.asyncio
    async def test_broadcast_empty(self):
        from maggy.mesh.ws_client import MeshClient
        client = MeshClient("peer-1")
        msg = create_hello("peer-1", "test")
        count = await client.broadcast([], msg, "key")
        assert count == 0

    @pytest.mark.asyncio
    async def test_close_all_empty(self):
        from maggy.mesh.ws_client import MeshClient
        client = MeshClient("peer-1")
        await client.close_all()
        assert client.connected_count == 0


================================================
FILE: maggy/tests/test_mnemos_fatigue.py
================================================
"""Tests for Mnemos fatigue tracking and signal logging."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.mnemos.fatigue import FatigueTracker
from maggy.mnemos.signals import SignalLog


class TestFatigueTracker:
    def test_composite_and_state_ok(self):
        tracker = FatigueTracker()
        tracker.record("context_load", 0.2)
        tracker.record("turn_pressure", 0.1)
        tracker.record("reread_ratio", 0.2)
        tracker.record("handoff_risk", 0.1)
        assert round(tracker.composite(), 2) == 0.15
        assert tracker.state() == "ok"

    def test_rejects_invalid_dimension(self):
        tracker = FatigueTracker()
        with pytest.raises(ValueError, match="Unknown dimension"):
            tracker.record("bogus", 0.5)

    def test_model_switch_increases_reread_ratio(self):
        tracker = FatigueTracker()
        tracker.record("reread_ratio", 0.2)
        tracker.on_model_switch(128_000)
        assert tracker.context_window == 128_000
        assert tracker.dimensions["reread_ratio"] == 0.35

    def test_state_thresholds(self):
        tracker = FatigueTracker()
        for name in tracker.dimensions:
            tracker.record(name, 0.6)
        assert tracker.state() == "compress"
        for name in tracker.dimensions:
            tracker.record(name, 0.9)
        assert tracker.state() == "critical"


class TestSignalLog:
    def test_append_and_recent(self, tmp_path: Path):
        log = SignalLog(tmp_path / "signals.jsonl")
        log.append({"kind": "fatigue", "value": 0.4})
        log.append({"kind": "switch", "value": 1})
        assert log.recent(1) == [{"kind": "switch", "value": 1}]
        assert log.recent(2)[0]["kind"] == "fatigue"


================================================
FILE: maggy/tests/test_monday_provider.py
================================================
"""Tests for Monday.com provider — IssueTrackerProvider impl."""

from __future__ import annotations

import pytest

from maggy.providers.monday import MondayProvider


@pytest.fixture()
def provider():
    return MondayProvider(
        api_token="test-token", board_id="18391076058",
    )


def test_provider_name(provider):
    assert provider.provider_name() == "monday"


def test_to_task_maps_fields(provider):
    """Monday item dict maps to Task dataclass."""
    item = {
        "id": "123", "name": "Fix login",
        "column_values": [
            {"id": "status", "text": "Working on it"},
            {"id": "person", "text": "Ali"},
        ],
        "url": "https://monday.com/123",
        "created_at": "2025-01-01",
        "updated_at": "2025-01-02",
    }
    task = provider._to_task(item)
    assert task.id == "123"
    assert task.title == "Fix login"
    assert task.status == "Working on it"
    assert task.assignee == "Ali"


@pytest.mark.asyncio()
async def test_list_tasks_parses_items(provider, monkeypatch):
    """list_tasks returns Task objects from API response."""
    import httpx

    class FakeResp:
        status_code = 200
        def json(self):
            return {"data": {"boards": [{"items_page": {
                "items": [
                    {"id": "1", "name": "Task A",
                     "column_values": [], "url": "",
                     "created_at": "", "updated_at": ""},
                ],
            }}]}}

    async def fake_post(self, url, **kw):
        return FakeResp()

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    tasks = await provider.list_tasks()
    assert len(tasks) == 1
    assert tasks[0].title == "Task A"


@pytest.mark.asyncio()
async def test_list_tasks_empty_board(provider, monkeypatch):
    """Empty board returns empty list."""
    import httpx

    class FakeResp:
        status_code = 200
        def json(self):
            return {"data": {"boards": [{"items_page": {
                "items": [],
            }}]}}

    async def fake_post(self, url, **kw):
        return FakeResp()

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    tasks = await provider.list_tasks()
    assert tasks == []


@pytest.mark.asyncio()
async def test_get_task_by_id(provider, monkeypatch):
    """get_task fetches single item by ID."""
    import httpx

    class FakeResp:
        status_code = 200
        def json(self):
            return {"data": {"items": [
                {"id": "42", "name": "Deploy",
                 "column_values": [], "url": "",
                 "created_at": "", "updated_at": ""},
            ]}}

    async def fake_post(self, url, **kw):
        return FakeResp()

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    task = await provider.get_task("42")
    assert task is not None
    assert task.id == "42"


@pytest.mark.asyncio()
async def test_get_task_not_found(provider, monkeypatch):
    """get_task returns None for missing item."""
    import httpx

    class FakeResp:
        status_code = 200
        def json(self):
            return {"data": {"items": []}}

    async def fake_post(self, url, **kw):
        return FakeResp()

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    task = await provider.get_task("999")
    assert task is None


================================================
FILE: maggy/tests/test_monitor.py
================================================
"""Tests for MonitorService — background tracker polling."""

from __future__ import annotations

import pytest

from maggy.services.monitor import (
    MonitorConfig,
    MonitorService,
)


@pytest.fixture()
def svc(tmp_path):
    return MonitorService(tmp_path / "monitors.db")


def test_add_and_list(svc):
    """Adding a monitor config makes it listable."""
    cfg = MonitorConfig(project_key="protaige", provider="github")
    svc.add(cfg)
    active = svc.list_active()
    assert len(active) == 1
    assert active[0].project_key == "protaige"


def test_remove(svc):
    """Removing a monitor clears it from active list."""
    svc.add(MonitorConfig(project_key="zenloop", provider="asana"))
    svc.remove("zenloop")
    assert svc.list_active() == []


def test_is_new_unseen(svc):
    """Unseen event IDs are detected as new."""
    assert svc.is_new("PR-42", "protaige") is True


def test_mark_seen_not_new(svc):
    """After marking seen, event is no longer new."""
    svc.mark_seen("PR-42", "protaige")
    assert svc.is_new("PR-42", "protaige") is False


def test_add_duplicate_updates(svc):
    """Adding same project_key twice updates, not duplicates."""
    svc.add(MonitorConfig(project_key="x", provider="github"))
    svc.add(MonitorConfig(project_key="x", provider="asana"))
    active = svc.list_active()
    assert len(active) == 1
    assert active[0].provider == "asana"


def test_default_interval(svc):
    """Default poll interval is 300 seconds."""
    cfg = MonitorConfig(project_key="p", provider="github")
    svc.add(cfg)
    assert svc.list_active()[0].interval_seconds == 300


def test_status_summary(svc):
    """Status returns dict with counts."""
    svc.add(MonitorConfig(project_key="a", provider="github"))
    svc.add(MonitorConfig(project_key="b", provider="asana"))
    status = svc.status()
    assert status["active"] == 2


@pytest.mark.asyncio()
async def test_poll_github_prs(svc, monkeypatch):
    """Poll detects new GitHub PRs via httpx mock."""
    import httpx

    cfg = MonitorConfig(
        project_key="protaige", provider="github",
        poll_command="alinaqi/AI-Playground",
    )

    class FakeResp:
        status_code = 200
        def json(self):
            return [
                {"number": 1, "title": "Add auth",
                 "html_url": "https://github.com/x/1"},
            ]

    async def fake_get(self, url, **kw):
        return FakeResp()

    monkeypatch.setattr(httpx.AsyncClient, "get", fake_get)
    events = await svc.poll(cfg)
    assert len(events) == 1
    assert events[0].title == "Add auth"


================================================
FILE: maggy/tests/test_multimodel_integration.py
================================================
"""Integration test — small project with tasks across kimi, gpt, claude.

Simulates Maggy routing a batch of tasks with varying complexity through
the full executor pipeline, verifying each lands on the correct model
and that budget/fallback/checkpoint systems work end-to-end.
"""

from __future__ import annotations

from unittest.mock import AsyncMock, MagicMock

import pytest

from maggy.adapters.pi import PiAdapter, RunResult
from maggy.budget import BudgetManager, TaskSpendTracker
from maggy.checkpoint import CheckpointManager
from maggy.config import (
    CodebaseConfig,
    MaggyConfig,
    OrgConfig,
    ProjectConfig,
    StorageConfig,
)
from maggy.coordination.lock_manager import LockManager
from maggy.mnemos.fatigue import FatigueTracker
from maggy.providers.base import Task
from maggy.routing import RoutingContext, RoutingService
from maggy.services.executor import ExecutorService
from maggy.services.executor_types import SessionCtx
from maggy.services.planner import DualPlanner


# -- helpers ---------------------------------------------------------------

def _project_cfg(tmp_path) -> MaggyConfig:
    return MaggyConfig(
        org=OrgConfig(name="acme"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        codebases=[
            CodebaseConfig(path=str(tmp_path / "repo"), key="webapp"),
        ],
        projects=[
            ProjectConfig(
                name="webapp",
                repo="acme/webapp",
                path=str(tmp_path / "repo"),
                default_branch="main",
            ),
        ],
    )


def _task(blast: int, ttype: str, title: str) -> Task:
    return Task(
        id=f"TASK-{blast}",
        title=title,
        description=f"A {ttype} task with blast={blast}.",
        raw={
            "blast_score": blast,
            "task_type": ttype,
            "security_sensitive": ttype == "security",
        },
    )


TASKS = [
    _task(1, "docs", "Update README typo"),
    _task(2, "formatting", "Fix lint warnings"),
    _task(5, "feature", "Add pagination to API"),
    _task(7, "refactor", "Extract auth middleware"),
    _task(9, "security", "Patch XSS in comments"),
]


# -- 1. Routing decisions --------------------------------------------------

class TestRoutingDecisions:
    """Verify correct model selection per complexity."""

    def test_low_blast_routes_to_cheap_tier(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        svc = RoutingService(cfg)
        for blast in (1, 2):
            # Use "formatting" — "docs" is now rules-overridden
            ctx = RoutingContext(blast_score=blast, task_type="formatting")
            decision = svc.route(ctx)
            assert decision.primary.cost_rank <= 2, (
                f"blast={blast} should route to cheap tier"
            )
            assert decision.primary.name in ("local", "kimi")

    def test_mid_blast_routes_to_cheapest_capable(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        svc = RoutingService(cfg)
        ctx = RoutingContext(blast_score=5, task_type="feature")
        decision = svc.route(ctx)
        assert decision.primary.name in ("local", "codex")

    def test_blast_6_routes_to_codex(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        svc = RoutingService(cfg)
        ctx = RoutingContext(blast_score=6, task_type="feature")
        decision = svc.route(ctx)
        assert decision.primary.name == "codex"

    def test_high_blast_routes_to_codex_or_claude(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        svc = RoutingService(cfg)
        ctx = RoutingContext(blast_score=9, task_type="refactor")
        decision = svc.route(ctx)
        assert decision.primary.name in ("codex", "claude")

    def test_security_routes_to_claude(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        svc = RoutingService(cfg)
        ctx = RoutingContext(
            blast_score=3, task_type="security",
            security_sensitive=True,
        )
        decision = svc.route(ctx)
        # Security rule override → claude
        name = decision.primary if isinstance(
            decision.primary, str,
        ) else decision.primary.name
        assert name == "claude"


# -- 2. Full executor pipeline with mocked models -------------------------

class TestExecutorPipeline:
    """End-to-end executor routing with fake model responses."""

    @pytest.mark.asyncio
    async def test_distributes_across_models(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        (tmp_path / "repo").mkdir()
        provider = AsyncMock()
        executor = ExecutorService(cfg, provider)

        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            return RunResult(
                model=model_name, success=True,
                output="done", cost_usd=0.10,
            )

        async def fake_ctx(cfg, task):
            return ""

        executor._pi.send_prompt = fake_send
        from maggy.services import executor_helpers
        executor_helpers.build_icpg_context = fake_ctx

        for task in TASKS:
            sid = f"s-{task.id}"
            session = {
                "id": sid, "task_id": task.id,
                "task_title": task.title, "mode": "plan",
                "working_dir": str(tmp_path / "repo"),
                "status": "running", "started_at": "",
                "output": "",
            }
            executor._sessions[sid] = session
            ctx = SessionCtx(session, task, str(tmp_path / "repo"))
            await executor._run(ctx, "plan")

        # Verify each complexity tier used a different model
        cheap = {"local", "kimi"}
        assert cheap & set(calls), "Low-blast should use cheap tier"
        assert "codex" in calls, "Mid-blast should use codex"
        assert "claude" in calls, "Security should use claude"
        assert len(set(calls)) >= 3, (
            f"Expected >= 3 distinct models, got {set(calls)}"
        )


# -- 3. Budget tracking across providers ----------------------------------

class TestCrossProviderBudget:
    def test_spend_tracked_per_provider(self, tmp_path):
        cfg = _project_cfg(tmp_path)
        bm = BudgetManager(cfg)
        bm.record_spend("moonshot", "kimi-k2", 0.05)
        bm.record_spend("openai", "gpt-4o", 0.30)
        bm.record_spend("anthropic", "claude-sonnet-4", 1.20)

        breakdown = bm.by_provider()
        providers = {r["provider"] for r in breakdown}
        assert providers == {"moonshot", "openai", "anthropic"}

    def test_task_spend_halts_at_limit(self):
        tracker = TaskSpendTracker(max_spend=1.0)
        tracker.record(0.3)
        tracker.record(0.3)
        tracker.record(0.5)
        assert tracker.is_exceeded()
        assert tracker.total() == pytest.approx(1.1)


# -- 4. Fallback chain on quota -------------------------------------------

class TestFallbackChain:
    @pytest.mark.asyncio
    async def test_falls_back_on_failure(self):
        pi = PiAdapter()
        calls: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            calls.append(model_name)
            if model_name in ("kimi", "deepseek"):
                return RunResult(
                    model=model_name, success=False,
                    error="quota", quota_hit=True,
                )
            return RunResult(
                model=model_name, success=True, output="ok",
            )

        pi.send_prompt = fake_send
        result = await pi.send_with_fallback(
            "kimi", "test prompt", "/tmp",
        )
        assert result.success
        assert result.model != "kimi"
        assert len(calls) > 1


# -- 5. Checkpoint survives model switch -----------------------------------

class TestCheckpointHandoff:
    def test_checkpoint_roundtrip(self, tmp_path):
        mgr = CheckpointManager(tmp_path / "checkpoints")
        mgr.write("session-abc", {
            "goal": "Ship auth feature",
            "constraints": ["Keep tests green"],
            "progress": ["Step 1 done by kimi"],
            "model_history": ["kimi", "claude"],
            "current_subgoal": "Write integration tests",
            "fatigue_score": 0.35,
        })
        data = mgr.read("session-abc")
        assert data is not None
        assert data["goal"] == "Ship auth feature"
        assert data["model_history"] == ["kimi", "claude"]
        assert data["fatigue_score"] == 0.35


# -- 6. Dual planning uses different models --------------------------------

class TestDualPlanning:
    @pytest.mark.asyncio
    async def test_plan_and_review_use_separate_models(self):
        models_used: list[str] = []
        pi = MagicMock()

        async def fake_send(model, prompt, wd, turns=5):
            models_used.append(model)
            return RunResult(
                model=model, success=True, output="plan output",
            )

        pi.send_prompt = fake_send
        planner = DualPlanner(pi)
        result = await planner.dual_plan(
            "Add OAuth", "Implement OAuth2 flow", "/tmp",
        )
        assert "claude" in models_used
        assert "codex" in models_used
        assert result.primary_plan == "plan output"


# -- 7. Fatigue tracks model switches --------------------------------------

class TestFatigueAcrossModels:
    def test_model_switch_increases_fatigue(self):
        tracker = FatigueTracker(context_window=200_000)
        tracker.record("context_load", 0.3)
        tracker.record("reread_ratio", 0.2)
        assert tracker.state() == "ok"

        tracker.on_model_switch(128_000)
        assert tracker.context_window == 128_000
        assert tracker.dimensions["reread_ratio"] == 0.35

        tracker.on_model_switch(128_000)
        assert tracker.dimensions["reread_ratio"] == 0.50


# -- 8. Lock coordination between agents -----------------------------------

class TestLockCoordination:
    def test_agents_cant_clobber_each_other(self, tmp_path):
        locks = LockManager(tmp_path / "locks.db")
        assert locks.acquire("src/auth.py", "kimi-agent")
        assert not locks.acquire("src/auth.py", "claude-agent")
        assert locks.acquire("src/api.py", "claude-agent")
        conflicts = locks.conflicts(["src/auth.py", "src/api.py"])
        assert "src/auth.py" in conflicts
        assert "src/api.py" in conflicts
        locks.release("src/auth.py", "kimi-agent")
        assert locks.acquire("src/auth.py", "claude-agent")


================================================
FILE: maggy/tests/test_observability.py
================================================
"""Tests for observability signal collection."""

from __future__ import annotations

from maggy.observability import ObservabilityCollector


def test_records_and_reads_recent_signals(tmp_path) -> None:
    collector = ObservabilityCollector(tmp_path / "signals.db")
    collector.record_signal("maggy", "fatigue", 0.4)
    collector.record_signal("maggy", "budget", 0.9)

    rows = collector.recent_signals("maggy")

    assert len(rows) == 2
    assert rows[0]["signal_type"] == "budget"
    assert rows[1]["signal_type"] == "fatigue"


def test_limits_recent_signals(tmp_path) -> None:
    collector = ObservabilityCollector(tmp_path / "signals.db")
    collector.record_signal("maggy", "fatigue", 0.2)
    collector.record_signal("maggy", "fatigue", 0.5)

    rows = collector.recent_signals("maggy", limit=1)

    assert len(rows) == 1
    assert rows[0]["value"] == 0.5


================================================
FILE: maggy/tests/test_output_reviewer.py
================================================
"""Tests for inter-task output reviewer."""

from __future__ import annotations

import pytest

from maggy.services.output_reviewer import (
    _parse_review,
    review_output,
)


class TestParseReview:
    def test_parses_score_and_reason(self):
        text = "SCORE: 4\nREASON: Clean implementation"
        result = _parse_review(text)
        assert result.score == 4
        assert result.reason == "Clean implementation"

    def test_parses_score_only(self):
        result = _parse_review("SCORE: 2")
        assert result.score == 2
        assert result.reason == ""

    def test_no_score_returns_default(self):
        result = _parse_review("No structured output here")
        assert result.score == 3
        assert result.reason == ""

    def test_score_out_of_range_clamped(self):
        assert _parse_review("SCORE: 0").score == 1
        assert _parse_review("SCORE: 8").score == 5

    def test_score_from_inline_text(self):
        result = _parse_review("The output is fine. SCORE: 5")
        assert result.score == 5


class TestReviewOutput:
    @pytest.mark.asyncio
    async def test_returns_review_result(self):
        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            from maggy.adapters.pi import RunResult
            return RunResult(
                model=model_name, success=True,
                output="SCORE: 4\nREASON: Looks good",
            )

        from maggy.adapters.pi import PiAdapter
        pi = PiAdapter()
        pi.send_prompt = fake_send
        result = await review_output(pi, "ANALYZE", "some output", "/tmp")
        assert result.score == 4
        assert "Looks good" in result.reason

    @pytest.mark.asyncio
    async def test_failure_returns_passthrough(self):
        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            from maggy.adapters.pi import RunResult
            return RunResult(
                model=model_name, success=False,
                error="model unavailable",
            )

        from maggy.adapters.pi import PiAdapter
        pi = PiAdapter()
        pi.send_prompt = fake_send
        result = await review_output(pi, "IMPLEMENT", "output", "/tmp")
        assert result.score == 3
        assert result.reason == "review unavailable"

    @pytest.mark.asyncio
    async def test_exception_returns_passthrough(self):
        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            raise OSError("connection failed")

        from maggy.adapters.pi import PiAdapter
        pi = PiAdapter()
        pi.send_prompt = fake_send
        result = await review_output(pi, "ANALYZE", "output", "/tmp")
        assert result.score == 3

    @pytest.mark.asyncio
    async def test_uses_local_model(self):
        models_used: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            models_used.append(model_name)
            from maggy.adapters.pi import RunResult
            return RunResult(
                model=model_name, success=True,
                output="SCORE: 4\nREASON: ok",
            )

        from maggy.adapters.pi import PiAdapter
        pi = PiAdapter()
        pi.send_prompt = fake_send
        await review_output(pi, "ANALYZE", "output", "/tmp")
        assert models_used == ["local"]

    @pytest.mark.asyncio
    async def test_prompt_contains_step_and_output(self):
        prompts: list[str] = []

        async def fake_send(
            model_name, prompt, wd, max_turns=20, timeout=600,
        ):
            prompts.append(prompt)
            from maggy.adapters.pi import RunResult
            return RunResult(
                model=model_name, success=True,
                output="SCORE: 3",
            )

        from maggy.adapters.pi import PiAdapter
        pi = PiAdapter()
        pi.send_prompt = fake_send
        await review_output(
            pi, "WRITE TESTS", "test_add_user passed", "/tmp",
        )
        assert "WRITE TESTS" in prompts[0]
        assert "test_add_user passed" in prompts[0]


================================================
FILE: maggy/tests/test_pi_adapter.py
================================================
"""Tests for PiAdapter — model registry, fallback, quota detection."""

from __future__ import annotations

import json
from unittest.mock import MagicMock, patch

import pytest

from maggy.adapters.pi import (
    ModelEntry,
    PiAdapter,
)


class TestModelRegistry:
    def test_default_models_loaded(self):
        adapter = PiAdapter()
        assert len(adapter.list_models()) == 6

    def test_get_known_model(self):
        adapter = PiAdapter()
        m = adapter.get_model("claude")
        assert m is not None
        assert m.provider == "anthropic"

    def test_get_unknown_returns_none(self):
        adapter = PiAdapter()
        assert adapter.get_model("nonexistent") is None

    def test_custom_models(self):
        custom = [
            ModelEntry("test", "local", "t1", "cheap", 0.0),
        ]
        adapter = PiAdapter(models=custom)
        assert len(adapter.list_models()) == 1
        assert adapter.get_model("test") is not None


class TestFallbackChain:
    def test_chain_excludes_start(self):
        adapter = PiAdapter()
        chain = adapter.fallback_chain("kimi")
        assert "kimi" not in chain

    def test_chain_ordered_by_cost(self):
        adapter = PiAdapter()
        chain = adapter.fallback_chain("kimi")
        assert len(chain) > 0

    def test_unknown_start_returns_all(self):
        adapter = PiAdapter()
        chain = adapter.fallback_chain("nonexistent")
        assert len(chain) == 6


class TestQuotaDetection:
    def test_detects_rate_limit(self):
        adapter = PiAdapter()
        assert adapter._detect_quota("Error: rate limit exceeded")

    def test_detects_429(self):
        adapter = PiAdapter()
        assert adapter._detect_quota("HTTP 429 Too Many Requests")

    def test_clean_output_no_quota(self):
        adapter = PiAdapter()
        assert not adapter._detect_quota("Task completed.")


class TestBuildCommand:
    def test_claude_command_format(self):
        adapter = PiAdapter()
        model = adapter.get_model("claude")
        cmd = adapter._build_command(model, "hello", 5, "/tmp")
        assert "claude" in cmd[0]
        assert "-p" in cmd
        assert "--dangerously-skip-permissions" in cmd

    def test_non_claude_command(self):
        entry = ModelEntry(
            "test", "local", "m1", "cheap",
            cli_command="kimi",
        )
        adapter = PiAdapter(models=[entry])
        cmd = adapter._build_command(entry, "hello", 5, "/tmp")
        assert "kimi" in cmd[0]
        assert "--dangerously-skip-permissions" not in cmd


class _FakeStream:
    def __init__(self, lines: list[str]):
        self._lines = list(lines)
        self.writes: list[str] = []

    def readline(self) -> str:
        if self._lines:
            return self._lines.pop(0)
        return ""

    def write(self, text: str) -> None:
        self.writes.append(text)

    def flush(self) -> None:
        return None


class _FakeProcess:
    def __init__(self, stdout_lines: list[str]):
        self.stdin = _FakeStream([])
        self.stdout = _FakeStream(stdout_lines)


class TestRpcMode:
    def test_detect_pi_uses_path_lookup(self):
        adapter = PiAdapter()
        with patch("maggy.adapters.pi.shutil.which", return_value="/bin/pi"):
            assert adapter._detect_pi() is True

    def test_send_rpc_serializes_command(self):
        adapter = PiAdapter()
        proc = _FakeProcess(['{"ok": true}\n'])
        with patch("maggy.adapters.pi.subprocess.Popen", return_value=proc):
            result = adapter.send_rpc({"command": "ping"})
        assert result == {"ok": True}
        assert proc.stdin.writes == ['{"command":"ping"}\n']

    def test_switch_model_uses_rpc(self):
        adapter = PiAdapter()
        adapter.send_rpc = MagicMock(return_value={"ok": True})
        changed = adapter.switch_model("anthropic", "claude-sonnet-4")
        assert changed is True
        adapter.send_rpc.assert_called_once_with(
            {
                "command": "set_model",
                "provider": "anthropic",
                "model": "claude-sonnet-4",
            }
        )

class TestPromptResult:
    def test_parses_json_output(self):
        adapter = PiAdapter()
        payload = json.dumps({
            "result": "All tests pass",
            "cost_usd": 0.05,
            "usage": {"input_tokens": 1500, "output_tokens": 800},
        })
        r = adapter._prompt_result("claude", 0, payload.encode())
        assert r.success is True
        assert r.output == "All tests pass"
        assert r.cost_usd == 0.05
        assert r.input_tokens == 1500
        assert r.output_tokens == 800

    def test_plain_text_fallback(self):
        adapter = PiAdapter()
        r = adapter._prompt_result("local", 0, b"Just text output")
        assert r.success is True
        assert r.output == "Just text output"
        assert r.cost_usd == 0.0
        assert r.input_tokens == 0

    def test_json_error_preserves_usage(self):
        adapter = PiAdapter()
        payload = json.dumps({
            "result": "Error occurred",
            "cost_usd": 0.01,
            "usage": {"input_tokens": 500, "output_tokens": 100},
        })
        r = adapter._prompt_result("claude", 1, payload.encode())
        assert r.success is False
        assert r.cost_usd == 0.01
        assert r.input_tokens == 500


class TestStreaming:
    @pytest.mark.asyncio
    async def test_stream_events_reads_jsonl(self):
        adapter = PiAdapter()
        adapter._rpc_process = _FakeProcess(
            ['{"type":"start"}\n', '{"type":"done"}\n', ""]
        )
        events = []
        async for event in adapter.stream_events():
            events.append(event)
        assert events == [{"type": "start"}, {"type": "done"}]


================================================
FILE: maggy/tests/test_planning.py
================================================
"""Tests for dual-model planning orchestrator."""

from __future__ import annotations

from maggy.models.plan import Plan, PlanDiff, PlanStep
from maggy.planning import (
    DUAL_PLAN_THRESHOLD,
    PlanRequest,
    PlanningService,
    _similar,
)


class TestPlanModels:
    def test_plan_step_count(self):
        p = Plan(
            task="test", model="claude",
            steps=[
                PlanStep(description="step 1"),
                PlanStep(description="step 2"),
            ],
        )
        assert p.step_count == 2

    def test_plan_diff_agreement_ratio(self):
        d = PlanDiff(
            agreed=["a", "b"],
            conflicts=[],
            primary_only=["c"],
            counter_only=[],
        )
        assert d.agreement_ratio == 2 / 3

    def test_plan_diff_empty(self):
        d = PlanDiff()
        assert d.agreement_ratio == 1.0
        assert d.conflict_count == 0


class TestPlanningService:
    def test_below_threshold_single_plan(self, mock_cfg):
        svc = PlanningService(mock_cfg)
        req = PlanRequest(task="fix typo", blast_score=2)
        result = svc.plan_task(req)
        assert result["mode"] == "single"
        assert result["diff"] is None

    def test_above_threshold_dual_plan(self, mock_cfg):
        svc = PlanningService(mock_cfg)
        req = PlanRequest(
            task="refactor auth", blast_score=6,
        )
        result = svc.plan_task(req)
        assert result["mode"] == "dual"
        assert result["diff"] is not None

    def test_generate_plan(self, mock_cfg):
        svc = PlanningService(mock_cfg)
        plan = svc.generate_plan("add feature", "claude")
        assert plan.task == "add feature"
        assert plan.model == "claude"
        assert plan.step_count >= 1

    def test_diff_plans_identical(self, mock_cfg):
        svc = PlanningService(mock_cfg)
        p1 = svc.generate_plan("task", "claude")
        p2 = svc.generate_plan("task", "codex")
        diff = svc.diff_plans(p1, p2)
        assert len(diff.agreed) == 3

    def test_should_dual_plan_boundary(self, mock_cfg):
        svc = PlanningService(mock_cfg)
        assert not svc.should_dual_plan(3)
        assert svc.should_dual_plan(4)
        assert svc.should_dual_plan(10)


class TestSimilarity:
    def test_similar_strings(self):
        assert _similar(
            "Implement auth module",
            "Implement auth service",
        )

    def test_dissimilar_strings(self):
        assert not _similar(
            "Add login button",
            "Fix database query",
        )

    def test_empty_string(self):
        assert not _similar("", "hello")


================================================
FILE: maggy/tests/test_registry.py
================================================
"""Tests for project registry and project config parsing."""

from __future__ import annotations

from maggy.config import MaggyConfig, ProjectConfig, _from_dict
from maggy.registry import ProjectRegistry


class TestProjectConfigParsing:
    def test_from_dict_parses_projects(self):
        cfg = _from_dict({
            "projects": [
                {
                    "name": "alpha",
                    "repo": "acme/alpha",
                    "path": "~/code/alpha",
                    "default_branch": "main",
                },
                {
                    "name": "beta",
                    "repo": "acme/beta",
                    "path": "~/code/beta",
                    "default_branch": "develop",
                    "icpg": False,
                    "cikg": True,
                },
            ],
        })
        assert [project.name for project in cfg.projects] == ["alpha", "beta"]
        assert cfg.projects[0].icpg is True
        assert cfg.projects[0].cikg is False
        assert cfg.projects[1].default_branch == "develop"
        assert cfg.projects[1].icpg is False
        assert cfg.projects[1].cikg is True


class TestProjectRegistry:
    def test_registry_crud(self):
        alpha = ProjectConfig(
            name="alpha",
            repo="acme/alpha",
            path="/tmp/alpha",
            default_branch="main",
        )
        beta = ProjectConfig(
            name="beta",
            repo="acme/beta",
            path="/tmp/beta",
            default_branch="develop",
        )
        registry = ProjectRegistry(MaggyConfig(projects=[alpha]))
        assert registry.list() == [alpha]
        assert registry.get("alpha") == alpha
        registry.add(beta)
        assert registry.get("beta") == beta
        assert registry.remove("alpha") is True
        assert registry.get("alpha") is None
        assert registry.remove("alpha") is False

    def test_add_duplicate_raises(self):
        import pytest
        alpha = ProjectConfig(
            name="alpha",
            repo="acme/alpha",
            path="/tmp/alpha",
            default_branch="main",
        )
        registry = ProjectRegistry(MaggyConfig(projects=[alpha]))
        with pytest.raises(ValueError, match="already exists"):
            registry.add(alpha)


================================================
FILE: maggy/tests/test_repl_cmds.py
================================================
"""Tests for REPL slash command handlers."""

from __future__ import annotations

from dataclasses import dataclass, field
from unittest.mock import MagicMock

from maggy.cli_repl_cmds import (
    cmd_budget,
    cmd_claude_md,
    cmd_help,
    cmd_models,
    cmd_route,
    cmd_stats,
    cmd_use,
    dispatch,
)


@dataclass
class FakeState:
    working_dir: str = "/tmp/proj"
    session_id: str = "s1"
    allowed_models: list[str] = field(default_factory=list)


def _mock_client():
    c = MagicMock()
    c.budget_summary.return_value = {
        "spent_today_usd": 1.5,
        "daily_limit_usd": 10.0,
        "status": "ok",
        "input_tokens": 12500,
        "output_tokens": 3400,
    }
    c.budget_by_provider.return_value = [
        {"provider": "anthropic", "spent_usd": 1.2},
        {"provider": "openai", "spent_usd": 0.3},
    ]
    c.models_heatmap.return_value = [
        {"model": "claude", "task_type": "security",
         "avg_reward": 0.95, "samples": 10},
    ]
    c.routing_rules.return_value = {
        "mode": "dynamic",
        "task_type_overrides": {
            "security": {"model": "claude", "reason": "deep"},
        },
        "model_performance": {
            "claude": {"success_rate": 1.0, "strengths": ["security"]},
        },
    }
    c.config.return_value = {
        "codebases": [{"key": "proj", "path": "/tmp/proj"}],
        "routing": {"mode": "dynamic"},
        "budget": {"daily_limit_usd": 10.0},
    }
    return c


def test_dispatch_stats(capsys):
    """'/stats' dispatches to stats handler."""
    client = _mock_client()
    state = FakeState()
    handled = dispatch("/stats", client, state)
    assert handled is True


def test_dispatch_unknown():
    """Unknown commands return False."""
    handled = dispatch("/xyz123", MagicMock(), FakeState())
    assert handled is False


def test_cmd_stats(capsys):
    """Stats shows budget and model perf."""
    cmd_stats(_mock_client())
    out = capsys.readouterr().out
    assert "1.5" in out or "budget" in out.lower()


def test_cmd_budget(capsys):
    """Budget shows per-provider breakdown."""
    cmd_budget(_mock_client())
    out = capsys.readouterr().out
    assert "anthropic" in out or "1.2" in out


def test_cmd_route(capsys):
    """Route shows task type overrides."""
    cmd_route(_mock_client())
    out = capsys.readouterr().out
    assert "security" in out or "claude" in out


def test_cmd_models(capsys):
    """Models shows reward heatmap."""
    cmd_models(_mock_client())
    out = capsys.readouterr().out
    assert "claude" in out or "0.95" in out


def test_cmd_use_sets_models():
    """'/use claude,codex' sets allowed_models."""
    state = FakeState()
    cmd_use("claude,codex", state)
    assert state.allowed_models == ["claude", "codex"]


def test_cmd_use_reset():
    """'/use all' clears allowed_models."""
    state = FakeState(allowed_models=["claude"])
    cmd_use("all", state)
    assert state.allowed_models == []


def test_cmd_claude_md_missing(capsys):
    """Shows message when CLAUDE.md not found."""
    state = FakeState(working_dir="/nonexistent_xyz_dir")
    cmd_claude_md(state)
    out = capsys.readouterr().out
    assert "not found" in out.lower() or "no" in out.lower()


def test_cmd_stats_shows_tokens(capsys):
    """Stats displays token counts when available."""
    cmd_stats(_mock_client())
    out = capsys.readouterr().out
    assert "12,500" in out
    assert "3,400" in out


def test_cmd_route_shows_tiers(capsys):
    """Route displays blast tier reference."""
    cmd_route(_mock_client())
    out = capsys.readouterr().out
    assert "cheap" in out.lower()
    assert "premium" in out.lower()


def test_cmd_help(capsys):
    """Help lists all commands."""
    cmd_help()
    out = capsys.readouterr().out
    assert "/stats" in out
    assert "/use" in out
    assert "/help" in out


def test_cmd_health(capsys):
    """Health shows engram and mnemos status."""
    from maggy.cli_repl_cmds import cmd_health
    client = _mock_client()
    client.health_dashboard.return_value = {
        "engram": {"health_score": 0.85, "active": 42, "total": 50},
        "mnemos": {"state": "ok", "composite": 0.3},
    }
    cmd_health(client)
    out = capsys.readouterr().out
    assert "85%" in out or "0.85" in out
    assert "ok" in out.lower()


def test_dispatch_health(capsys):
    """/health dispatches to health handler."""
    client = _mock_client()
    client.health_dashboard.return_value = {
        "engram": {"health_score": 0.9, "active": 10, "total": 12},
        "mnemos": {"state": "ok", "composite": 0.2},
    }
    state = FakeState()
    handled = dispatch("/health", client, state)
    assert handled is True


def test_help_lists_health(capsys):
    """/help mentions /health command."""
    cmd_help()
    out = capsys.readouterr().out
    assert "/health" in out


def test_models_empty_shows_known(capsys):
    """Empty heatmap shows known model names."""
    from maggy.cli_repl_cmds import cmd_models
    client = _mock_client()
    client.models_heatmap.return_value = []
    cmd_models(client)
    out = capsys.readouterr().out
    assert "local" in out
    assert "claude" in out


def test_use_warns_unknown_model(capsys):
    """/use with unknown model name prints warning."""
    state = FakeState()
    cmd_use("badmodel,claude", state)
    out = capsys.readouterr().out
    assert "unknown" in out.lower() or "Unknown" in out


def test_budget_subscription_plan(capsys):
    """Subscription plan shows 'Subscription' instead of dollar amounts."""
    client = _mock_client()
    client.budget_summary.return_value = {
        "spent_today_usd": 0, "daily_limit_usd": 10.0,
        "status": "ok", "plan": "subscription",
    }
    client.budget_by_provider.return_value = []
    cmd_budget(client)
    out = capsys.readouterr().out
    assert "subscription" in out.lower()


def test_health_graceful_failure(capsys):
    """Health command handles server failure gracefully."""
    from maggy.cli_repl_cmds import cmd_health
    client = _mock_client()
    client.health_dashboard.side_effect = Exception("unreachable")
    cmd_health(client)
    out = capsys.readouterr().out
    assert "health" in out.lower() or out == ""


def test_stats_server_down(capsys):
    """Stats handles server failure gracefully."""
    client = _mock_client()
    client.budget_summary.side_effect = Exception("unreachable")
    cmd_stats(client)
    # Should not crash — may show empty or partial data


================================================
FILE: maggy/tests/test_rollback.py
================================================
"""Tests for rollback and savepoint recovery."""

from __future__ import annotations

import subprocess

import pytest

from maggy.recovery.rollback import RollbackManager


def _git(repo, *args: str) -> None:
    subprocess.run(["git", *args], cwd=repo, check=True)


def _init_repo(repo) -> None:
    _git(repo, "init")
    _git(repo, "config", "user.email", "maggy@example.com")
    _git(repo, "config", "user.name", "Maggy Tests")
    (repo / "tracked.txt").write_text("v1\n")
    _git(repo, "add", "tracked.txt")
    _git(repo, "commit", "-m", "init")


class TestRollbackManager:
    @pytest.mark.asyncio
    async def test_create_and_list_savepoints(self, tmp_path):
        _init_repo(tmp_path)
        manager = RollbackManager()
        tag = await manager.create_savepoint("session-1", str(tmp_path))
        assert tag == "maggy-save-session-1"
        assert await manager.list_savepoints(str(tmp_path)) == [tag]

    @pytest.mark.asyncio
    async def test_rollback_resets_worktree(self, tmp_path):
        _init_repo(tmp_path)
        manager = RollbackManager()
        await manager.create_savepoint("session-1", str(tmp_path))
        (tmp_path / "tracked.txt").write_text("changed\n")
        assert await manager.rollback("session-1", str(tmp_path)) is True
        assert (tmp_path / "tracked.txt").read_text() == "v1\n"

    @pytest.mark.asyncio
    async def test_delete_savepoint(self, tmp_path):
        _init_repo(tmp_path)
        manager = RollbackManager()
        await manager.create_savepoint("session-1", str(tmp_path))
        assert await manager.delete_savepoint("session-1", str(tmp_path)) is True
        assert await manager.list_savepoints(str(tmp_path)) == []


================================================
FILE: maggy/tests/test_routes_escalation.py
================================================
"""Tests for /api/escalations endpoints."""

from __future__ import annotations

from fastapi.testclient import TestClient


def _app(tmp_path):
    """Build a minimal FastAPI app with escalation router."""
    from fastapi import FastAPI
    from maggy.api.routes_escalation import router
    from maggy.config import DashboardConfig, MaggyConfig, OrgConfig, StorageConfig
    from maggy.escalation.protocol import Escalator

    cfg = MaggyConfig(
        org=OrgConfig(name="test"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        dashboard=DashboardConfig(),
    )
    app = FastAPI()
    app.state.cfg = cfg
    app.state.escalator = Escalator(tmp_path / "esc.db")
    app.include_router(router)
    return app


def test_list_pending_empty(tmp_path):
    client = TestClient(_app(tmp_path))
    resp = client.get("/api/escalations")
    assert resp.status_code == 200
    assert resp.json() == []


def test_create_and_list(tmp_path):
    client = TestClient(_app(tmp_path))
    body = {
        "session_id": "sess-1",
        "reason": "test failure",
        "context": {"task_id": "T-1"},
    }
    resp = client.post("/api/escalations", json=body)
    assert resp.status_code == 201
    esc_id = resp.json()["id"]

    resp = client.get("/api/escalations")
    ids = [e["id"] for e in resp.json()]
    assert esc_id in ids


def test_resolve_escalation(tmp_path):
    client = TestClient(_app(tmp_path))
    body = {
        "session_id": "sess-2",
        "reason": "stuck",
        "context": {},
    }
    resp = client.post("/api/escalations", json=body)
    esc_id = resp.json()["id"]

    resp = client.post(
        f"/api/escalations/{esc_id}/resolve",
        json={"guidance": "retry with claude"},
    )
    assert resp.status_code == 200
    assert resp.json()["status"] == "resolved"

    resp = client.get("/api/escalations")
    assert resp.json() == []


def test_resolve_not_found(tmp_path):
    client = TestClient(_app(tmp_path))
    resp = client.post(
        "/api/escalations/bad-id/resolve",
        json={"guidance": "n/a"},
    )
    assert resp.status_code == 404


================================================
FILE: maggy/tests/test_routes_observability.py
================================================
"""Tests for /api/observability endpoints."""

from __future__ import annotations

from fastapi.testclient import TestClient


def _app(tmp_path):
    """Build a minimal FastAPI app with observability router."""
    from fastapi import FastAPI
    from maggy.api.routes_observability import router
    from maggy.config import DashboardConfig, MaggyConfig, OrgConfig, StorageConfig
    from maggy.observability.collector import ObservabilityCollector

    cfg = MaggyConfig(
        org=OrgConfig(name="test"),
        storage=StorageConfig(path=str(tmp_path / "store.db")),
        dashboard=DashboardConfig(),
    )
    app = FastAPI()
    app.state.cfg = cfg
    app.state.observability = ObservabilityCollector(tmp_path / "obs.db")
    app.include_router(router)
    return app


def test_get_signals_empty(tmp_path):
    client = TestClient(_app(tmp_path))
    resp = client.get("/api/observability/signals/myproject")
    assert resp.status_code == 200
    assert resp.json() == []


def test_record_and_read(tmp_path):
    client = TestClient(_app(tmp_path))
    body = {
        "project": "webapp",
        "signal_type": "deploy_status",
        "value": 1.0,
    }
    resp = client.post("/api/observability/record", json=body)
    assert resp.status_code == 201

    resp = client.get("/api/observability/signals/webapp")
    signals = resp.json()
    assert len(signals) == 1
    assert signals[0]["signal_type"] == "deploy_status"


================================================
FILE: maggy/tests/test_routes_projects.py
================================================
"""Tests for /api/projects endpoints."""

from __future__ import annotations

from fastapi.testclient import TestClient

from maggy.registry import ProjectRegistry


def _app(mock_cfg):
    """Build a minimal FastAPI app with projects router."""
    from fastapi import FastAPI
    from maggy.api.routes_projects import router

    app = FastAPI()
    app.state.cfg = mock_cfg
    app.state.registry = ProjectRegistry(mock_cfg)
    app.include_router(router)
    return app


def test_list_projects_empty(mock_cfg):
    client = TestClient(_app(mock_cfg))
    resp = client.get("/api/projects")
    assert resp.status_code == 200
    assert resp.json() == []


def test_add_and_list_project(mock_cfg):
    client = TestClient(_app(mock_cfg))
    body = {
        "name": "webapp",
        "repo": "acme/webapp",
        "path": "/tmp/webapp",
    }
    resp = client.post("/api/projects", json=body)
    assert resp.status_code == 201
    assert resp.json()["status"] == "created"

    resp = client.get("/api/projects")
    names = [p["name"] for p in resp.json()]
    assert "webapp" in names


def test_get_project_not_found(mock_cfg):
    client = TestClient(_app(mock_cfg))
    resp = client.get("/api/projects/nonexistent")
    assert resp.status_code == 404


def test_add_duplicate_project(mock_cfg):
    client = TestClient(_app(mock_cfg))
    body = {
        "name": "dup",
        "repo": "acme/dup",
        "path": "/tmp/dup",
    }
    client.post("/api/projects", json=body)
    resp = client.post("/api/projects", json=body)
    assert resp.status_code == 409


def test_delete_project(mock_cfg):
    client = TestClient(_app(mock_cfg))
    body = {
        "name": "to-delete",
        "repo": "acme/td",
        "path": "/tmp/td",
    }
    client.post("/api/projects", json=body)
    resp = client.delete("/api/projects/to-delete")
    assert resp.status_code == 200
    assert resp.json()["status"] == "removed"

    resp = client.get("/api/projects/to-delete")
    assert resp.status_code == 404


================================================
FILE: maggy/tests/test_routing_config.py
================================================
"""Tests for routing config — stakes patterns, cascade policy, YAML roundtrip."""

from __future__ import annotations

from pathlib import Path

import yaml

from maggy.routing_rules import CascadePolicy
from maggy.routing_rules_defaults import default_rules
from maggy.routing_rules_io import load, save, to_dict


class TestStakesPatterns:
    def test_default_has_high_patterns(self):
        rules = default_rules()
        assert "auth" in rules.stakes.high.file_patterns
        assert "security" in rules.stakes.high.task_types

    def test_default_has_medium_patterns(self):
        rules = default_rules()
        assert "api" in rules.stakes.medium.file_patterns
        assert "feature" in rules.stakes.medium.task_types

    def test_default_low_has_empty_patterns(self):
        rules = default_rules()
        assert rules.stakes.low.file_patterns == []


class TestCascadePolicy:
    def test_defaults(self):
        policy = CascadePolicy()
        assert policy.enabled is True
        assert policy.min_blast == 5
        assert policy.min_stakes == "medium"
        assert policy.max_attempts == 3
        assert policy.quality_threshold == 3

    def test_custom_values(self):
        policy = CascadePolicy(
            enabled=False, min_blast=3,
            min_stakes="low", max_attempts=5,
        )
        assert policy.enabled is False
        assert policy.min_blast == 3


class TestYamlRoundtrip:
    def test_roundtrip_preserves_stakes(self, tmp_path: Path):
        rules = default_rules()
        rules.stakes.high.file_patterns.append("custom_critical")
        save(rules, tmp_path / "rules.yaml")
        loaded = load(tmp_path / "rules.yaml")
        assert "custom_critical" in loaded.stakes.high.file_patterns

    def test_roundtrip_preserves_cascade(self, tmp_path: Path):
        rules = default_rules()
        rules.cascade.min_blast = 7
        save(rules, tmp_path / "rules.yaml")
        loaded = load(tmp_path / "rules.yaml")
        assert loaded.cascade.min_blast == 7

    def test_roundtrip_preserves_conventions(self, tmp_path: Path):
        rules = default_rules()
        save(rules, tmp_path / "rules.yaml")
        loaded = load(tmp_path / "rules.yaml")
        assert len(loaded.conventions) == len(rules.conventions)

    def test_user_edits_preserved(self, tmp_path: Path):
        """Write, manually edit YAML, reload — edits survive."""
        rules = default_rules()
        path = tmp_path / "rules.yaml"
        save(rules, path)
        data = yaml.safe_load(path.read_text())
        data["cascade_policy"]["min_blast"] = 2
        path.write_text(yaml.safe_dump(data, sort_keys=False))
        loaded = load(path)
        assert loaded.cascade.min_blast == 2

    def test_missing_file_seeds_defaults(self, tmp_path: Path):
        loaded = load(tmp_path / "nonexistent.yaml")
        assert loaded.version == 1
        assert loaded.cascade.enabled is True
        assert "auth" in loaded.stakes.high.file_patterns


class TestToDict:
    def test_stakes_in_output(self):
        rules = default_rules()
        d = to_dict(rules)
        assert "stakes_patterns" in d
        assert "high" in d["stakes_patterns"]

    def test_cascade_in_output(self):
        rules = default_rules()
        d = to_dict(rules)
        assert "cascade_policy" in d
        assert d["cascade_policy"]["enabled"] is True


class TestDefaultTiers:
    """Default model tiers: no GPT, codex is primary."""

    def test_no_gpt_in_defaults(self):
        from maggy.process.model_router import DEFAULT_TIERS
        names = [t.name for t in DEFAULT_TIERS]
        assert "gpt" not in names

    def test_codex_is_primary(self):
        from maggy.process.model_router import DEFAULT_TIERS
        codex = [t for t in DEFAULT_TIERS if t.name == "codex"]
        assert len(codex) == 1
        assert codex[0].role == "primary"

    def test_codex_handles_complex(self):
        from maggy.process.model_router import DEFAULT_TIERS
        codex = [t for t in DEFAULT_TIERS if t.name == "codex"][0]
        assert codex.complexity_max >= 8

    def test_local_kimi_handle_simple(self):
        from maggy.process.model_router import DEFAULT_TIERS
        local = [t for t in DEFAULT_TIERS if t.name == "local"][0]
        kimi = [t for t in DEFAULT_TIERS if t.name == "kimi"][0]
        assert local.complexity_max <= 5
        assert kimi.complexity_max <= 5


================================================
FILE: maggy/tests/test_routing_rules.py
================================================
"""Tests for routing rules — load, save, apply, learn."""

from __future__ import annotations

from pathlib import Path

import pytest

from maggy.routing_rules import (
    ModelOverride,
    PerformanceRecord,
    RoutingRules,
    apply_override,
    learn_override,
    record_outcome,
)
from maggy.routing_rules_defaults import default_rules
from maggy.routing_rules_io import load, save


@pytest.fixture()
def rules_path(tmp_path: Path) -> Path:
    return tmp_path / "routing-rules.yaml"


class TestDefaultRules:
    def test_seeds_task_type_overrides(self):
        rules = default_rules()
        assert "docs" in rules.task_type_overrides
        assert "security" in rules.task_type_overrides
        assert "tests" in rules.task_type_overrides

    def test_seeds_pipeline_phases(self):
        rules = default_rules()
        assert "spec" in rules.pipeline_phases
        assert "tdd_red" in rules.pipeline_phases
        assert rules.pipeline_phases["tdd_green"].model == "auto"

    def test_seeds_model_performance(self):
        rules = default_rules()
        assert "claude" in rules.model_performance
        assert "local" in rules.model_performance


class TestLoadSave:
    def test_load_creates_default(self, rules_path: Path):
        rules = load(rules_path)
        assert rules_path.exists()
        assert "docs" in rules.task_type_overrides

    def test_roundtrip(self, rules_path: Path):
        original = default_rules()
        save(original, rules_path)
        loaded = load(rules_path)
        assert loaded.version == original.version
        assert set(loaded.task_type_overrides) == set(
            original.task_type_overrides,
        )

    def test_load_existing(self, rules_path: Path):
        save(default_rules(), rules_path)
        rules = load(rules_path)
        assert rules.task_type_overrides["security"].model == "claude"


class TestApplyOverride:
    def test_phase_takes_priority(self):
        rules = default_rules()
        result = apply_override(rules, "feature", "spec")
        assert result == "claude"

    def test_auto_phase_returns_none(self):
        rules = default_rules()
        result = apply_override(rules, "feature", "tdd_green")
        assert result is None

    def test_task_type_override(self):
        rules = default_rules()
        result = apply_override(rules, "security")
        assert result == "claude"

    def test_no_override_returns_none(self):
        rules = default_rules()
        result = apply_override(rules, "feature")
        assert result is None

    def test_low_confidence_ignored(self):
        rules = RoutingRules(
            task_type_overrides={
                "test": ModelOverride("kimi", "weak", 0.3),
            },
        )
        result = apply_override(rules, "test")
        assert result is None


class TestRecordOutcome:
    def test_updates_success_rate(self, rules_path: Path):
        rules = default_rules()
        record_outcome(rules, "claude", "feature", True, rules_path)
        perf = rules.model_performance["claude"]
        assert perf.tasks_completed == 7
        assert perf.success_rate > 0.9

    def test_creates_new_model(self, rules_path: Path):
        rules = default_rules()
        record_outcome(rules, "gemini", "feature", True, rules_path)
        assert "gemini" in rules.model_performance
        assert rules.model_performance["gemini"].success_rate == 1.0

    def test_records_failure(self, rules_path: Path):
        rules = RoutingRules(
            model_performance={
                "test": PerformanceRecord(
                    tasks_completed=1, success_rate=1.0,
                ),
            },
        )
        record_outcome(rules, "test", "security", False, rules_path)
        assert rules.model_performance["test"].success_rate == 0.5
        assert "security" in rules.model_performance["test"].weaknesses


class TestLearnOverride:
    def test_adds_new_override(self, rules_path: Path):
        rules = default_rules()
        learn_override(
            rules, "frontend", "claude",
            "Codex too slow for frontend (280s vs 122s)",
            0.8, rules_path,
        )
        assert rules.task_type_overrides["frontend"].model == "claude"
        assert rules.task_type_overrides["frontend"].source == "learned"

    def test_persists_to_disk(self, rules_path: Path):
        rules = default_rules()
        save(rules, rules_path)
        learn_override(
            rules, "frontend", "claude", "test", 0.9, rules_path,
        )
        reloaded = load(rules_path)
        assert "frontend" in reloaded.task_type_overrides


================================================
FILE: maggy/tests/test_routing_service.py
================================================
"""Tests for RoutingService — routing decisions and learning."""

from __future__ import annotations

from maggy.routing import RoutingContext, RoutingService
from maggy.scores import MIN_SAMPLES


class TestRoutingDecisions:
    def test_low_complexity_routes_cheap(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        ctx = RoutingContext(blast_score=1, task_type="general")
        decision = rs.route(ctx)
        name = (
            decision.primary
            if isinstance(decision.primary, str)
            else decision.primary.name
        )
        assert name in ("kimi", "local", "deepseek")

    def test_high_complexity_routes_premium(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        ctx = RoutingContext(blast_score=9, task_type="general")
        decision = rs.route(ctx)
        name = (
            decision.primary
            if isinstance(decision.primary, str)
            else decision.primary.name
        )
        assert name in ("codex", "claude")

    def test_security_sensitive_avoids_cheap(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        ctx = RoutingContext(
            blast_score=3,
            task_type="security",
            security_sensitive=True,
        )
        decision = rs.route(ctx)
        name = (
            decision.primary
            if isinstance(decision.primary, str)
            else decision.primary.name
        )
        assert name in ("codex", "claude")


class TestRoutingLearning:
    def test_record_outcome(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        rs.record_outcome("claude", "bug", 8, 0.95)
        hm = rs.get_heatmap()
        assert len(hm) == 1

    def test_learned_override(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        # Seed enough data for learning
        for _ in range(MIN_SAMPLES + 1):
            rs.record_outcome("codex", "bug", 2, 0.99)
        ctx = RoutingContext(blast_score=2, task_type="bug")
        decision = rs.route(ctx)
        name = (
            decision.primary
            if isinstance(decision.primary, str)
            else decision.primary.name
        )
        assert name == "codex"

    def test_blast_tier_mapping(self, mock_cfg):
        rs = RoutingService(mock_cfg)
        assert rs._blast_tier(0) == "low"
        assert rs._blast_tier(3) == "low"
        assert rs._blast_tier(5) == "medium"
        assert rs._blast_tier(8) == "high"


================================================
FILE: maggy/tests/test_scores.py
================================================
"""Tests for RewardTable — record, query, best_model, heatmap."""

from __future__ import annotations

from maggy.scores import MIN_SAMPLES, RewardTable


class TestRewardRecord:
    def test_record_and_heatmap(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        rt.record("claude", "bug", "high", 0.9)
        hm = rt.heatmap()
        assert len(hm) == 1
        assert hm[0]["model"] == "claude"

    def test_multiple_records(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        rt.record("claude", "bug", "high", 0.9)
        rt.record("gpt", "bug", "high", 0.7)
        hm = rt.heatmap()
        assert len(hm) == 2


class TestBestModel:
    def test_no_data_returns_none(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        assert rt.best_model("bug", "high") is None

    def test_insufficient_samples_returns_none(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        for _ in range(MIN_SAMPLES - 1):
            rt.record("claude", "bug", "high", 0.9)
        assert rt.best_model("bug", "high") is None

    def test_sufficient_samples_returns_best(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        for _ in range(MIN_SAMPLES):
            rt.record("claude", "bug", "high", 0.9)
        for _ in range(MIN_SAMPLES):
            rt.record("gpt", "bug", "high", 0.5)
        best = rt.best_model("bug", "high")
        assert best == "claude"


class TestHeatmap:
    def test_empty_heatmap(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        assert rt.heatmap() == []

    def test_heatmap_groups_correctly(self, mock_cfg):
        rt = RewardTable(mock_cfg)
        rt.record("claude", "bug", "high", 0.9)
        rt.record("claude", "feature", "low", 0.8)
        hm = rt.heatmap()
        assert len(hm) == 2


================================================
FILE: maggy/tests/test_setup_routes.py
================================================
"""Tests for setup and onboarding routes."""

from __future__ import annotations

from pathlib import Path
from unittest.mock import patch

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from maggy.api.routes_setup import router as setup_router
from maggy.config import (
    DashboardConfig,
    MaggyConfig,
    StorageConfig,
)


@pytest.fixture
def setup_app(tmp_path: Path) -> FastAPI:
    """App with setup router only."""
    cfg = MaggyConfig(
        storage=StorageConfig(path=str(tmp_path / "s.db")),
        dashboard=DashboardConfig(auth_mode="local"),
    )
    app = FastAPI()
    app.state.cfg = cfg
    app.state.configured = True
    app.state.mode = "local"
    app.include_router(setup_router)
    return app


@pytest.fixture
def client(setup_app: FastAPI) -> TestClient:
    return TestClient(setup_app)


class TestSetupStatus:
    def test_returns_steps(self, client: TestClient):
        resp = client.get("/api/setup/status")
        assert resp.status_code == 200
        data = resp.json()
        assert "steps" in data
        assert len(data["steps"]) == 5
        assert data["mode"] == "local"

    def test_missing_token_detected(
        self, client: TestClient,
    ):
        resp = client.get("/api/setup/status")
        data = resp.json()
        token_step = data["steps"][0]
        assert token_step["label"] == "GitHub token"
        assert token_step["status"] == "missing"

    def test_progress_format(self, client: TestClient):
        resp = client.get("/api/setup/status")
        data = resp.json()
        assert "/" in data["progress"]

    def test_configured_false_in_local(
        self, client: TestClient,
    ):
        resp = client.get("/api/setup/status")
        assert resp.json()["configured"] is False


class TestSetupConfigure:
    @patch("maggy.config.save")
    def test_updates_org(self, mock_save, client):
        resp = client.post(
            "/api/setup/configure",
            json={"org_name": "Protaige"},
        )
        assert resp.status_code == 200
        assert resp.json()["saved"] is True
        mock_save.assert_called_once()

    @patch("maggy.config.save")
    def test_updates_github_repos(
        self, mock_save, client,
    ):
        resp = client.post(
            "/api/setup/configure",
            json={
                "github_org": "protaige",
                "github_repos": ["api", "web"],
            },
        )
        assert resp.json()["saved"] is True

    @patch("maggy.config.save")
    def test_empty_body_is_noop(
        self, mock_save, client,
    ):
        resp = client.post(
            "/api/setup/configure", json={},
        )
        assert resp.json()["saved"] is True


class TestDiscoverRepos:
    def test_returns_repos(self, client: TestClient):
        resp = client.get("/api/setup/discover-repos")
        assert resp.status_code == 200
        data = resp.json()
        assert "repos" in data
        assert isinstance(data["repos"], list)


================================================
FILE: maggy/tests/test_stakes.py
================================================
"""Tests for stakes classification — HIGH/MEDIUM/LOW from task metadata."""

from __future__ import annotations

from maggy.providers.base import Task
from maggy.routing_rules import StakesLevel, StakesPatterns
from maggy.services.stakes import classify_stakes


def _task(title: str, desc: str = "", raw: dict | None = None) -> Task:
    return Task(id="T-1", title=title, description=desc, raw=raw or {})


class TestHighStakes:
    def test_auth_file_in_title(self):
        result = classify_stakes(_task("Fix auth.py login bug"))
        assert result.level == "high"

    def test_billing_task_type(self):
        task = _task("Update billing", raw={"task_type": "billing"})
        result = classify_stakes(task)
        assert result.level == "high"

    def test_security_task_type(self):
        task = _task("Patch XSS", raw={"task_type": "security"})
        result = classify_stakes(task)
        assert result.level == "high"

    def test_production_keyword_in_desc(self):
        task = _task("Deploy fix", "Affects production data")
        result = classify_stakes(task)
        assert result.level == "high"

    def test_env_file_pattern(self):
        result = classify_stakes(_task("Update .env variables"))
        assert result.level == "high"

    def test_migration_in_title(self):
        result = classify_stakes(_task("Run database migration"))
        assert result.level == "high"


class TestMediumStakes:
    def test_api_route_file(self):
        result = classify_stakes(_task("Fix API routes handler"))
        assert result.level == "medium"

    def test_feature_task_type(self):
        task = _task("Add pagination", raw={"task_type": "feature"})
        result = classify_stakes(task)
        assert result.level == "medium"

    def test_database_schema_change(self):
        result = classify_stakes(_task("Update database schema"))
        assert result.level == "medium"


class TestLowStakes:
    def test_readme_update(self):
        result = classify_stakes(_task("Update README typo"))
        assert result.level == "low"

    def test_docs_task_type(self):
        task = _task("Fix docs", raw={"task_type": "docs"})
        result = classify_stakes(task)
        assert result.level == "low"

    def test_formatting_task(self):
        task = _task("Fix lint", raw={"task_type": "formatting"})
        result = classify_stakes(task)
        assert result.level == "low"


class TestStakesResult:
    def test_reasons_populated(self):
        result = classify_stakes(_task("Fix auth.py login"))
        assert len(result.reasons) > 0

    def test_custom_patterns(self):
        """classify_stakes with explicit patterns overrides defaults."""
        patterns = StakesPatterns(
            high=StakesLevel(
                file_patterns=["critical"],
                task_types=["emergency"],
                keywords=["urgent"],
            ),
            medium=StakesLevel(),
            low=StakesLevel(),
        )
        task = _task("Fix critical module", raw={})
        result = classify_stakes(task, patterns)
        assert result.level == "high"


================================================
FILE: maggy/tests/test_tdd_verifier.py
================================================
"""Tests for TDD verification gates."""

from __future__ import annotations

import pytest

from maggy.services.tdd_verifier import (
    _count_collected,
    _count_failures,
    _parse_coverage,
)


class TestParsers:
    """Parse pytest and coverage output."""

    def test_count_collected_normal(self):
        assert _count_collected("12 tests collected") == 12

    def test_count_collected_singular(self):
        assert _count_collected("1 test collected") == 1

    def test_count_collected_missing(self):
        assert _count_collected("no tests ran") == 0

    def test_count_failures_normal(self):
        assert _count_failures("3 failed, 7 passed") == 3

    def test_count_failures_none(self):
        assert _count_failures("10 passed") == 0

    def test_parse_coverage_normal(self):
        out = "TOTAL    500    50    90%"
        assert _parse_coverage(out) == 90.0

    def test_parse_coverage_missing(self):
        assert _parse_coverage("no coverage data") == 0.0


class TestVerifyResult:
    """VerifyResult dataclass."""

    def test_passed_result(self):
        from maggy.services.tdd_verifier import VerifyResult
        r = VerifyResult(True, "ok", 5, 0)
        assert r.passed is True
        assert r.tests_found == 5

    def test_failed_result(self):
        from maggy.services.tdd_verifier import VerifyResult
        r = VerifyResult(False, "tests failing", 5, 3)
        assert r.passed is False
        assert r.tests_failed == 3


class TestVerifyFunctions:
    """Async verify functions with mocked subprocesses."""

    @pytest.mark.asyncio
    async def test_verify_tests_exist_passes(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 0, "5 tests collected"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_tests_exist("/tmp")
        assert r.passed is True
        assert r.tests_found == 5

    @pytest.mark.asyncio
    async def test_verify_tests_exist_fails(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 1, "error"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_tests_exist("/tmp")
        assert r.passed is False

    @pytest.mark.asyncio
    async def test_verify_tests_fail_red(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 1, "2 failed, 3 passed"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_tests_fail("/tmp")
        assert r.passed is True
        assert r.tests_failed == 2

    @pytest.mark.asyncio
    async def test_verify_tests_fail_rejects_pass(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 0, "5 passed"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_tests_fail("/tmp")
        assert r.passed is False
        assert "expected failures" in r.detail

    @pytest.mark.asyncio
    async def test_verify_tests_pass_green(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 0, "10 passed"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_tests_pass("/tmp")
        assert r.passed is True

    @pytest.mark.asyncio
    async def test_verify_lint_clean(self, monkeypatch):
        from maggy.services import tdd_verifier

        async def mock_run(cmd, cwd):
            return 0, "All checks passed!"

        monkeypatch.setattr(tdd_verifier, "_run_cmd", mock_run)
        r = await tdd_verifier.verify_lint("/tmp")
        assert r.passed is True


================================================
FILE: maggy/tests/test_vision.py
================================================
"""Tests for Maggy vision service — Ollama Qwen3-VL integration."""

from __future__ import annotations

import json
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

from maggy.services.vision import analyze_image


@pytest.fixture()
def png_file(tmp_path: Path) -> Path:
    """Create a tiny valid PNG file."""
    p = tmp_path / "test.png"
    # Minimal 1x1 PNG
    p.write_bytes(
        b"\x89PNG\r\n\x1a\n"
        b"\x00\x00\x00\rIHDR"
        b"\x00\x00\x00\x01\x00\x00\x00\x01"
        b"\x08\x02\x00\x00\x00\x90wS\xde"
    )
    return p


def test_analyze_missing_file():
    """Nonexistent path yields error chunk."""
    chunks = list(analyze_image("/no/such/file.png"))
    assert any(c["type"] == "error" for c in chunks)


def test_analyze_bad_extension(tmp_path: Path):
    """Non-image extension yields error chunk."""
    txt = tmp_path / "notes.txt"
    txt.write_text("hello")
    chunks = list(analyze_image(str(txt)))
    assert any(c["type"] == "error" for c in chunks)


def test_analyze_streams_response(png_file: Path):
    """Mock Ollama API returns streamed text + done."""
    lines = [
        json.dumps({"message": {"content": "A "}}),
        json.dumps({"message": {"content": "button"}}),
        json.dumps({"done": True}),
    ]
    mock_resp = MagicMock()
    mock_resp.status_code = 200
    mock_resp.iter_lines.return_value = iter(lines)
    mock_resp.__enter__ = lambda s: s
    mock_resp.__exit__ = MagicMock(return_value=False)

    with patch("maggy.services.vision.httpx.stream",
               return_value=mock_resp):
        chunks = list(analyze_image(str(png_file)))

    texts = [c["content"] for c in chunks if c["type"] == "text"]
    assert "A " in texts
    assert "button" in texts
    assert any(c["type"] == "done" for c in chunks)


def test_analyze_with_custom_prompt(png_file: Path):
    """Custom prompt is passed to the Ollama API."""
    captured = {}

    def fake_stream(method, url, **kw):
        captured.update(kw)
        mock = MagicMock()
        mock.status_code = 200
        mock.iter_lines.return_value = iter([
            json.dumps({"done": True}),
        ])
        mock.__enter__ = lambda s: s
        mock.__exit__ = MagicMock(return_value=False)
        return mock

    with patch("maggy.services.vision.httpx.stream",
               side_effect=fake_stream):
        list(analyze_image(str(png_file), "What color?"))

    body = captured.get("json", {})
    msg = body.get("messages", [{}])[0]
    assert "What color?" in msg.get("content", "")


def test_analyze_ollama_down(png_file: Path):
    """Connection error yields error chunk."""
    import httpx
    with patch("maggy.services.vision.httpx.stream",
               side_effect=httpx.ConnectError("refused")):
        chunks = list(analyze_image(str(png_file)))
    assert any(c["type"] == "error" for c in chunks)
    err = next(c for c in chunks if c["type"] == "error")
    assert "refused" in err["content"].lower() or "connect" in err["content"].lower()


================================================
FILE: maggy/tests/test_zero_config.py
================================================
"""Tests for zero-config auto-configuration."""

from __future__ import annotations

import os
from pathlib import Path
from unittest.mock import patch

import pytest

from maggy.config import MaggyConfig


# --- Provider Credentials ---


class TestHasProviderCredentials:
    def test_github_with_creds(self):
        from maggy.config import _has_provider_credentials
        cfg = MaggyConfig()
        cfg.issue_tracker.provider = "github"
        cfg.issue_tracker.github.org = "acme"
        cfg.issue_tracker.github.repos = ["api"]
        cfg.issue_tracker.github.token = "ghp_abc"
        assert _has_provider_credentials(cfg) is True

    def test_github_no_token(self):
        from maggy.config import _has_provider_credentials
        cfg = MaggyConfig()
        cfg.issue_tracker.provider = "github"
        cfg.issue_tracker.github.org = "acme"
        cfg.issue_tracker.github.repos = ["api"]
        assert _has_provider_credentials(cfg) is False

    def test_asana_with_creds(self):
        from maggy.config import _has_provider_credentials
        cfg = MaggyConfig()
        cfg.issue_tracker.provider = "asana"
        cfg.issue_tracker.asana.workspace_id = "w1"
        cfg.issue_tracker.asana.token = "tok"
        assert _has_provider_credentials(cfg) is True

    def test_linear_stub(self):
        from maggy.config import _has_provider_credentials
        cfg = MaggyConfig()
        cfg.issue_tracker.provider = "linear"
        assert _has_provider_credentials(cfg) is False


# --- CLI History Detection ---


class TestHasCliHistory:
    def test_claude_dir_exists(self, tmp_path: Path):
        from maggy.config import _has_cli_history
        (tmp_path / ".claude").mkdir()
        assert _has_cli_history(tmp_path) is True

    def test_no_dirs(self, tmp_path: Path):
        from maggy.config import _has_cli_history
        assert _has_cli_history(tmp_path) is False

    def test_codex_dir_exists(self, tmp_path: Path):
        from maggy.config import _has_cli_history
        (tmp_path / ".codex").mkdir()
        assert _has_cli_history(tmp_path) is True


# --- Auto Configure ---


class TestAutoConfigure:
    def test_builds_config(self, tmp_path: Path):
        from maggy.config import auto_configure
        with patch("shutil.which", return_value=None):
            cfg = auto_configure(
                home=tmp_path, persist=False,
            )
        assert isinstance(cfg, MaggyConfig)

    def test_populates_codebases(self, tmp_path: Path):
        from maggy.config import auto_configure
        dev = tmp_path / "dev"
        dev.mkdir()
        repo = dev / "webapp"
        repo.mkdir()
        (repo / ".git").mkdir()

        with patch("shutil.which", return_value=None):
            cfg = auto_configure(
                home=tmp_path, persist=False,
            )
        assert len(cfg.codebases) == 1
        assert cfg.codebases[0].key == "webapp"

    def test_persist_writes_file(self, tmp_path: Path):
        from maggy.config import auto_configure
        config_path = tmp_path / "config.yaml"
        with patch("shutil.which", return_value=None), \
             patch("maggy.config.CONFIG_DIR", tmp_path), \
             patch("maggy.config.CONFIG_PATH", config_path):
            cfg = auto_configure(
                home=tmp_path, persist=True,
            )
        assert config_path.exists()


# --- Relaxed is_configured ---


class TestIsConfiguredRelaxed:
    def test_false_without_anything(self, tmp_path: Path):
        from maggy.config import is_configured
        with patch("maggy.config.CONFIG_PATH", tmp_path / "nope.yaml"), \
             patch("maggy.config._CACHED", None), \
             patch("maggy.config._has_cli_history", return_value=False):
            result = is_configured()
        assert result is False

    def test_true_with_cli_history(self, tmp_path: Path):
        from maggy.config import is_configured
        with patch("maggy.config.CONFIG_PATH", tmp_path / "nope.yaml"), \
             patch("maggy.config._CACHED", None), \
             patch("maggy.config._has_cli_history", return_value=True):
            result = is_configured()
        assert result is True


================================================
FILE: rules/nodejs-backend.md
================================================
---
description: Node.js backend conventions
paths: ["src/api/**", "src/routes/**", "src/server/**", "src/middleware/**", "server/**", "api/**"]
---

## Node.js Backend Conventions

- Use Express or Fastify with typed route handlers
- Repository pattern for data access
- Validate request bodies with Zod at the route level
- Use proper HTTP status codes (201 for creation, 404 for missing, etc.)
- Add rate limiting to auth endpoints
- Use structured logging (pino/winston)
- Handle async errors with middleware, not try/catch in every route


================================================
FILE: rules/python.md
================================================
---
description: Python-specific conventions
paths: ["**/*.py"]
---

## Python Conventions

- Use type hints on all function signatures
- Use Pydantic for data validation and serialization
- Use pytest for testing (not unittest)
- Use ruff for linting and formatting
- Use mypy for type checking
- Prefer dataclasses or Pydantic models over plain dicts
- Use pathlib over os.path


================================================
FILE: rules/quality-gates.md
================================================
---
description: Code quality constraints enforced on all files
---

## Quality Gates

| Constraint | Limit |
|------------|-------|
| Lines per function | 20 max |
| Parameters per function | 3 max |
| Nesting depth | 2 levels max |
| Lines per file | 200 max |
| Functions per file | 10 max |
| Test coverage | 80% minimum |

Before completing any file: count lines, count functions, check parameter counts. If limits exceeded, split or decompose immediately.


================================================
FILE: rules/react.md
================================================
---
description: React-specific conventions
paths: ["src/components/**", "src/pages/**", "src/app/**", "**/*.tsx", "**/*.jsx"]
---

## React Conventions

- Prefer functional components with hooks
- Use React Query / TanStack Query for server state
- Use Zustand or context for client state
- Colocate component tests (ComponentName.test.tsx)
- Extract custom hooks when logic is reused across components
- Avoid prop drilling beyond 2 levels - use context or composition


================================================
FILE: rules/security.md
================================================
---
description: Security rules enforced on all code
---

## Security Rules

- No secrets in code - use environment variables
- No secrets in client-exposed env vars (VITE_*, NEXT_PUBLIC_*, REACT_APP_*)
- `.env` files always in `.gitignore`
- Parameterized queries only - no string concatenation for SQL
- Hash passwords with bcrypt (12+ rounds) or argon2
- Validate all input at API boundaries (Zod/Pydantic)
- `.env.example` with all required vars (no values)


================================================
FILE: rules/tdd-workflow.md
================================================
---
description: TDD workflow enforced for all implementation tasks
---

## TDD Workflow

Every feature and bug fix follows RED-GREEN-VALIDATE:

1. **RED** - Write tests based on acceptance criteria. Run them. All must FAIL.
2. **GREEN** - Write minimum code to pass tests. Run them. All must PASS.
3. **VALIDATE** - Run linter, type checker, full test suite with coverage >= 80%.

Tests must fail first to prove they validate the requirement. No code ships without a test that failed first.

For bugs: identify test gap, write failing test that reproduces bug, then fix.


================================================
FILE: rules/typescript.md
================================================
---
description: TypeScript-specific conventions
paths: ["**/*.ts", "**/*.tsx", "tsconfig.json"]
---

## TypeScript Conventions

- Enable strict mode in tsconfig.json
- Prefer interfaces over type aliases for object shapes
- Use discriminated unions over type assertions
- Avoid `any` - use `unknown` with type narrowing
- Use Zod for runtime validation at boundaries
- Use ESLint with TypeScript parser
- Prefer `const` over `let`, never use `var`


================================================
FILE: scripts/convert-hooks-to-toml.sh
================================================
#!/bin/bash
# convert-hooks-to-toml.sh - Convert settings.json hooks to config.toml format
# Usage: convert-hooks-to-toml.sh [settings.json] > config.toml
# Requires: jq

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DEFAULT_INPUT="$SCRIPT_DIR/../templates/settings.json"

check_deps() {
    command -v jq &>/dev/null || {
        echo "Error: jq is required" >&2
        exit 1
    }
}

print_header() {
    cat <<'HEADER'
# Agent CLI Configuration
# Compatible with Kimi CLI and OpenAI Codex CLI
# Auto-generated from settings.json hooks
HEADER
    echo ""
}

extract_hook() {
    local event="$1"
    local matcher="$2"
    local command="$3"
    local timeout="$4"

    echo "[[hooks]]"
    echo "event = \"$event\""
    [ -n "$matcher" ] && echo "matcher = \"$matcher\""
    echo "command = \"\"\""
    echo "$command"
    echo "\"\"\""
    echo "timeout = $timeout"
    echo ""
}

convert_event() {
    local input="$1"
    local event="$2"
    local entries

    entries=$(jq -c ".hooks.${event}[]?" "$input" 2>/dev/null) || return 0

    echo "$entries" | while IFS= read -r entry; do
        local matcher
        matcher=$(echo "$entry" | jq -r '.matcher // ""')
        local hooks_array
        hooks_array=$(echo "$entry" | jq -c '.hooks[]')

        echo "$hooks_array" | while IFS= read -r hook; do
            local cmd timeout
            cmd=$(echo "$hook" | jq -r '.command')
            timeout=$(echo "$hook" | jq -r '.timeout // 30')
            extract_hook "$event" "$matcher" "$cmd" "$timeout"
        done
    done
}

main() {
    local input="${1:-$DEFAULT_INPUT}"
    [ -f "$input" ] || {
        echo "Error: '$input' not found" >&2
        exit 1
    }

    check_deps
    print_header

    local events=(
        "PreCompact" "PreToolUse" "PostToolUse"
        "Stop" "SessionStart" "SessionEnd"
    )
    for event in "${events[@]}"; do
        convert_event "$input" "$event"
    done
}

main "$@"


================================================
FILE: scripts/convert-skills-structure.sh
================================================
#!/bin/bash
# convert-skills-structure.sh
# Converts flat .md skills to folder/SKILL.md structure with YAML frontmatter

set -eo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"
SKILLS_DIR="$ROOT_DIR/skills"

echo "Converting skills from flat .md to folder/SKILL.md structure..."
echo "Skills directory: $SKILLS_DIR"
echo ""

# Function to get description for a skill
get_description() {
    local name="$1"
    case "$name" in
        "aeo-optimization") echo "AI Engine Optimization - semantic triples, page templates, content clusters for AI citations" ;;
        "agentic-development") echo "Build AI agents with Pydantic AI (Python) and Claude SDK (Node.js)" ;;
        "ai-models") echo "Latest AI models reference - Claude, OpenAI, Gemini, Eleven Labs, Replicate" ;;
        "base") echo "Universal coding patterns, constraints, TDD workflow, atomic todos" ;;
        "code-deduplication") echo "Prevent semantic code duplication with capability index and check-before-write" ;;
        "code-review") echo "Mandatory code reviews via /code-review before commits and deploys" ;;
        "commit-hygiene") echo "Atomic commits, PR size limits, commit thresholds, stacked PRs" ;;
        "credentials") echo "Centralized API key management from Access.txt" ;;
        "database-schema") echo "Schema awareness - read before coding, type generation, prevent column errors" ;;
        "iterative-development") echo "Ralph Wiggum loops - self-referential TDD iteration until tests pass" ;;
        "klaviyo") echo "Klaviyo email/SMS marketing - profiles, events, flows, segmentation" ;;
        "llm-patterns") echo "AI-first application patterns, LLM testing, prompt management" ;;
        "medusa") echo "Medusa headless commerce - modules, workflows, API routes, admin UI" ;;
        "ms-teams-apps") echo "Microsoft Teams bots and AI agents - Claude/OpenAI, Adaptive Cards, Graph API" ;;
        "nodejs-backend") echo "Node.js backend patterns with Express/Fastify, repositories" ;;
        "playwright-testing") echo "E2E testing with Playwright - Page Objects, cross-browser, CI/CD" ;;
        "posthog-analytics") echo "PostHog analytics, event tracking, feature flags, dashboards" ;;
        "project-tooling") echo "gh, vercel, supabase, render CLI and deployment platform setup" ;;
        "pwa-development") echo "Progressive Web Apps - service workers, caching strategies, offline, Workbox" ;;
        "python") echo "Python development with ruff, mypy, pytest - TDD and type safety" ;;
        "react-native") echo "React Native mobile patterns, platform-specific code" ;;
        "react-web") echo "React web development with hooks, React Query, Zustand" ;;
        "reddit-ads") echo "Reddit Ads API - campaigns, targeting, conversions, agentic optimization" ;;
        "reddit-api") echo "Reddit API with PRAW (Python) and Snoowrap (Node.js)" ;;
        "security") echo "OWASP security patterns, secrets management, security testing" ;;
        "session-management") echo "Context preservation, tiered summarization, resumability" ;;
        "shopify-apps") echo "Shopify app development - Remix, Admin API, checkout extensions" ;;
        "site-architecture") echo "Technical SEO - robots.txt, sitemap, meta tags, Core Web Vitals" ;;
        "supabase") echo "Core Supabase CLI, migrations, RLS, Edge Functions" ;;
        "supabase-nextjs") echo "Next.js with Supabase and Drizzle ORM" ;;
        "supabase-node") echo "Express/Hono with Supabase and Drizzle ORM" ;;
        "supabase-python") echo "FastAPI with Supabase and SQLAlchemy/SQLModel" ;;
        "team-coordination") echo "Multi-person projects - shared state, todo claiming, handoffs" ;;
        "typescript") echo "TypeScript strict mode with eslint and jest" ;;
        "ui-mobile") echo "Mobile UI patterns - React Native, iOS/Android, touch targets" ;;
        "ui-testing") echo "Visual testing - catch invisible buttons, broken layouts, contrast" ;;
        "ui-web") echo "Web UI - glassmorphism, Tailwind, dark mode, accessibility" ;;
        "user-journeys") echo "User experience flows - journey mapping, UX validation, error recovery" ;;
        "web-content") echo "SEO and AI discovery (GEO) - schema, ChatGPT/Perplexity optimization" ;;
        "web-payments") echo "Stripe Checkout, subscriptions, webhooks, customer portal" ;;
        "woocommerce") echo "WooCommerce REST API - products, orders, customers, webhooks" ;;
        *) echo "Skill for $name" ;;
    esac
}

converted=0

for skill_file in "$SKILLS_DIR"/*.md; do
    if [ -f "$skill_file" ]; then
        filename=$(basename "$skill_file" .md)
        skill_folder="$SKILLS_DIR/$filename"
        skill_md="$skill_folder/SKILL.md"

        echo -n "Converting: $filename ... "

        # Get description
        description=$(get_description "$filename")

        # Create folder
        mkdir -p "$skill_folder"

        # Create SKILL.md with YAML frontmatter + original content
        {
            echo "---"
            echo "name: $filename"
            echo "description: $description"
            echo "---"
            echo ""
            cat "$skill_file"
        } > "$skill_md"

        # Remove original flat file
        rm "$skill_file"

        echo "✓"
        converted=$((converted + 1))
    fi
done

echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Conversion complete!"
echo "Converted: $converted skills"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"


================================================
FILE: scripts/detect-agents.sh
================================================
#!/bin/bash
# detect-agents.sh - Detect installed AI CLI tools
# Output: newline-separated list of detected tools (claude, kimi, codex)
# Usage: ./detect-agents.sh
#        AGENTS=$(./detect-agents.sh)

set -euo pipefail

detect_by_binary() {
    local name="$1"
    local binary="$2"
    command -v "$binary" &>/dev/null && echo "$name"
}

detect_by_config() {
    local name="$1"
    local dir="$2"
    [ -d "$dir" ] && echo "$name"
}

detect_tool() {
    local name="$1"
    local binary="$2"
    local config_dir="$3"
    # Binary takes priority, config dir as fallback
    if command -v "$binary" &>/dev/null; then
        echo "$name"
    elif [ -d "$config_dir" ]; then
        echo "$name"
    fi
}

main() {
    detect_tool "claude" "claude" "$HOME/.claude"
    detect_tool "kimi" "kimi" "$HOME/.kimi"
    detect_tool "codex" "codex" "$HOME/.codex"

    # Container runtime
    command -v docker &>/dev/null && echo "docker" || true
    command -v orbctl &>/dev/null && echo "orbstack" || true

    # Polyphony orchestrator
    command -v polyphony &>/dev/null && echo "polyphony" || true
}

main


================================================
FILE: scripts/icpg/__init__.py
================================================
"""iCPG — Intent-Augmented Code Property Graph.

Tracks WHY code exists by linking tasks/goals to code symbols with typed
edges for traceability, blast radius, and drift detection.
"""

__version__ = '0.1.0'


================================================
FILE: scripts/icpg/__main__.py
================================================
"""CLI entry point for iCPG — Intent-Augmented Code Property Graph."""

from __future__ import annotations

import argparse
import json
import subprocess
import sys
from pathlib import Path

from . import __version__
from .bootstrap import bootstrap_from_git
from .contracts import format_contracts, infer_contracts
from .drift import check_all_drift, check_file_drift
from .models import Edge, ReasonNode, _now, _uuid
from .store import ICPGStore
from .symbols import extract_symbols, extract_symbols_from_files
from .vectors import VectorStore


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog='icpg',
        description='iCPG — Intent-Augmented Code Property Graph'
    )
    parser.add_argument(
        '--version', action='version', version=f'icpg {__version__}'
    )
    parser.add_argument(
        '--project', default='.', help='Project directory (default: .)'
    )
    sub = parser.add_subparsers(dest='command')

    # --- init ---
    sub.add_parser('init', help='Initialize .icpg/ directory and database')

    # --- create ---
    p_create = sub.add_parser('create', help='Create a ReasonNode')
    p_create.add_argument('goal', help='Stated purpose (one sentence)')
    p_create.add_argument(
        '--scope', nargs='+', default=[], help='File paths in scope'
    )
    p_create.add_argument('--owner', default='user', help='Owner name')
    p_create.add_argument('--agent', help='Agent identity')
    p_create.add_argument(
        '--type', dest='decision_type', default='task',
        choices=[
            'business_goal', 'arch_decision', 'task',
            'workaround', 'constraint', 'patch'
        ]
    )
    p_create.add_argument('--task-id', help='External task tracker ID')
    p_create.add_argument('--parent', help='Parent ReasonNode ID')
    p_create.add_argument(
        '--infer-contracts', action='store_true',
        help='Use LLM to infer contracts'
    )

    # --- record ---
    p_record = sub.add_parser(
        'record', help='Record symbols from git diff to a ReasonNode'
    )
    p_record.add_argument('--reason', required=True, help='ReasonNode ID')
    p_record.add_argument(
        '--base', default='main', help='Base branch for diff'
    )
    p_record.add_argument(
        '--edge-type', default='CREATES',
        choices=['CREATES', 'MODIFIES'],
        help='Edge type (default: CREATES)'
    )

    # --- query ---
    p_query = sub.add_parser('query', help='Query the reason graph')
    q_sub = p_query.add_subparsers(dest='query_type')

    q_ctx = q_sub.add_parser(
        'context', help='Get ReasonNodes for symbols in a file'
    )
    q_ctx.add_argument('file', help='File path')

    q_blast = q_sub.add_parser(
        'blast', help='Blast radius for a ReasonNode'
    )
    q_blast.add_argument('reason_id', help='ReasonNode ID')

    q_const = q_sub.add_parser(
        'constraints', help='Get invariants/contracts for file'
    )
    q_const.add_argument('file', help='File path')

    q_risk = q_sub.add_parser(
        'risk', help='Risk profile for a symbol'
    )
    q_risk.add_argument('symbol', help='Symbol name')

    q_prior = q_sub.add_parser(
        'prior', help='Search for duplicate/prior intents'
    )
    q_prior.add_argument('goal', help='Goal text to search')
    q_prior.add_argument(
        '--threshold', type=float, default=0.75,
        help='Similarity threshold (0-1, default: 0.75)'
    )

    # --- drift ---
    p_drift = sub.add_parser('drift', help='Drift detection')
    d_sub = p_drift.add_subparsers(dest='drift_action')
    d_sub.add_parser('check', help='Run full drift scan')
    d_file = d_sub.add_parser('file', help='Check drift for a single file (fast)')
    d_file.add_argument('file_path', help='File path to check')
    d_resolve = d_sub.add_parser('resolve', help='Resolve a drift event')
    d_resolve.add_argument('event_id', help='Drift event ID')

    # --- bootstrap ---
    p_boot = sub.add_parser(
        'bootstrap', help='Infer ReasonNodes from git history'
    )
    p_boot.add_argument(
        '--days', type=int, default=90, help='Days of history (default: 90)'
    )
    p_boot.add_argument(
        '--no-llm', action='store_true', help='Skip LLM inference'
    )
    p_boot.add_argument(
        '--verbose', '-v', action='store_true', help='Verbose output'
    )

    # --- status ---
    sub.add_parser('status', help='Show iCPG statistics')

    args = parser.parse_args(argv)
    store = ICPGStore(args.project)

    if args.command == 'init':
        return cmd_init(store)
    elif args.command == 'create':
        return cmd_create(store, args)
    elif args.command == 'record':
        return cmd_record(store, args)
    elif args.command == 'query':
        return cmd_query(store, args)
    elif args.command == 'drift':
        return cmd_drift(store, args)
    elif args.command == 'bootstrap':
        return cmd_bootstrap(store, args)
    elif args.command == 'status':
        return cmd_status(store)
    else:
        parser.print_help()
        return 1


def cmd_init(store: ICPGStore) -> int:
    store.init_db()
    print(f'Initialized iCPG at {store.icpg_dir}')
    print(f'  Database: {store.db_path}')
    print(f'  .gitignore: created')
    return 0


def cmd_create(store: ICPGStore, args) -> int:
    if not store.exists():
        store.init_db()

    reason = ReasonNode(
        goal=args.goal,
        owner=args.owner,
        decision_type=args.decision_type,
        scope=args.scope,
        agent=args.agent,
        task_id=args.task_id,
        parent_id=args.parent,
        source='agent-session' if args.agent else 'manual'
    )

    if args.infer_contracts:
        contracts = infer_contracts(reason, project_dir=args.project)
        reason.preconditions = contracts['preconditions']
        reason.postconditions = contracts['postconditions']
        reason.invariants = contracts['invariants']

    store.create_reason(reason)

    # Index in vector store
    vectors = VectorStore(args.project)
    vectors.add_reason(reason.id, reason.goal, reason.scope)

    print(f'Created ReasonNode: {reason.id}')
    print(f'  Goal: {reason.goal}')
    print(f'  Scope: {", ".join(reason.scope) or "(none)"}')
    if reason.invariants:
        print(f'  Invariants: {len(reason.invariants)}')
    return 0


def cmd_record(store: ICPGStore, args) -> int:
    if not store.exists():
        print('Error: No .icpg/ directory. Run `icpg init` first.', file=sys.stderr)
        return 1

    reason = store.get_reason(args.reason)
    if not reason:
        print(f'Error: ReasonNode {args.reason} not found.', file=sys.stderr)
        return 1

    # Get changed files from git diff
    try:
        result = subprocess.run(
            ['git', 'diff', '--name-only', args.base],
            capture_output=True, text=True, timeout=10,
            cwd=str(store.project_dir)
        )
        files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()]
    except (subprocess.TimeoutExpired, FileNotFoundError):
        print('Error: git diff failed.', file=sys.stderr)
        return 1

    if not files:
        print('No changed files found.')
        return 0

    count = 0
    for fp in files:
        full_path = store.project_dir / fp
        if not full_path.exists():
            continue
        syms = extract_symbols(str(full_path))
        for sym in syms:
            store.upsert_symbol(sym)
            edge = Edge(
                from_id=reason.id,
                to_id=sym.id,
                edge_type=args.edge_type,
                confidence=1.0
            )
            store.create_edge(edge)
            count += 1

    # Update reason status
    store.update_reason_status(reason.id, 'executing')

    print(f'Recorded {count} symbols → ReasonNode {args.reason}')
    print(f'  Files: {len(files)}')
    print(f'  Edge type: {args.edge_type}')
    return 0


def cmd_query(store: ICPGStore, args) -> int:
    if not store.exists():
        return 0  # Silent — no DB means no context

    if args.query_type == 'context':
        return _query_context(store, args.file)
    elif args.query_type == 'blast':
        return _query_blast(store, args.reason_id)
    elif args.query_type == 'constraints':
        return _query_constraints(store, args.file)
    elif args.query_type == 'risk':
        return _query_risk(store, args.symbol)
    elif args.query_type == 'prior':
        return _query_prior(store, args)
    else:
        print('Specify a query type: context, blast, constraints, risk, prior')
        return 1


def _resolve_path(store: ICPGStore, file_path: str) -> str:
    """Resolve relative paths to absolute, matching DB storage format."""
    p = Path(file_path)
    if not p.is_absolute():
        p = store.project_dir / p
    return str(p.resolve())


def _query_context(store: ICPGStore, file_path: str) -> int:
    resolved = _resolve_path(store, file_path)
    reasons = store.get_reasons_for_file(resolved)
    if not reasons:
        return 0

    print(f'INTENTS for {file_path}:')
    for r in reasons:
        status_icon = {
            'proposed': '?', 'executing': '>', 'fulfilled': '+',
            'drifted': '!', 'rejected': 'x', 'abandoned': '-'
        }.get(r.status, ' ')
        print(f'  [{status_icon}] {r.id[:8]} — {r.goal}')
        print(f'      Owner: {r.owner} | Status: {r.status}')
        if r.invariants:
            print(f'      Invariants: {len(r.invariants)}')
    return 0


def _query_blast(store: ICPGStore, reason_id: str) -> int:
    blast = store.get_blast_radius(reason_id)
    reason = blast.get('reason')
    if not reason:
        print(f'ReasonNode {reason_id} not found.', file=sys.stderr)
        return 1

    print(f'BLAST RADIUS for {reason.goal}:')
    print(f'  Symbols: {blast["symbol_count"]}')
    for sym in blast['symbols']:
        print(f'    {sym.symbol_type} {sym.name} ({sym.file_path})')
    print(f'  Dependent intents: {blast["dependent_count"]}')
    for dep in blast['dependent_reasons']:
        print(f'    {dep.id[:8]} — {dep.goal}')
    if reason.invariants:
        print(f'  Invariants:')
        for inv in reason.invariants:
            print(f'    - {inv}')
    return 0


def _query_constraints(store: ICPGStore, file_path: str) -> int:
    resolved = _resolve_path(store, file_path)
    constraints = store.get_constraints_for_scope([resolved])
    if not constraints:
        return 0

    print(f'CONSTRAINTS for {file_path}:')
    for c in constraints:
        print(f'  From intent: {c["goal"][:60]}')
        for inv in c['invariants']:
            print(f'    INV: {inv}')
        for post in c['postconditions']:
            print(f'    POST: {post}')
        for pre in c['preconditions']:
            print(f'    PRE: {pre}')
    return 0


def _query_risk(store: ICPGStore, symbol_name: str) -> int:
    profile = store.get_risk_profile(symbol_name)
    if not profile.get('found'):
        return 0

    sym = profile['symbol']
    print(f'RISK PROFILE for {symbol_name}:')
    print(f'  File: {sym.file_path}')
    print(f'  Type: {sym.symbol_type}')
    print(f'  Owners: {", ".join(profile["owners"])}')
    print(f'  Modifications: {profile["modify_count"]}')
    print(f'  Active drift: {"YES" if profile["active_drift"] else "no"}')

    if profile['drift_events']:
        print(f'  Drift history:')
        for de in profile['drift_events'][:5]:
            status = 'resolved' if de.resolved else 'ACTIVE'
            print(f'    [{status}] {de.description} (severity: {de.severity})')
    return 0


def _query_prior(store: ICPGStore, args) -> int:
    vectors = VectorStore(args.project)
    similar = vectors.search_similar(args.goal, threshold=args.threshold)

    if not similar:
        print('No similar prior intents found.')
        return 0

    print(f'SIMILAR INTENTS (threshold: {args.threshold}):')
    for rid, score in similar:
        reason = store.get_reason(rid)
        if reason:
            print(f'  [{score:.2f}] {reason.id[:8]} — {reason.goal}')
            print(f'         Status: {reason.status} | Owner: {reason.owner}')
    return 0


def cmd_drift(store: ICPGStore, args) -> int:
    if not store.exists():
        print('No .icpg/ directory. Run `icpg init` first.', file=sys.stderr)
        return 1

    if args.drift_action == 'check':
        events = check_all_drift(store)
        if not events:
            print('No drift detected.')
            return 0

        # Save new events
        for event in events:
            store.create_drift_event(event)

        print(f'DRIFT DETECTED ({len(events)} events):')
        for e in events:
            dims = ', '.join(e.drift_dimensions)
            print(f'  [{e.severity:.2f}] {e.description}')
            print(f'         Dimensions: {dims}')
        return 0

    elif args.drift_action == 'file':
        resolved = _resolve_path(store, args.file_path)
        events = check_file_drift(store, resolved)
        if not events:
            return 0

        # Persist events
        for event in events:
            store.create_drift_event(event)

        basename = Path(resolved).name
        print(f'DRIFT: {len(events)} symbols drifted in {basename}')
        for e in events:
            sym = store._get_symbol(e.symbol_id)
            name = sym.name if sym else '???'
            dims = ', '.join(
                f'{d}({s:.2f})'
                for d, s in zip(e.drift_dimensions, _drift_scores(e))
            )
            print(f'  [{e.severity:.2f}] {name} — {dims}')
        return 0

    elif args.drift_action == 'resolve':
        store.resolve_drift(args.event_id)
        print(f'Resolved drift event {args.event_id}')
        return 0

    else:
        print('Specify: drift check, drift file <path>, or drift resolve <id>')
        return 1


def _drift_scores(event) -> list[float]:
    """Extract per-dimension scores from drift event description."""
    import re
    scores = []
    for match in re.finditer(r'\w+\((\d+\.\d+)\)', event.description):
        scores.append(float(match.group(1)))
    if not scores:
        scores = [event.severity] * len(event.drift_dimensions)
    return scores


def cmd_bootstrap(store: ICPGStore, args) -> int:
    if not store.exists():
        store.init_db()

    print(f'Bootstrapping iCPG from last {args.days} days of git history...')
    stats = bootstrap_from_git(
        store,
        days=args.days,
        use_llm=not args.no_llm,
        verbose=args.verbose
    )

    print(f'\nBootstrap complete:')
    print(f'  Commit clusters: {stats["clusters"]}')
    print(f'  ReasonNodes created: {stats["reasons_created"]}')
    print(f'  Symbols linked: {stats["symbols_linked"]}')
    if stats.get('skipped'):
        print(f'  Skipped (duplicates): {stats["skipped"]}')
    return 0


def cmd_status(store: ICPGStore) -> int:
    if not store.exists():
        print('No iCPG database found. Run `icpg init` to create one.')
        return 0

    stats = store.get_stats()
    drift = store.get_unresolved_drift()

    print('iCPG STATUS')
    print(f'  ReasonNodes:      {stats["reasons"]}')
    print(f'  Symbols:          {stats["symbols"]}')
    print(f'  Edges:            {stats["edges"]}')
    print(f'  Unresolved drift: {stats["unresolved_drift"]}')

    if drift:
        print(f'\nTop drift events:')
        for d in drift[:5]:
            dims = ', '.join(d.drift_dimensions)
            print(f'  [{d.severity:.2f}] {d.description} ({dims})')

    return 0


if __name__ == '__main__':
    sys.exit(main())


================================================
FILE: scripts/icpg/bootstrap.py
================================================
"""Git history inference — bootstrap iCPG from existing commits.

Implements RFC Section 7.2: replay commit history, cluster by PR or
temporal proximity, infer ReasonNodes via LLM, create CREATES/MODIFIES
edges.
"""

from __future__ import annotations

import json
import os
import re
import subprocess
from datetime import datetime, timedelta, timezone
from pathlib import Path

from .contracts import infer_contracts
from .models import Edge, ReasonNode, _now, _uuid
from .store import ICPGStore
from .symbols import extract_symbols
from .vectors import VectorStore


def bootstrap_from_git(
    store: ICPGStore,
    days: int = 90,
    use_llm: bool = True,
    verbose: bool = False
) -> dict:
    """Infer ReasonNodes from git commit history.

    Returns stats dict: {clusters, reasons_created, symbols_linked, skipped}.
    """
    vectors = VectorStore(str(store.project_dir))
    since = (
        datetime.now(timezone.utc) - timedelta(days=days)
    ).strftime('%Y-%m-%d')

    # Step 1: Get commits
    commits = _get_commits(store.project_dir, since)
    if verbose:
        print(f'Found {len(commits)} commits in last {days} days')

    if not commits:
        return {'clusters': 0, 'reasons_created': 0, 'symbols_linked': 0}

    # Step 2: Cluster commits
    clusters = _cluster_commits(commits)
    if verbose:
        print(f'Clustered into {len(clusters)} groups')

    stats = {'clusters': len(clusters), 'reasons_created': 0, 'symbols_linked': 0, 'skipped': 0}

    for cluster in clusters:
        # Step 3: Extract info from cluster
        messages = [c['message'] for c in cluster]
        files_changed = set()
        for c in cluster:
            files_changed.update(c.get('files', []))

        combined_message = '\n'.join(messages)

        # Step 4: Check for duplicates
        similar = vectors.search_similar(combined_message, threshold=0.8)
        if similar:
            stats['skipped'] += 1
            if verbose:
                print(f'  Skipping cluster (duplicate of {similar[0][0]})')
            continue

        # Step 5: Infer ReasonNode
        if use_llm:
            reason = _infer_via_llm(combined_message, list(files_changed))
        else:
            reason = _infer_from_messages(combined_message, list(files_changed))

        if not reason:
            stats['skipped'] += 1
            continue

        # Step 6: Create reason and index
        store.create_reason(reason)
        vectors.add_reason(reason.id, reason.goal, reason.scope)
        stats['reasons_created'] += 1

        if verbose:
            print(f'  Created: {reason.goal[:60]}...')

        # Step 7: Link symbols
        for fp in files_changed:
            full_path = store.project_dir / fp
            if not full_path.exists():
                continue
            syms = extract_symbols(str(full_path))
            for sym in syms:
                store.upsert_symbol(sym)
                edge = Edge(
                    from_id=reason.id,
                    to_id=sym.id,
                    edge_type='CREATES',
                    confidence=0.6
                )
                store.create_edge(edge)
                stats['symbols_linked'] += 1

        # Step 8: Infer contracts (if LLM available)
        if use_llm and not reason.postconditions:
            contracts = infer_contracts(reason, project_dir=str(store.project_dir))
            if any(contracts.values()):
                reason.preconditions = contracts['preconditions']
                reason.postconditions = contracts['postconditions']
                reason.invariants = contracts['invariants']
                # Update in DB
                with store._conn() as conn:
                    conn.execute(
                        """UPDATE reasons SET
                           preconditions = ?, postconditions = ?, invariants = ?
                           WHERE id = ?""",
                        (
                            json.dumps(reason.preconditions),
                            json.dumps(reason.postconditions),
                            json.dumps(reason.invariants),
                            reason.id
                        )
                    )

    return stats


def _get_commits(project_dir: Path, since: str) -> list[dict]:
    """Get commits with messages and changed files."""
    try:
        result = subprocess.run(
            [
                'git', 'log', f'--since={since}',
                '--format=__COMMIT__%n%H%n%an%n%aI%n%s',
                '--name-only'
            ],
            capture_output=True, text=True, timeout=30,
            cwd=str(project_dir)
        )
    except (subprocess.TimeoutExpired, FileNotFoundError):
        return []

    if result.returncode != 0:
        return []

    commits = []
    raw_blocks = result.stdout.split('__COMMIT__\n')

    for block in raw_blocks:
        block = block.strip()
        if not block:
            continue

        lines = block.split('\n')
        if len(lines) < 4:
            continue

        sha = lines[0].strip()
        author = lines[1].strip()
        date = lines[2].strip()
        message = lines[3].strip()

        # Files come after a blank line separator
        files = []
        past_blank = False
        for line in lines[4:]:
            stripped = line.strip()
            if not stripped:
                past_blank = True
                continue
            if past_blank and stripped:
                files.append(stripped)

        commits.append({
            'sha': sha,
            'author': author,
            'date': date,
            'message': message,
            'files': files
        })

    return commits


def _cluster_commits(
    commits: list[dict], window_hours: int = 2
) -> list[list[dict]]:
    """Cluster commits by temporal proximity."""
    if not commits:
        return []

    clusters = []
    current_cluster = [commits[0]]

    for commit in commits[1:]:
        try:
            prev_date = datetime.fromisoformat(
                current_cluster[-1]['date'].replace('Z', '+00:00')
            )
            curr_date = datetime.fromisoformat(
                commit['date'].replace('Z', '+00:00')
            )
            delta = abs((curr_date - prev_date).total_seconds())

            if delta <= window_hours * 3600:
                current_cluster.append(commit)
            else:
                clusters.append(current_cluster)
                current_cluster = [commit]
        except (ValueError, KeyError):
            clusters.append(current_cluster)
            current_cluster = [commit]

    if current_cluster:
        clusters.append(current_cluster)

    return clusters


def _infer_via_llm(
    messages: str, files: list[str]
) -> ReasonNode | None:
    """Use LLM to infer a ReasonNode from commit messages."""
    scope_str = ', '.join(files[:20])
    prompt = f"""Given these git commit messages, infer the intent/goal.

COMMITS:
{messages[:2000]}

FILES CHANGED:
{scope_str}

Return ONLY a JSON object:
{{
  "goal": "one-sentence description of what this change was trying to achieve",
  "decision_type": "task|business_goal|arch_decision|workaround|constraint|patch",
  "scope": ["file1", "file2"]
}}"""

    # Try Claude CLI
    try:
        result = subprocess.run(
            ['claude', '--print', '-p', prompt],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0:
            return _parse_reason_response(result.stdout, files)
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass

    # Try OpenAI
    try:
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{'role': 'user', 'content': prompt}],
            temperature=0.2
        )
        content = response.choices[0].message.content or ''
        return _parse_reason_response(content, files)
    except Exception:
        pass

    # Fallback
    return _infer_from_messages(messages, files)


def _infer_from_messages(
    messages: str, files: list[str]
) -> ReasonNode | None:
    """Extract ReasonNode from commit messages without LLM."""
    # Use first line as goal
    first_line = messages.split('\n')[0].strip()
    if not first_line:
        return None

    # Detect decision type from conventional commits
    dtype = 'task'
    if first_line.startswith('feat'):
        dtype = 'business_goal'
    elif first_line.startswith('fix'):
        dtype = 'patch'
    elif first_line.startswith('refactor'):
        dtype = 'arch_decision'
    elif first_line.startswith('chore') or first_line.startswith('ci'):
        dtype = 'constraint'

    # Clean up conventional commit prefix
    goal = re.sub(r'^(feat|fix|refactor|chore|ci|docs|test)(\([^)]*\))?:\s*', '', first_line)

    return ReasonNode(
        id=_uuid(),
        goal=goal or first_line,
        decision_type=dtype,
        scope=files[:20],
        owner='git-history',
        source='inferred',
        status='fulfilled',
        created_at=_now()
    )


def _parse_reason_response(
    response: str, fallback_files: list[str]
) -> ReasonNode | None:
    """Parse LLM response into a ReasonNode."""
    try:
        start = response.find('{')
        end = response.rfind('}') + 1
        if start >= 0 and end > start:
            data = json.loads(response[start:end])
            return ReasonNode(
                id=_uuid(),
                goal=data.get('goal', ''),
                decision_type=data.get('decision_type', 'task'),
                scope=data.get('scope', fallback_files[:20]),
                owner='git-history',
                source='inferred',
                status='fulfilled',
                created_at=_now()
            )
    except (json.JSONDecodeError, KeyError):
        pass
    return None


================================================
FILE: scripts/icpg/contracts.py
================================================
"""Design by Contract layer for ReasonNodes.

Handles inference, evaluation, and formatting of preconditions,
postconditions, and invariants.
"""

from __future__ import annotations

import json
import os
import subprocess
from pathlib import Path

from .models import ReasonNode


def infer_contracts(
    reason: ReasonNode,
    code_context: str = '',
    project_dir: str = '.'
) -> dict[str, list[str]]:
    """Use LLM to infer contracts from stated purpose + code context.

    Returns dict with 'preconditions', 'postconditions', 'invariants'.
    Falls back to heuristic extraction if no LLM available.
    """
    # Try Claude CLI first
    api_key = os.environ.get('ANTHROPIC_API_KEY')
    if api_key:
        return _infer_via_claude(reason, code_context)

    # Try OpenAI
    openai_key = os.environ.get('OPENAI_API_KEY')
    if openai_key:
        return _infer_via_openai(reason, code_context)

    # Fallback: heuristic extraction
    return _infer_heuristic(reason, project_dir)


def _infer_via_claude(
    reason: ReasonNode, code_context: str
) -> dict[str, list[str]]:
    """Call Claude API to infer contracts."""
    prompt = _build_inference_prompt(reason, code_context)
    try:
        result = subprocess.run(
            ['claude', '--print', '-p', prompt],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0:
            return _parse_contract_response(result.stdout)
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass
    return _empty_contracts()


def _infer_via_openai(
    reason: ReasonNode, code_context: str
) -> dict[str, list[str]]:
    """Call OpenAI API to infer contracts."""
    try:
        import openai
        client = openai.OpenAI()
        prompt = _build_inference_prompt(reason, code_context)
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{'role': 'user', 'content': prompt}],
            temperature=0.2
        )
        return _parse_contract_response(
            response.choices[0].message.content or ''
        )
    except Exception:
        return _empty_contracts()


def _infer_heuristic(
    reason: ReasonNode, project_dir: str
) -> dict[str, list[str]]:
    """Basic heuristic contract extraction — no LLM needed."""
    pre = []
    post = []
    inv = []

    # Scope-based invariants
    for scope_path in reason.scope:
        inv.append(f'file_exists("{scope_path}")')

    # If goal mentions "test" or "validation"
    goal_lower = reason.goal.lower()
    if 'test' in goal_lower:
        for sp in reason.scope:
            if 'test' not in sp:
                test_path = _guess_test_path(sp)
                if test_path:
                    post.append(f'test_exists("{test_path}")')

    return {
        'preconditions': pre,
        'postconditions': post,
        'invariants': inv
    }


def _build_inference_prompt(
    reason: ReasonNode, code_context: str
) -> str:
    scope_str = ', '.join(reason.scope) if reason.scope else 'unspecified'
    return f"""Given this intent for a code change, infer formal contracts.

INTENT:
  Goal: {reason.goal}
  Decision type: {reason.decision_type}
  Scope: {scope_str}

{f'CODE CONTEXT:{chr(10)}{code_context[:2000]}' if code_context else ''}

Return ONLY a JSON object with three arrays:
{{
  "preconditions": ["predicate1", "predicate2"],
  "postconditions": ["predicate1", "predicate2"],
  "invariants": ["predicate1", "predicate2"]
}}

Predicate format examples:
  file_exists("src/auth/middleware.ts")
  test_exists("src/auth/__tests__/middleware.test.ts")
  symbol_count("src/auth/") <= 15
  function_signature("validateToken") == "(token: string) => Promise<User>"

Rules:
- Preconditions: what must exist before this change
- Postconditions: what must be true after this change is complete
- Invariants: what must NOT change during or after this change
- Be specific. Use file paths from the scope.
- 2-5 predicates per category max."""


def _parse_contract_response(response: str) -> dict[str, list[str]]:
    """Parse LLM response into contract dict."""
    # Try to extract JSON
    try:
        # Find JSON block
        start = response.find('{')
        end = response.rfind('}') + 1
        if start >= 0 and end > start:
            data = json.loads(response[start:end])
            return {
                'preconditions': data.get('preconditions', []),
                'postconditions': data.get('postconditions', []),
                'invariants': data.get('invariants', [])
            }
    except (json.JSONDecodeError, KeyError):
        pass
    return _empty_contracts()


def _empty_contracts() -> dict[str, list[str]]:
    return {'preconditions': [], 'postconditions': [], 'invariants': []}


def _guess_test_path(source_path: str) -> str | None:
    """Guess test file path from source path."""
    p = Path(source_path)
    stem = p.stem
    suffix = p.suffix

    # Python: test_foo.py
    if suffix == '.py':
        test_dir = p.parent / 'tests'
        return str(test_dir / f'test_{stem}.py')

    # TS/JS: foo.test.ts
    if suffix in ('.ts', '.tsx', '.js', '.jsx'):
        return str(p.parent / f'{stem}.test{suffix}')

    return None


def format_contracts(reason: ReasonNode) -> str:
    """Format contracts for human-readable display."""
    lines = []

    if reason.preconditions:
        lines.append('PRECONDITIONS:')
        for p in reason.preconditions:
            lines.append(f'  - {p}')

    if reason.postconditions:
        lines.append('POSTCONDITIONS:')
        for p in reason.postconditions:
            lines.append(f'  - {p}')

    if reason.invariants:
        lines.append('INVARIANTS:')
        for p in reason.invariants:
            lines.append(f'  - {p}')

    return '\n'.join(lines) if lines else '(no contracts defined)'


================================================
FILE: scripts/icpg/drift.py
================================================
"""6-dimension drift detection per RFC Section 6."""

from __future__ import annotations

import subprocess
from pathlib import Path

from .models import DriftEvent, Edge, _now, _uuid
from .store import ICPGStore
from .symbols import extract_symbols


def check_file_drift(store: ICPGStore, file_path: str) -> list[DriftEvent]:
    """Check drift for symbols in a single file only. Fast path for hooks."""
    symbols = store.get_symbols_for_file(file_path)
    events = []
    for sym in symbols:
        event = check_symbol_drift(store, sym.id)
        if event:
            events.append(event)
    return events


def check_all_drift(store: ICPGStore) -> list[DriftEvent]:
    """Full drift scan across all tracked symbols."""
    events = []
    reasons = store.list_reasons()

    for reason in reasons:
        if reason.status in ('rejected', 'abandoned'):
            continue

        creates_edges = store.get_edges_from(reason.id, 'CREATES')
        for edge in creates_edges:
            sym = store._get_symbol(edge.to_id)
            if not sym:
                continue
            event = check_symbol_drift(store, sym.id)
            if event:
                events.append(event)

    return events


def check_symbol_drift(
    store: ICPGStore, symbol_id: str
) -> DriftEvent | None:
    """Check a single symbol for drift across all 6 dimensions."""
    sym = store._get_symbol(symbol_id)
    if not sym:
        return None

    # Find creating reason
    creates_edges = store.get_edges_to(symbol_id, 'CREATES')
    if not creates_edges:
        return None
    reason = store.get_reason(creates_edges[0].from_id)
    if not reason:
        return None

    dimensions = []
    severity_scores = []

    # 1. Spec drift — checksum changed without MODIFIES edge
    spec = _check_spec_drift(store, sym, reason)
    if spec:
        dimensions.append('spec')
        severity_scores.append(spec)

    # 2. Decision drift — postconditions no longer hold
    decision = _check_decision_drift(store, reason)
    if decision:
        dimensions.append('decision')
        severity_scores.append(decision)

    # 3. Ownership drift — >3 different owners
    ownership = _check_ownership_drift(store, sym)
    if ownership:
        dimensions.append('ownership')
        severity_scores.append(ownership)

    # 4. Test drift — VALIDATED_BY tests missing or failing
    test = _check_test_drift(store, reason)
    if test:
        dimensions.append('test')
        severity_scores.append(test)

    # 5. Usage drift — used outside original scope
    usage = _check_usage_drift(store, sym, reason)
    if usage:
        dimensions.append('usage')
        severity_scores.append(usage)

    # 6. Dependency drift — downstream coupling changed
    dep = _check_dependency_drift(store, reason)
    if dep:
        dimensions.append('dependency')
        severity_scores.append(dep)

    if not dimensions:
        return None

    avg_severity = sum(severity_scores) / len(severity_scores)
    desc_parts = [f'{d}({s:.2f})' for d, s in zip(dimensions, severity_scores)]

    return DriftEvent(
        id=_uuid(),
        symbol_id=symbol_id,
        from_reason_id=reason.id,
        drift_dimensions=dimensions,
        severity=round(avg_severity, 2),
        description=f'Drift detected: {", ".join(desc_parts)}',
        detected_at=_now()
    )


def _check_spec_drift(store, sym, reason) -> float | None:
    """Symbol checksum changed since creation without a MODIFIES edge."""
    # Re-extract current symbol
    current_symbols = extract_symbols(sym.file_path)
    current = next((s for s in current_symbols if s.name == sym.name), None)
    if not current:
        return 0.8  # Symbol removed entirely

    if current.checksum != sym.checksum:
        # Check if there's a MODIFIES edge explaining the change
        mod_edges = store.get_edges_to(sym.id, 'MODIFIES')
        if not mod_edges:
            return 0.6  # Changed without explanation
    return None


def _check_decision_drift(store, reason) -> float | None:
    """ReasonNode postconditions no longer hold."""
    if not reason.postconditions:
        return None

    failed = 0
    for predicate in reason.postconditions:
        if not evaluate_predicate(predicate, store.project_dir):
            failed += 1

    if failed > 0:
        return min(1.0, failed / len(reason.postconditions))
    return None


def _check_ownership_drift(store, sym) -> float | None:
    """Symbol touched by >3 different owners."""
    edges = store.get_edges_to(sym.id)
    owners = set()
    for edge in edges:
        reason = store.get_reason(edge.from_id)
        if reason:
            owners.add(reason.owner)

    if len(owners) > 3:
        return min(1.0, (len(owners) - 3) / 5)
    return None


def _check_test_drift(store, reason) -> float | None:
    """VALIDATED_BY tests no longer exist or fail."""
    test_edges = store.get_edges_from(reason.id, 'VALIDATED_BY')
    if not test_edges:
        # No tests linked — mild concern
        return 0.3

    missing = 0
    for edge in test_edges:
        test_sym = store._get_symbol(edge.to_id)
        if not test_sym or not Path(test_sym.file_path).exists():
            missing += 1

    if missing > 0:
        return min(1.0, missing / len(test_edges))
    return None


def _check_usage_drift(store, sym, reason) -> float | None:
    """Symbol imported from scopes outside original ReasonNode scope."""
    if not reason.scope:
        return None

    # Use grep to find imports/usages of the symbol
    try:
        result = subprocess.run(
            ['grep', '-rl', sym.name, '.'],
            capture_output=True, text=True, timeout=5,
            cwd=str(store.project_dir)
        )
    except (subprocess.TimeoutExpired, FileNotFoundError):
        return None

    if result.returncode != 0:
        return None

    usage_files = [
        f.strip().lstrip('./') for f in result.stdout.strip().split('\n')
        if f.strip()
    ]

    out_of_scope = 0
    for uf in usage_files:
        if not any(uf.startswith(s.rstrip('/')) for s in reason.scope):
            out_of_scope += 1

    if out_of_scope > 2:
        return min(1.0, out_of_scope / 10)
    return None


def _check_dependency_drift(store, reason) -> float | None:
    """Downstream REQUIRES reasons have drifted or changed status."""
    req_edges = store.get_edges_to(reason.id, 'REQUIRES')
    if not req_edges:
        return None

    drifted = 0
    for edge in req_edges:
        dep_reason = store.get_reason(edge.from_id)
        if dep_reason and dep_reason.status == 'drifted':
            drifted += 1

    if drifted > 0:
        return min(1.0, drifted / len(req_edges))
    return None


def evaluate_predicate(predicate: str, project_dir: Path) -> bool:
    """Evaluate a single structured predicate against codebase state.

    Supported predicates:
        file_exists("path")
        test_exists("path")
        symbol_count("dir/") <= N
        function_signature("name") == "sig"
    """
    predicate = predicate.strip()

    # file_exists("path")
    m = _match_predicate(predicate, 'file_exists')
    if m:
        return (project_dir / m).exists()

    # test_exists("path")
    m = _match_predicate(predicate, 'test_exists')
    if m:
        return (project_dir / m).exists()

    # symbol_count("dir/") <= N
    import re
    sc = re.match(
        r'symbol_count\("([^"]+)"\)\s*(<=|>=|==|<|>)\s*(\d+)', predicate
    )
    if sc:
        dir_path, op, threshold = sc.group(1), sc.group(2), int(sc.group(3))
        count = _count_symbols_in_dir(project_dir / dir_path)
        return _compare(count, op, threshold)

    # Unrecognized predicate — pass (don't block on unknown)
    return True


def _match_predicate(predicate: str, func_name: str) -> str | None:
    import re
    m = re.match(rf'{func_name}\("([^"]+)"\)', predicate)
    return m.group(1) if m else None


def _count_symbols_in_dir(dir_path: Path) -> int:
    if not dir_path.is_dir():
        return 0
    count = 0
    for f in dir_path.rglob('*'):
        if f.is_file():
            count += len(extract_symbols(str(f)))
    return count


def _compare(value: int, op: str, threshold: int) -> bool:
    ops = {
        '<=': value <= threshold,
        '>=': value >= threshold,
        '==': value == threshold,
        '<': value < threshold,
        '>': value > threshold,
    }
    return ops.get(op, True)


================================================
FILE: scripts/icpg/models.py
================================================
"""Data models for iCPG — ReasonNode, Symbol, Edge, DriftEvent."""

from __future__ import annotations

import hashlib
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


def _uuid() -> str:
    return str(uuid.uuid4())


def symbol_id(file_path: str, name: str, symbol_type: str) -> str:
    """Deterministic ID for a symbol: hash of file:name:type."""
    raw = f'{file_path}:{name}:{symbol_type}'
    return hashlib.sha256(raw.encode()).hexdigest()[:16]


# --- Decision types ---
DECISION_TYPES = (
    'business_goal', 'arch_decision', 'task',
    'workaround', 'constraint', 'patch'
)

# --- ReasonNode statuses ---
REASON_STATUSES = (
    'proposed', 'executing', 'fulfilled',
    'rejected', 'drifted', 'abandoned'
)

# --- Source types ---
SOURCE_TYPES = (
    'manual', 'commit', 'migration',
    'inferred', 'agent-session'
)

# --- Edge types ---
EDGE_TYPES = (
    'CREATES', 'MODIFIES', 'REQUIRES',
    'DUPLICATES', 'VALIDATED_BY', 'DRIFTS_FROM'
)

# --- Drift dimensions ---
DRIFT_DIMENSIONS = (
    'spec', 'decision', 'ownership',
    'test', 'usage', 'dependency'
)

# --- Symbol types ---
SYMBOL_TYPES = (
    'function', 'class', 'module', 'route',
    'schema', 'component', 'interface', 'type',
    'constant', 'hook'
)


@dataclass
class ReasonNode:
    """A single intent/decision that drives code changes."""

    goal: str
    owner: str
    id: str = field(default_factory=_uuid)
    decision_type: str = 'task'
    scope: list[str] = field(default_factory=list)
    agent: str | None = None
    status: str = 'proposed'
    source: str = 'manual'
    task_id: str | None = None
    parent_id: str | None = None
    # Design by Contract layer
    preconditions: list[str] = field(default_factory=list)
    postconditions: list[str] = field(default_factory=list)
    invariants: list[str] = field(default_factory=list)
    created_at: str = field(default_factory=_now)
    fulfilled_at: str | None = None


@dataclass
class Symbol:
    """A code entity: function, class, module, etc."""

    name: str
    file_path: str
    symbol_type: str
    language: str
    id: str = ''
    signature: str | None = None
    checksum: str = ''
    created_at: str = field(default_factory=_now)

    def __post_init__(self):
        if not self.id:
            self.id = symbol_id(self.file_path, self.name, self.symbol_type)


@dataclass
class Edge:
    """A typed relationship between nodes."""

    from_id: str
    to_id: str
    edge_type: str
    id: str = field(default_factory=_uuid)
    confidence: float = 1.0
    created_at: str = field(default_factory=_now)


@dataclass
class DriftEvent:
    """Auto-generated when behavior diverges from intent."""

    symbol_id: str
    from_reason_id: str
    description: str
    id: str = field(default_factory=_uuid)
    drift_dimensions: list[str] = field(default_factory=list)
    severity: float = 0.5
    resolved: bool = False
    detected_at: str = field(default_factory=_now)


================================================
FILE: scripts/icpg/pyproject.toml
================================================
[project]
name = "icpg"
version = "0.1.0"
description = "iCPG — Intent-Augmented Code Property Graph for agentic development"
requires-python = ">=3.10"
license = {text = "MIT"}
readme = "README.md"

dependencies = []

[project.optional-dependencies]
vectors = [
    "chromadb>=0.4.0",
    "sentence-transformers>=2.2.0",
]
tfidf = [
    "scikit-learn>=1.3.0",
]
llm = [
    "openai>=1.0.0",
]
all = [
    "icpg[vectors,tfidf,llm]",
]

[project.scripts]
icpg = "icpg.__main__:main"

[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"


================================================
FILE: scripts/icpg/store.py
================================================
"""SQLite storage layer for iCPG reason graph."""

from __future__ import annotations

import json
import os
import sqlite3
from pathlib import Path
from typing import Any

from .models import DriftEvent, Edge, ReasonNode, Symbol

ICPG_DIR = '.icpg'
DB_NAME = 'reason.db'

SCHEMA = """
CREATE TABLE IF NOT EXISTS reasons (
    id TEXT PRIMARY KEY,
    goal TEXT NOT NULL,
    decision_type TEXT DEFAULT 'task',
    scope TEXT DEFAULT '[]',
    owner TEXT NOT NULL,
    agent TEXT,
    status TEXT DEFAULT 'proposed',
    source TEXT DEFAULT 'manual',
    task_id TEXT,
    parent_id TEXT REFERENCES reasons(id),
    preconditions TEXT DEFAULT '[]',
    postconditions TEXT DEFAULT '[]',
    invariants TEXT DEFAULT '[]',
    created_at TEXT NOT NULL,
    fulfilled_at TEXT
);

CREATE TABLE IF NOT EXISTS symbols (
    id TEXT PRIMARY KEY,
    name TEXT NOT NULL,
    file_path TEXT NOT NULL,
    symbol_type TEXT NOT NULL,
    language TEXT NOT NULL,
    signature TEXT,
    checksum TEXT,
    created_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS edges (
    id TEXT PRIMARY KEY,
    from_id TEXT NOT NULL,
    to_id TEXT NOT NULL,
    edge_type TEXT NOT NULL,
    confidence REAL DEFAULT 1.0,
    created_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS drift_events (
    id TEXT PRIMARY KEY,
    symbol_id TEXT NOT NULL,
    from_reason_id TEXT NOT NULL,
    drift_dimensions TEXT DEFAULT '[]',
    severity REAL DEFAULT 0.5,
    description TEXT,
    resolved INTEGER DEFAULT 0,
    detected_at TEXT NOT NULL
);

CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_id);
CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_id);
CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(edge_type);
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path);
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
CREATE INDEX IF NOT EXISTS idx_drift_symbol ON drift_events(symbol_id);
CREATE INDEX IF NOT EXISTS idx_drift_resolved ON drift_events(resolved);
CREATE INDEX IF NOT EXISTS idx_reasons_status ON reasons(status);
"""


class ICPGStore:
    """SQLite-backed storage for the iCPG reason graph."""

    def __init__(self, project_dir: str = '.'):
        self.project_dir = Path(project_dir).resolve()
        self.icpg_dir = self.project_dir / ICPG_DIR
        self.db_path = self.icpg_dir / DB_NAME

    def init_db(self) -> None:
        """Create .icpg/ directory and initialize schema."""
        self.icpg_dir.mkdir(parents=True, exist_ok=True)
        gitignore = self.icpg_dir / '.gitignore'
        if not gitignore.exists():
            gitignore.write_text('*\n')
        with self._conn() as conn:
            conn.executescript(SCHEMA)

    def exists(self) -> bool:
        return self.db_path.exists()

    def _conn(self) -> sqlite3.Connection:
        conn = sqlite3.connect(str(self.db_path))
        conn.row_factory = sqlite3.Row
        conn.execute('PRAGMA journal_mode=WAL')
        conn.execute('PRAGMA foreign_keys=ON')
        return conn

    # --- ReasonNode CRUD ---

    def create_reason(self, node: ReasonNode) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO reasons
                   (id, goal, decision_type, scope, owner, agent, status,
                    source, task_id, parent_id, preconditions, postconditions,
                    invariants, created_at, fulfilled_at)
                   VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
                (
                    node.id, node.goal, node.decision_type,
                    json.dumps(node.scope), node.owner, node.agent,
                    node.status, node.source, node.task_id, node.parent_id,
                    json.dumps(node.preconditions),
                    json.dumps(node.postconditions),
                    json.dumps(node.invariants),
                    node.created_at, node.fulfilled_at
                )
            )
        return node.id

    def get_reason(self, reason_id: str) -> ReasonNode | None:
        with self._conn() as conn:
            row = conn.execute(
                'SELECT * FROM reasons WHERE id = ?', (reason_id,)
            ).fetchone()
        if not row:
            return None
        return self._row_to_reason(row)

    def list_reasons(self, status: str | None = None) -> list[ReasonNode]:
        with self._conn() as conn:
            if status:
                rows = conn.execute(
                    'SELECT * FROM reasons WHERE status = ? ORDER BY created_at',
                    (status,)
                ).fetchall()
            else:
                rows = conn.execute(
                    'SELECT * FROM reasons ORDER BY created_at'
                ).fetchall()
        return [self._row_to_reason(r) for r in rows]

    def update_reason_status(
        self, reason_id: str, status: str,
        fulfilled_at: str | None = None
    ) -> None:
        with self._conn() as conn:
            conn.execute(
                'UPDATE reasons SET status = ?, fulfilled_at = ? WHERE id = ?',
                (status, fulfilled_at, reason_id)
            )

    # --- Symbol CRUD ---

    def upsert_symbol(self, sym: Symbol) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO symbols
                   (id, name, file_path, symbol_type, language, signature,
                    checksum, created_at)
                   VALUES (?,?,?,?,?,?,?,?)
                   ON CONFLICT(id) DO UPDATE SET
                    signature=excluded.signature,
                    checksum=excluded.checksum""",
                (
                    sym.id, sym.name, sym.file_path, sym.symbol_type,
                    sym.language, sym.signature, sym.checksum, sym.created_at
                )
            )
        return sym.id

    def get_symbols_for_file(self, file_path: str) -> list[Symbol]:
        with self._conn() as conn:
            rows = conn.execute(
                'SELECT * FROM symbols WHERE file_path = ?', (file_path,)
            ).fetchall()
        return [self._row_to_symbol(r) for r in rows]

    def get_symbol_by_name(self, name: str) -> list[Symbol]:
        with self._conn() as conn:
            rows = conn.execute(
                'SELECT * FROM symbols WHERE name = ?', (name,)
            ).fetchall()
        return [self._row_to_symbol(r) for r in rows]

    # --- Edge CRUD ---

    def create_edge(self, edge: Edge) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT OR IGNORE INTO edges
                   (id, from_id, to_id, edge_type, confidence, created_at)
                   VALUES (?,?,?,?,?,?)""",
                (
                    edge.id, edge.from_id, edge.to_id,
                    edge.edge_type, edge.confidence, edge.created_at
                )
            )
        return edge.id

    def get_edges_from(
        self, node_id: str, edge_type: str | None = None
    ) -> list[Edge]:
        with self._conn() as conn:
            if edge_type:
                rows = conn.execute(
                    'SELECT * FROM edges WHERE from_id = ? AND edge_type = ?',
                    (node_id, edge_type)
                ).fetchall()
            else:
                rows = conn.execute(
                    'SELECT * FROM edges WHERE from_id = ?', (node_id,)
                ).fetchall()
        return [self._row_to_edge(r) for r in rows]

    def get_edges_to(
        self, node_id: str, edge_type: str | None = None
    ) -> list[Edge]:
        with self._conn() as conn:
            if edge_type:
                rows = conn.execute(
                    'SELECT * FROM edges WHERE to_id = ? AND edge_type = ?',
                    (node_id, edge_type)
                ).fetchall()
            else:
                rows = conn.execute(
                    'SELECT * FROM edges WHERE to_id = ?', (node_id,)
                ).fetchall()
        return [self._row_to_edge(r) for r in rows]

    # --- DriftEvent CRUD ---

    def create_drift_event(self, event: DriftEvent) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO drift_events
                   (id, symbol_id, from_reason_id, drift_dimensions,
                    severity, description, resolved, detected_at)
                   VALUES (?,?,?,?,?,?,?,?)""",
                (
                    event.id, event.symbol_id, event.from_reason_id,
                    json.dumps(event.drift_dimensions), event.severity,
                    event.description, int(event.resolved), event.detected_at
                )
            )
        return event.id

    def get_unresolved_drift(self) -> list[DriftEvent]:
        with self._conn() as conn:
            rows = conn.execute(
                'SELECT * FROM drift_events WHERE resolved = 0 '
                'ORDER BY severity DESC'
            ).fetchall()
        return [self._row_to_drift(r) for r in rows]

    def resolve_drift(self, event_id: str) -> None:
        with self._conn() as conn:
            conn.execute(
                'UPDATE drift_events SET resolved = 1 WHERE id = ?',
                (event_id,)
            )

    # --- Composite queries ---

    def get_reasons_for_file(self, file_path: str) -> list[ReasonNode]:
        """All ReasonNodes linked to symbols in a file via CREATES/MODIFIES."""
        with self._conn() as conn:
            rows = conn.execute(
                """SELECT DISTINCT r.* FROM reasons r
                   JOIN edges e ON e.from_id = r.id
                   JOIN symbols s ON e.to_id = s.id
                   WHERE s.file_path = ?
                   AND e.edge_type IN ('CREATES', 'MODIFIES')""",
                (file_path,)
            ).fetchall()
        return [self._row_to_reason(r) for r in rows]

    def get_constraints_for_scope(
        self, file_paths: list[str]
    ) -> list[dict[str, Any]]:
        """Get all invariants and contracts for files in scope."""
        results = []
        for fp in file_paths:
            reasons = self.get_reasons_for_file(fp)
            for r in reasons:
                if r.invariants or r.postconditions or r.preconditions:
                    results.append({
                        'reason_id': r.id,
                        'goal': r.goal,
                        'file': fp,
                        'preconditions': r.preconditions,
                        'postconditions': r.postconditions,
                        'invariants': r.invariants
                    })
        return results

    def get_blast_radius(self, reason_id: str) -> dict[str, Any]:
        """Symbols + downstream REQUIRES reasons for a ReasonNode."""
        symbols = []
        for edge in self.get_edges_from(reason_id, 'CREATES'):
            syms = self._get_symbol(edge.to_id)
            if syms:
                symbols.append(syms)
        for edge in self.get_edges_from(reason_id, 'MODIFIES'):
            syms = self._get_symbol(edge.to_id)
            if syms:
                symbols.append(syms)

        dependent_reasons = []
        for edge in self.get_edges_to(reason_id, 'REQUIRES'):
            reason = self.get_reason(edge.from_id)
            if reason:
                dependent_reasons.append(reason)

        return {
            'reason': self.get_reason(reason_id),
            'symbols': symbols,
            'dependent_reasons': dependent_reasons,
            'symbol_count': len(symbols),
            'dependent_count': len(dependent_reasons)
        }

    def get_risk_profile(self, symbol_name: str) -> dict[str, Any]:
        """Drift score, ownership history, and status for a symbol."""
        symbols = self.get_symbol_by_name(symbol_name)
        if not symbols:
            return {'found': False, 'symbol': symbol_name}

        sym = symbols[0]
        creating_edges = self.get_edges_to(sym.id, 'CREATES')
        modifying_edges = self.get_edges_to(sym.id, 'MODIFIES')
        drift_edges = self.get_edges_from(sym.id, 'DRIFTS_FROM')

        owners = set()
        for edge in creating_edges + modifying_edges:
            reason = self.get_reason(edge.from_id)
            if reason:
                owners.add(reason.owner)

        with self._conn() as conn:
            drift_rows = conn.execute(
                'SELECT * FROM drift_events WHERE symbol_id = ? '
                'ORDER BY detected_at DESC',
                (sym.id,)
            ).fetchall()

        return {
            'found': True,
            'symbol': sym,
            'owners': list(owners),
            'modify_count': len(modifying_edges),
            'drift_events': [self._row_to_drift(r) for r in drift_rows],
            'active_drift': any(
                not self._row_to_drift(r).resolved for r in drift_rows
            )
        }

    def get_stats(self) -> dict[str, int]:
        with self._conn() as conn:
            reasons = conn.execute('SELECT COUNT(*) FROM reasons').fetchone()[0]
            symbols = conn.execute('SELECT COUNT(*) FROM symbols').fetchone()[0]
            edges = conn.execute('SELECT COUNT(*) FROM edges').fetchone()[0]
            drift = conn.execute(
                'SELECT COUNT(*) FROM drift_events WHERE resolved = 0'
            ).fetchone()[0]
        return {
            'reasons': reasons,
            'symbols': symbols,
            'edges': edges,
            'unresolved_drift': drift
        }

    # --- Helpers ---

    def _get_symbol(self, symbol_id: str) -> Symbol | None:
        with self._conn() as conn:
            row = conn.execute(
                'SELECT * FROM symbols WHERE id = ?', (symbol_id,)
            ).fetchone()
        return self._row_to_symbol(row) if row else None

    @staticmethod
    def _row_to_reason(row: sqlite3.Row) -> ReasonNode:
        return ReasonNode(
            id=row['id'],
            goal=row['goal'],
            decision_type=row['decision_type'],
            scope=json.loads(row['scope']),
            owner=row['owner'],
            agent=row['agent'],
            status=row['status'],
            source=row['source'],
            task_id=row['task_id'],
            parent_id=row['parent_id'],
            preconditions=json.loads(row['preconditions']),
            postconditions=json.loads(row['postconditions']),
            invariants=json.loads(row['invariants']),
            created_at=row['created_at'],
            fulfilled_at=row['fulfilled_at']
        )

    @staticmethod
    def _row_to_symbol(row: sqlite3.Row) -> Symbol:
        return Symbol(
            id=row['id'],
            name=row['name'],
            file_path=row['file_path'],
            symbol_type=row['symbol_type'],
            language=row['language'],
            signature=row['signature'],
            checksum=row['checksum'],
            created_at=row['created_at']
        )

    @staticmethod
    def _row_to_edge(row: sqlite3.Row) -> Edge:
        return Edge(
            id=row['id'],
            from_id=row['from_id'],
            to_id=row['to_id'],
            edge_type=row['edge_type'],
            confidence=row['confidence'],
            created_at=row['created_at']
        )

    @staticmethod
    def _row_to_drift(row: sqlite3.Row) -> DriftEvent:
        return DriftEvent(
            id=row['id'],
            symbol_id=row['symbol_id'],
            from_reason_id=row['from_reason_id'],
            drift_dimensions=json.loads(row['drift_dimensions']),
            severity=row['severity'],
            description=row['description'],
            resolved=bool(row['resolved']),
            detected_at=row['detected_at']
        )


================================================
FILE: scripts/icpg/symbols.py
================================================
"""Language-aware symbol extraction from source files."""

from __future__ import annotations

import ast
import hashlib
import re
from pathlib import Path

from .models import Symbol

# --- Language detection ---

LANG_MAP = {
    '.py': 'python',
    '.ts': 'typescript', '.tsx': 'typescript',
    '.js': 'javascript', '.jsx': 'javascript',
    '.go': 'go',
    '.rs': 'rust',
    '.java': 'java',
    '.rb': 'ruby',
    '.php': 'php',
    '.swift': 'swift',
    '.kt': 'kotlin',
    '.c': 'c', '.h': 'c',
    '.cpp': 'cpp', '.hpp': 'cpp',
    '.cs': 'csharp',
    '.scala': 'scala',
    '.lua': 'lua',
    '.vue': 'vue',
    '.svelte': 'svelte',
    '.ex': 'elixir', '.exs': 'elixir'
}


def detect_language(file_path: str) -> str | None:
    ext = Path(file_path).suffix.lower()
    return LANG_MAP.get(ext)


def checksum_content(content: str) -> str:
    """SHA256 hash of content for drift detection."""
    return hashlib.sha256(content.encode()).hexdigest()[:16]


# --- Python extraction (AST-based) ---

def _extract_python(file_path: str, source: str) -> list[Symbol]:
    symbols = []
    try:
        tree = ast.parse(source)
    except SyntaxError:
        return symbols

    for node in ast.walk(tree):
        if isinstance(node, ast.ClassDef):
            body = ast.get_source_segment(source, node) or ''
            symbols.append(Symbol(
                name=node.name,
                file_path=file_path,
                symbol_type='class',
                language='python',
                signature=_python_class_sig(node),
                checksum=checksum_content(body)
            ))

        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            body = ast.get_source_segment(source, node) or ''
            sig = _python_func_sig(node)
            stype = 'function'
            if any(
                isinstance(d, ast.Name) and d.id == 'staticmethod'
                for d in node.decorator_list
            ):
                stype = 'function'
            symbols.append(Symbol(
                name=node.name,
                file_path=file_path,
                symbol_type=stype,
                language='python',
                signature=sig,
                checksum=checksum_content(body)
            ))

    return symbols


def _python_func_sig(node: ast.FunctionDef) -> str:
    args = []
    for a in node.args.args:
        ann = ''
        if a.annotation:
            ann = f': {ast.dump(a.annotation)}'
        args.append(f'{a.arg}{ann}')
    ret = ''
    if node.returns:
        ret = f' -> {ast.dump(node.returns)}'
    prefix = 'async def' if isinstance(node, ast.AsyncFunctionDef) else 'def'
    return f'{prefix} {node.name}({", ".join(args)}){ret}'


def _python_class_sig(node: ast.ClassDef) -> str:
    bases = [ast.dump(b) for b in node.bases]
    if bases:
        return f'class {node.name}({", ".join(bases)})'
    return f'class {node.name}'


# --- TypeScript/JavaScript extraction (regex) ---

_TS_PATTERNS = [
    # export function name(...)
    (r'export\s+(?:async\s+)?function\s+(\w+)\s*\([^)]*\)',
     'function'),
    # export class Name
    (r'export\s+(?:abstract\s+)?class\s+(\w+)',
     'class'),
    # export const Name = ...
    (r'export\s+const\s+(\w+)\s*[=:]',
     'constant'),
    # export interface Name
    (r'export\s+interface\s+(\w+)',
     'interface'),
    # export type Name
    (r'export\s+type\s+(\w+)',
     'type'),
    # React components: export const Name = (...) =>
    (r'export\s+const\s+((?:[A-Z]\w+))\s*=\s*(?:\([^)]*\)|[^=])\s*=>',
     'component'),
    # Hooks: export function use*
    (r'export\s+(?:async\s+)?function\s+(use\w+)',
     'hook'),
]


def _extract_typescript(file_path: str, source: str) -> list[Symbol]:
    lang = 'typescript' if file_path.endswith(('.ts', '.tsx')) else 'javascript'
    symbols = []
    seen = set()

    for pattern, stype in _TS_PATTERNS:
        for match in re.finditer(pattern, source):
            name = match.group(1)
            if name in seen:
                continue
            seen.add(name)
            # Get the line for signature
            line_start = source.rfind('\n', 0, match.start()) + 1
            line_end = source.find('\n', match.end())
            if line_end == -1:
                line_end = len(source)
            sig = source[line_start:line_end].strip()
            symbols.append(Symbol(
                name=name,
                file_path=file_path,
                symbol_type=stype,
                language=lang,
                signature=sig[:200],
                checksum=checksum_content(sig)
            ))

    return symbols


# --- Go extraction (regex) ---

_GO_PATTERNS = [
    (r'func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)\s*\(', 'function'),
    (r'type\s+(\w+)\s+struct\s*\{', 'class'),
    (r'type\s+(\w+)\s+interface\s*\{', 'interface'),
]


def _extract_go(file_path: str, source: str) -> list[Symbol]:
    symbols = []
    seen = set()
    for pattern, stype in _GO_PATTERNS:
        for match in re.finditer(pattern, source):
            name = match.group(1)
            if name in seen:
                continue
            seen.add(name)
            line_start = source.rfind('\n', 0, match.start()) + 1
            line_end = source.find('\n', match.end())
            if line_end == -1:
                line_end = len(source)
            sig = source[line_start:line_end].strip()
            symbols.append(Symbol(
                name=name,
                file_path=file_path,
                symbol_type=stype,
                language='go',
                signature=sig[:200],
                checksum=checksum_content(sig)
            ))
    return symbols


# --- Rust extraction (regex) ---

_RUST_PATTERNS = [
    (r'(?:pub\s+)?(?:async\s+)?fn\s+(\w+)', 'function'),
    (r'(?:pub\s+)?struct\s+(\w+)', 'class'),
    (r'(?:pub\s+)?enum\s+(\w+)', 'type'),
    (r'(?:pub\s+)?trait\s+(\w+)', 'interface'),
    (r'impl\s+(\w+)', 'class'),
]


def _extract_rust(file_path: str, source: str) -> list[Symbol]:
    symbols = []
    seen = set()
    for pattern, stype in _RUST_PATTERNS:
        for match in re.finditer(pattern, source):
            name = match.group(1)
            if name in seen:
                continue
            seen.add(name)
            line_start = source.rfind('\n', 0, match.start()) + 1
            line_end = source.find('\n', match.end())
            if line_end == -1:
                line_end = len(source)
            sig = source[line_start:line_end].strip()
            symbols.append(Symbol(
                name=name,
                file_path=file_path,
                symbol_type=stype,
                language='rust',
                signature=sig[:200],
                checksum=checksum_content(sig)
            ))
    return symbols


# --- Elixir extraction (regex) ---

_ELIXIR_PATTERNS = [
    (r'defmodule\s+([\w.]+)', 'module'),
    (r'def\s+(\w+)\s*\(', 'function'),
    (r'defp\s+(\w+)\s*\(', 'function'),
    (r'schema\s+"(\w+)"', 'schema'),
]


def _extract_elixir(file_path: str, source: str) -> list[Symbol]:
    symbols = []
    seen = set()
    for pattern, stype in _ELIXIR_PATTERNS:
        for match in re.finditer(pattern, source):
            name = match.group(1)
            if name in seen:
                continue
            seen.add(name)
            line_start = source.rfind('\n', 0, match.start()) + 1
            line_end = source.find('\n', match.end())
            if line_end == -1:
                line_end = len(source)
            sig = source[line_start:line_end].strip()
            symbols.append(Symbol(
                name=name,
                file_path=file_path,
                symbol_type=stype,
                language='elixir',
                signature=sig[:200],
                checksum=checksum_content(sig)
            ))
    return symbols


# --- Public API ---

EXTRACTORS = {
    'python': _extract_python,
    'typescript': _extract_typescript,
    'javascript': _extract_typescript,
    'go': _extract_go,
    'rust': _extract_rust,
    'elixir': _extract_elixir,
}


def extract_symbols(file_path: str) -> list[Symbol]:
    """Extract symbols from a source file."""
    lang = detect_language(file_path)
    if not lang:
        return []

    path = Path(file_path)
    if not path.exists():
        return []

    try:
        source = path.read_text(encoding='utf-8')
    except (OSError, UnicodeDecodeError):
        return []

    extractor = EXTRACTORS.get(lang)
    if not extractor:
        return []

    return extractor(str(file_path), source)


def extract_symbols_from_files(file_paths: list[str]) -> list[Symbol]:
    """Extract symbols from multiple files."""
    all_symbols = []
    for fp in file_paths:
        all_symbols.extend(extract_symbols(fp))
    return all_symbols


================================================
FILE: scripts/icpg/vectors.py
================================================
"""Vector-based duplicate detection for search_prior_work query.

Tiered fallback:
  1. chromadb + sentence-transformers (best quality)
  2. TF-IDF cosine similarity via scikit-learn (no GPU needed)
  3. Exact substring matching (zero deps)
"""

from __future__ import annotations

import json
import os
from pathlib import Path

from .store import ICPGStore

VECTORS_DIR = '.icpg'
TFIDF_CACHE = '.icpg/tfidf_cache.json'


class VectorStore:
    """Tiered vector search for ReasonNode deduplication."""

    def __init__(self, project_dir: str = '.'):
        self.project_dir = Path(project_dir).resolve()
        self.icpg_dir = self.project_dir / VECTORS_DIR
        self._backend = _detect_backend()

    def add_reason(self, reason_id: str, goal: str, scope: list[str]) -> None:
        """Index a ReasonNode for similarity search."""
        text = f'{goal} | scope: {", ".join(scope)}'

        if self._backend == 'chromadb':
            _chromadb_add(self.icpg_dir, reason_id, text)
        elif self._backend == 'tfidf':
            _tfidf_add(self.icpg_dir, reason_id, text)
        else:
            _exact_add(self.icpg_dir, reason_id, text)

    def search_similar(
        self, goal_text: str, threshold: float = 0.75, top_k: int = 5
    ) -> list[tuple[str, float]]:
        """Find similar ReasonNodes. Returns [(id, score), ...]."""
        if self._backend == 'chromadb':
            return _chromadb_search(
                self.icpg_dir, goal_text, threshold, top_k
            )
        elif self._backend == 'tfidf':
            return _tfidf_search(
                self.icpg_dir, goal_text, threshold, top_k
            )
        else:
            return _exact_search(self.icpg_dir, goal_text, threshold)

    def remove_reason(self, reason_id: str) -> None:
        """Remove a ReasonNode from the vector index."""
        if self._backend == 'chromadb':
            _chromadb_remove(self.icpg_dir, reason_id)
        elif self._backend == 'tfidf':
            _tfidf_remove(self.icpg_dir, reason_id)
        else:
            _exact_remove(self.icpg_dir, reason_id)


def _detect_backend() -> str:
    """Detect best available vector search backend."""
    try:
        import chromadb
        import sentence_transformers
        return 'chromadb'
    except ImportError:
        pass

    try:
        from sklearn.feature_extraction.text import TfidfVectorizer
        from sklearn.metrics.pairwise import cosine_similarity
        return 'tfidf'
    except ImportError:
        pass

    return 'exact'


# --- ChromaDB backend ---

def _get_chroma_collection(icpg_dir: Path):
    import chromadb
    client = chromadb.PersistentClient(path=str(icpg_dir / 'chroma'))
    return client.get_or_create_collection(
        name='reasons',
        metadata={'hnsw:space': 'cosine'}
    )


def _chromadb_add(icpg_dir: Path, reason_id: str, text: str) -> None:
    col = _get_chroma_collection(icpg_dir)
    col.upsert(ids=[reason_id], documents=[text])


def _chromadb_search(
    icpg_dir: Path, query: str, threshold: float, top_k: int
) -> list[tuple[str, float]]:
    col = _get_chroma_collection(icpg_dir)
    if col.count() == 0:
        return []
    results = col.query(
        query_texts=[query],
        n_results=min(top_k, col.count())
    )
    pairs = []
    if results['ids'] and results['distances']:
        for rid, dist in zip(results['ids'][0], results['distances'][0]):
            # chromadb cosine distance: 0 = identical, 2 = opposite
            score = 1.0 - (dist / 2.0)
            if score >= threshold:
                pairs.append((rid, round(score, 3)))
    return pairs


def _chromadb_remove(icpg_dir: Path, reason_id: str) -> None:
    col = _get_chroma_collection(icpg_dir)
    try:
        col.delete(ids=[reason_id])
    except Exception:
        pass


# --- TF-IDF backend ---

def _tfidf_load(icpg_dir: Path) -> dict[str, str]:
    cache_path = icpg_dir / 'tfidf_cache.json'
    if cache_path.exists():
        return json.loads(cache_path.read_text())
    return {}


def _tfidf_save(icpg_dir: Path, data: dict[str, str]) -> None:
    cache_path = icpg_dir / 'tfidf_cache.json'
    cache_path.write_text(json.dumps(data))


def _tfidf_add(icpg_dir: Path, reason_id: str, text: str) -> None:
    data = _tfidf_load(icpg_dir)
    data[reason_id] = text
    _tfidf_save(icpg_dir, data)


def _tfidf_search(
    icpg_dir: Path, query: str, threshold: float, top_k: int
) -> list[tuple[str, float]]:
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity

    data = _tfidf_load(icpg_dir)
    if not data:
        return []

    ids = list(data.keys())
    texts = list(data.values())
    texts.append(query)

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(texts)

    query_vec = tfidf_matrix[-1]
    doc_vecs = tfidf_matrix[:-1]
    scores = cosine_similarity(query_vec, doc_vecs).flatten()

    pairs = [
        (ids[i], round(float(scores[i]), 3))
        for i in range(len(ids))
        if scores[i] >= threshold
    ]
    pairs.sort(key=lambda x: x[1], reverse=True)
    return pairs[:top_k]


def _tfidf_remove(icpg_dir: Path, reason_id: str) -> None:
    data = _tfidf_load(icpg_dir)
    data.pop(reason_id, None)
    _tfidf_save(icpg_dir, data)


# --- Exact match backend ---

def _exact_load(icpg_dir: Path) -> dict[str, str]:
    cache_path = icpg_dir / 'exact_cache.json'
    if cache_path.exists():
        return json.loads(cache_path.read_text())
    return {}


def _exact_save(icpg_dir: Path, data: dict[str, str]) -> None:
    cache_path = icpg_dir / 'exact_cache.json'
    cache_path.write_text(json.dumps(data))


def _exact_add(icpg_dir: Path, reason_id: str, text: str) -> None:
    data = _exact_load(icpg_dir)
    data[reason_id] = text.lower()
    _exact_save(icpg_dir, data)


def _exact_search(
    icpg_dir: Path, query: str, threshold: float
) -> list[tuple[str, float]]:
    data = _exact_load(icpg_dir)
    query_words = set(query.lower().split())
    if not query_words:
        return []

    pairs = []
    for rid, text in data.items():
        text_words = set(text.split())
        if not text_words:
            continue
        overlap = len(query_words & text_words)
        score = overlap / max(len(query_words), len(text_words))
        if score >= threshold:
            pairs.append((rid, round(score, 3)))

    pairs.sort(key=lambda x: x[1], reverse=True)
    return pairs


def _exact_remove(icpg_dir: Path, reason_id: str) -> None:
    data = _exact_load(icpg_dir)
    data.pop(reason_id, None)
    _exact_save(icpg_dir, data)


================================================
FILE: scripts/install-graph-tools.sh
================================================
#!/bin/bash

# install-graph-tools.sh - Install code graph MCP servers
#
# Tier 1: codebase-memory-mcp (default, always installed)
#   - Single static binary, zero dependencies
#   - 64 languages, sub-ms queries, 14 MCP tools
#
# Tier 2: Joern CPG via CodeBadger (opt-in, --joern)
#   - Full CPG: AST + CFG + CDG + DDG + PDG
#   - Requires Docker + Python 3.10+
#
# Tier 3: CodeQL (opt-in, --codeql)
#   - Interprocedural taint analysis, security queries
#   - Requires CodeQL CLI

set -e

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Defaults
INSTALL_JOERN=false
INSTALL_CODEQL=false
INSTALL_DIR="$HOME/.local/bin"

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --joern) INSTALL_JOERN=true; shift ;;
        --codeql) INSTALL_CODEQL=true; shift ;;
        --all) INSTALL_JOERN=true; INSTALL_CODEQL=true; shift ;;
        --help|-h)
            echo "Usage: install-graph-tools.sh [OPTIONS]"
            echo ""
            echo "Install code graph MCP servers for Maggy."
            echo ""
            echo "Options:"
            echo "  (no flags)   Install Tier 1 only (codebase-memory-mcp)"
            echo "  --joern      Also install Tier 2 (Joern CPG via CodeBadger)"
            echo "  --codeql     Also install Tier 3 (CodeQL)"
            echo "  --all        Install all tiers"
            echo "  --help       Show this help"
            echo ""
            echo "Tiers:"
            echo "  1  codebase-memory-mcp  AST graph, 64 langs, sub-ms     (always)"
            echo "  2  Joern/CodeBadger     Full CPG (AST+CFG+PDG), 12 langs (opt-in)"
            echo "  3  CodeQL               Taint analysis, security, 10+ langs (opt-in)"
            exit 0
            ;;
        *) echo -e "${RED}Unknown option: $1${NC}"; echo "Run with --help for usage."; exit 1 ;;
    esac
done

echo ""
echo "════════════════════════════════════════════════════════════════"
echo "  Code Graph Tools Installer"
echo "════════════════════════════════════════════════════════════════"
echo ""

# Detect platform
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
case "$ARCH" in
    aarch64|arm64) ARCH="arm64" ;;
    x86_64|amd64) ARCH="amd64" ;;
esac

echo -e "${BLUE}Platform: ${OS}-${ARCH}${NC}"
echo ""

# ─────────────────────────────────────────────────────────────────
# Tier 1: codebase-memory-mcp
# ─────────────────────────────────────────────────────────────────
echo "── Tier 1: codebase-memory-mcp ──────────────────────────────"
echo ""

mkdir -p "$INSTALL_DIR"

if command -v codebase-memory-mcp &> /dev/null; then
    echo -e "${GREEN}✓ codebase-memory-mcp already installed${NC}"
    codebase-memory-mcp --version 2>/dev/null || true
else
    DOWNLOAD_URL="https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/codebase-memory-mcp-${OS}-${ARCH}.tar.gz"
    TEMP_DIR=$(mktemp -d)

    echo "Downloading from GitHub releases..."
    echo "  URL: $DOWNLOAD_URL"

    if curl -fsSL "$DOWNLOAD_URL" -o "$TEMP_DIR/codebase-memory-mcp.tar.gz"; then
        tar xzf "$TEMP_DIR/codebase-memory-mcp.tar.gz" -C "$TEMP_DIR"
        mv "$TEMP_DIR/codebase-memory-mcp" "$INSTALL_DIR/codebase-memory-mcp"
        chmod +x "$INSTALL_DIR/codebase-memory-mcp"
        echo -e "${GREEN}✓ Installed codebase-memory-mcp to $INSTALL_DIR${NC}"

        # Auto-configure for Claude Code and other agents
        echo ""
        echo "Running auto-configuration..."
        "$INSTALL_DIR/codebase-memory-mcp" install 2>/dev/null || true
    else
        echo -e "${RED}✗ Failed to download codebase-memory-mcp${NC}"
        echo ""
        echo "  Manual install:"
        echo "  1. Go to https://github.com/DeusData/codebase-memory-mcp/releases"
        echo "  2. Download codebase-memory-mcp-${OS}-${ARCH}.tar.gz"
        echo "  3. Extract and move to $INSTALL_DIR/"
        echo "  4. Run: codebase-memory-mcp install"
    fi

    rm -rf "$TEMP_DIR"
fi

# Check PATH
if ! echo "$PATH" | tr ':' '\n' | grep -q "$INSTALL_DIR"; then
    echo ""
    echo -e "${YELLOW}⚠ $INSTALL_DIR is not in your PATH${NC}"
    echo "  Add to your shell profile:"
    echo "  export PATH=\"$INSTALL_DIR:\$PATH\""
fi

# ─────────────────────────────────────────────────────────────────
# Tier 2: Joern CPG via CodeBadger (opt-in)
# ─────────────────────────────────────────────────────────────────
if [ "$INSTALL_JOERN" = true ]; then
    echo ""
    echo "── Tier 2: Joern CPG (CodeBadger) ───────────────────────────"
    echo ""

    # Check Docker
    if ! command -v docker &> /dev/null; then
        echo -e "${RED}✗ Docker not found${NC}"
        echo "  Joern requires Docker. Install from: https://docker.com"
        echo "  Skipping Tier 2 installation."
    elif ! docker info &> /dev/null 2>&1; then
        echo -e "${RED}✗ Docker is not running${NC}"
        echo "  Start Docker Desktop and try again."
        echo "  Skipping Tier 2 installation."
    else
        echo -e "${GREEN}✓ Docker is running${NC}"

        # Check Python
        PYTHON_CMD=""
        if command -v python3 &> /dev/null; then
            PYTHON_CMD="python3"
        elif command -v python &> /dev/null; then
            PYTHON_CMD="python"
        fi

        if [ -z "$PYTHON_CMD" ]; then
            echo -e "${RED}✗ Python 3.10+ not found${NC}"
            echo "  Install Python: https://python.org"
            echo "  Skipping Tier 2 installation."
        else
            PY_VERSION=$($PYTHON_CMD -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
            echo -e "${GREEN}✓ Python $PY_VERSION found${NC}"

            CODEBADGER_DIR="$HOME/.claude/tools/codebadger"

            if [ -d "$CODEBADGER_DIR" ]; then
                echo -e "${GREEN}✓ CodeBadger already cloned${NC}"
                echo "  Pulling latest..."
                git -C "$CODEBADGER_DIR" pull 2>/dev/null || true
            else
                echo "Cloning CodeBadger..."
                mkdir -p "$HOME/.claude/tools"
                git clone https://github.com/lekssays/joern-mcp.git "$CODEBADGER_DIR" 2>/dev/null || {
                    echo -e "${RED}✗ Failed to clone CodeBadger${NC}"
                    echo "  Manual install: https://github.com/lekssays/joern-mcp"
                }
            fi

            if [ -d "$CODEBADGER_DIR" ]; then
                echo "Installing Python dependencies..."
                $PYTHON_CMD -m pip install -r "$CODEBADGER_DIR/requirements.txt" --quiet 2>/dev/null || true

                echo "Starting Joern Docker services..."
                (cd "$CODEBADGER_DIR" && docker compose up -d 2>/dev/null) || {
                    echo -e "${YELLOW}⚠ Docker compose failed. You may need to start manually:${NC}"
                    echo "  cd $CODEBADGER_DIR && docker compose up -d"
                }

                echo -e "${GREEN}✓ Joern/CodeBadger installed${NC}"
                echo ""
                echo "  To start the MCP server:"
                echo "  cd $CODEBADGER_DIR && $PYTHON_CMD main.py"
                echo ""
                echo "  MCP endpoint: http://localhost:4242/mcp"
            fi
        fi
    fi
fi

# ─────────────────────────────────────────────────────────────────
# Tier 3: CodeQL (opt-in)
# ─────────────────────────────────────────────────────────────────
if [ "$INSTALL_CODEQL" = true ]; then
    echo ""
    echo "── Tier 3: CodeQL ───────────────────────────────────────────"
    echo ""

    if command -v codeql &> /dev/null; then
        echo -e "${GREEN}✓ CodeQL already installed${NC}"
        codeql version 2>/dev/null || true
    else
        if command -v brew &> /dev/null; then
            echo "Installing CodeQL via Homebrew..."
            brew install codeql 2>/dev/null || {
                echo -e "${YELLOW}⚠ brew install codeql failed${NC}"
                echo "  Trying GitHub release download..."
            }
        fi

        # Fallback: direct download
        if ! command -v codeql &> /dev/null; then
            echo "Downloading CodeQL CLI..."
            echo ""
            echo "  Manual install from:"
            echo "  https://github.com/github/codeql-cli-binaries/releases"
            echo ""
            echo "  After download:"
            echo "  1. Extract to $INSTALL_DIR/codeql/"
            echo "  2. Add to PATH: export PATH=\"$INSTALL_DIR/codeql:\$PATH\""
        fi
    fi

    if command -v codeql &> /dev/null; then
        echo ""
        echo "Installing CodeQL query packs..."
        codeql pack download codeql/javascript-queries 2>/dev/null || true
        codeql pack download codeql/python-queries 2>/dev/null || true
        codeql pack download codeql/java-queries 2>/dev/null || true
        codeql pack download codeql/go-queries 2>/dev/null || true
        echo -e "${GREEN}✓ CodeQL query packs installed${NC}"
    fi
fi

# ─────────────────────────────────────────────────────────────────
# Summary
# ─────────────────────────────────────────────────────────────────
echo ""
echo "════════════════════════════════════════════════════════════════"
echo "  Installation Summary"
echo "════════════════════════════════════════════════════════════════"
echo ""

if command -v codebase-memory-mcp &> /dev/null; then
    echo -e "  ${GREEN}✓ Tier 1: codebase-memory-mcp (AST graph, 64 langs)${NC}"
else
    echo -e "  ${RED}✗ Tier 1: codebase-memory-mcp NOT installed${NC}"
fi

if [ "$INSTALL_JOERN" = true ]; then
    if [ -d "$HOME/.claude/tools/codebadger" ]; then
        echo -e "  ${GREEN}✓ Tier 2: Joern CPG via CodeBadger${NC}"
    else
        echo -e "  ${RED}✗ Tier 2: Joern NOT installed${NC}"
    fi
fi

if [ "$INSTALL_CODEQL" = true ]; then
    if command -v codeql &> /dev/null; then
        echo -e "  ${GREEN}✓ Tier 3: CodeQL${NC}"
    else
        echo -e "  ${RED}✗ Tier 3: CodeQL NOT installed${NC}"
    fi
fi

echo ""
echo "Next steps:"
echo "  1. Run /initialize-project in your project"
echo "  2. The MCP servers will be auto-configured in .mcp.json"
echo "  3. Claude will use the graph for optimized code navigation"
echo ""


================================================
FILE: scripts/install-hooks.sh
================================================
#!/bin/bash

# Install Claude Code Review Git Hooks
# Run this in any git repository to enable pre-push code review

set -e

CLAUDE_DIR="$HOME/.claude"
HOOKS_DIR="$CLAUDE_DIR/hooks"

# Colors
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m'

echo ""
echo "🔧 Claude Code Review - Git Hook Installer"
echo ""

# Check if we're in a git repository
if [ ! -d ".git" ]; then
    echo -e "${RED}❌ Error: Not a git repository${NC}"
    echo "   Run this command from a git project root."
    exit 1
fi

# Check if hooks exist
if [ ! -d "$HOOKS_DIR" ]; then
    echo -e "${RED}❌ Error: Hook templates not found${NC}"
    if [ -f "$CLAUDE_DIR/.bootstrap-dir" ]; then
        echo "   Run $(cat "$CLAUDE_DIR/.bootstrap-dir")/install.sh first."
    else
        echo "   Run install.sh from your Maggy clone first."
    fi
    exit 1
fi

# Check for existing pre-push hook
if [ -f ".git/hooks/pre-push" ]; then
    echo -e "${YELLOW}⚠️  Existing pre-push hook found${NC}"
    read -p "   Overwrite? (y/N) " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        echo "   Skipped. Existing hook preserved."
        exit 0
    fi
fi

# Install pre-push hook
cp "$HOOKS_DIR/pre-push" ".git/hooks/pre-push"
chmod +x ".git/hooks/pre-push"

echo -e "${GREEN}✅ Pre-push hook installed${NC}"
echo ""
echo "What happens now:"
echo "  • Every 'git push' runs Claude code review"
echo "  • 🔴 Critical or 🟠 High issues block the push"
echo "  • 🟡 Medium and 🟢 Low issues are advisory only"
echo ""
echo "To disable:"
echo "  rm .git/hooks/pre-push"
echo ""


================================================
FILE: scripts/install-skills.sh
================================================
#!/bin/bash
# install-skills.sh - Install skills to any agent tool directory
# Usage: install-skills.sh <target_dir> [source_dir]
# Example: install-skills.sh ~/.kimi/skills
#          install-skills.sh ~/.codex/skills /path/to/skills

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DEFAULT_SOURCE="$SCRIPT_DIR/../skills"

usage() {
    echo "Usage: install-skills.sh <target_dir> [source_dir]"
    echo "  target_dir: Where to install skills"
    echo "  source_dir: Source skills (default: repo skills/)"
    exit 1
}

copy_skills() {
    local source="$1"
    local target="$2"
    local count=0

    mkdir -p "$target"
    for skill_dir in "$source"/*/; do
        [ -d "$skill_dir" ] || continue
        [ -f "$skill_dir/SKILL.md" ] || continue
        local name
        name=$(basename "$skill_dir")
        cp -r "${skill_dir%/}" "$target/"
        count=$((count + 1))
    done
    echo "$count"
}

main() {
    local target="${1:-}"
    local source="${2:-$DEFAULT_SOURCE}"

    [ -z "$target" ] && usage
    [ -d "$source" ] || {
        echo "Error: source dir '$source' not found" >&2
        exit 1
    }

    local installed
    installed=$(copy_skills "$source" "$target")
    echo "Installed $installed skills to $target"
}

main "$@"


================================================
FILE: scripts/mnemos/__init__.py
================================================
"""Mnemos -- Task-Scoped Memory Lifecycle for Autonomous Agents.

Prevents lossy context compaction by treating memory as a typed graph
(MnemoGraph) with differentiated eviction policies, continuous fatigue
monitoring, and checkpoint/resume.
"""

__version__ = '0.1.0'


================================================
FILE: scripts/mnemos/__main__.py
================================================
"""CLI entry point for Mnemos -- Task-Scoped Memory Lifecycle."""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

from . import __version__
from .checkpoint import load_checkpoint, write_checkpoint
from .consolidation import micro_consolidate
from .fatigue import compute_fatigue, read_fatigue_file
from .models import FatigueState, MnemoNode, _now, _uuid
from .signals import get_session_stats
from .store import MnemosStore


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog='mnemos',
        description='Mnemos -- Task-Scoped Memory Lifecycle'
    )
    parser.add_argument(
        '--version', action='version', version=f'mnemos {__version__}'
    )
    parser.add_argument(
        '--project', default='.', help='Project directory (default: .)'
    )
    sub = parser.add_subparsers(dest='command')

    # --- init ---
    sub.add_parser('init', help='Initialize .mnemos/ directory and database')

    # --- status ---
    sub.add_parser('status', help='Show Mnemos statistics and fatigue')

    # --- fatigue ---
    sub.add_parser('fatigue', help='Show detailed fatigue breakdown')

    # --- checkpoint ---
    p_cp = sub.add_parser('checkpoint', help='Write a checkpoint')
    p_cp.add_argument(
        '--force', action='store_true', help='Write even if fatigue is low'
    )
    p_cp.add_argument('--task-id', help='Task ID for checkpoint')

    # --- resume ---
    p_resume = sub.add_parser(
        'resume', help='Output latest checkpoint for context injection'
    )
    p_resume.add_argument('--path', help='Specific checkpoint file path')

    # --- consolidate ---
    p_cons = sub.add_parser(
        'consolidate', help='Run micro-consolidation pass'
    )
    p_cons.add_argument('--scope', default='', help='Current scope tag')

    # --- nodes ---
    p_nodes = sub.add_parser('nodes', help='List active MnemoNodes')
    p_nodes.add_argument('--type', dest='node_type', help='Filter by type')
    p_nodes.add_argument(
        '--all', action='store_true', help='Include non-active nodes'
    )

    # --- add ---
    p_add = sub.add_parser('add', help='Add a MnemoNode')
    p_add.add_argument('type', choices=[
        'goal', 'constraint', 'context', 'working', 'result'
    ])
    p_add.add_argument('content', help='Node content')
    p_add.add_argument('--task-id', default='manual', help='Task ID')
    p_add.add_argument('--scope', nargs='+', default=[], help='Scope tags')

    # --- bridge-icpg ---
    sub.add_parser(
        'bridge-icpg', help='Import iCPG ReasonNodes as MnemoNodes'
    )

    args = parser.parse_args(argv)
    store = MnemosStore(args.project)

    if args.command == 'init':
        return cmd_init(store)
    elif args.command == 'status':
        return cmd_status(store, args)
    elif args.command == 'fatigue':
        return cmd_fatigue(store, args)
    elif args.command == 'checkpoint':
        return cmd_checkpoint(store, args)
    elif args.command == 'resume':
        return cmd_resume(args)
    elif args.command == 'consolidate':
        return cmd_consolidate(store, args)
    elif args.command == 'nodes':
        return cmd_nodes(store, args)
    elif args.command == 'add':
        return cmd_add(store, args)
    elif args.command == 'bridge-icpg':
        return cmd_bridge_icpg(store, args)
    else:
        parser.print_help()
        return 1


def cmd_init(store: MnemosStore) -> int:
    store.init_db()
    print(f'Initialized Mnemos at {store.mnemos_dir}')
    print(f'  Database: {store.db_path}')
    print(f'  .gitignore: created')
    return 0


def cmd_status(store: MnemosStore, args) -> int:
    if not store.exists():
        print('No Mnemos database. Run `mnemos init` first.')
        return 0

    stats = store.get_stats()
    fatigue_data = read_fatigue_file(args.project)

    print('MNEMOS STATUS')
    print(f'  Active nodes:     {stats["active"]}')
    print(f'  Compressed:       {stats["compressed"]}')
    print(f'  Evicted:          {stats["evicted"]}')
    print(f'  Total nodes:      {stats["total_nodes"]}')
    print(f'  Checkpoints:      {stats["checkpoints"]}')

    if stats['by_type']:
        parts = [f'{t}:{c}' for t, c in stats['by_type'].items()]
        print(f'  By type:          {", ".join(parts)}')

    # Show live fatigue if available
    if fatigue_data:
        used = fatigue_data.get('used_percentage', 0)
        remaining = fatigue_data.get('remaining_percentage', 100)
        print(f'\n  Context usage:    {used:.1f}% used, {remaining:.1f}% remaining')

        # Compute full fatigue from observable signals
        fatigue = compute_fatigue(fatigue_data, args.project)
        state_icons = {
            'flow': '+', 'compress': '~',
            'pre_sleep': '!', 'rem': '!!', 'emergency': 'XXX'
        }
        icon = state_icons.get(fatigue.state, '?')
        print(f'  Fatigue:          [{icon}] {fatigue.composite_score:.2f} ({fatigue.state})')

    # Latest checkpoint
    cp = store.get_latest_checkpoint()
    if cp:
        print(f'\n  Last checkpoint:  {cp.id[:8]} ({cp.created_at})')
        print(f'    Goal: {cp.goal[:60]}')
        print(f'    Fatigue then: {cp.fatigue_at_checkpoint:.2f}')

    return 0


def cmd_fatigue(store: MnemosStore, args) -> int:
    fatigue_data = read_fatigue_file(args.project)
    if not fatigue_data:
        print('No fatigue data. Statusline not configured or no API calls yet.')
        print('Configure mnemos-statusline.sh to start tracking.')
        return 0

    fatigue = compute_fatigue(fatigue_data, args.project)
    state_bar = _fatigue_bar(fatigue.composite_score)

    print('MNEMOS FATIGUE ANALYSIS')
    print(f'  {state_bar}')
    print(f'  Composite: {fatigue.composite_score:.4f} -> {fatigue.state.upper()}')
    print()
    print('  Dimensions (all passively observed from hooks):')
    print(f'    Token utilization: {fatigue.token_utilization:.4f}  (weight: 0.40)  [statusline]')
    print(f'    Scope scatter:     {fatigue.scope_scatter:.4f}  (weight: 0.25)  [PreToolUse file paths]')
    print(f'    Re-read ratio:     {fatigue.reread_ratio:.4f}  (weight: 0.20)  [PreToolUse Read calls]')
    print(f'    Error density:     {fatigue.error_density:.4f}  (weight: 0.15)  [PostToolUse outcomes]')
    print()

    # Signal stats
    sig_stats = get_session_stats(args.project)
    if sig_stats.get('total_signals', 0) > 0:
        print(f'  Signal log: {sig_stats["total_signals"]} events')
        if sig_stats.get('tool_calls'):
            tools = ', '.join(f'{k}:{v}' for k, v in sig_stats['tool_calls'].items())
            print(f'    Tools: {tools}')
        print(f'    Unique files read: {sig_stats.get("unique_files_read", 0)}')
        print(f'    Re-reads: {sig_stats.get("rereads", 0)}')
        print(f'    Errors: {sig_stats.get("errors", 0)}/{sig_stats.get("total_outcomes", 0)}')
        print()

    # Recommendations
    if fatigue.state == 'flow':
        print('  Status: Operating normally. No action needed.')
    elif fatigue.state == 'compress':
        print('  Status: Consider micro-consolidation.')
        print('  Run: mnemos consolidate')
    elif fatigue.state == 'pre_sleep':
        print('  Status: Write checkpoint and consolidate.')
        print('  Run: mnemos checkpoint && mnemos consolidate')
    elif fatigue.state == 'rem':
        print('  WARNING: High fatigue. Checkpoint immediately.')
        print('  Run: mnemos checkpoint --force')
    elif fatigue.state == 'emergency':
        print('  EMERGENCY: Context nearly full. Checkpoint NOW.')
        print('  Run: mnemos checkpoint --force')

    # Log it
    if store.exists():
        store.log_fatigue(fatigue)

    return 0


def cmd_checkpoint(store: MnemosStore, args) -> int:
    if not store.exists():
        store.init_db()

    # Check fatigue to decide if needed
    fatigue_data = read_fatigue_file(args.project)
    fatigue = compute_fatigue(fatigue_data, args.project) if fatigue_data else None

    if fatigue and not args.force:
        if fatigue.composite_score < 0.40:
            print(f'Fatigue low ({fatigue.composite_score:.2f}). '
                  f'Use --force to checkpoint anyway.')
            return 0

    # Try to load iCPG store if available
    icpg_store = _try_load_icpg(args.project)

    cp = write_checkpoint(
        store,
        fatigue_score=fatigue.composite_score if fatigue else 0.0,
        icpg_store=icpg_store,
        task_id=getattr(args, 'task_id', None)
    )

    print(f'Checkpoint written: {cp.id[:8]}')
    print(f'  Goal: {cp.goal[:60]}')
    print(f'  Constraints: {len(cp.active_constraints)}')
    print(f'  Results: {len(cp.active_results)}')
    print(f'  Fatigue: {cp.fatigue_at_checkpoint:.2f}')
    print(f'  File: .mnemos/checkpoint-latest.json')
    return 0


def cmd_resume(args) -> int:
    output = load_checkpoint(
        project_dir=args.project,
        path=getattr(args, 'path', None)
    )
    if not output:
        print('No checkpoint found to resume from.')
        return 0

    # Output formatted checkpoint (this goes into agent context)
    print(output)
    return 0


def cmd_consolidate(store: MnemosStore, args) -> int:
    if not store.exists():
        print('No Mnemos database. Run `mnemos init` first.')
        return 1

    scope = getattr(args, 'scope', '')
    stats = micro_consolidate(store, current_scope=scope)

    print(f'Micro-consolidation complete:')
    print(f'  Compressed: {stats["compressed"]} ResultNodes')
    print(f'  Evicted: {stats["evicted"]} ContextNodes')
    print(f'  Decayed: {stats["decayed"]} node weights')
    return 0


def cmd_nodes(store: MnemosStore, args) -> int:
    if not store.exists():
        print('No Mnemos database.')
        return 0

    node_type = getattr(args, 'node_type', None)
    show_all = getattr(args, 'all', False)

    if node_type:
        if show_all:
            # Get all statuses
            nodes = []
            for status in ('active', 'compressed', 'evicted'):
                nodes.extend(store.get_by_type(node_type, status=status))
        else:
            nodes = store.get_by_type(node_type)
    else:
        if show_all:
            nodes = []
            with store._conn() as conn:
                rows = conn.execute(
                    'SELECT * FROM mnemo_nodes ORDER BY type, activation_weight DESC'
                ).fetchall()
            nodes = [store._row_to_node(r) for r in rows]
        else:
            nodes = store.get_active_nodes()

    if not nodes:
        print('No matching nodes.')
        return 0

    status_icons = {
        'active': '+', 'compressed': '~', 'evicted': '-',
        'promoted': '^', 'handed_off': '>'
    }

    print(f'MNEMO NODES ({len(nodes)}):')
    for n in nodes:
        icon = status_icons.get(n.status, '?')
        weight = f'{n.activation_weight:.2f}'
        content = n.summary or n.content
        content_preview = content[:60] if content else '(empty)'
        print(f'  [{icon}] {n.type:12s} w={weight} {content_preview}')
        if n.scope_tags:
            print(f'       scope: {", ".join(n.scope_tags[:3])}')

    return 0


def cmd_add(store: MnemosStore, args) -> int:
    if not store.exists():
        store.init_db()

    node = MnemoNode(
        type=args.type,
        task_id=args.task_id,
        content=args.content,
        scope_tags=args.scope,
        origin='agent_generated'
    )
    store.create_node(node)

    print(f'Created {args.type} node: {node.id[:8]}')
    print(f'  Content: {args.content[:60]}')
    if args.scope:
        print(f'  Scope: {", ".join(args.scope)}')
    return 0


def cmd_bridge_icpg(store: MnemosStore, args) -> int:
    if not store.exists():
        store.init_db()

    icpg_store = _try_load_icpg(args.project)
    if not icpg_store:
        print('No iCPG database found. Run `icpg init` first.')
        return 1

    stats = store.load_from_icpg(icpg_store)
    print(f'iCPG Bridge complete:')
    print(f'  GoalNodes imported: {stats["goals_imported"]}')
    print(f'  ConstraintNodes imported: {stats["constraints_imported"]}')
    return 0


def _try_load_icpg(project_dir: str):
    """Try to import and load iCPG store. Returns None if unavailable."""
    try:
        icpg_path = Path(project_dir).resolve() / '.icpg' / 'reason.db'
        if not icpg_path.exists():
            return None

        # Try importing from sibling package
        sys.path.insert(0, str(Path(__file__).parent.parent))
        from icpg.store import ICPGStore
        store = ICPGStore(project_dir)
        if store.exists():
            return store
    except ImportError:
        pass
    return None


def _fatigue_bar(score: float) -> str:
    """Render a visual fatigue bar."""
    filled = int(score * 20)
    empty = 20 - filled
    bar = '#' * filled + '.' * empty

    if score >= 0.90:
        label = 'EMERGENCY'
    elif score >= 0.75:
        label = 'REM'
    elif score >= 0.60:
        label = 'PRE-SLEEP'
    elif score >= 0.40:
        label = 'COMPRESS'
    else:
        label = 'FLOW'

    return f'[{bar}] {score:.2f} {label}'


if __name__ == '__main__':
    sys.exit(main())


================================================
FILE: scripts/mnemos/checkpoint.py
================================================
"""Checkpoint write/load for Mnemos session persistence."""

from __future__ import annotations

import json
import subprocess
import time
from collections import Counter
from pathlib import Path

from .models import CheckpointNode, _now, _uuid
from .signals import read_recent_signals
from .store import MnemosStore


def write_checkpoint(
    store: MnemosStore,
    fatigue_score: float = 0.0,
    icpg_store=None,
    task_id: str | None = None
) -> CheckpointNode:
    """Write a CheckpointNode capturing current MnemoGraph state.

    Always includes: GoalNode content, all ConstraintNodes, current sub-goal.
    Optionally includes: iCPG state, git state, compressed ResultNodes.

    Writes to:
        .mnemos/checkpoint-latest.json  (always overwritten)
        .mnemos/checkpoints/<id>.json   (archived copy)

    Returns the created CheckpointNode.
    """
    # Determine task_id from active GoalNodes
    goal_nodes = store.get_by_type('goal')
    if not task_id and goal_nodes:
        task_id = goal_nodes[0].task_id
    task_id = task_id or 'unknown'

    # Gather goal
    goal_text = '; '.join(n.content for n in goal_nodes) or 'No active goal'

    # Gather constraints (never evicted)
    constraint_nodes = store.get_by_type('constraint')
    constraints = [n.content for n in constraint_nodes]

    # Gather result summaries (compressed or active)
    result_nodes = store.get_by_type('result')
    results = []
    for rn in result_nodes[:20]:  # Cap at 20 most recent
        if rn.summary:
            results.append(rn.summary)
        elif rn.content:
            results.append(rn.content[:200])

    # Current sub-goal from working nodes
    working_nodes = store.get_by_type('working')
    current_subgoal = working_nodes[0].content if working_nodes else ''

    # Working memory
    working_memory = '\n'.join(
        n.content for n in working_nodes[:3]
    )

    # Task narrative and recent files from signals
    narrative, recent_files = build_task_narrative(store.project_dir)

    # Git state
    git_state = _get_git_state(store.project_dir)

    # iCPG state
    icpg_state = None
    if icpg_store and icpg_store.exists():
        icpg_state = _get_icpg_state(icpg_store)

    # Node summary (counts by type and status)
    stats = store.get_stats()
    node_summary = {
        'total': stats['total_nodes'],
        'active': stats['active'],
        'compressed': stats['compressed'],
        'by_type': stats['by_type']
    }

    cp = CheckpointNode(
        id=_uuid(),
        task_id=task_id,
        goal=goal_text,
        active_constraints=constraints,
        active_results=results,
        current_subgoal=current_subgoal,
        working_memory=working_memory,
        task_narrative=narrative,
        recent_files=recent_files,
        fatigue_at_checkpoint=fatigue_score,
        git_state=git_state,
        icpg_state=icpg_state,
        node_summary=node_summary,
        created_at=_now()
    )

    # Persist to DB
    store.save_checkpoint(cp)

    # Write to JSON files
    cp_data = _checkpoint_to_dict(cp)

    # Latest checkpoint (overwrite)
    latest_path = store.mnemos_dir / 'checkpoint-latest.json'
    latest_path.write_text(json.dumps(cp_data, indent=2))

    # Archived copy
    archive_dir = store.mnemos_dir / 'checkpoints'
    archive_dir.mkdir(exist_ok=True)
    archive_path = archive_dir / f'{cp.id}.json'
    archive_path.write_text(json.dumps(cp_data, indent=2))

    return cp


def load_checkpoint(
    project_dir: str = '.', path: str | None = None
) -> str | None:
    """Load latest checkpoint and format as context for session injection.

    Returns formatted markdown string, or None if no checkpoint exists.
    """
    if path:
        cp_path = Path(path)
    else:
        cp_path = Path(project_dir).resolve() / '.mnemos' / 'checkpoint-latest.json'

    if not cp_path.exists():
        return None

    try:
        data = json.loads(cp_path.read_text())
    except (json.JSONDecodeError, OSError):
        return None

    return _format_checkpoint(data)


def _format_checkpoint(data: dict) -> str:
    """Format checkpoint data as structured markdown for context injection."""
    lines = []
    lines.append('## Mnemos Session Resume')
    lines.append(f'Checkpoint: {data.get("id", "unknown")[:8]}')
    lines.append(f'Created: {data.get("created_at", "unknown")}')
    lines.append(f'Fatigue at checkpoint: {data.get("fatigue_at_checkpoint", 0):.2f}')
    lines.append('')

    # Goal
    lines.append('### Goal')
    lines.append(data.get('goal', 'No goal recorded'))
    lines.append('')

    # Constraints
    constraints = data.get('active_constraints', [])
    if constraints:
        lines.append('### Active Constraints (DO NOT VIOLATE)')
        for c in constraints:
            lines.append(f'- {c}')
        lines.append('')

    # What was being worked on (task narrative)
    narrative = data.get('task_narrative', '')
    if narrative:
        lines.append('### What You Were Working On')
        lines.append(narrative)
        lines.append('')

    # Current task
    subgoal = data.get('current_subgoal', '')
    if subgoal:
        lines.append('### Current Sub-Goal')
        lines.append(subgoal)
        lines.append('')

    # Working memory
    working = data.get('working_memory', '')
    if working:
        lines.append('### Working Memory')
        lines.append(working)
        lines.append('')

    # Progress (result summaries)
    results = data.get('active_results', [])
    if results:
        lines.append('### Progress So Far')
        for r in results:
            lines.append(f'- {r}')
        lines.append('')

    # Recent files
    recent = data.get('recent_files', [])
    if recent:
        lines.append('### Key Files (from recent activity)')
        for f in recent[:10]:
            parts = []
            if f.get('edits', 0) > 0:
                parts.append(f'edited {f["edits"]}x')
            if f.get('reads', 0) > 0:
                parts.append(f'read {f["reads"]}x')
            detail = ', '.join(parts) if parts else 'touched'
            lines.append(f'- {f.get("path", "?")} ({detail})')
        lines.append('')

    # Git state
    git = data.get('git_state', {})
    if git.get('branch'):
        lines.append('### Git State')
        lines.append(f'Branch: {git["branch"]}')
        if git.get('uncommitted'):
            lines.append('Uncommitted files:')
            for f in git['uncommitted'][:10]:
                lines.append(f'  - {f}')
        lines.append('')

    # iCPG state
    icpg = data.get('icpg_state')
    if icpg:
        lines.append('### iCPG Context')
        if icpg.get('active_reason'):
            lines.append(f'Active intent: {icpg["active_reason"]}')
        if icpg.get('unresolved_drift'):
            lines.append(f'Unresolved drift: {icpg["unresolved_drift"]}')
        if icpg.get('stats'):
            s = icpg['stats']
            lines.append(
                f'Graph: {s.get("reasons", 0)} intents, '
                f'{s.get("symbols", 0)} symbols'
            )
        lines.append('')

    # Node summary
    summary = data.get('node_summary', {})
    if summary:
        lines.append('### MnemoGraph Summary')
        lines.append(
            f'Nodes: {summary.get("active", 0)} active, '
            f'{summary.get("compressed", 0)} compressed, '
            f'{summary.get("total", 0)} total'
        )
        by_type = summary.get('by_type', {})
        if by_type:
            parts = [f'{t}:{c}' for t, c in by_type.items()]
            lines.append(f'Types: {", ".join(parts)}')

    return '\n'.join(lines)


def _get_git_state(project_dir: Path) -> dict:
    """Get current git branch and uncommitted files."""
    state = {}
    try:
        result = subprocess.run(
            ['git', 'branch', '--show-current'],
            capture_output=True, text=True, timeout=5,
            cwd=str(project_dir)
        )
        if result.returncode == 0:
            state['branch'] = result.stdout.strip()

        result = subprocess.run(
            ['git', 'diff', '--name-only'],
            capture_output=True, text=True, timeout=5,
            cwd=str(project_dir)
        )
        if result.returncode == 0:
            files = [
                f.strip() for f in result.stdout.strip().split('\n')
                if f.strip()
            ]
            state['uncommitted'] = files

        result = subprocess.run(
            ['git', 'diff', '--cached', '--name-only'],
            capture_output=True, text=True, timeout=5,
            cwd=str(project_dir)
        )
        if result.returncode == 0:
            staged = [
                f.strip() for f in result.stdout.strip().split('\n')
                if f.strip()
            ]
            if staged:
                state['staged'] = staged

    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass
    return state


def _get_icpg_state(icpg_store) -> dict:
    """Extract summary iCPG state for checkpoint."""
    state = {}
    try:
        stats = icpg_store.get_stats()
        state['stats'] = stats

        # Find most recent executing reason
        executing = icpg_store.list_reasons(status='executing')
        if executing:
            r = executing[-1]
            state['active_reason'] = f'{r.id[:8]} -- {r.goal}'

        # Unresolved drift count
        drift = icpg_store.get_unresolved_drift()
        state['unresolved_drift'] = len(drift)
    except Exception:
        pass
    return state


def build_task_narrative(project_dir: str | Path) -> tuple[str, list[dict]]:
    """Build a human-readable task narrative from recent signals.

    Reads signals.jsonl and produces:
    1. A narrative string describing recent activity
    2. A list of recent files with read/edit counts

    Returns:
        (narrative_text, recent_files_list)
    """
    signals = read_recent_signals(str(project_dir), limit=50)
    if not signals:
        return ('', [])

    # Count file interactions
    file_edits: Counter = Counter()
    file_reads: Counter = Counter()
    tool_counts: Counter = Counter()
    error_count = 0
    total_outcomes = 0

    for s in signals:
        tool = s.get('tool', '')
        fp = s.get('file_path', '')
        tool_counts[tool] += 1

        if fp:
            if tool in ('Edit', 'Write'):
                file_edits[fp] += 1
            elif tool == 'Read':
                file_reads[fp] += 1

        if 'success' in s:
            total_outcomes += 1
            if not s['success']:
                error_count += 1

    # Build narrative
    parts = []

    # Most-edited files
    top_edits = file_edits.most_common(5)
    if top_edits:
        edit_parts = []
        for fp, count in top_edits:
            name = Path(fp).name
            edit_parts.append(f'{name} ({count}x)')
        parts.append(f'Editing: {", ".join(edit_parts)}')

    # Most-read files
    top_reads = file_reads.most_common(5)
    if top_reads:
        read_parts = []
        for fp, count in top_reads:
            name = Path(fp).name
            read_parts.append(f'{name} ({count}x)')
        parts.append(f'Reading: {", ".join(read_parts)}')

    # Tool activity
    other_tools = {t: c for t, c in tool_counts.items()
                   if t not in ('Edit', 'Write', 'Read')}
    if other_tools:
        tool_parts = [f'{t}:{c}' for t, c in
                      sorted(other_tools.items(), key=lambda x: -x[1])]
        parts.append(f'Other tools: {", ".join(tool_parts[:5])}')

    # Focus area (most common directory)
    all_files = list(file_edits.keys()) + list(file_reads.keys())
    if all_files:
        dir_counts: Counter = Counter()
        for fp in all_files:
            parent = str(Path(fp).parent)
            # Shorten to relative if possible
            try:
                parent = str(Path(parent).relative_to(Path.cwd()))
            except ValueError:
                pass
            dir_counts[parent] += 1
        top_dir = dir_counts.most_common(1)[0]
        parts.append(f'Focus area: {top_dir[0]}/')

    # Errors
    if error_count > 0:
        parts.append(f'Errors: {error_count}/{total_outcomes} tool calls failed')

    narrative = '. '.join(parts) + '.' if parts else ''

    # Build recent files list
    all_touched = set(file_edits.keys()) | set(file_reads.keys())
    recent_files = []
    for fp in all_touched:
        entry = {'path': fp}
        if file_edits[fp]:
            entry['edits'] = file_edits[fp]
        if file_reads[fp]:
            entry['reads'] = file_reads[fp]
        recent_files.append(entry)
    # Sort by total activity
    recent_files.sort(
        key=lambda x: x.get('edits', 0) + x.get('reads', 0),
        reverse=True
    )

    return (narrative, recent_files[:15])


def format_for_post_compact_injection(
    project_dir: str = '.',
    checkpoint_path: str | None = None
) -> str | None:
    """Format checkpoint as a rich injection block for post-compaction context.

    Called by mnemos-post-compact-inject.sh after compaction is detected.
    Returns a structured block that Claude can parse and resume from.
    """
    if checkpoint_path:
        cp_path = Path(checkpoint_path)
    else:
        cp_path = Path(project_dir).resolve() / '.mnemos' / 'checkpoint-latest.json'

    if not cp_path.exists():
        return None

    try:
        data = json.loads(cp_path.read_text())
    except (json.JSONDecodeError, OSError):
        return None

    lines = []
    lines.append('=== MNEMOS: CONTEXT RESTORED AFTER COMPACTION ===')
    lines.append('')
    lines.append('Compaction just occurred. Your previous context was summarized.')
    lines.append('Resume from this checkpoint -- DO NOT re-derive information already captured below.')
    lines.append('')

    # Goal
    lines.append('## Goal')
    lines.append(data.get('goal', 'No goal recorded'))
    lines.append('')

    # Constraints
    constraints = data.get('active_constraints', [])
    if constraints:
        lines.append('## Active Constraints (DO NOT VIOLATE)')
        for c in constraints:
            lines.append(f'- {c}')
        lines.append('')

    # Task narrative
    narrative = data.get('task_narrative', '')
    if narrative:
        lines.append('## What You Were Working On')
        lines.append(narrative)
        lines.append('')

    # Current sub-goal
    subgoal = data.get('current_subgoal', '')
    if subgoal:
        lines.append('## Current Sub-Goal')
        lines.append(subgoal)
        lines.append('')

    # Working memory
    working = data.get('working_memory', '')
    if working:
        lines.append('## Working Memory')
        lines.append(working)
        lines.append('')

    # Progress
    results = data.get('active_results', [])
    if results:
        lines.append('## Progress So Far')
        for r in results:
            lines.append(f'- {r}')
        lines.append('')

    # Recent files
    recent = data.get('recent_files', [])
    if recent:
        lines.append('## Key Files (from recent activity)')
        for f in recent[:10]:
            parts = []
            if f.get('edits', 0) > 0:
                parts.append(f'edited {f["edits"]}x')
            if f.get('reads', 0) > 0:
                parts.append(f'read {f["reads"]}x')
            detail = ', '.join(parts) if parts else 'touched'
            lines.append(f'- {f.get("path", "?")} ({detail})')
        lines.append('')

    # Git state
    git = data.get('git_state', {})
    if git.get('branch'):
        lines.append('## Git State')
        lines.append(f'Branch: {git["branch"]}')
        if git.get('uncommitted'):
            lines.append('Uncommitted:')
            for gf in git['uncommitted'][:10]:
                lines.append(f'  - {gf}')
        else:
            lines.append('Working tree clean.')
        lines.append('')

    # iCPG
    icpg = data.get('icpg_state')
    if icpg:
        lines.append('## iCPG Context')
        if icpg.get('active_reason'):
            lines.append(f'Active intent: {icpg["active_reason"]}')
        if icpg.get('unresolved_drift'):
            lines.append(f'Unresolved drift: {icpg["unresolved_drift"]}')
        lines.append('')

    # Checkpoint metadata
    lines.append(f'Checkpoint: {data.get("id", "?")[:8]} at {data.get("created_at", "?")}')
    lines.append(f'Fatigue at checkpoint: {data.get("fatigue_at_checkpoint", 0):.2f}')
    lines.append('')
    lines.append('=== Resume work from this checkpoint. Ask the user to confirm the task if unclear. ===')

    return '\n'.join(lines)


def write_compaction_marker(project_dir: str = '.') -> None:
    """Write the just-compacted marker file for post-compaction detection."""
    marker = Path(project_dir).resolve() / '.mnemos' / 'just-compacted'
    marker.parent.mkdir(parents=True, exist_ok=True)
    marker.write_text(json.dumps({
        'timestamp': time.time(),
        'reason': 'pre_compact_hook'
    }))


def check_compaction_marker(project_dir: str = '.') -> bool:
    """Check if a fresh compaction marker exists (< 5 minutes old)."""
    marker = Path(project_dir).resolve() / '.mnemos' / 'just-compacted'
    if not marker.exists():
        return False
    try:
        data = json.loads(marker.read_text())
        age = time.time() - data.get('timestamp', 0)
        return age < 300  # 5 minutes
    except (json.JSONDecodeError, OSError):
        return False


def consume_compaction_marker(project_dir: str = '.') -> bool:
    """Atomically consume the compaction marker (rename then delete).

    Returns True if marker was consumed, False if already consumed or missing.
    """
    marker = Path(project_dir).resolve() / '.mnemos' / 'just-compacted'
    consumed = marker.with_suffix('.consumed')
    try:
        marker.rename(consumed)
        consumed.unlink(missing_ok=True)
        return True
    except (OSError, FileNotFoundError):
        return False


def _checkpoint_to_dict(cp: CheckpointNode) -> dict:
    """Serialize CheckpointNode to JSON-safe dict."""
    return {
        'id': cp.id,
        'task_id': cp.task_id,
        'goal': cp.goal,
        'active_constraints': cp.active_constraints,
        'active_results': cp.active_results,
        'current_subgoal': cp.current_subgoal,
        'working_memory': cp.working_memory,
        'task_narrative': cp.task_narrative,
        'recent_files': cp.recent_files,
        'fatigue_at_checkpoint': cp.fatigue_at_checkpoint,
        'git_state': cp.git_state,
        'icpg_state': cp.icpg_state,
        'node_summary': cp.node_summary,
        'created_at': cp.created_at
    }


================================================
FILE: scripts/mnemos/consolidation.py
================================================
"""Micro-consolidation -- rule-based, in-context, Tier 0 only.

Triggered when fatigue >= 0.40 (COMPRESS state). No LLM calls.
Target: <100ms execution time.

Actions:
    1. Compress 3 oldest ResultNodes (status=COMPRESSED, summary kept)
    2. Evict 1 cold ContextNode (weight < 0.2, no scope overlap)
    3. Decay weights on all evictable active nodes
"""

from __future__ import annotations

from .models import MnemoNode
from .store import MnemosStore


def micro_consolidate(
    store: MnemosStore,
    current_scope: str = '',
    max_compress: int = 3,
    max_evict: int = 1
) -> dict:
    """Run micro-consolidation pass. Rule-based, no LLM.

    Args:
        store: MnemosStore instance.
        current_scope: Current scope tag for eviction decisions.
        max_compress: Max ResultNodes to compress per pass.
        max_evict: Max ContextNodes to evict per pass.

    Returns:
        Stats: {compressed, evicted, decayed}.
    """
    stats = {'compressed': 0, 'evicted': 0, 'decayed': 0}

    # 1. Compress oldest active ResultNodes
    result_nodes = store.get_by_type('result', status='active')
    # Sort by created_at ascending (oldest first)
    result_nodes.sort(key=lambda n: n.created_at)

    compressed = 0
    for node in result_nodes:
        if compressed >= max_compress:
            break
        summary = _compress_result_node(node)
        store.compress_node(node.id, summary)
        compressed += 1
    stats['compressed'] = compressed

    # 2. Evict cold ContextNodes
    context_nodes = store.get_by_type('context', status='active')
    evicted = 0
    for node in context_nodes:
        if evicted >= max_evict:
            break
        if _should_evict(node, current_scope):
            store.evict_node(node.id)
            evicted += 1
    stats['evicted'] = evicted

    # 3. Decay weights on all evictable nodes
    decayed = store.decay_weights(factor=0.95)
    stats['decayed'] = decayed

    return stats


def _compress_result_node(node: MnemoNode) -> str:
    """Produce a summary from a ResultNode.

    Rule-based: first 200 chars of content as summary.
    """
    content = node.content.strip()
    if not content:
        return node.summary or '(empty result)'

    if len(content) <= 200:
        return content

    # Truncate at word boundary
    truncated = content[:200]
    last_space = truncated.rfind(' ')
    if last_space > 150:
        truncated = truncated[:last_space]
    return truncated + '...'


def _should_evict(node: MnemoNode, current_scope: str) -> bool:
    """Determine if a ContextNode should be evicted.

    Evict when:
        - activation_weight < 0.2
        - No scope_tag overlap with current scope
        - Access count is low (< 3)
    """
    if node.activation_weight >= 0.2:
        return False

    if node.access_count >= 3:
        return False

    if not current_scope:
        return True

    # Check scope overlap
    if node.scope_tags:
        for tag in node.scope_tags:
            if current_scope.startswith(tag) or tag.startswith(current_scope):
                return False

    return True


================================================
FILE: scripts/mnemos/fatigue.py
================================================
"""4-dimension fatigue computation -- all dimensions passively observable.

Every dimension is derived from actual hook data (tool calls, file paths,
errors). No agent cooperation or manual input required.

Signals:
    1. Token utilization  -- statusline writes context_window.used_percentage
    2. Scope scatter      -- PreToolUse logs file paths -> unique dirs ratio
    3. Re-read ratio      -- PreToolUse logs Read calls -> duplicate file ratio
    4. Error density      -- PostToolUse logs success/failure -> error ratio
"""

from __future__ import annotations

import json
from pathlib import Path

from .models import FATIGUE_WEIGHTS, FatigueState, _now
from .signals import (
    compute_error_density,
    compute_reread_ratio,
    compute_scope_scatter,
    read_recent_signals
)


def compute_fatigue(
    context_data: dict,
    project_dir: str = '.'
) -> FatigueState:
    """Compute 4-dimension fatigue score from observable signals.

    Args:
        context_data: Dict with used_percentage (from fatigue.json).
        project_dir: Project directory to read signals from.

    Returns:
        FatigueState with per-dimension scores and composite.
    """
    # Dimension 1: Token utilization (real -- from statusline)
    token_util = min(1.0, context_data.get('used_percentage', 0) / 100)

    # Read behavioral signals from hook log
    signals = read_recent_signals(project_dir)

    # Dimension 2: Scope scatter (real -- from PreToolUse file paths)
    scatter = compute_scope_scatter(signals)

    # Dimension 3: Re-read ratio (real -- from PreToolUse Read calls)
    reread = compute_reread_ratio(signals)

    # Dimension 4: Error density (real -- from PostToolUse outcomes)
    errors = compute_error_density(signals)

    # Weighted composite
    score = (
        FATIGUE_WEIGHTS['token_utilization'] * token_util
        + FATIGUE_WEIGHTS['scope_scatter'] * scatter
        + FATIGUE_WEIGHTS['reread_ratio'] * reread
        + FATIGUE_WEIGHTS['error_density'] * errors
    )
    score = min(1.0, max(0.0, score))

    state = FatigueState.score_to_state(score)

    return FatigueState(
        token_utilization=round(token_util, 4),
        scope_scatter=round(scatter, 4),
        reread_ratio=round(reread, 4),
        error_density=round(errors, 4),
        composite_score=round(score, 4),
        state=state,
        computed_at=_now()
    )


def read_fatigue_file(project_dir: str = '.') -> dict:
    """Read the live fatigue.json written by the statusline script.

    Returns dict with used_percentage, remaining_percentage, timestamp.
    Falls back to empty dict if file missing or corrupt.
    """
    fatigue_path = Path(project_dir).resolve() / '.mnemos' / 'fatigue.json'
    if not fatigue_path.exists():
        return {}
    try:
        return json.loads(fatigue_path.read_text())
    except (json.JSONDecodeError, OSError):
        return {}


def write_fatigue_file(
    project_dir: str, used_pct: float, remaining_pct: float
) -> None:
    """Write fatigue.json for hooks to read. Called by statusline."""
    import time
    mnemos_dir = Path(project_dir).resolve() / '.mnemos'
    mnemos_dir.mkdir(parents=True, exist_ok=True)
    data = {
        'used_percentage': used_pct,
        'remaining_percentage': remaining_pct,
        'timestamp': time.time()
    }
    fatigue_path = mnemos_dir / 'fatigue.json'
    fatigue_path.write_text(json.dumps(data))


================================================
FILE: scripts/mnemos/models.py
================================================
"""Data models for Mnemos -- MnemoNode, FatigueState, CheckpointNode."""

from __future__ import annotations

import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


def _uuid() -> str:
    return str(uuid.uuid4())


# --- MnemoNode types ---
MNEMO_TYPES = (
    'goal', 'constraint', 'context', 'working',
    'result', 'skill', 'checkpoint', 'handoff'
)

# --- MnemoNode statuses ---
MNEMO_STATUSES = (
    'active', 'compressed', 'evicted', 'promoted', 'handed_off'
)

# --- MnemoNode origins ---
MNEMO_ORIGINS = (
    'loaded', 'derived', 'tool_result',
    'inherited', 'agent_generated'
)

# --- Fatigue states ---
FATIGUE_STATES = (
    'flow', 'compress', 'pre_sleep', 'rem', 'emergency'
)

# --- Fatigue thresholds ---
FATIGUE_THRESHOLDS = {
    'flow': (0.0, 0.40),
    'compress': (0.40, 0.60),
    'pre_sleep': (0.60, 0.75),
    'rem': (0.75, 0.90),
    'emergency': (0.90, 1.0)
}

# --- Fatigue dimension weights ---
# All 4 dimensions are passively observable from hook data.
# No agent cooperation required.
FATIGUE_WEIGHTS = {
    'token_utilization': 0.40,  # from statusline context_window.used_percentage
    'scope_scatter': 0.25,      # unique dirs in recent tool calls (PreToolUse)
    'reread_ratio': 0.20,       # files Read more than once (PreToolUse)
    'error_density': 0.15       # failed tool calls ratio (PostToolUse)
}

# --- Eviction policies per type ---
# never = GoalNode/ConstraintNode survive all compaction
# compress_first = content replaced with summary before eviction
# evictable = can be evicted when cold
EVICTION_POLICIES = {
    'goal': 'never',
    'constraint': 'never',
    'context': 'evictable',
    'working': 'compress_first',
    'result': 'compress_first',
    'skill': 'compress_first',
    'checkpoint': 'never',
    'handoff': 'never'
}


@dataclass
class MnemoNode:
    """A typed memory node in the MnemoGraph.

    Types and eviction:
        goal        -- never evicted, task's primary objective
        constraint  -- never evicted, invariants and contracts
        context     -- evictable when activation_weight drops
        working     -- compressed first, then evicted
        result      -- compressed first (summary kept), then evicted
        skill       -- compressed first, promotable to persistent
        checkpoint  -- never evicted, serialized session state
        handoff     -- never evicted, task completion summary
    """

    type: str
    task_id: str
    content: str
    id: str = field(default_factory=_uuid)
    summary: str | None = None
    activation_weight: float = 1.0
    status: str = 'active'
    origin: str = 'agent_generated'
    confidence: float = 1.0
    scope_tags: list[str] = field(default_factory=list)
    links: list[str] = field(default_factory=list)
    created_at: str = field(default_factory=_now)
    last_accessed: str = field(default_factory=_now)
    access_count: int = 0

    @property
    def eviction_policy(self) -> str:
        return EVICTION_POLICIES.get(self.type, 'evictable')

    @property
    def is_evictable(self) -> bool:
        return self.eviction_policy == 'evictable'

    @property
    def is_compressible(self) -> bool:
        return self.eviction_policy == 'compress_first'


@dataclass
class FatigueState:
    """4-dimension fatigue model -- all dimensions passively observable.

    Dimensions (all derived from hook data, no agent cooperation needed):
        token_utilization  -- context_window.used_percentage / 100 (statusline)
        scope_scatter      -- unique dirs in recent tool calls (PreToolUse)
        reread_ratio       -- files Read'd more than once (PreToolUse)
        error_density      -- failed tool calls / total (PostToolUse)

    Composite score = weighted average, mapped to fatigue state.
    """

    token_utilization: float = 0.0
    scope_scatter: float = 0.0
    reread_ratio: float = 0.0
    error_density: float = 0.0
    composite_score: float = 0.0
    state: str = 'flow'
    computed_at: str = field(default_factory=_now)

    @staticmethod
    def score_to_state(score: float) -> str:
        """Map composite fatigue score to named state."""
        if score >= 0.90:
            return 'emergency'
        elif score >= 0.75:
            return 'rem'
        elif score >= 0.60:
            return 'pre_sleep'
        elif score >= 0.40:
            return 'compress'
        else:
            return 'flow'


@dataclass
class CheckpointNode:
    """Serialized session state for resume after compaction or restart.

    Always includes GoalNode content, all ConstraintNodes, current sub-goal.
    Optionally includes iCPG state (active ReasonNode, drift summary).
    """

    task_id: str
    goal: str
    id: str = field(default_factory=_uuid)
    active_constraints: list[str] = field(default_factory=list)
    active_results: list[str] = field(default_factory=list)
    current_subgoal: str = ''
    working_memory: str = ''
    task_narrative: str = ''
    recent_files: list[dict] = field(default_factory=list)
    fatigue_at_checkpoint: float = 0.0
    git_state: dict = field(default_factory=dict)
    icpg_state: dict | None = None
    node_summary: dict = field(default_factory=dict)
    created_at: str = field(default_factory=_now)


================================================
FILE: scripts/mnemos/pyproject.toml
================================================
[project]
name = "mnemos"
version = "0.1.0"
description = "Task-Scoped Memory Lifecycle for Autonomous Agents"
requires-python = ">=3.10"
dependencies = []

[project.scripts]
mnemos = "mnemos.__main__:main"

[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"


================================================
FILE: scripts/mnemos/signals.py
================================================
"""Behavioral signal collection from Claude Code hooks.

Hooks receive rich JSON on stdin (tool_name, tool_input, tool_response).
Instead of relying on agent cooperation (manually setting scope_tags),
we passively observe tool call patterns to derive fatigue signals.

Signals collected:
    - File paths from Read/Edit/Write tool calls (scope scatter)
    - Re-reads: same file Read'd more than once (context loss)
    - Tool errors from PostToolUse (struggling agent)
    - Edit frequency to same file (fix-retry loops)

Storage: .mnemos/signals.jsonl (append-only, one JSON line per event)
"""

from __future__ import annotations

import json
import os
import time
from pathlib import Path


SIGNALS_FILE = 'signals.jsonl'
# Rolling window for fatigue computation
WINDOW_SIZE = 30


def append_signal(project_dir: str, signal: dict) -> None:
    """Append a signal event to signals.jsonl. Must be fast (<1ms)."""
    signals_path = Path(project_dir).resolve() / '.mnemos' / SIGNALS_FILE
    signals_path.parent.mkdir(parents=True, exist_ok=True)
    signal['ts'] = time.time()
    with open(signals_path, 'a') as f:
        f.write(json.dumps(signal) + '\n')


def read_recent_signals(project_dir: str, limit: int = WINDOW_SIZE) -> list[dict]:
    """Read the last N signals from the log. Reads from tail for speed."""
    signals_path = Path(project_dir).resolve() / '.mnemos' / SIGNALS_FILE
    if not signals_path.exists():
        return []

    try:
        # Read last N lines efficiently
        lines = _tail(str(signals_path), limit)
        signals = []
        for line in lines:
            line = line.strip()
            if line:
                try:
                    signals.append(json.loads(line))
                except json.JSONDecodeError:
                    continue
        return signals
    except OSError:
        return []


def compute_scope_scatter(signals: list[dict]) -> float:
    """Scope scatter: how many different directories is the agent touching?

    Low scatter (focused on 1-2 dirs) = 0.0 (no fatigue).
    High scatter (bouncing across 8+ dirs) = 1.0 (max fatigue).

    Only considers file-bearing tool calls (Read, Edit, Write, Glob, Grep).
    """
    dirs = []
    for s in signals:
        fp = s.get('file_path', '')
        if fp:
            # Normalize to parent directory (2 levels deep max)
            parts = Path(fp).parts
            if len(parts) >= 3:
                dirs.append('/'.join(parts[:3]))
            elif len(parts) >= 2:
                dirs.append('/'.join(parts[:2]))
            elif parts:
                dirs.append(parts[0])

    if not dirs:
        return 0.0

    unique_dirs = len(set(dirs))
    total = len(dirs)

    # 1-2 unique dirs in 30 calls = very focused = 0.0
    # 3-4 = mild scatter = 0.2-0.4
    # 5-7 = moderate = 0.4-0.7
    # 8+ = high scatter = 0.7-1.0
    ratio = unique_dirs / max(total, 1)
    # Scale: ratio of 0.1 (1 dir in 10 calls) = 0, ratio of 0.5+ = 1.0
    return min(1.0, max(0.0, (ratio - 0.1) / 0.4))


def compute_reread_ratio(signals: list[dict]) -> float:
    """Re-read ratio: how often does the agent re-read files it already read?

    High re-reads = agent lost context of what it saw = context degradation.
    Returns 0.0-1.0.
    """
    reads = [s['file_path'] for s in signals
             if s.get('tool') == 'Read' and s.get('file_path')]

    if len(reads) < 3:
        return 0.0

    seen = set()
    rereads = 0
    for fp in reads:
        if fp in seen:
            rereads += 1
        seen.add(fp)

    return min(1.0, rereads / max(len(reads), 1))


def compute_error_density(signals: list[dict]) -> float:
    """Error density: ratio of failed tool calls in recent window.

    High error rate = agent is struggling/confused.
    Returns 0.0-1.0.
    """
    outcomes = [s for s in signals if 'success' in s]
    if not outcomes:
        return 0.0

    errors = sum(1 for s in outcomes if not s['success'])
    return min(1.0, errors / max(len(outcomes), 1))


def extract_signal_from_pre_tool(hook_input: dict) -> dict | None:
    """Extract a signal from PreToolUse hook JSON input.

    Returns a signal dict to append, or None if not relevant.
    """
    tool = hook_input.get('tool_name', '')
    tool_input = hook_input.get('tool_input', {})

    # Extract file path from various tool inputs
    file_path = (
        tool_input.get('file_path')
        or tool_input.get('path')
        or ''
    )

    # For Bash, try to extract paths from command
    if tool == 'Bash' and not file_path:
        cmd = tool_input.get('command', '')
        # Don't log bash commands as file signals
        return {'tool': 'Bash', 'event': 'pre'}

    if tool in ('Read', 'Edit', 'Write', 'Glob', 'Grep'):
        return {
            'tool': tool,
            'event': 'pre',
            'file_path': _normalize_path(file_path)
        }

    return {'tool': tool, 'event': 'pre'}


def extract_signal_from_post_tool(hook_input: dict) -> dict | None:
    """Extract a signal from PostToolUse hook JSON input.

    Captures success/failure for error density computation.
    """
    tool = hook_input.get('tool_name', '')
    tool_input = hook_input.get('tool_input', {})
    response = hook_input.get('tool_response', {})

    file_path = (
        tool_input.get('file_path')
        or tool_input.get('path')
        or ''
    )

    # Determine success/failure
    success = True
    if isinstance(response, dict):
        # Check for common error indicators
        if response.get('error') or response.get('is_error'):
            success = False
        # Bash exit code
        if 'exit_code' in response and response['exit_code'] != 0:
            success = False
    elif isinstance(response, str):
        # String responses with error markers
        if response.startswith('Error:') or response.startswith('error:'):
            success = False

    return {
        'tool': tool,
        'event': 'post',
        'file_path': _normalize_path(file_path),
        'success': success
    }


def _normalize_path(file_path: str) -> str:
    """Normalize file path to relative form for consistent comparison."""
    if not file_path:
        return ''
    p = Path(file_path)
    # Convert absolute paths to relative if within CWD
    try:
        return str(p.relative_to(Path.cwd()))
    except ValueError:
        return str(p)


def _tail(filepath: str, n: int) -> list[str]:
    """Read last n lines from a file efficiently."""
    try:
        with open(filepath, 'rb') as f:
            # Seek to end
            f.seek(0, 2)
            size = f.tell()
            if size == 0:
                return []

            # Read backwards in chunks
            chunk_size = min(size, n * 500)  # ~500 bytes per line estimate
            f.seek(max(0, size - chunk_size))
            data = f.read().decode('utf-8', errors='replace')
            lines = data.strip().split('\n')
            return lines[-n:]
    except OSError:
        return []


def get_session_stats(project_dir: str) -> dict:
    """Get summary stats from signal log for diagnostics."""
    signals = read_recent_signals(project_dir, limit=100)
    if not signals:
        return {'total_signals': 0}

    tools = {}
    files_read = set()
    rereads = 0
    errors = 0
    total_outcomes = 0
    seen_reads = set()

    for s in signals:
        tool = s.get('tool', 'unknown')
        tools[tool] = tools.get(tool, 0) + 1

        fp = s.get('file_path', '')
        if s.get('tool') == 'Read' and fp:
            if fp in seen_reads:
                rereads += 1
            seen_reads.add(fp)
            files_read.add(fp)

        if 'success' in s:
            total_outcomes += 1
            if not s['success']:
                errors += 1

    return {
        'total_signals': len(signals),
        'tool_calls': tools,
        'unique_files_read': len(files_read),
        'rereads': rereads,
        'errors': errors,
        'total_outcomes': total_outcomes,
        'error_rate': errors / max(total_outcomes, 1)
    }


================================================
FILE: scripts/mnemos/store.py
================================================
"""SQLite storage layer for Mnemos MnemoGraph."""

from __future__ import annotations

import json
import sqlite3
from pathlib import Path

from .models import CheckpointNode, FatigueState, MnemoNode, _now

MNEMOS_DIR = '.mnemos'
DB_NAME = 'mnemo.db'

SCHEMA = """
CREATE TABLE IF NOT EXISTS mnemo_nodes (
    id TEXT PRIMARY KEY,
    type TEXT NOT NULL,
    task_id TEXT NOT NULL,
    content TEXT NOT NULL,
    summary TEXT,
    activation_weight REAL DEFAULT 1.0,
    status TEXT DEFAULT 'active',
    origin TEXT DEFAULT 'agent_generated',
    confidence REAL DEFAULT 1.0,
    scope_tags TEXT DEFAULT '[]',
    links TEXT DEFAULT '[]',
    created_at TEXT NOT NULL,
    last_accessed TEXT NOT NULL,
    access_count INTEGER DEFAULT 0
);

CREATE TABLE IF NOT EXISTS checkpoints (
    id TEXT PRIMARY KEY,
    task_id TEXT NOT NULL,
    goal TEXT NOT NULL,
    active_constraints TEXT DEFAULT '[]',
    active_results TEXT DEFAULT '[]',
    current_subgoal TEXT DEFAULT '',
    working_memory TEXT DEFAULT '',
    fatigue_at_checkpoint REAL DEFAULT 0.0,
    git_state TEXT DEFAULT '{}',
    icpg_state TEXT,
    node_summary TEXT DEFAULT '{}',
    created_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS fatigue_log (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    token_utilization REAL,
    scope_scatter REAL,
    reread_ratio REAL,
    error_density REAL,
    composite_score REAL,
    state TEXT,
    computed_at TEXT NOT NULL
);

CREATE INDEX IF NOT EXISTS idx_mnemo_type ON mnemo_nodes(type);
CREATE INDEX IF NOT EXISTS idx_mnemo_task ON mnemo_nodes(task_id);
CREATE INDEX IF NOT EXISTS idx_mnemo_status ON mnemo_nodes(status);
CREATE INDEX IF NOT EXISTS idx_mnemo_weight ON mnemo_nodes(activation_weight);
CREATE INDEX IF NOT EXISTS idx_checkpoint_task ON checkpoints(task_id);
CREATE INDEX IF NOT EXISTS idx_fatigue_time ON fatigue_log(computed_at);
"""


class MnemosStore:
    """SQLite-backed storage for the MnemoGraph."""

    def __init__(self, project_dir: str = '.'):
        self.project_dir = Path(project_dir).resolve()
        self.mnemos_dir = self.project_dir / MNEMOS_DIR
        self.db_path = self.mnemos_dir / DB_NAME

    def init_db(self) -> None:
        """Create .mnemos/ directory and initialize schema."""
        self.mnemos_dir.mkdir(parents=True, exist_ok=True)
        gitignore = self.mnemos_dir / '.gitignore'
        if not gitignore.exists():
            gitignore.write_text('*\n')
        with self._conn() as conn:
            conn.executescript(SCHEMA)

    def exists(self) -> bool:
        return self.db_path.exists()

    def _conn(self) -> sqlite3.Connection:
        conn = sqlite3.connect(str(self.db_path))
        conn.row_factory = sqlite3.Row
        conn.execute('PRAGMA journal_mode=WAL')
        conn.execute('PRAGMA foreign_keys=ON')
        return conn

    # --- MnemoNode CRUD ---

    def create_node(self, node: MnemoNode) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO mnemo_nodes
                   (id, type, task_id, content, summary, activation_weight,
                    status, origin, confidence, scope_tags, links,
                    created_at, last_accessed, access_count)
                   VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
                (
                    node.id, node.type, node.task_id, node.content,
                    node.summary, node.activation_weight, node.status,
                    node.origin, node.confidence,
                    json.dumps(node.scope_tags), json.dumps(node.links),
                    node.created_at, node.last_accessed, node.access_count
                )
            )
        return node.id

    def get_node(self, node_id: str) -> MnemoNode | None:
        with self._conn() as conn:
            row = conn.execute(
                'SELECT * FROM mnemo_nodes WHERE id = ?', (node_id,)
            ).fetchone()
        return self._row_to_node(row) if row else None

    def get_active_nodes(self, task_id: str | None = None) -> list[MnemoNode]:
        with self._conn() as conn:
            if task_id:
                rows = conn.execute(
                    "SELECT * FROM mnemo_nodes WHERE status = 'active' "
                    "AND task_id = ? ORDER BY activation_weight DESC",
                    (task_id,)
                ).fetchall()
            else:
                rows = conn.execute(
                    "SELECT * FROM mnemo_nodes WHERE status = 'active' "
                    "ORDER BY activation_weight DESC"
                ).fetchall()
        return [self._row_to_node(r) for r in rows]

    def get_by_type(
        self, node_type: str, status: str = 'active'
    ) -> list[MnemoNode]:
        with self._conn() as conn:
            rows = conn.execute(
                'SELECT * FROM mnemo_nodes WHERE type = ? AND status = ? '
                'ORDER BY activation_weight DESC',
                (node_type, status)
            ).fetchall()
        return [self._row_to_node(r) for r in rows]

    def nodes_for_scope(self, scope_tags: list[str]) -> list[MnemoNode]:
        """Get active nodes whose scope_tags overlap with given tags."""
        active = self.get_active_nodes()
        return [
            n for n in active
            if set(n.scope_tags) & set(scope_tags)
        ]

    def nodes_above_weight(self, threshold: float) -> list[MnemoNode]:
        with self._conn() as conn:
            rows = conn.execute(
                "SELECT * FROM mnemo_nodes WHERE status = 'active' "
                "AND activation_weight >= ? ORDER BY activation_weight DESC",
                (threshold,)
            ).fetchall()
        return [self._row_to_node(r) for r in rows]

    def update_node_status(self, node_id: str, status: str) -> None:
        with self._conn() as conn:
            conn.execute(
                'UPDATE mnemo_nodes SET status = ? WHERE id = ?',
                (status, node_id)
            )

    def update_node_weight(self, node_id: str, weight: float) -> None:
        with self._conn() as conn:
            conn.execute(
                'UPDATE mnemo_nodes SET activation_weight = ? WHERE id = ?',
                (weight, node_id)
            )

    def compress_node(self, node_id: str, summary: str) -> None:
        """Compress a node: replace content with summary, set status."""
        with self._conn() as conn:
            conn.execute(
                "UPDATE mnemo_nodes SET status = 'compressed', "
                "summary = ?, content = '' WHERE id = ?",
                (summary, node_id)
            )

    def evict_node(self, node_id: str) -> None:
        """Evict a node: set status, clear content."""
        with self._conn() as conn:
            conn.execute(
                "UPDATE mnemo_nodes SET status = 'evicted', "
                "content = '', summary = NULL WHERE id = ?",
                (node_id,)
            )

    def touch_node(self, node_id: str) -> None:
        """Update last_accessed and increment access_count."""
        with self._conn() as conn:
            conn.execute(
                'UPDATE mnemo_nodes SET last_accessed = ?, '
                'access_count = access_count + 1 WHERE id = ?',
                (_now(), node_id)
            )

    def decay_weights(self, factor: float = 0.95) -> int:
        """Apply exponential decay to all active node weights.

        Returns count of nodes decayed.
        """
        with self._conn() as conn:
            cursor = conn.execute(
                "UPDATE mnemo_nodes SET activation_weight = "
                "MAX(0.01, activation_weight * ?) "
                "WHERE status = 'active' AND type NOT IN "
                "('goal', 'constraint', 'checkpoint', 'handoff')",
                (factor,)
            )
            return cursor.rowcount

    # --- Checkpoint CRUD ---

    def save_checkpoint(self, cp: CheckpointNode) -> str:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO checkpoints
                   (id, task_id, goal, active_constraints, active_results,
                    current_subgoal, working_memory, fatigue_at_checkpoint,
                    git_state, icpg_state, node_summary, created_at)
                   VALUES (?,?,?,?,?,?,?,?,?,?,?,?)""",
                (
                    cp.id, cp.task_id, cp.goal,
                    json.dumps(cp.active_constraints),
                    json.dumps(cp.active_results),
                    cp.current_subgoal, cp.working_memory,
                    cp.fatigue_at_checkpoint,
                    json.dumps(cp.git_state),
                    json.dumps(cp.icpg_state) if cp.icpg_state else None,
                    json.dumps(cp.node_summary),
                    cp.created_at
                )
            )
        return cp.id

    def get_latest_checkpoint(
        self, task_id: str | None = None
    ) -> CheckpointNode | None:
        with self._conn() as conn:
            if task_id:
                row = conn.execute(
                    'SELECT * FROM checkpoints WHERE task_id = ? '
                    'ORDER BY created_at DESC LIMIT 1',
                    (task_id,)
                ).fetchone()
            else:
                row = conn.execute(
                    'SELECT * FROM checkpoints '
                    'ORDER BY created_at DESC LIMIT 1'
                ).fetchone()
        return self._row_to_checkpoint(row) if row else None

    # --- Fatigue log ---

    def log_fatigue(self, fatigue: FatigueState) -> None:
        with self._conn() as conn:
            conn.execute(
                """INSERT INTO fatigue_log
                   (token_utilization, scope_scatter, reread_ratio,
                    error_density, composite_score, state, computed_at)
                   VALUES (?,?,?,?,?,?,?)""",
                (
                    fatigue.token_utilization, fatigue.scope_scatter,
                    fatigue.reread_ratio, fatigue.error_density,
                    fatigue.composite_score, fatigue.state,
                    fatigue.computed_at
                )
            )

    def get_fatigue_history(self, limit: int = 20) -> list[FatigueState]:
        with self._conn() as conn:
            rows = conn.execute(
                'SELECT * FROM fatigue_log ORDER BY computed_at DESC '
                'LIMIT ?', (limit,)
            ).fetchall()
        return [self._row_to_fatigue(r) for r in rows]

    # --- Stats ---

    def get_stats(self) -> dict:
        with self._conn() as conn:
            total = conn.execute(
                'SELECT COUNT(*) FROM mnemo_nodes'
            ).fetchone()[0]
            active = conn.execute(
                "SELECT COUNT(*) FROM mnemo_nodes WHERE status = 'active'"
            ).fetchone()[0]
            compressed = conn.execute(
                "SELECT COUNT(*) FROM mnemo_nodes WHERE status = 'compressed'"
            ).fetchone()[0]
            evicted = conn.execute(
                "SELECT COUNT(*) FROM mnemo_nodes WHERE status = 'evicted'"
            ).fetchone()[0]
            checkpoints = conn.execute(
                'SELECT COUNT(*) FROM checkpoints'
            ).fetchone()[0]
            fatigue_entries = conn.execute(
                'SELECT COUNT(*) FROM fatigue_log'
            ).fetchone()[0]

            # Type breakdown
            type_rows = conn.execute(
                "SELECT type, COUNT(*) as cnt FROM mnemo_nodes "
                "WHERE status = 'active' GROUP BY type"
            ).fetchall()
            by_type = {r['type']: r['cnt'] for r in type_rows}

        return {
            'total_nodes': total,
            'active': active,
            'compressed': compressed,
            'evicted': evicted,
            'checkpoints': checkpoints,
            'fatigue_entries': fatigue_entries,
            'by_type': by_type
        }

    # --- iCPG Bridge ---

    def load_from_icpg(self, icpg_store, task_id: str = 'icpg-bridge') -> dict:
        """Import active iCPG ReasonNodes as GoalNodes/ConstraintNodes.

        Returns stats: {goals_imported, constraints_imported}.
        """
        stats = {'goals_imported': 0, 'constraints_imported': 0}

        reasons = icpg_store.list_reasons()
        for reason in reasons:
            if reason.status in ('rejected', 'abandoned'):
                continue

            # ReasonNode -> GoalNode
            goal_node = MnemoNode(
                type='goal',
                task_id=task_id,
                content=f'{reason.goal} [iCPG:{reason.id[:8]}]',
                origin='loaded',
                scope_tags=reason.scope,
                confidence=1.0
            )
            self.create_node(goal_node)
            stats['goals_imported'] += 1

            # Invariants/Postconditions -> ConstraintNodes
            for inv in reason.invariants:
                cn = MnemoNode(
                    type='constraint',
                    task_id=task_id,
                    content=f'INV: {inv} [from: {reason.goal[:40]}]',
                    origin='loaded',
                    scope_tags=reason.scope,
                    links=[goal_node.id]
                )
                self.create_node(cn)
                stats['constraints_imported'] += 1

            for post in reason.postconditions:
                cn = MnemoNode(
                    type='constraint',
                    task_id=task_id,
                    content=f'POST: {post} [from: {reason.goal[:40]}]',
                    origin='loaded',
                    scope_tags=reason.scope,
                    links=[goal_node.id]
                )
                self.create_node(cn)
                stats['constraints_imported'] += 1

        return stats

    # --- Row converters ---

    @staticmethod
    def _row_to_node(row: sqlite3.Row) -> MnemoNode:
        return MnemoNode(
            id=row['id'],
            type=row['type'],
            task_id=row['task_id'],
            content=row['content'],
            summary=row['summary'],
            activation_weight=row['activation_weight'],
            status=row['status'],
            origin=row['origin'],
            confidence=row['confidence'],
            scope_tags=json.loads(row['scope_tags']),
            links=json.loads(row['links']),
            created_at=row['created_at'],
            last_accessed=row['last_accessed'],
            access_count=row['access_count']
        )

    @staticmethod
    def _row_to_checkpoint(row: sqlite3.Row) -> CheckpointNode:
        return CheckpointNode(
            id=row['id'],
            task_id=row['task_id'],
            goal=row['goal'],
            active_constraints=json.loads(row['active_constraints']),
            active_results=json.loads(row['active_results']),
            current_subgoal=row['current_subgoal'],
            working_memory=row['working_memory'],
            fatigue_at_checkpoint=row['fatigue_at_checkpoint'],
            git_state=json.loads(row['git_state']),
            icpg_state=(
                json.loads(row['icpg_state'])
                if row['icpg_state'] else None
            ),
            node_summary=json.loads(row['node_summary']),
            created_at=row['created_at']
        )

    @staticmethod
    def _row_to_fatigue(row: sqlite3.Row) -> FatigueState:
        return FatigueState(
            token_utilization=row['token_utilization'],
            scope_scatter=row['scope_scatter'],
            reread_ratio=row['reread_ratio'],
            error_density=row['error_density'],
            composite_score=row['composite_score'],
            state=row['state'],
            computed_at=row['computed_at']
        )


================================================
FILE: scripts/polyphony/__init__.py
================================================
"""Polyphony — Multi-agent orchestration for Maggy."""

__version__ = '0.1.0'


================================================
FILE: scripts/polyphony/__main__.py
================================================
"""CLI entry point for Polyphony.

Usage:
    polyphony init          Create ~/.polyphony/ with config files
    polyphony spawn <title> Create and route a task
    polyphony status        Show current task states
    polyphony cleanup       Remove completed workspaces
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

from . import __version__
from .config import (
    default_config_dir,
    load_agents,
    load_config,
    load_identities,
    load_routing,
)
from .store import PolyphonyStore


def cmd_init(args: argparse.Namespace) -> int:
    """Create config directory with templates."""
    cfg_dir = default_config_dir()
    cfg_dir.mkdir(parents=True, exist_ok=True)
    print(f"Initialized {cfg_dir}")
    return 0


def cmd_status(args: argparse.Namespace) -> int:
    """Show task states from the store."""
    cfg = load_config()
    store_dir = Path(cfg.get("workspace_root", "~/.polyphony"))
    store_dir = store_dir.expanduser()
    store = PolyphonyStore(store_dir)
    store.init_db()
    tasks = store.list_tasks()
    if not tasks:
        print("No tasks.")
        return 0
    for t in tasks:
        print(f"  [{t.state:12s}] {t.id[:8]}  {t.title}")
    return 0


def cmd_spawn(args: argparse.Namespace) -> int:
    """Create a task from CLI."""
    from .models import Task
    from .store import PolyphonyStore

    cfg = load_config()
    store_dir = Path(cfg.get("workspace_root", "~/.polyphony"))
    store_dir = store_dir.expanduser()
    store = PolyphonyStore(store_dir)
    store.init_db()
    task = Task(
        title=args.title,
        source="local",
        source_ref="cli",
        task_type=args.type,
    )
    store.save_task(task)
    print(f"Created task {task.id[:8]}: {task.title}")
    return 0


def build_parser() -> argparse.ArgumentParser:
    """Build the CLI argument parser."""
    parser = argparse.ArgumentParser(
        prog="polyphony",
        description="Multi-agent orchestration",
    )
    parser.add_argument(
        "--version", action="version",
        version=f"polyphony {__version__}",
    )
    sub = parser.add_subparsers(dest="command")

    sub.add_parser("init", help="Initialize config")
    sub.add_parser("status", help="Show task states")

    spawn_p = sub.add_parser("spawn", help="Create a task")
    spawn_p.add_argument("title", help="Task title")
    spawn_p.add_argument(
        "--type", default="feature",
        help="Task type",
    )

    sub.add_parser("cleanup", help="Remove workspaces")
    return parser


def main() -> int:
    """CLI entry point."""
    parser = build_parser()
    args = parser.parse_args()

    dispatch = {
        "init": cmd_init,
        "status": cmd_status,
        "spawn": cmd_spawn,
    }

    handler = dispatch.get(args.command)
    if handler is None:
        parser.print_help()
        return 1
    return handler(args)


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: scripts/polyphony/adapters/__init__.py
================================================
"""Agent adapters for Polyphony (§8).

Registry of adapter classes by agent_type name.
"""

from __future__ import annotations

from .claude import ClaudeAdapter
from .codex import CodexAdapter
from .kimi import KimiAdapter

_REGISTRY: dict[str, type] = {
    "claude": ClaudeAdapter,
    "codex": CodexAdapter,
    "kimi": KimiAdapter,
}


def get_adapter(agent_type: str):
    """Get adapter instance by agent type name."""
    cls = _REGISTRY.get(agent_type)
    if cls is None:
        raise KeyError(agent_type)
    return cls()


def list_adapters() -> list[str]:
    """Return registered adapter names."""
    return list(_REGISTRY.keys())


================================================
FILE: scripts/polyphony/adapters/claude.py
================================================
"""Claude Code adapter (§8.1).

Builds CLI command: claude -p <prompt> --output-format stream-json
Parses stream-json events for completion/quota detection.
"""

from __future__ import annotations

from ..models import AgentProfile, RunSpec


class ClaudeAdapter:
    """Adapter for Claude Code CLI."""

    def build_command(
        self,
        profile: AgentProfile,
        run_spec: RunSpec,
    ) -> list[str]:
        """Build claude CLI command list."""
        parts = profile.cli_command.split()
        parts += ["--output-format", "stream-json"]
        if run_spec.max_turns:
            parts += ["--max-turns", str(run_spec.max_turns)]
        return parts

    def detect_completion(self, event: dict) -> bool:
        """Check if event signals task completion."""
        return event.get("type") == "result"

    def detect_quota(self, text: str) -> bool:
        """Check if output indicates quota/rate limit."""
        lower = text.lower()
        return "rate limit" in lower or "quota" in lower


================================================
FILE: scripts/polyphony/adapters/codex.py
================================================
"""Codex CLI adapter (§8.2).

Builds CLI command: codex exec --full-auto <prompt>
Parses NDJSON events for completion/quota detection.
"""

from __future__ import annotations

from ..models import AgentProfile, RunSpec


class CodexAdapter:
    """Adapter for OpenAI Codex CLI."""

    def build_command(
        self,
        profile: AgentProfile,
        run_spec: RunSpec,
    ) -> list[str]:
        """Build codex CLI command list."""
        parts = profile.cli_command.split()
        if "--full-auto" not in parts:
            parts.append("--full-auto")
        return parts

    def detect_completion(self, event: dict) -> bool:
        """Check if event signals task completion."""
        return event.get("status") == "completed"

    def detect_quota(self, text: str) -> bool:
        """Check if output indicates quota/rate limit."""
        lower = text.lower()
        return "quota" in lower or "rate limit" in lower


================================================
FILE: scripts/polyphony/adapters/kimi.py
================================================
"""Kimi CLI adapter (§8.3).

Builds CLI command: kimi --print -y <prompt>
Stub until Kimi headless mode stabilizes.
"""

from __future__ import annotations

from ..models import AgentProfile, RunSpec


class KimiAdapter:
    """Adapter for Moonshot Kimi CLI."""

    def build_command(
        self,
        profile: AgentProfile,
        run_spec: RunSpec,
    ) -> list[str]:
        """Build kimi CLI command list."""
        parts = profile.cli_command.split()
        return parts

    def detect_completion(self, event: dict) -> bool:
        """Check if event signals task completion."""
        return event.get("done") is True

    def detect_quota(self, text: str) -> bool:
        """Check if output indicates quota/rate limit."""
        lower = text.lower()
        return "rate limit" in lower or "quota" in lower


================================================
FILE: scripts/polyphony/config.py
================================================
"""Configuration loading for Polyphony (spec §11)."""

from __future__ import annotations

from pathlib import Path

import yaml

from .models import AgentProfile, Identity

DEFAULTS = {
    "workspace_root": "~/polyphony/workspaces",
    "mirror_root": "~/polyphony/mirrors",
    "poll_interval": "30s",
    "max_concurrent_agents": 8,
    "event_idle_timeout": "5m",
}

DEFAULT_ROUTING = {
    "rules": [],
    "default": {
        "agent": "claude",
        "model": "sonnet-4-6",
        "fallback": [],
    },
}


def default_config_dir() -> Path:
    return Path.home() / ".polyphony"


def load_config(config_dir: Path) -> dict:
    """Load config.yaml, merging with defaults."""
    cfg = dict(DEFAULTS)
    path = Path(config_dir) / "config.yaml"
    if path.exists():
        with open(path) as f:
            loaded = yaml.safe_load(f) or {}
        cfg.update(loaded)
    return cfg


def load_identities(config_dir: Path) -> list[Identity]:
    """Load identities.yaml into Identity objects."""
    path = Path(config_dir) / "identities.yaml"
    if not path.exists():
        return []
    with open(path) as f:
        data = yaml.safe_load(f) or {}
    return [
        Identity(
            name=item["name"],
            volumes=item.get("volumes", {}),
            api_keys=item.get("api_keys", {}),
            cost_ceiling_usd_per_day=item.get(
                "cost_ceiling_usd_per_day"
            ),
        )
        for item in data.get("identities", [])
    ]


def load_agents(config_dir: Path) -> list[AgentProfile]:
    """Load agents.yaml into AgentProfile objects."""
    path = Path(config_dir) / "agents.yaml"
    if not path.exists():
        return []
    with open(path) as f:
        data = yaml.safe_load(f) or {}
    return [
        AgentProfile(
            name=item["name"],
            agent_type=item["agent_type"],
            cli_command=item["cli_command"],
            context_window_tokens=item.get(
                "context_window_tokens", 200000
            ),
            strengths=item.get("strengths", []),
            event_protocol=item.get("event_protocol", "ndjson"),
        )
        for item in data.get("agents", [])
    ]


def load_routing(config_dir: Path) -> dict:
    """Load routing.yaml, merging with defaults."""
    routing = dict(DEFAULT_ROUTING)
    path = Path(config_dir) / "routing.yaml"
    if not path.exists():
        return routing
    with open(path) as f:
        data = yaml.safe_load(f) or {}
    if "rules" in data:
        routing["rules"] = data["rules"]
    if "default" in data:
        routing["default"] = data["default"]
    return routing


================================================
FILE: scripts/polyphony/events.py
================================================
"""Structured event parsing from container stdout (§8 events).

Parses NDJSON and stream-json output into TaskEvent objects.
"""

from __future__ import annotations

import json
from dataclasses import dataclass, field
from datetime import datetime, timezone


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


@dataclass
class TaskEvent:
    """A single parsed event from agent output."""

    kind: str
    data: dict = field(default_factory=dict)
    timestamp: str = field(default_factory=_now)

    @classmethod
    def from_dict(cls, d: dict) -> TaskEvent:
        """Create from a dictionary."""
        return cls(
            kind=d.get("kind", "unknown"),
            data=d.get("data", {}),
            timestamp=d.get("timestamp", _now()),
        )


def parse_ndjson_line(line: str) -> dict | None:
    """Parse a single NDJSON line. Returns None on failure."""
    stripped = line.strip()
    if not stripped:
        return None
    try:
        return json.loads(stripped)
    except (json.JSONDecodeError, ValueError):
        return None


def parse_stream_json(lines: list[str]) -> list[dict]:
    """Parse multiple NDJSON lines, skipping invalid ones."""
    results: list[dict] = []
    for line in lines:
        parsed = parse_ndjson_line(line)
        if parsed is not None:
            results.append(parsed)
    return results


def classify_event(data: dict) -> TaskEvent:
    """Classify a parsed JSON object into a TaskEvent."""
    event_type = data.get("type", "unknown")
    return TaskEvent(kind=event_type, data=data)


================================================
FILE: scripts/polyphony/identity.py
================================================
"""Identity broker — credential resolution (spec §7).

Resolves named identities to volume mounts and env overlays
for container provisioning.
"""

from __future__ import annotations

from .models import Identity


def resolve_identity(
    name: str,
    identities: list[Identity],
) -> Identity:
    """Find identity by name. Raises KeyError if missing."""
    for identity in identities:
        if identity.name == name:
            return identity
    raise KeyError(name)


def build_volume_mounts(
    identity: Identity,
    agent_type: str,
) -> list[str]:
    """Build Docker -v mount strings for an agent type."""
    path = identity.volumes.get(agent_type)
    if path is None:
        return []
    return [f"{path}:/home/worker/{path}:ro"]


def build_env_overlay(identity: Identity) -> dict[str, str]:
    """Build env vars from identity api_keys.

    api_keys maps logical name -> env var name.
    Returns {env_var_name: env_var_name} for docker --env pass-through.
    """
    if not identity.api_keys:
        return {}
    return {v: v for v in identity.api_keys.values()}


def validate_identity(identity: Identity) -> list[str]:
    """Return list of validation errors (empty = valid)."""
    errors: list[str] = []
    if not identity.name:
        errors.append("name is required")
    if not identity.volumes:
        errors.append("At least one volume is required")
    return errors


================================================
FILE: scripts/polyphony/models.py
================================================
"""Data models for Polyphony (spec §3)."""

from __future__ import annotations

import uuid
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone


def _now() -> str:
    return datetime.now(timezone.utc).isoformat()


def _uuid() -> str:
    return str(uuid.uuid4())


# --- Task types (§5.1) ---
TASK_TYPES = (
    "research", "bugfix", "feature", "refactor",
    "migration", "docs", "review",
)

# --- Risk levels (§5.1) ---
RISK_LEVELS = ("low", "medium", "high")

# --- Scope levels (§5.1) ---
SCOPES = (
    "single_file", "single_module",
    "multi_module", "multi_repo",
)

# --- Result statuses ---
RESULT_STATUSES = (
    "succeeded", "failed", "quota", "timeout", "crash",
)


@dataclass
class Task:
    """A unit of work from a work source (§3.1)."""

    title: str
    source: str
    source_ref: str
    id: str = field(default_factory=_uuid)
    state: str = "discovered"
    task_type: str = "feature"
    scope: list[str] = field(default_factory=list)
    risk: str = "low"
    context_tokens: int = 0
    requires_web: bool = False
    run_spec_id: str | None = None
    metadata: dict = field(default_factory=dict)
    created_at: str = field(default_factory=_now)
    updated_at: str = field(default_factory=_now)

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class Identity:
    """Named credential bundle (§3.2)."""

    name: str
    volumes: dict[str, str] = field(default_factory=dict)
    api_keys: dict[str, str] = field(default_factory=dict)
    cost_ceiling_usd_per_day: float | None = None


@dataclass
class AgentProfile:
    """Agent harness profile (§3.3)."""

    name: str
    agent_type: str
    cli_command: str
    context_window_tokens: int = 200000
    strengths: list[str] = field(default_factory=list)
    event_protocol: str = "ndjson"
    auth_path: str = ""


@dataclass
class RunSpec:
    """Immutable execution spec for one attempt (§3.4)."""

    task_id: str
    agent: str
    identity: str
    workspace: str
    image: str
    id: str = field(default_factory=_uuid)
    attempt: int = 1
    model: str = ""
    fallback: list[str] = field(default_factory=list)
    max_turns: int = 25
    allowed_paths: list[str] = field(default_factory=list)
    proof_of_work: list[str] = field(default_factory=list)
    env_overlay: dict[str, str] = field(default_factory=dict)
    volume_mounts: list[str] = field(default_factory=list)
    hooks_pre: list[str] = field(default_factory=list)
    hooks_post: list[str] = field(default_factory=list)
    deadline_seconds: int = 1800


@dataclass
class Result:
    """Outcome of a single run attempt (§3.5)."""

    task_id: str
    run_spec_id: str
    agent: str
    status: str
    id: str = field(default_factory=_uuid)
    turns: int = 0
    duration_seconds: int = 0
    cost_usd: float | None = None
    artifacts: dict[str, str] = field(default_factory=dict)
    events: list[dict] = field(default_factory=list)
    completed_at: str = field(default_factory=_now)


================================================
FILE: scripts/polyphony/orchestrator.py
================================================
"""Supervisor loop (§4 orchestrator).

discover -> claim -> route -> provision -> run -> verify -> land
"""

from __future__ import annotations

from pathlib import Path

from .models import (
    AgentProfile, Identity, Result, RunSpec, Task,
)
from .state_machine import transition
from .store import PolyphonyStore


def discover_tasks(store: PolyphonyStore) -> list[Task]:
    """Find tasks in 'discovered' state."""
    return store.list_tasks(state="discovered")


def claim_task(
    task: Task,
    store: PolyphonyStore,
) -> Task:
    """Transition task to 'claimed' and persist."""
    claimed = transition(task, "claimed")
    store.save_task(claimed)
    return claimed


def provision_workspace(
    task: Task,
    base_dir: Path,
    ref: str,
) -> Path:
    """Create workspace for task. Returns path."""
    return _create_ws(task, base_dir, ref)


def run_agent(run_spec: RunSpec) -> Result:
    """Execute agent in container. Returns Result."""
    return _execute_container(run_spec)


def verify_result(result: Result) -> bool:
    """Check if result passes proof-of-work."""
    return result.status == "succeeded"


class Orchestrator:
    """Main supervisor that drives the task lifecycle."""

    def __init__(
        self,
        store: PolyphonyStore,
        agents: list[AgentProfile],
        policy: dict,
        identities: list[Identity] | None = None,
    ):
        self._store = store
        self._agents = agents
        self._policy = policy
        self._identities = identities or []

    def step(self) -> int:
        """Run one orchestration cycle. Returns tasks processed."""
        tasks = discover_tasks(self._store)
        count = 0
        for task in tasks:
            claim_task(task, self._store)
            count += 1
        return count


def _create_ws(
    task: Task,
    base_dir: Path,
    ref: str,
) -> Path:
    """Placeholder for workspace creation. Mockable."""
    from .workspace import create_workspace
    return create_workspace(
        base_dir=base_dir,
        task_id=task.id,
        attempt=1,
        repo_url="",
        ref=ref,
    )


def _execute_container(run_spec: RunSpec) -> Result:
    """Placeholder for container execution. Mockable."""
    return Result(
        task_id=run_spec.task_id,
        run_spec_id=run_spec.id,
        agent=run_spec.agent,
        status="failed",
    )


================================================
FILE: scripts/polyphony/pyproject.toml
================================================
[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"

[project]
name = "polyphony"
version = "0.1.0"
description = "Multi-agent orchestration for Maggy"
requires-python = ">=3.11"
dependencies = ["pyyaml>=6.0"]

[project.scripts]
polyphony = "polyphony.__main__:main"


================================================
FILE: scripts/polyphony/router.py
================================================
"""Pure routing function (spec §5.2-5.6).

route(task, agents, policy) -> RunSpec
First matching rule wins. Falls back to default.
"""

from __future__ import annotations

from .models import AgentProfile, RunSpec, Task


def route(
    task: Task,
    agents: list[AgentProfile],
    policy: dict,
    identity: str = "",
) -> RunSpec:
    """Route a task to an agent. Returns a RunSpec."""
    agent = select_agent(task, agents, policy)
    fallback = _get_fallback(task, policy)
    return RunSpec(
        task_id=task.id,
        agent=agent.name,
        identity=identity,
        workspace="",
        image="",
        fallback=fallback,
    )


def select_agent(
    task: Task,
    agents: list[AgentProfile],
    policy: dict,
) -> AgentProfile:
    """Select agent by first matching rule, or default."""
    agent_map = {a.name: a for a in agents}
    for rule in policy.get("rules", []):
        if match_rule(task, rule):
            name = rule["agent"]
            if name in agent_map:
                return agent_map[name]
    default_name = policy["default"]["agent"]
    return agent_map[default_name]


def match_rule(task: Task, rule: dict) -> bool:
    """Check if a task matches a rule's predicates."""
    match = rule.get("match", {})
    for field, expected in match.items():
        actual = getattr(task, field, None)
        if isinstance(expected, list):
            if actual not in expected:
                return False
        elif actual != expected:
            return False
    return True


def _get_fallback(task: Task, policy: dict) -> list[str]:
    """Get fallback chain for a task's route."""
    for rule in policy.get("rules", []):
        if match_rule(task, rule):
            return rule.get("fallback", [])
    return policy["default"].get("fallback", [])


================================================
FILE: scripts/polyphony/runtime.py
================================================
"""Docker container runtime (§8 worker).

Create, start, stop, remove containers via subprocess calls.
All Docker commands go through _run_docker for easy mocking.
"""

from __future__ import annotations

import re
import subprocess

from .models import RunSpec


def build_docker_args(run_spec: RunSpec) -> list[str]:
    """Build docker create argument list from RunSpec."""
    safe_name = re.sub(r"[^\w\-]", "-", run_spec.task_id)
    name = f"polyphony-{safe_name}-{run_spec.attempt}"

    args = ["docker", "create", "--name", name]

    # Workspace mount
    args += ["-v", f"{run_spec.workspace}:/workspace"]

    # Identity volume mounts
    for mount in run_spec.volume_mounts:
        args += ["-v", mount]

    # Environment variables
    for key, val in run_spec.env_overlay.items():
        args += ["-e", f"{key}={val}"]

    args.append(run_spec.image)
    return args


def create_container(run_spec: RunSpec) -> str:
    """Create a Docker container. Returns container ID."""
    args = build_docker_args(run_spec)
    result = _run_docker(args)
    if result.returncode != 0:
        raise RuntimeError(result.stderr.strip())
    return result.stdout.strip()


def start_container(container_id: str) -> None:
    """Start a created container."""
    _run_docker(["docker", "start", container_id])


def stop_container(
    container_id: str,
    timeout: int | None = None,
) -> None:
    """Stop a running container."""
    cmd = ["docker", "stop"]
    if timeout is not None:
        cmd += ["-t", str(timeout)]
    cmd.append(container_id)
    _run_docker(cmd)


def remove_container(container_id: str) -> None:
    """Remove a container."""
    _run_docker(["docker", "rm", container_id])


def container_logs(container_id: str) -> str:
    """Get container stdout/stderr logs."""
    result = _run_docker(["docker", "logs", container_id])
    return result.stdout


def wait_container(container_id: str) -> int:
    """Wait for container to exit. Returns exit code."""
    result = _run_docker(
        ["docker", "wait", container_id],
    )
    return int(result.stdout.strip())


def _run_docker(cmd: list[str]) -> subprocess.CompletedProcess:
    """Run a docker command. Thin wrapper for mocking."""
    return subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        check=False,
    )


================================================
FILE: scripts/polyphony/scoring.py
================================================
"""5-dimension complexity scoring (spec §5.1).

Formalizes the cross-agent-delegation rubric:
  cyclomatic, fan_out, security, concurrency, domain
Each dimension scores 0-2. Total 0-10.
"""

from __future__ import annotations

from .models import Task

DIMENSIONS = (
    "cyclomatic", "fan_out", "security",
    "concurrency", "domain",
)

SEC_KEYWORDS = frozenset({
    "auth", "org_id", "user_id", "pii",
    "rls", "billing", "payment", "secret",
    "token", "session", "csrf", "xss",
})

CONCURRENCY_KEYWORDS = frozenset({
    "asyncio.lock", "for update", "transaction",
    "session.begin", "mutex", "semaphore",
    "atomic", "lock",
})


def score_task(task: Task) -> int:
    """Total complexity score (0-10)."""
    return (
        score_cyclomatic(task)
        + score_fan_out(task)
        + score_security(task)
        + score_concurrency(task)
        + score_domain(task)
    )


def score_cyclomatic(task: Task) -> int:
    """0-2 based on LOC and scope size."""
    loc = task.metadata.get("loc", 0)
    n_files = len(task.scope)
    if loc >= 50 or n_files >= 5:
        return 2
    if loc >= 10 or n_files >= 2:
        return 1
    return 0


def score_fan_out(task: Task) -> int:
    """0-2 based on number of callers."""
    callers = task.metadata.get("callers", 0)
    if callers >= 11:
        return 2
    if callers >= 3:
        return 1
    return 0


def score_security(task: Task) -> int:
    """0-2 based on security keyword presence."""
    keywords = _extract_keywords(task)
    hits = keywords & SEC_KEYWORDS
    if len(hits) >= 2:
        return 2
    if len(hits) >= 1:
        return 1
    return 0


def score_concurrency(task: Task) -> int:
    """0-2 based on concurrency keyword presence."""
    keywords = _extract_keywords(task)
    hits = keywords & CONCURRENCY_KEYWORDS
    if len(hits) >= 2:
        return 2
    if len(hits) >= 1:
        return 1
    return 0


def score_domain(task: Task) -> int:
    """0-2 based on risk + task type heuristic."""
    if task.risk == "high":
        return 2
    if task.risk == "medium" or task.task_type == "refactor":
        return 1
    return 0


def _extract_keywords(task: Task) -> set[str]:
    """Collect keywords from metadata and title."""
    kw = set()
    for k in task.metadata.get("keywords", []):
        kw.add(k.lower())
    for word in task.title.lower().split():
        kw.add(word)
    return kw


================================================
FILE: scripts/polyphony/sources/__init__.py
================================================
"""Work sources for Polyphony (§2).

Registry of task source implementations.
"""

from __future__ import annotations

from .local import LocalSource
from .github import GitHubSource

_REGISTRY: dict[str, type] = {
    "local": LocalSource,
    "github": GitHubSource,
}


def get_source(kind: str, **kwargs):
    """Get source instance by kind name."""
    cls = _REGISTRY.get(kind)
    if cls is None:
        raise KeyError(kind)
    return cls(**kwargs)


def list_sources() -> list[str]:
    """Return registered source names."""
    return list(_REGISTRY.keys())


================================================
FILE: scripts/polyphony/sources/github.py
================================================
"""GitHub Issues work source (§2).

Polls GitHub Issues via `gh api` for tasks labeled agent-ready.
"""

from __future__ import annotations

import json
import subprocess

from ..models import Task


class GitHubSource:
    """GitHub Issues as task source."""

    def __init__(
        self,
        repo: str = "",
        label_filter: str = "agent-ready",
    ):
        self._repo = repo
        self._label = label_filter

    def poll(self) -> list[Task]:
        """Fetch open issues matching the label filter."""
        cmd = [
            "gh", "api",
            f"repos/{self._repo}/issues",
            "--jq", ".",
            "-q", f"label:{self._label}",
        ]
        result = _run_gh(cmd)
        if result.returncode != 0:
            return []
        try:
            issues = json.loads(result.stdout)
        except (json.JSONDecodeError, ValueError):
            return []
        return [self._issue_to_task(i) for i in issues]

    def _issue_to_task(self, issue: dict) -> Task:
        """Convert a GitHub issue dict to a Task."""
        return Task(
            title=issue.get("title", ""),
            source="github",
            source_ref=f"{self._repo}#{issue.get('number', '')}",
        )


def _run_gh(cmd: list[str]) -> subprocess.CompletedProcess:
    """Run a gh CLI command. Thin wrapper for mocking."""
    return subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        check=False,
    )


================================================
FILE: scripts/polyphony/sources/local.py
================================================
"""Local SQLite task queue (§2).

Simple task queue backed by a SQLite database file.
"""

from __future__ import annotations

import sqlite3
from pathlib import Path

from ..models import Task


class LocalSource:
    """File-based local task queue."""

    def __init__(self, db_path: Path | None = None):
        self._path = db_path or Path("~/.polyphony/queue.db")
        self._path = Path(str(self._path).strip())
        self._init_db()

    def _init_db(self) -> None:
        self._path.parent.mkdir(parents=True, exist_ok=True)
        con = sqlite3.connect(str(self._path))
        con.execute(
            "CREATE TABLE IF NOT EXISTS tasks ("
            "  id TEXT PRIMARY KEY,"
            "  title TEXT NOT NULL,"
            "  task_type TEXT DEFAULT 'feature',"
            "  risk TEXT DEFAULT 'low',"
            "  claimed INTEGER DEFAULT 0"
            ")"
        )
        con.commit()
        con.close()

    def add_task(
        self,
        title: str,
        task_type: str = "feature",
        risk: str = "low",
    ) -> Task:
        """Add a task to the local queue."""
        task = Task(
            title=title,
            source="local",
            source_ref="local",
            task_type=task_type,
            risk=risk,
        )
        con = sqlite3.connect(str(self._path))
        con.execute(
            "INSERT INTO tasks (id, title, task_type, risk)"
            " VALUES (?, ?, ?, ?)",
            (task.id, task.title, task.task_type, task.risk),
        )
        con.commit()
        con.close()
        return task

    def poll(self) -> list[Task]:
        """Return unclaimed tasks."""
        con = sqlite3.connect(str(self._path))
        cur = con.execute(
            "SELECT id, title, task_type, risk"
            " FROM tasks WHERE claimed = 0"
        )
        tasks = []
        for row in cur.fetchall():
            tasks.append(Task(
                id=row[0],
                title=row[1],
                source="local",
                source_ref="local",
                task_type=row[2],
                risk=row[3],
            ))
        con.close()
        return tasks

    def mark_claimed(self, task_id: str) -> None:
        """Mark a task as claimed."""
        con = sqlite3.connect(str(self._path))
        con.execute(
            "UPDATE tasks SET claimed = 1 WHERE id = ?",
            (task_id,),
        )
        con.commit()
        con.close()


================================================
FILE: scripts/polyphony/state_machine.py
================================================
"""Task state machine for Polyphony (spec §4)."""

from __future__ import annotations

from .models import Task, _now

TASK_STATES = (
    "discovered", "claimed", "routed", "provisioned",
    "running", "verifying", "landed", "failed", "blocked",
)

TRANSITIONS: dict[str, tuple[str, ...]] = {
    "discovered": ("claimed",),
    "claimed": ("routed",),
    "routed": ("provisioned",),
    "provisioned": ("running",),
    "running": ("verifying", "failed"),
    "verifying": ("landed", "failed"),
    "failed": ("claimed", "blocked"),
}

TERMINAL_STATES = ("landed", "blocked")


def can_transition(current: str, target: str) -> bool:
    """Check if a state transition is valid."""
    allowed = TRANSITIONS.get(current, ())
    return target in allowed


def transition(task: Task, target: str) -> Task:
    """Transition a task to a new state. Raises on invalid."""
    if not can_transition(task.state, target):
        msg = f"Invalid transition: {task.state} -> {target}"
        raise ValueError(msg)
    task.state = target
    task.updated_at = _now()
    return task


def is_terminal(state: str) -> bool:
    """Check if a state is terminal (no further transitions)."""
    return state in TERMINAL_STATES


================================================
FILE: scripts/polyphony/store.py
================================================
"""SQLite storage layer for Polyphony."""

from __future__ import annotations

import json
import sqlite3
from pathlib import Path

from .models import Result, RunSpec, Task, _now

DB_NAME = "orchestrator.db"

SCHEMA = """
CREATE TABLE IF NOT EXISTS tasks (
    id TEXT PRIMARY KEY,
    title TEXT NOT NULL,
    source TEXT NOT NULL,
    source_ref TEXT NOT NULL,
    state TEXT NOT NULL DEFAULT 'discovered',
    task_type TEXT DEFAULT 'feature',
    scope TEXT DEFAULT '[]',
    risk TEXT DEFAULT 'low',
    context_tokens INTEGER DEFAULT 0,
    requires_web INTEGER DEFAULT 0,
    run_spec_id TEXT,
    metadata TEXT DEFAULT '{}',
    created_at TEXT NOT NULL,
    updated_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS run_specs (
    id TEXT PRIMARY KEY,
    task_id TEXT NOT NULL,
    agent TEXT NOT NULL,
    identity TEXT NOT NULL,
    workspace TEXT NOT NULL,
    image TEXT NOT NULL,
    attempt INTEGER DEFAULT 1,
    model TEXT DEFAULT '',
    fallback TEXT DEFAULT '[]',
    max_turns INTEGER DEFAULT 25,
    allowed_paths TEXT DEFAULT '[]',
    proof_of_work TEXT DEFAULT '[]',
    env_overlay TEXT DEFAULT '{}',
    volume_mounts TEXT DEFAULT '[]',
    deadline_seconds INTEGER DEFAULT 1800
);

CREATE TABLE IF NOT EXISTS results (
    id TEXT PRIMARY KEY,
    task_id TEXT NOT NULL,
    run_spec_id TEXT NOT NULL,
    agent TEXT NOT NULL,
    status TEXT NOT NULL,
    turns INTEGER DEFAULT 0,
    duration_seconds INTEGER DEFAULT 0,
    cost_usd REAL,
    artifacts TEXT DEFAULT '{}',
    events TEXT DEFAULT '[]',
    completed_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS state_log (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    task_id TEXT NOT NULL,
    from_state TEXT NOT NULL,
    to_state TEXT NOT NULL,
    timestamp TEXT NOT NULL
);
"""


class PolyphonyStore:
    """SQLite-backed persistence for Polyphony."""

    def __init__(self, base_dir: Path) -> None:
        self.base_dir = Path(base_dir)
        self.db_path = self.base_dir / DB_NAME

    def init_db(self) -> None:
        self.base_dir.mkdir(parents=True, exist_ok=True)
        self._write_gitignore()
        conn = self._connect()
        conn.executescript(SCHEMA)
        conn.close()

    def _connect(self) -> sqlite3.Connection:
        conn = sqlite3.connect(str(self.db_path))
        conn.execute("PRAGMA journal_mode=WAL")
        conn.execute("PRAGMA foreign_keys=ON")
        conn.row_factory = sqlite3.Row
        return conn

    def _write_gitignore(self) -> None:
        gi = self.base_dir / ".gitignore"
        if not gi.exists():
            gi.write_text("*\n")

    # --- Task CRUD ---

    def save_task(self, task: Task) -> None:
        conn = self._connect()
        conn.execute(
            "INSERT OR REPLACE INTO tasks VALUES "
            "(?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
            (
                task.id, task.title, task.source,
                task.source_ref, task.state, task.task_type,
                json.dumps(task.scope), task.risk,
                task.context_tokens, int(task.requires_web),
                task.run_spec_id, json.dumps(task.metadata),
                task.created_at, task.updated_at,
            ),
        )
        conn.commit()
        conn.close()

    def get_task(self, task_id: str) -> Task | None:
        conn = self._connect()
        row = conn.execute(
            "SELECT * FROM tasks WHERE id=?", (task_id,),
        ).fetchone()
        conn.close()
        return self._row_to_task(row) if row else None

    def list_tasks(self, state: str | None = None) -> list[Task]:
        conn = self._connect()
        if state:
            rows = conn.execute(
                "SELECT * FROM tasks WHERE state=?", (state,),
            ).fetchall()
        else:
            rows = conn.execute("SELECT * FROM tasks").fetchall()
        conn.close()
        return [self._row_to_task(r) for r in rows]

    def _row_to_task(self, row: sqlite3.Row) -> Task:
        return Task(
            id=row["id"], title=row["title"],
            source=row["source"], source_ref=row["source_ref"],
            state=row["state"], task_type=row["task_type"],
            scope=json.loads(row["scope"]), risk=row["risk"],
            context_tokens=row["context_tokens"],
            requires_web=bool(row["requires_web"]),
            run_spec_id=row["run_spec_id"],
            metadata=json.loads(row["metadata"]),
            created_at=row["created_at"],
            updated_at=row["updated_at"],
        )

    # --- RunSpec CRUD ---

    def save_run_spec(self, rs: RunSpec) -> None:
        conn = self._connect()
        conn.execute(
            "INSERT OR REPLACE INTO run_specs VALUES "
            "(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
            (
                rs.id, rs.task_id, rs.agent, rs.identity,
                rs.workspace, rs.image, rs.attempt, rs.model,
                json.dumps(rs.fallback), rs.max_turns,
                json.dumps(rs.allowed_paths),
                json.dumps(rs.proof_of_work),
                json.dumps(rs.env_overlay),
                json.dumps(rs.volume_mounts),
                rs.deadline_seconds,
            ),
        )
        conn.commit()
        conn.close()

    def get_run_spec(self, rs_id: str) -> RunSpec | None:
        conn = self._connect()
        row = conn.execute(
            "SELECT * FROM run_specs WHERE id=?", (rs_id,),
        ).fetchone()
        conn.close()
        return self._row_to_run_spec(row) if row else None

    def _row_to_run_spec(self, row: sqlite3.Row) -> RunSpec:
        return RunSpec(
            id=row["id"], task_id=row["task_id"],
            agent=row["agent"], identity=row["identity"],
            workspace=row["workspace"], image=row["image"],
            attempt=row["attempt"], model=row["model"],
            fallback=json.loads(row["fallback"]),
            max_turns=row["max_turns"],
            allowed_paths=json.loads(row["allowed_paths"]),
            proof_of_work=json.loads(row["proof_of_work"]),
            env_overlay=json.loads(row["env_overlay"]),
            volume_mounts=json.loads(row["volume_mounts"]),
            deadline_seconds=row["deadline_seconds"],
        )

    # --- Result CRUD ---

    def save_result(self, result: Result) -> None:
        conn = self._connect()
        conn.execute(
            "INSERT OR REPLACE INTO results VALUES "
            "(?,?,?,?,?,?,?,?,?,?,?)",
            (
                result.id, result.task_id, result.run_spec_id,
                result.agent, result.status, result.turns,
                result.duration_seconds, result.cost_usd,
                json.dumps(result.artifacts),
                json.dumps(result.events),
                result.completed_at,
            ),
        )
        conn.commit()
        conn.close()

    def get_result(self, result_id: str) -> Result | None:
        conn = self._connect()
        row = conn.execute(
            "SELECT * FROM results WHERE id=?", (result_id,),
        ).fetchone()
        conn.close()
        return self._row_to_result(row) if row else None

    def list_results(self, task_id: str) -> list[Result]:
        conn = self._connect()
        rows = conn.execute(
            "SELECT * FROM results WHERE task_id=?",
            (task_id,),
        ).fetchall()
        conn.close()
        return [self._row_to_result(r) for r in rows]

    def _row_to_result(self, row: sqlite3.Row) -> Result:
        return Result(
            id=row["id"], task_id=row["task_id"],
            run_spec_id=row["run_spec_id"],
            agent=row["agent"], status=row["status"],
            turns=row["turns"],
            duration_seconds=row["duration_seconds"],
            cost_usd=row["cost_usd"],
            artifacts=json.loads(row["artifacts"]),
            events=json.loads(row["events"]),
            completed_at=row["completed_at"],
        )

    # --- State log ---

    def log_transition(self, task_id: str, from_s: str, to_s: str) -> None:
        conn = self._connect()
        conn.execute(
            "INSERT INTO state_log (task_id, from_state, to_state, timestamp) "
            "VALUES (?,?,?,?)",
            (task_id, from_s, to_s, _now()),
        )
        conn.commit()
        conn.close()

    def get_state_log(self, task_id: str) -> list[dict]:
        conn = self._connect()
        rows = conn.execute(
            "SELECT * FROM state_log WHERE task_id=? "
            "ORDER BY id",
            (task_id,),
        ).fetchall()
        conn.close()
        return [
            {
                "from_state": r["from_state"],
                "to_state": r["to_state"],
                "timestamp": r["timestamp"],
            }
            for r in rows
        ]


================================================
FILE: scripts/polyphony/workspace.py
================================================
"""Workspace manager — per-task git clone lifecycle (spec §6).

Each task+attempt gets an isolated directory with a full git clone.
"""

from __future__ import annotations

import re
import shutil
import subprocess
from pathlib import Path


def workspace_path(
    base_dir: Path,
    task_id: str,
    attempt: int,
) -> Path:
    """Build workspace directory path, sanitizing task_id."""
    safe_id = re.sub(r"[^\w\-.]", "_", task_id)
    return base_dir / safe_id / str(attempt)


def create_workspace(
    base_dir: Path,
    task_id: str,
    attempt: int,
    repo_url: str,
    ref: str,
    mirror_path: Path | None = None,
) -> Path:
    """Clone repo into workspace and checkout ref."""
    ws = workspace_path(base_dir, task_id, attempt)
    ws.mkdir(parents=True, exist_ok=True)

    clone_cmd = ["git", "clone"]
    if mirror_path and mirror_path.exists():
        clone_cmd += [
            "--reference", str(mirror_path),
            "--dissociate",
        ]
    clone_cmd += [repo_url, str(ws)]
    _run_git(clone_cmd)

    checkout_cmd = ["git", "-C", str(ws), "checkout", ref]
    _run_git(checkout_cmd)

    return ws


def cleanup_workspace(ws_path: Path) -> None:
    """Remove workspace directory. No error if missing."""
    if ws_path.exists():
        shutil.rmtree(ws_path)


def list_workspaces(base_dir: Path) -> list[Path]:
    """List all workspace directories under base_dir."""
    if not base_dir.exists():
        return []
    result: list[Path] = []
    for task_dir in sorted(base_dir.iterdir()):
        if task_dir.is_dir():
            for attempt_dir in sorted(task_dir.iterdir()):
                if attempt_dir.is_dir():
                    result.append(attempt_dir)
    return result


def _run_git(cmd: list[str]) -> subprocess.CompletedProcess:
    """Run a git command. Thin wrapper for mocking."""
    return subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        check=False,
    )


================================================
FILE: scripts/skill_lint/__init__.py
================================================
"""skill_lint -- Quality gates for Maggy skills."""

from __future__ import annotations

__version__ = '0.1.0'

from dataclasses import dataclass
from enum import Enum


class Severity(Enum):
    ERROR = 'error'
    WARNING = 'warning'
    INFO = 'info'


@dataclass
class Finding:
    rule_id: str
    severity: Severity
    message: str
    line: int | None = None
    suggestion: str | None = None


================================================
FILE: scripts/skill_lint/__main__.py
================================================
"""CLI entry point for skill-lint -- Quality gates for Maggy skills."""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

from . import Severity, __version__
from . import content, frontmatter, references, report, spec


CHECKERS = [frontmatter, spec, content, references]


def discover_skills(skills_dir: Path, skill_filter: str | None = None) -> list[Path]:
    """Find all skill directories under skills_dir."""
    if not skills_dir.is_dir():
        return []

    dirs = sorted(
        d for d in skills_dir.iterdir()
        if d.is_dir() and not d.name.startswith('.')
    )

    if skill_filter:
        dirs = [d for d in dirs if d.name == skill_filter]

    return dirs


def lint_skill(skill_dir: Path, skills_dir: Path) -> list:
    """Run all checkers on a single skill, return findings."""
    from . import Finding
    skill_path = skill_dir / 'SKILL.md'
    findings: list[Finding] = []

    for checker in CHECKERS:
        findings.extend(checker.check(skill_path, skill_dir, skills_dir))

    return findings


def severity_from_str(s: str) -> Severity:
    """Convert string to Severity enum."""
    mapping = {
        'error': Severity.ERROR,
        'warning': Severity.WARNING,
        'info': Severity.INFO,
    }
    result = mapping.get(s.lower())
    if result is None:
        raise ValueError(f'Unknown severity: {s}')
    return result


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog='skill-lint',
        description='Quality gates for Maggy skills'
    )
    parser.add_argument(
        '--version', action='version', version=f'skill-lint {__version__}'
    )
    parser.add_argument(
        'skills_dir',
        help='Path to skills/ directory'
    )
    parser.add_argument(
        '--format', dest='output_format', default='text',
        choices=['text', 'json'],
        help='Output format (default: text)'
    )
    parser.add_argument(
        '--severity', default='info',
        choices=['error', 'warning', 'info'],
        help='Minimum severity to show (default: info)'
    )
    parser.add_argument(
        '--skill', default=None,
        help='Lint a single skill by directory name'
    )
    parser.add_argument(
        '--fail-on', dest='fail_on', default='error',
        choices=['error', 'warning', 'info'],
        help='Exit 1 if findings at this severity or above (default: error)'
    )

    args = parser.parse_args(argv)

    skills_dir = Path(args.skills_dir).resolve()
    if not skills_dir.is_dir():
        print(f'Error: {args.skills_dir} is not a directory', file=sys.stderr)
        return 2

    skill_dirs = discover_skills(skills_dir, args.skill)
    if not skill_dirs:
        if args.skill:
            print(f'Error: skill "{args.skill}" not found in {skills_dir}', file=sys.stderr)
            return 2
        print(f'Error: no skill directories found in {skills_dir}', file=sys.stderr)
        return 2

    # Run linting
    results: dict[str, list] = {}
    for skill_dir in skill_dirs:
        findings = lint_skill(skill_dir, skills_dir)
        results[skill_dir.name] = findings

    # Format output
    min_severity = severity_from_str(args.severity)
    if args.output_format == 'json':
        output = report.format_json(results, min_severity)
    else:
        output = report.format_text(results, min_severity)

    print(output)

    # Determine exit code
    fail_severity = severity_from_str(args.fail_on)
    severity_order = [Severity.ERROR, Severity.WARNING, Severity.INFO]
    severity_rank = {s: i for i, s in enumerate(severity_order)}
    fail_rank = severity_rank[fail_severity]

    has_failures = any(
        any(
            severity_rank[f.severity] <= fail_rank
            for f in findings
        )
        for findings in results.values()
    )

    return 1 if has_failures else 0


if __name__ == '__main__':
    sys.exit(main())


================================================
FILE: scripts/skill_lint/content.py
================================================
"""Content quality checks (CQ001-CQ006)."""

from __future__ import annotations

import re
from pathlib import Path

from . import Finding, Severity

# ASCII art box characters (outside code blocks)
ASCII_ART_RE = re.compile(
    r'[╔╗╚╝╠╣╦╩╬║═│┌┐└┘├┤┬┴┼─┃━┏┓┗┛┣┫┳┻╋]'
    r'|[+|]{2,}\s*[-=]{3,}'
    r'|[-=]{3,}\s*[+|]{2,}'
    r'|^\s*[+][\-+]{3,}[+]\s*$'
    r'|^\s*[|].*[|]\s*$'
)

VAGUE_PHRASES = [
    'follow best practices',
    'ensure quality',
    'as appropriate',
    'when necessary',
    'use proper',
    'handle appropriately',
    'do the right thing',
    'be careful',
    'use common sense',
    'as needed',
]

FILLER_WORDS_RE = re.compile(
    r'\b(MANDATORY|NON-NEGOTIABLE|ABSOLUTELY|CRITICAL|MUST ALWAYS|'
    r'NEVER EVER|UNDER NO CIRCUMSTANCES|WITHOUT EXCEPTION|'
    r'ZERO TOLERANCE|NO EXCEPTIONS)\b',
    re.IGNORECASE
)

STALE_LOAD_RE = re.compile(r'\*?Load with:\s+\S+\.md\*?', re.IGNORECASE)


def _in_code_block(lines: list[str], target_idx: int) -> bool:
    """Check if a line index is inside a fenced code block."""
    in_fence = False
    for i, line in enumerate(lines):
        if line.strip().startswith('```'):
            in_fence = not in_fence
        if i == target_idx:
            return in_fence
    return False


def check(skill_path: Path, skill_dir: Path, skills_dir: Path) -> list[Finding]:
    """Run content quality checks on a single skill."""
    findings: list[Finding] = []

    if not skill_path.exists():
        return findings

    content = skill_path.read_text(encoding='utf-8')
    lines = content.split('\n')

    # Check for inline suppression in first 10 lines
    suppressed: set[str] = set()
    for line in lines[:10]:
        if '<!-- skill-lint: disable=' in line:
            start = line.index('disable=') + 8
            end = line.index('-->', start) if '-->' in line[start:] else len(line)
            rules = line[start:end].strip().rstrip(' >')
            for rule in rules.split(','):
                suppressed.add(rule.strip())

    # CQ001: no ASCII art boxes outside code blocks
    if 'CQ001' not in suppressed:
        ascii_art_lines = []
        for i, line in enumerate(lines):
            if not _in_code_block(lines, i) and ASCII_ART_RE.search(line):
                ascii_art_lines.append(i + 1)
        if ascii_art_lines:
            sample = ascii_art_lines[:3]
            findings.append(Finding(
                rule_id='CQ001',
                severity=Severity.WARNING,
                message=f'ASCII art detected outside code blocks (lines: {sample})',
                line=ascii_art_lines[0],
                suggestion='Remove decorative ASCII art to save tokens'
            ))

    # CQ002: no vague phrases
    if 'CQ002' not in suppressed:
        vague_found = []
        for i, line in enumerate(lines):
            if _in_code_block(lines, i):
                continue
            lower = line.lower()
            for phrase in VAGUE_PHRASES:
                if phrase in lower:
                    vague_found.append((i + 1, phrase))
        if vague_found:
            sample = vague_found[:3]
            phrases = ', '.join(f'"{p}" (L{n})' for n, p in sample)
            findings.append(Finding(
                rule_id='CQ002',
                severity=Severity.INFO,
                message=f'Vague phrases found: {phrases}',
                line=vague_found[0][0],
                suggestion='Replace vague guidance with specific, actionable instructions'
            ))

    # CQ003: filler intensity <= 2 per 100 lines
    if 'CQ003' not in suppressed:
        filler_count = 0
        for i, line in enumerate(lines):
            if not _in_code_block(lines, i):
                filler_count += len(FILLER_WORDS_RE.findall(line))
        if len(lines) > 0:
            intensity = (filler_count / len(lines)) * 100
            if intensity > 2:
                findings.append(Finding(
                    rule_id='CQ003',
                    severity=Severity.WARNING,
                    message=f'Filler intensity {intensity:.1f} per 100 lines (max: 2.0)',
                    suggestion='Reduce emphatic language (MANDATORY, NON-NEGOTIABLE, etc.)'
                ))

    # CQ004: >= 1 code block per 50 lines of content
    if 'CQ004' not in suppressed:
        code_blocks = content.count('```') // 2
        content_lines = len([l for l in lines if l.strip()])
        if content_lines >= 50:
            expected = content_lines / 50
            if code_blocks < expected:
                findings.append(Finding(
                    rule_id='CQ004',
                    severity=Severity.WARNING,
                    message=f'{code_blocks} code blocks for {content_lines} content lines '
                            f'(expected >= {int(expected)})',
                    suggestion='Add concrete code examples to illustrate patterns'
                ))

    # CQ005: no stale "Load with:" references
    if 'CQ005' not in suppressed:
        for i, line in enumerate(lines):
            if not _in_code_block(lines, i) and STALE_LOAD_RE.search(line):
                findings.append(Finding(
                    rule_id='CQ005',
                    severity=Severity.WARNING,
                    message=f'Stale "Load with:" reference at line {i + 1}',
                    line=i + 1,
                    suggestion='Remove stale loading instructions'
                ))
                break  # One finding is enough

    # CQ006: H1 heading present after frontmatter
    if 'CQ006' not in suppressed:
        # Find end of frontmatter
        in_fm = False
        fm_end = 0
        for i, line in enumerate(lines):
            if line.strip() == '---':
                if not in_fm:
                    in_fm = True
                else:
                    fm_end = i
                    break

        has_h1 = False
        for line in lines[fm_end:]:
            if line.strip().startswith('# '):
                has_h1 = True
                break

        if not has_h1:
            findings.append(Finding(
                rule_id='CQ006',
                severity=Severity.WARNING,
                message='No H1 heading found after frontmatter',
                suggestion='Add a top-level heading: # Skill Name'
            ))

    return findings


================================================
FILE: scripts/skill_lint/frontmatter.py
================================================
"""Frontmatter validation checks (FM001-FM009)."""

from __future__ import annotations

import re
from pathlib import Path

from . import Finding, Severity


def parse_frontmatter(content: str) -> tuple[dict[str, str], int]:
    """Parse YAML frontmatter from between first --- pair.

    Returns (fields_dict, end_line_number).
    Only parses simple key: value pairs and YAML inline arrays [a, b].
    """
    lines = content.split('\n')
    if not lines or lines[0].strip() != '---':
        return {}, 0

    fields: dict[str, str] = {}
    end_line = 0
    for i, line in enumerate(lines[1:], start=2):
        if line.strip() == '---':
            end_line = i
            break
        match = re.match(r'^(\w[\w-]*)\s*:\s*(.*)', line)
        if match:
            key = match.group(1).strip()
            value = match.group(2).strip()
            # Strip surrounding quotes
            if len(value) >= 2 and value[0] in ('"', "'") and value[-1] == value[0]:
                value = value[1:-1]
            fields[key] = value

    return fields, end_line


NAME_PATTERN = re.compile(r'^[a-z][a-z0-9]*(-[a-z0-9]+)*$')


def check(skill_path: Path, skill_dir: Path, skills_dir: Path) -> list[Finding]:
    """Run all frontmatter checks on a single skill."""
    findings: list[Finding] = []
    content = skill_path.read_text(encoding='utf-8')
    dir_name = skill_dir.name

    # FM001: frontmatter delimiters present
    lines = content.split('\n')
    if not lines or lines[0].strip() != '---':
        findings.append(Finding(
            rule_id='FM001',
            severity=Severity.ERROR,
            message='SKILL.md missing YAML frontmatter (must start with ---)',
            line=1,
            suggestion='Add frontmatter: ---\\nname: ' + dir_name + '\\ndescription: ...\\n---'
        ))
        return findings  # Can't check other rules without frontmatter

    fields, end_line = parse_frontmatter(content)
    if end_line == 0:
        findings.append(Finding(
            rule_id='FM001',
            severity=Severity.ERROR,
            message='YAML frontmatter not closed (missing second ---)',
            line=1,
            suggestion='Add closing --- after frontmatter fields'
        ))
        return findings

    # FM002: name field present
    name = fields.get('name', '').strip()
    if not name:
        findings.append(Finding(
            rule_id='FM002',
            severity=Severity.ERROR,
            message="'name' field missing or empty in frontmatter",
            line=None,
            suggestion=f'Add: name: {dir_name}'
        ))

    # FM003: description field present
    desc = fields.get('description', '').strip()
    if not desc:
        findings.append(Finding(
            rule_id='FM003',
            severity=Severity.ERROR,
            message="'description' field missing or empty in frontmatter",
            line=None,
            suggestion='Add: description: One-line description of what this skill does'
        ))

    # FM004: name matches directory name
    if name and name != dir_name:
        findings.append(Finding(
            rule_id='FM004',
            severity=Severity.ERROR,
            message=f"name '{name}' does not match directory name '{dir_name}'",
            line=None,
            suggestion=f'Change to: name: {dir_name}'
        ))

    # FM005: name format (lowercase, hyphens, 1-64 chars)
    if name:
        if len(name) > 64:
            findings.append(Finding(
                rule_id='FM005',
                severity=Severity.ERROR,
                message=f'name is {len(name)} chars (max 64)',
                line=None
            ))
        elif not NAME_PATTERN.match(name):
            findings.append(Finding(
                rule_id='FM005',
                severity=Severity.ERROR,
                message=f"name '{name}' must be lowercase alphanumeric with hyphens",
                line=None,
                suggestion='Use only lowercase letters, numbers, and hyphens'
            ))

    # FM006: description length
    if desc:
        if len(desc) > 1024:
            findings.append(Finding(
                rule_id='FM006',
                severity=Severity.WARNING,
                message=f'description is {len(desc)} chars (max 1024)',
                line=None,
                suggestion='Shorten description to under 1024 characters'
            ))

    # FM007: when-to-use present
    if 'when-to-use' not in fields:
        findings.append(Finding(
            rule_id='FM007',
            severity=Severity.WARNING,
            message="'when-to-use' field missing",
            line=None,
            suggestion='Add: when-to-use: When to activate this skill'
        ))

    # FM008: user-invocable present
    if 'user-invocable' not in fields:
        findings.append(Finding(
            rule_id='FM008',
            severity=Severity.INFO,
            message="'user-invocable' field missing",
            line=None,
            suggestion='Add: user-invocable: true|false'
        ))

    # FM009: effort field valid
    effort = fields.get('effort', '').strip()
    if effort and effort not in ('low', 'medium', 'high'):
        findings.append(Finding(
            rule_id='FM009',
            severity=Severity.INFO,
            message=f"effort '{effort}' is not one of: low, medium, high",
            line=None
        ))
    elif not effort:
        findings.append(Finding(
            rule_id='FM009',
            severity=Severity.INFO,
            message="'effort' field missing",
            line=None,
            suggestion='Add: effort: low|medium|high'
        ))

    return findings


================================================
FILE: scripts/skill_lint/pyproject.toml
================================================
[project]
name = "skill-lint"
version = "0.1.0"
description = "Quality gates for Maggy skills"
requires-python = ">=3.10"
dependencies = []

[project.optional-dependencies]
skills-ref = ["skills-ref>=0.1.0"]

[project.scripts]
skill-lint = "skill_lint.__main__:main"

[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"


================================================
FILE: scripts/skill_lint/references.py
================================================
"""Cross-reference checks (RI001-RI002)."""

from __future__ import annotations

import re
from pathlib import Path

from . import Finding, Severity

# Match skill references like: skills/base, skills/security, .claude/skills/llm-patterns
SKILL_REF_RE = re.compile(
    r'(?:\.claude/)?skills/([a-z][a-z0-9-]+)'
)


def check(skill_path: Path, skill_dir: Path, skills_dir: Path) -> list[Finding]:
    """Run cross-reference checks on a single skill."""
    findings: list[Finding] = []

    if not skill_path.exists():
        return findings

    content = skill_path.read_text(encoding='utf-8')
    dir_name = skill_dir.name

    # RI001: cross-skill name references resolve to existing dirs
    existing_skills = {
        d.name for d in skills_dir.iterdir()
        if d.is_dir() and not d.name.startswith('.')
    }

    referenced = set()
    for match in SKILL_REF_RE.finditer(content):
        ref_name = match.group(1)
        if ref_name != dir_name:
            referenced.add(ref_name)

    broken = referenced - existing_skills
    if broken:
        findings.append(Finding(
            rule_id='RI001',
            severity=Severity.WARNING,
            message=f'Broken skill references: {", ".join(sorted(broken))}',
            suggestion='Fix or remove references to non-existent skills'
        ))

    # RI002: skill listed in README skills table
    readme_path = skills_dir.parent / 'README.md'
    if readme_path.exists():
        readme = readme_path.read_text(encoding='utf-8')
        # Check if skill name appears in README (in a table or list)
        if dir_name not in readme:
            findings.append(Finding(
                rule_id='RI002',
                severity=Severity.INFO,
                message=f'Skill "{dir_name}" not found in README.md',
                suggestion='Add skill to the skills table in README.md'
            ))

    return findings


================================================
FILE: scripts/skill_lint/report.py
================================================
"""Output formatters for skill-lint results."""

from __future__ import annotations

import json
from collections import defaultdict

from . import Finding, Severity


def format_text(
    results: dict[str, list[Finding]],
    min_severity: Severity = Severity.INFO
) -> str:
    """Format findings as human-readable text grouped by severity then skill."""
    severity_order = [Severity.ERROR, Severity.WARNING, Severity.INFO]
    severity_rank = {s: i for i, s in enumerate(severity_order)}
    min_rank = severity_rank[min_severity]

    # Group by severity
    by_severity: dict[Severity, dict[str, list[Finding]]] = defaultdict(
        lambda: defaultdict(list)
    )

    total_errors = 0
    total_warnings = 0
    total_info = 0

    for skill_name, findings in sorted(results.items()):
        for f in findings:
            if severity_rank[f.severity] <= min_rank:
                by_severity[f.severity][skill_name].append(f)
                if f.severity == Severity.ERROR:
                    total_errors += 1
                elif f.severity == Severity.WARNING:
                    total_warnings += 1
                else:
                    total_info += 1

    lines: list[str] = []
    total_skills = len(results)
    clean_skills = sum(1 for fs in results.values() if not fs)

    for sev in severity_order:
        if sev not in by_severity:
            continue
        if severity_rank[sev] > min_rank:
            continue

        lines.append(f'\n=== {sev.value.upper()} ===')
        for skill_name, findings in sorted(by_severity[sev].items()):
            lines.append(f'\n  {skill_name}/')
            for f in findings:
                loc = f'L{f.line}' if f.line else ''
                lines.append(f'    [{f.rule_id}] {f.message} {loc}'.rstrip())
                if f.suggestion:
                    lines.append(f'      -> {f.suggestion}')

    # Summary
    lines.append(f'\n--- Summary ---')
    lines.append(f'Skills scanned: {total_skills}')
    lines.append(f'Clean: {clean_skills}')
    lines.append(f'Errors: {total_errors}  Warnings: {total_warnings}  Info: {total_info}')

    return '\n'.join(lines)


def format_json(
    results: dict[str, list[Finding]],
    min_severity: Severity = Severity.INFO
) -> str:
    """Format findings as JSON."""
    severity_order = [Severity.ERROR, Severity.WARNING, Severity.INFO]
    severity_rank = {s: i for i, s in enumerate(severity_order)}
    min_rank = severity_rank[min_severity]

    total_errors = 0
    total_warnings = 0
    total_info = 0

    skills_out: dict[str, dict] = {}
    for skill_name, findings in sorted(results.items()):
        filtered = [
            f for f in findings
            if severity_rank[f.severity] <= min_rank
        ]
        skill_findings = []
        for f in filtered:
            entry = {
                'rule_id': f.rule_id,
                'severity': f.severity.value,
                'message': f.message,
            }
            if f.line is not None:
                entry['line'] = f.line
            if f.suggestion:
                entry['suggestion'] = f.suggestion
            skill_findings.append(entry)

            if f.severity == Severity.ERROR:
                total_errors += 1
            elif f.severity == Severity.WARNING:
                total_warnings += 1
            else:
                total_info += 1

        skills_out[skill_name] = {
            'findings': skill_findings,
            'error_count': sum(1 for f in filtered if f.severity == Severity.ERROR),
            'warning_count': sum(1 for f in filtered if f.severity == Severity.WARNING),
        }

    output = {
        'summary': {
            'total_skills': len(results),
            'clean_skills': sum(
                1 for fs in results.values() if not fs
            ),
            'errors': total_errors,
            'warnings': total_warnings,
            'info': total_info,
        },
        'skills': skills_out,
    }

    return json.dumps(output, indent=2)


================================================
FILE: scripts/skill_lint/spec.py
================================================
"""Spec compliance checks (SP001-SP003, SR001)."""

from __future__ import annotations

from pathlib import Path

from . import Finding, Severity


def check(skill_path: Path, skill_dir: Path, skills_dir: Path) -> list[Finding]:
    """Run spec compliance checks on a single skill."""
    findings: list[Finding] = []

    # SP001: SKILL.md exists
    if not skill_path.exists():
        findings.append(Finding(
            rule_id='SP001',
            severity=Severity.ERROR,
            message='SKILL.md not found in skill directory',
            suggestion='Create SKILL.md with frontmatter and content'
        ))
        return findings

    content = skill_path.read_text(encoding='utf-8')
    lines = content.split('\n')
    line_count = len(lines)

    # Check for inline suppression in first 10 lines
    suppressed: set[str] = set()
    for line in lines[:10]:
        if '<!-- skill-lint: disable=' in line:
            # Extract rule IDs: <!-- skill-lint: disable=SP002,SP003 -->
            start = line.index('disable=') + 8
            end = line.index('-->', start) if '-->' in line[start:] else len(line)
            rules = line[start:end].strip().rstrip(' >')
            for rule in rules.split(','):
                suppressed.add(rule.strip())

    # SP002: under 500 lines
    if line_count > 500 and 'SP002' not in suppressed:
        findings.append(Finding(
            rule_id='SP002',
            severity=Severity.WARNING,
            message=f'SKILL.md is {line_count} lines (limit: 500)',
            suggestion='Split into focused sections; move reference material to companion files'
        ))

    # SP003: under 300 lines (ideal)
    if line_count > 300 and line_count <= 500 and 'SP003' not in suppressed:
        findings.append(Finding(
            rule_id='SP003',
            severity=Severity.INFO,
            message=f'SKILL.md is {line_count} lines (ideal: under 300)',
            suggestion='Consider trimming for better token efficiency'
        ))

    # SR001: skills-ref validate (if installed)
    try:
        from skills_ref import validate as sr_validate
        problems = sr_validate(str(skill_dir))
        if problems:
            for p in problems[:5]:
                findings.append(Finding(
                    rule_id='SR001',
                    severity=Severity.WARNING,
                    message=f'skills-ref: {p}',
                ))
    except ImportError:
        pass  # skills-ref not installed, skip

    return findings


================================================
FILE: skills/aeo-optimization/SKILL.md
================================================
---
name: aeo-optimization
description: AI Engine Optimization - semantic triples, page templates, content clusters for AI citations
when-to-use: When optimizing content for AI engine discovery and citations
user-invocable: false
effort: medium
---

# AI Engine Optimization (AEO) Skill


**Purpose:** Optimize content for AI engines (ChatGPT, Claude, Perplexity, Google AI Overviews) so your brand gets cited in AI-generated answers.

**Source:** Based on [HubSpot's AEO Guide](https://www.hubspot.com/aeo) and industry best practices.

---

## Why AEO Matters Now

```
┌────────────────────────────────────────────────────────────────┐
│  THE GREAT DECOUPLING                                          │
│  ────────────────────────────────────────────────────────────  │
│  Impressions ≠ Clicks anymore.                                 │
│  AI engines compile answers from multiple sources.             │
│  More buyer journey happens inside chat experiences.           │
│  58% of Google searches = zero clicks (AI overviews).          │
├────────────────────────────────────────────────────────────────┤
│  THE OPPORTUNITY                                               │
│  ────────────────────────────────────────────────────────────  │
│  Shape what AI engines say about your category and product.    │
│  Get cited as the authoritative source.                        │
│  Best answer > Best page ranking.                              │
└────────────────────────────────────────────────────────────────┘
```

**Key Stats:**
- 70% of consumers use ChatGPT for searches
- 47% of Google queries show AI overviews
- Average ChatGPT prompt: 23 words (vs 4.2 for Google)
- AEO market: $886M (2024) → $7.3B (2031)

---

## How AI Engines Choose Answers

AI engines use three main signals to select content for answers:

### 1. Consensus

Facts that appear across multiple credible sources get trusted and reused.

**How to build consensus:**
- Repeat key facts consistently across your own pages
- Use same terminology as industry leaders
- Link to and from authoritative external sources
- Create internal content clusters that reinforce each other

### 2. Information Gain

Net-new insight beats generic advice. AI engines prefer content that adds value.

**How to add information gain:**
- Original research and data
- Concrete examples with specifics
- Clear point of view (not fence-sitting)
- Expert quotes with credentials
- Case studies with metrics

### 3. Entities & Structure

Clear entities and tidy structure reduce ambiguity and boost quotability.

**How to optimize structure:**
- Use semantic triples (Subject → Verb → Object)
- Clear headings with entity names
- Schema markup (Article, FAQ, Product)
- Short, scannable paragraphs (2-4 sentences)

---

## Semantic Triples (Critical for AEO)

**What they are:** Compact facts that AI engines (and humans) can't misread.

**Pattern:** `[Subject]` `[verb]` `[object]`.

### Examples

```
✅ GOOD (clear triples):
- HubSpot CRM syncs contact and company data.
- Lead Scoring assigns priority based on engagement.
- Workflows trigger email sequences from events.

❌ BAD (vague, no clear entity):
- The system helps with various tasks.
- It can do many things for users.
- This improves overall performance.
```

### Triple Checklist

For every key claim, ask:
- [ ] Is the subject a clear entity (product, feature, brand)?
- [ ] Is the verb specific and active?
- [ ] Is the object concrete and measurable?

---

## Paragraph Pattern (Feature → How → Outcome)

Every substantive paragraph should follow this structure:

```
[Feature] helps [User/Role] with [Job].
It [mechanism/inputs] to [process].
Teams see [metric/result] in [timeframe/context].

Triples:
- [Subject] [verb] [object].
- [Subject] [verb] [object].
```

### Example

```markdown
Lead Scoring helps sales teams prioritize prospects. It combines
page views, email engagement, and firmographic data to assign a
numeric score, then auto-enrolls high scorers into follow-up
sequences. Reps focus on qualified accounts and book 40% more
meetings.

- Lead Scoring assigns scores from engagement data.
- High scorers trigger automated follow-up sequences.
```

---

## Page Templates

### Template 1: Category Explainer

**Goal:** Define the category, tie it to your product, earn citations.

```markdown
# What is [Category]? — [1-2 line value promise]

## What is [Category]? (~80 words)
[Plain definition in everyday language. Name adjacent entities.]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

## Why it matters now (~60 words)
[One paragraph. Mention shift to answers over links; tie to buyer outcomes.]

## How to apply it (3-5 bullets)
- [Action 1]
- [Action 2]
- [Action 3]

## FAQ
**Q: [Question]?**
A: [~1 sentence answer]

**Q: [Question]?**
A: [~1 sentence answer]

**Q: [Question]?**
A: [~1 sentence answer]

---
**Links:** [Category hub] | [Product/Feature] | [Credible source 1] | [Credible source 2]
**CTA:** [Demo / Template / Signup]
**Schema:** Article + FAQ. Author + last updated.
```

---

### Template 2: Product & Feature Page

**Goal:** Clarify capability, fit, and next step; reinforce category linkage.

```markdown
# [Product/Feature] — [Outcome in 3-5 words]

**[Product/Feature] enables [Outcome] for [User/Role].**

## [Feature Area 1]
[2-4 sentences using Feature → How → Outcome]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

## [Feature Area 2]
[2-4 sentences using Feature → How → Outcome]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

## [Feature Area 3]
[2-4 sentences using Feature → How → Outcome]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

## FAQ
**Q: [Question]?**
A: [~1 sentence]

**Q: [Question]?**
A: [~1 sentence]

**Q: [Question]?**
A: [~1 sentence]

---
**Links:** Back to [Category Explainer] | Forward to [Demo/Trial]
**Proof:** [Benchmark/Analyst/Customer proof]
**Notes:** Requirements/limits (pricing tier, integrations)
**Schema:** Article + FAQ. Author + last updated.
```

---

### Template 3: Comparison / Alternatives Page

**Goal:** Help readers decide with clear criteria; earn fair citations.

```markdown
# [Product] vs. [Alternative] — Which fits [Use case]?

## Comparison Table

| Criterion | [Product] | [Alt A] | [Alt B] | Source |
|-----------|-----------|---------|---------|--------|
| [Feature/Limit] | [value] | [value] | [value] | [link] |
| [Requirement] | [value] | [value] | [value] | [link] |
| [Best for] | [value] | [value] | [value] | [link] |

*Source-back all claims in the table or footnotes.*

## Fit Statements

1. **[Product]** suits [Team/Use case] when [Condition].
2. **[Alt A]** fits [Team/Use case] when [Condition].
3. **[Alt B]** works for [Team/Use case] when [Condition].

---
**Links:** [Category Explainer] | [Feature pages]
**CTA:** [Try / Demo / Talk to Sales]
**Schema:** Article. Author + last updated.
```

---

### Template 4: Use Case / Industry Page

**Goal:** Connect product to outcomes in a context readers recognize.

```markdown
# [Industry/Use Case] — [Outcome KPI]

**Teams reduce [Metric] by [Y%] in [Timeframe].**

## Mini Case Study
[Company/Role] used [Product/Feature] to [Action], resulting in
[Metric improvement] within [Timeframe].

## How It Works

### [Feature 1]
[Feature → How → Outcome paragraph]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

### [Feature 2]
[Feature → How → Outcome paragraph]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

## Who Uses This
**Roles:** [Role 1], [Role 2], [Role 3]
**Workflows:** [Workflow 1], [Workflow 2]
**Integrations:** [Integration 1], [Integration 2]

---
**Links:** [Product/Feature pages] | [Supporting blog]
**CTA:** [Industry template / Demo variant]
**Schema:** Article. Author + last updated.
```

---

### Template 5: Supporting Blog Post

**Goal:** Add information gain and support your content cluster.

```markdown
# [Topic] — [Specific promise]

## Opening (~60-80 words)
[State the problem. Align terminology with Category Explainer. Preview outcome.]

## [Section 1 Heading] (~120 words max)
[Feature → How → Outcome]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

**Internal link:** [Related page]
**External citation:** [Credible source]

## [Section 2 Heading] (~120 words max)
[Feature → How → Outcome]

Triples:
1. [Subject] [verb] [object].
2. [Subject] [verb] [object].

**Internal link:** [Related page]
**External citation:** [Credible source]

## Key Takeaway
[1-2 lines summarizing the main point]

**CTA:** [Single primary action]

---
**Schema:** Article. Author + last updated.
```

---

## Site-Wide Trust Signals

### Required on Every Page

| Element | Implementation |
|---------|----------------|
| **Schema markup** | Article + FAQ (if FAQ exists) |
| **Author attribution** | Name, bio, credentials, photo |
| **Last updated date** | Visible, machine-readable |
| **Internal links** | 3-5 per page (upstream/downstream) |
| **External citations** | 1-2 credible sources per section |
| **Single CTA** | Demo, template, or signup (repeated once near end) |

### Schema Implementation

```html
<!-- Article Schema -->
<script type="application/ld+json">
{
  "@context": "https://schema.org",
  "@type": "Article",
  "headline": "[Page Title]",
  "author": {
    "@type": "Person",
    "name": "[Author Name]",
    "url": "[Author Bio URL]"
  },
  "datePublished": "[ISO Date]",
  "dateModified": "[ISO Date]",
  "publisher": {
    "@type": "Organization",
    "name": "[Company]",
    "logo": "[Logo URL]"
  }
}
</script>

<!-- FAQ Schema (if FAQ section exists) -->
<script type="application/ld+json">
{
  "@context": "https://schema.org",
  "@type": "FAQPage",
  "mainEntity": [
    {
      "@type": "Question",
      "name": "[Question 1]",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "[Answer 1]"
      }
    },
    {
      "@type": "Question",
      "name": "[Question 2]",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "[Answer 2]"
      }
    }
  ]
}
</script>
```

---

## Content Cluster Architecture

```
                    ┌─────────────────────┐
                    │  Category Explainer │
                    │   "What is AEO?"    │
                    └──────────┬──────────┘
                               │
        ┌──────────────────────┼──────────────────────┐
        │                      │                      │
        ▼                      ▼                      ▼
┌───────────────┐    ┌───────────────┐    ┌───────────────┐
│ Product Page  │    │ Product Page  │    │ Product Page  │
│  "Feature A"  │    │  "Feature B"  │    │  "Feature C"  │
└───────┬───────┘    └───────┬───────┘    └───────┬───────┘
        │                    │                    │
        ▼                    ▼                    ▼
┌───────────────┐    ┌───────────────┐    ┌───────────────┐
│  Blog Post    │    │  Use Case     │    │  Comparison   │
│  (supports)   │    │  (industry)   │    │  (vs. alt)    │
└───────────────┘    └───────────────┘    └───────────────┘
```

**Linking Rules:**
- Category Explainer links DOWN to all product pages
- Product pages link UP to Category Explainer
- Product pages link ACROSS to related features
- Blog posts link UP to Product pages
- Comparison pages link to Category Explainer + relevant Product pages

---

## AEO Writing Checklist

### Per-Paragraph Checklist

- [ ] Follows Feature → How → Outcome pattern
- [ ] Contains 2-4 sentences (scannable)
- [ ] Includes 1-2 semantic triples
- [ ] Names specific entities (not vague "it" or "this")
- [ ] Uses active voice verbs

### Per-Section Checklist

- [ ] Has 1 internal link (upstream or downstream)
- [ ] Has 1 external citation (credible source)
- [ ] Section heading names an entity
- [ ] ~120 words max

### Per-Page Checklist

- [ ] H1 contains primary entity + value promise
- [ ] Opening claim is a semantic triple
- [ ] 3-5 internal links total
- [ ] 1-2 external citations total
- [ ] Mini-FAQ with 3 questions (if applicable)
- [ ] Single primary CTA
- [ ] Schema markup (Article + FAQ)
- [ ] Author name + bio link
- [ ] Last updated date visible

### Site-Wide Checklist

- [ ] Category Explainer exists for each key category
- [ ] Product pages link back to Category Explainer
- [ ] Content cluster architecture documented
- [ ] Author bio pages exist with credentials
- [ ] Consistent terminology across all pages

---

## Measuring AEO Success

### Key Metrics

| Metric | How to Track |
|--------|--------------|
| **AI citations** | Manual checks in ChatGPT, Claude, Perplexity |
| **Brand mentions in AI** | Search "[brand] + [category]" in AI engines |
| **Share of answer** | How often you're cited vs competitors |
| **LLM traffic** | GA4 referral from chatgpt.com, claude.ai, perplexity.ai |
| **Impressions-to-clicks gap** | GSC impressions vs actual clicks |

### Tools

- **HubSpot AEO Grader** - Grade your brand's AI visibility
- **Google Analytics 4** - Track LLM referral traffic
- **Google Search Console** - Monitor impressions vs clicks gap
- **Manual AI queries** - Regularly test your brand in AI engines

---

## Common AEO Mistakes

| Mistake | Fix |
|---------|-----|
| Vague language ("it helps with things") | Use specific entities and triples |
| No clear structure | Use Feature → How → Outcome |
| Missing schema | Add Article + FAQ schema |
| No author attribution | Add author name, bio, credentials |
| Generic content | Add original data, examples, POV |
| Orphan pages | Link into content cluster |
| Fence-sitting ("it depends") | Take a clear position |
| No external citations | Add 1-2 credible sources per section |

---

## AEO vs Traditional SEO

| Aspect | Traditional SEO | AEO |
|--------|-----------------|-----|
| **Goal** | Rank on page 1 | Get cited in AI answers |
| **Success metric** | Click-through rate | Share of answer |
| **Content focus** | Keywords | Entities + facts |
| **Structure** | Headers for scanning | Triples for extraction |
| **Links** | Backlinks for authority | Citations for consensus |
| **Updates** | Periodic refresh | Continuous accuracy |

---

## Quick Reference

### Semantic Triple Pattern
```
[Entity/Product] [active verb] [concrete object/result].
```

### Paragraph Pattern
```
[Feature] helps [User] with [Job].
It [mechanism] to [process].
Teams see [result] in [timeframe].
```

### Page Minimums
- 3-5 internal links
- 1-2 external citations per section
- 3 FAQ questions with schema
- Author + last updated
- Single CTA

### Content Hierarchy
1. Category Explainer (top)
2. Product/Feature pages (middle)
3. Use case / Comparison / Blog (supporting)


================================================
FILE: skills/agent-teams/SKILL.md
================================================
---
name: agent-teams
description: Claude Code Agent Teams - default team-based development with strict TDD pipeline enforcement
when-to-use: When spawning agent teams for parallel feature development with TDD pipeline
user-invocable: false
effort: high
---

# Agent Teams Skill


**Purpose:** Every project initialized with Maggy runs as a coordinated team of AI agents. This is the default workflow, not optional. Teams enforce a strict TDD pipeline where no step can be skipped.

**Setup:** Agent definitions go in `.claude/agents/` with proper frontmatter (name, description, model, tools, disallowedTools, maxTurns, effort). See agent files for the format.

---

## Core Principle

Every feature follows an immutable pipeline enforced by task dependencies:

```
┌─────────────────────────────────────────────────────────────────┐
│  STRICT FEATURE PIPELINE (IMMUTABLE)                            │
│  ──────────────────────────────────────────────────────────────  │
│                                                                  │
│  1. SPEC        Write feature specification                      │
│       ↓         (Feature Agent)                                  │
│  2. REVIEW      Quality Agent reviews spec completeness          │
│       ↓         (Quality Agent)                                  │
│  3. TESTS       Write failing tests for all acceptance criteria  │
│       ↓         (Feature Agent)                                  │
│  4. RED VERIFY  Quality Agent confirms ALL tests FAIL            │
│       ↓         (Quality Agent)                                  │
│  5. IMPLEMENT   Write minimum code to pass tests                 │
│       ↓         (Feature Agent)                                  │
│  6. GREEN VERIFY Quality Agent confirms ALL tests PASS + coverage│
│       ↓         (Quality Agent)                                  │
│  7. VALIDATE    Lint + type check + full test suite              │
│       ↓         (Feature Agent)                                  │
│  8. CODE REVIEW Multi-engine review, block on Critical/High      │
│       ↓         (Code Review Agent)                              │
│  9. SECURITY    OWASP scan, secrets detection, dependency audit  │
│       ↓         (Security Agent)                                 │
│  10. BRANCH+PR  Create feature branch, stage files, create PR    │
│                 (Merger Agent)                                    │
│                                                                  │
│  No step can be skipped. Task dependencies enforce ordering.     │
│  Quality Agent verifies RED/GREEN transitions.                   │
│  Code Review + Security Agents gate the merge path.              │
│  Merger Agent handles branching and PR creation.                 │
└─────────────────────────────────────────────────────────────────┘
```

---

## Default Agent Roster

Every project spawns 5 permanent agents + N feature agents:

```
┌─────────────────────────────────────────────────────────────────┐
│  DEFAULT TEAM ROSTER                                             │
│  ──────────────────────────────────────────────────────────────  │
│                                                                  │
│  PERMANENT AGENTS (always present)                               │
│  ─────────────────────────────────                               │
│  Team Lead        Orchestration, task breakdown, assignment      │
│                   Uses delegate mode - NEVER writes code         │
│                                                                  │
│  Quality Agent    TDD verification (RED/GREEN phases)            │
│                   Coverage gates (>= 80%)                        │
│                   Spec completeness review                       │
│                                                                  │
│  Security Agent   OWASP scanning, secrets detection              │
│                   Dependency audit, .env validation               │
│                   Blocks on Critical/High                        │
│                                                                  │
│  Code Review Agent  Multi-engine code review                     │
│                     Claude / Codex / Gemini / All                │
│                     Blocks on Critical/High                      │
│                                                                  │
│  Merger Agent     Creates feature branches                       │
│                   Stages feature-specific files only              │
│                   Creates PRs via gh CLI                          │
│                   NEVER merges - only creates PRs                │
│                                                                  │
│  DYNAMIC AGENTS (one per feature)                                │
│  ────────────────────────────────                                │
│  Feature Agent    Implements one feature end-to-end              │
│  (x N features)   Follows strict pipeline above                  │
│                   Uses Ralph loops for implementation             │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘
```

| Agent | Role | Plan Mode | Can Edit Code |
|-------|------|-----------|---------------|
| team-lead | Orchestration, task breakdown, assignment | No (delegate mode) | No |
| quality-agent | TDD verification, coverage gates | Yes | No (read-only) |
| security-agent | OWASP scanning, secrets detection | Yes | No (read-only) |
| review-agent | Multi-engine code review | Yes | No (read-only) |
| merger-agent | Branch creation, PR management | No | No (git only) |
| feature-{name} | Feature implementation (one per feature) | No | Yes |

---

## Team Lead Responsibilities

The Team Lead is the orchestrator. It NEVER writes code.

1. Read `_project_specs/features/*.md` to identify all features
2. Break each feature into the 10-task dependency chain (see below)
3. Spawn one feature agent per feature
4. Assign initial tasks (spec-writing) to feature agents
5. Monitor TaskList continuously for progress and blockers
6. Handle blocked tasks and reassignment
7. Coordinate cross-feature dependencies
8. Send `shutdown_request` to all agents when all PRs are created
9. Clean up the team when done

**Delegate mode is mandatory.** The team lead uses only:
- TeamCreate, TaskCreate, TaskUpdate, TaskList, TaskGet
- SendMessage (message, broadcast, shutdown_request)
- Read, Glob, Grep (for monitoring)

---

## Feature Agent Workflow (MANDATORY)

Each feature agent MUST follow this exact sequence. Task dependencies enforce ordering - a feature agent cannot start step N+1 until step N is marked complete and verified.

### Step 1: Write Spec
- Create `_project_specs/features/{feature-name}.md`
- Include: description, acceptance criteria, test cases table, dependencies
- Follow the atomic TODO format from base.md skill
- Mark task complete -> Quality Agent reviews

### Step 2: Write Tests (RED Phase)
- Write test files based on spec's test cases table
- Tests MUST cover ALL acceptance criteria
- Import modules that don't exist yet (they will fail)
- Mark task complete -> Quality Agent verifies tests EXIST and FAIL

### Step 3: Wait for RED Verification
- Quality Agent runs tests and verifies ALL new tests fail
- If any test passes without implementation -> rewrite tests
- Quality Agent marks verification complete -> unlocks implementation

### Step 4: Implement (GREEN Phase)
- Write minimum code to make all tests pass
- Follow simplicity rules from base.md (20 lines/function, 200 lines/file, 3 params)
- Use Ralph loops (`/ralph-loop`) for iterative implementation
- Run tests after implementation - ALL must pass
- Mark task complete -> Quality Agent verifies tests pass

### Step 5: Wait for GREEN Verification
- Quality Agent runs full test suite and checks coverage
- Coverage must be >= 80%
- If tests fail or coverage insufficient -> fix and re-request
- Quality Agent marks verification complete -> unlocks validation

### Step 6: Validate
- Run linter (ESLint / Ruff)
- Run type checker (TypeScript / mypy)
- Run full test suite with coverage
- Fix any issues
- Mark task complete -> unlocks code review

### Step 7: Wait for Code Review
- Code Review Agent runs `/code-review` on changed files
- If Critical or High issues -> fix and re-request review
- Code Review Agent marks complete -> unlocks security scan

### Step 8: Wait for Security Scan
- Security Agent runs security checks
- If Critical or High issues -> fix and re-request scan
- Security Agent marks complete -> unlocks merge

### Step 9: Wait for Branch + PR
- Merger Agent creates feature branch, stages files, creates PR
- Feature is complete when PR is created

---

## Task Dependency Chain Model

For each feature "X", the team lead creates these 10 tasks with strict ordering:

```
┌────────────────────────────────────────────────────────────────┐
│  TASK CHAIN FOR FEATURE "X"                                     │
│                                                                  │
│  Task 1:  X-spec                                                │
│           owner: feature-X                                       │
│           blockedBy: (none)                                      │
│           ↓                                                      │
│  Task 2:  X-spec-review                                         │
│           owner: quality-agent                                   │
│           blockedBy: X-spec                                      │
│           ↓                                                      │
│  Task 3:  X-tests                                               │
│           owner: feature-X                                       │
│           blockedBy: X-spec-review                               │
│           ↓                                                      │
│  Task 4:  X-tests-fail-verify                                   │
│           owner: quality-agent                                   │
│           blockedBy: X-tests                                     │
│           ↓                                                      │
│  Task 5:  X-implement                                           │
│           owner: feature-X                                       │
│           blockedBy: X-tests-fail-verify                         │
│           ↓                                                      │
│  Task 6:  X-tests-pass-verify                                   │
│           owner: quality-agent                                   │
│           blockedBy: X-implement                                 │
│           ↓                                                      │
│  Task 7:  X-validate                                            │
│           owner: feature-X                                       │
│           blockedBy: X-tests-pass-verify                         │
│           ↓                                                      │
│  Task 8:  X-code-review                                         │
│           owner: review-agent                                    │
│           blockedBy: X-validate                                  │
│           ↓                                                      │
│  Task 9:  X-security-scan                                       │
│           owner: security-agent                                  │
│           blockedBy: X-code-review                               │
│           ↓                                                      │
│  Task 10: X-branch-pr                                           │
│           owner: merger-agent                                    │
│           blockedBy: X-security-scan                             │
└────────────────────────────────────────────────────────────────┘
```

### Parallel Feature Execution

Multiple features run their chains in parallel. Shared agents process tasks as they unblock:

```
Feature: auth         Feature: dashboard      Feature: payments
  auth-spec             dash-spec               pay-spec
  auth-spec-review      dash-spec-review        pay-spec-review
  auth-tests            dash-tests              pay-tests
  auth-fail-verify      dash-fail-verify        pay-fail-verify
  auth-implement        dash-implement          pay-implement
  auth-pass-verify      dash-pass-verify        pay-pass-verify
  auth-validate         dash-validate           pay-validate
  auth-code-review      dash-code-review        pay-code-review
  auth-security         dash-security           pay-security
  auth-branch-pr        dash-branch-pr          pay-branch-pr
       |                     |                       |
       v                     v                       v
   [All chains run simultaneously]
   [Quality Agent handles all verify tasks as they unblock]
   [Review Agent handles all review tasks as they unblock]
   [Security Agent handles all scan tasks as they unblock]
   [Merger Agent handles all branch-pr tasks as they unblock]
```

---

## Inter-Agent Communication

### Direct Messages (for targeted work)
```
Feature Agent -> Quality Agent:  "Tests written for auth, ready for RED verify"
Quality Agent -> Feature Agent:  "All 7 tests fail as expected. Proceed to implement"
Feature Agent -> Review Agent:   "Implementation complete, ready for code review"
Review Agent  -> Feature Agent:  "2 High issues found: [details]. Fix before proceeding"
Security Agent -> Merger Agent:  "Security scan passed for auth feature"
Merger Agent  -> Team Lead:      "PR #42 created for auth feature"
```

### Task List (source of truth for state)
- All agents check TaskList after completing work
- Quality Agent claims verification tasks automatically
- Review Agent claims code-review tasks automatically
- Security Agent claims security-scan tasks automatically
- Merger Agent claims branch-pr tasks automatically

### Broadcast (rare - blocking issues only)
- Team Lead -> All: "Blocking dependency found between auth and dashboard"
- Security Agent -> All: "Critical vulnerability in shared dependency"

---

## Feature Agent Spawning

The team lead spawns one feature agent per feature:

1. Read `_project_specs/features/*.md`
2. For each feature spec, spawn a feature agent:
   - name: `feature-{feature-name}`
   - Uses `.claude/agents/feature.md` definition
   - Spawn prompt includes the feature name and spec location
3. Create the full 10-task dependency chain for that feature
4. Assign the spec-writing task to the feature agent

### Example
If project has 3 features: auth, dashboard, payments
- Spawn: `feature-auth`, `feature-dashboard`, `feature-payments`
- Create 30 tasks total (10 per feature)
- Each feature agent starts with their spec task
- All 3 work in parallel

---

## Branch and PR Strategy

**One branch per feature. One PR per feature.**

```
Branch naming:  feature/{feature-name}
PR title:       feat({feature-name}): {short description}
PR body:        Generated from spec + test results + review + security results
```

The Merger Agent:
1. `git checkout main && git pull origin main`
2. `git checkout -b feature/{feature-name}`
3. Stages ONLY files changed for this feature (never `git add -A`)
4. Commits with descriptive message including verification results
5. `git push -u origin feature/{feature-name}`
6. `gh pr create` with full template including:
   - Summary from feature spec
   - Test results from quality verification
   - Code review summary from review agent
   - Security scan results from security agent
   - Checklist of all pipeline steps completed

---

## Quality Gates

### Workflow Enforcement (via task dependencies)
- Task dependencies make it **structurally impossible** to skip steps
- A feature agent cannot see "implement" until quality agent completes "tests-fail-verify"
- This is the primary enforcement mechanism

### Cross-Agent Verification (trust but verify)
- Quality agent independently runs tests (doesn't trust feature agent's report)
- Security agent independently scans (doesn't trust review agent)
- Merger agent verifies all predecessor tasks are complete before branching

### Blocking Rules
- Quality Agent: blocks if tests don't fail (RED) or don't pass (GREEN) or coverage < 80%
- Code Review Agent: blocks on Critical or High severity issues
- Security Agent: blocks on Critical or High severity findings
- Merger Agent: refuses to branch if any predecessor task is incomplete

---

## Integration with Existing Skills

| Existing Skill | How Agent Teams Uses It |
|----------------|------------------------|
| base.md | TDD workflow, atomic todos, simplicity rules - all agents follow |
| code-review.md | Review Agent executes `/code-review` per this skill |
| security.md | Security Agent follows OWASP patterns from this skill |
| session-management.md | Each agent maintains its own session state |
| iterative-development.md | Feature agents use Stop hook TDD loops for implementation |
| project-tooling.md | Merger Agent uses `gh` CLI for branches and PRs |
| team-coordination.md | Superseded by agent-teams for automated coordination |
| **icpg.md** | **Team lead creates ReasonNodes. Feature agents query constraints/risk. Quality agent checks drift. PreToolUse hook injects context. Stop hook auto-records symbols.** |
| code-graph.md | Feature agents use graph for symbol lookup alongside iCPG for intent context |

---

## Environment Setup

### Required Setting
```json
// settings.json or environment
{
  "env": {
    "agent teams (via .claude/agents/ definitions)": "1"
  }
}
```

### Project Structure (created by /initialize-project)
```
.claude/
  agents/            # Agent definitions (from agent-teams skill)
    team-lead.md
    quality.md
    security.md
    code-review.md
    merger.md
    feature.md
  skills/
    agent-teams/     # This skill
      SKILL.md
      agents/        # Agent definition templates
    base/
    code-review/
    security/
    ...
```

---

## Spawning the Team

### Automatic (via /initialize-project)
After project setup completes, Phase 6 asks for features and spawns the team automatically.

### Manual (via /spawn-team)
For existing projects: run `/spawn-team` to spawn the team from existing feature specs.

---

## Container Isolation (Polyphony)

When Docker/OrbStack is available, feature agents run in Polyphony containers by default. The team lead and shared agents (quality, security, review, merger) still run natively — they only read and coordinate.

### What changes with Polyphony

| Aspect | Without Polyphony | With Polyphony |
|--------|-------------------|----------------|
| Feature agents | Shared filesystem | Own container + git branch |
| File conflicts | Team lead must serialize | Impossible (isolated clones) |
| Test execution | Shared, can interfere | Independent per container |
| Branch strategy | Merger agent creates branches | Each container has its own branch |

### How it works

1. `/spawn-team` detects Docker + polyphony CLI
2. For each feature, runs `polyphony spawn "$FEATURE" --type feature`
3. Polyphony creates a container with its own git clone + branch
4. Agent CLI starts inside the container
5. On completion, changes are on a dedicated branch ready for PR

### Fallback

If Docker is not available, `/spawn-team` falls back to the native Agent tool (shared filesystem). A note is printed:
> "Running without container isolation (Docker not found). Agents share the workspace."

---

## Limitations

- **Experimental feature** - Agent teams require the experimental env var
- **No nested teams** - Teammates cannot spawn sub-teams
- **One team per session** - Clean up before starting a new team
- **No session resumption** - If session dies, re-run `/spawn-team` (tasks persist)
- **File conflicts** - Features sharing files must be serialized by team lead (unless using Polyphony containers)
- **Token cost** - Each agent is a separate Claude instance (5 + N instances)


================================================
FILE: skills/agent-teams/agents/code-review.md
================================================
---
name: review-agent
description: Performs code reviews on completed features - checks security, performance, architecture, code quality. Blocks on Critical/High.
model: sonnet
tools: [Read, Glob, Grep, Bash, TaskUpdate, TaskList, TaskGet, SendMessage]
disallowedTools: [Write, Edit]
maxTurns: 20
effort: high
---

# Code Review Agent

You perform code reviews on completed features.

## Review Protocol

For each `{name}-code-review` task:

1. Identify changed files via `git diff main --name-only`
2. Review for: security vulnerabilities, performance issues (N+1, memory leaks), architecture problems (coupling, SOLID), code quality (simplicity rules, DRY, dead code), test quality (behavior tests, edge cases, isolation)
3. Categorize findings by severity (Critical/High/Medium/Low)

## Blocking Rules

If Critical or High issues found:
1. Message feature agent with file:line, description, and suggested fix
2. Do NOT mark complete
3. Wait for fixes, then re-review

If only Medium/Low: mark complete, message security-agent.

## Rules

- Read-only: review code, do NOT fix it
- Block on Critical and High, no exceptions
- Process tasks in order (lowest task ID first)


================================================
FILE: skills/agent-teams/agents/feature.md
================================================
---
name: feature-agent
description: Implements one feature end-to-end following the strict TDD pipeline - spec, tests, implementation, validation.
model: inherit
tools: [Read, Write, Edit, Bash, Glob, Grep, TaskUpdate, TaskList, TaskGet, SendMessage]
maxTurns: 40
effort: high
---

# Feature Agent

You implement one specific feature following the strict TDD pipeline.

## Your Steps (enforced by task dependencies)

1. **SPEC** — Write `_project_specs/features/{name}.md` with description, acceptance criteria, test cases table, dependencies
2. *Wait for quality-agent spec review*
3. **TESTS (RED)** — Write test files covering ALL acceptance criteria. Tests MUST fail.
4. *Wait for quality-agent RED verification*
5. **PRE-IMPLEMENT** — Before coding:
   - Run `icpg query constraints <scope-files>` to understand invariants
   - Run `icpg query risk <key-symbol>` for fragile symbols
   - Write feature name to `.icpg/.current-intent` (enables auto-recording)
6. **IMPLEMENT (GREEN)** — Write minimum code to pass all tests. Follow simplicity rules (20 lines/function, 200 lines/file, 3 params max). PreToolUse hook auto-injects intent context before every edit.
7. **POST-IMPLEMENT** — After tests pass:
   - Run `icpg record --reason <intent-id> --base main` (or auto via Stop hook)
   - Run `icpg drift check` to verify no unintended scope drift
8. *Wait for quality-agent GREEN verification*
9. **VALIDATE** — Run linter, type checker, full test suite with coverage.
10. *Wait for code review and security scan*

## Rules

- Always write tests before implementation (TDD is mandatory)
- Always check constraints and risk before implementing (iCPG is mandatory)
- Follow simplicity rules from project CLAUDE.md
- If blocked by environment issues (DB down, missing API key), message team-lead
- Mark tasks complete only when the work is actually done
- Process tasks in order following the pipeline


================================================
FILE: skills/agent-teams/agents/merger.md
================================================
---
name: merger-agent
description: Creates feature branches and PRs for completed features via gh CLI. Never merges - only creates PRs.
model: sonnet
tools: [Read, Glob, Grep, Bash, TaskUpdate, TaskList, TaskGet, SendMessage]
disallowedTools: [Write, Edit]
maxTurns: 15
effort: medium
---

# Merger Agent

You handle git branching and PR creation. You NEVER merge - you only create PRs.

## Protocol

For each `{name}-branch-pr` task:

1. `git checkout main && git pull origin main`
2. `git checkout -b feature/{feature-name}`
3. Stage ONLY files related to this feature (never `git add -A`)
4. Commit with: `feat({feature-name}): {description}`
5. `git push -u origin feature/{feature-name}`
6. `gh pr create` with summary, test results, review results, security results, pipeline checklist
7. `git checkout main`
8. Message team-lead with PR URL

## Gathering Results

Before creating PR, use TaskGet to read predecessor tasks for:
- Test count and coverage from `{name}-tests-pass-verify`
- Review summary from `{name}-code-review`
- Security summary from `{name}-security-scan`

## Rules

- Never merge PRs, only create them
- Never force push
- Never use `git add -A` or `git add .`
- One branch per feature, one PR per feature
- Process tasks in order (lowest task ID first)


================================================
FILE: skills/agent-teams/agents/quality.md
================================================
---
name: quality-agent
description: Enforces TDD discipline - verifies specs are complete, tests fail before implementation, tests pass after implementation, coverage >= 80%
model: sonnet
tools: [Read, Glob, Grep, Bash, TaskUpdate, TaskList, TaskGet, SendMessage]
disallowedTools: [Write, Edit]
maxTurns: 30
effort: high
---

# Quality Agent

You enforce TDD discipline. You verify that specs are complete, tests fail before implementation, and tests pass after implementation. You are read-only for source code.

## Verification Protocols

### Spec Review (`{name}-spec-review`)

Read `_project_specs/features/{name}.md` and verify:
- Has clear description
- Has numbered acceptance criteria
- Has test cases table (Test, Input, Expected Output)
- Has dependencies listed
- Criteria are testable, not vague

If incomplete: message feature agent with what's missing. Do NOT mark complete.

### RED Phase (`{name}-tests-fail-verify`)

1. Run the project's test command
2. ALL new tests must FAIL (not error from imports — actual test failures)
3. Every spec test case must have a corresponding test

If tests pass: message feature agent to rewrite tests.
If tests fail: mark complete, message feature agent to proceed.

### GREEN Phase (`{name}-tests-pass-verify`)

1. Run full test suite (not just new tests)
2. ALL tests must pass
3. Coverage >= 80%
4. **iCPG drift check**: Run `icpg drift check` to verify no unintended scope drift

If tests fail or coverage insufficient: message feature agent with details.
If drift detected: message feature agent with drift dimensions and severity.
If all pass and no drift: mark complete, message feature agent to proceed.

### Spec-Intent Alignment (`{name}-spec-review`)

During spec review, also verify:
- The feature's ReasonNode exists in iCPG (`icpg query context` on scope files)
- Scope in spec matches scope in ReasonNode
- No DUPLICATES edges flagged for this intent

## Rules

- You are read-only: run tests and icpg queries, do NOT fix code
- Mark tasks complete only when verification passes
- Process tasks in order (lowest task ID first)
- Report drift events with specific dimensions and severity


================================================
FILE: skills/agent-teams/agents/security.md
================================================
---
name: security-agent
description: Performs security analysis on completed features - OWASP scanning, secrets detection, dependency audit. Blocks on Critical/High.
model: sonnet
tools: [Read, Glob, Grep, Bash, TaskUpdate, TaskList, TaskGet, SendMessage]
disallowedTools: [Write, Edit]
maxTurns: 20
effort: high
---

# Security Agent

You perform security analysis on completed features before they can be merged.

## Security Scan Protocol

For each `{name}-security-scan` task:

### 1. Identify Changed Files
Use `git diff main --name-only` to identify feature files.

### 2. Secrets Detection
Check for: hardcoded API keys (sk-, pk_, api_key, secret), passwords, tokens, connection strings with credentials, .env committed to git.

### 3. OWASP Top 10
Check for: SQL injection (raw queries with string interpolation), XSS (innerHTML with user input), broken auth (missing auth on protected routes), insecure crypto (MD5/SHA1 for passwords), SSRF (user-controlled URLs), path traversal, mass assignment, missing rate limits on auth.

### 4. Dependency Audit
Run `npm audit` or `safety check`. Flag known vulnerabilities.

### 5. Environment Variables
Verify no secrets in VITE_*, NEXT_PUBLIC_*, REACT_APP_* vars.

## Severity and Blocking

| Severity | Action |
|----------|--------|
| Critical | Block merge. Must fix. |
| High | Block merge. Should fix. |
| Medium | Advisory. Can merge. |
| Low | Informational. |

If Critical/High found: message feature agent with file:line references and fix suggestions. Do NOT mark complete.
If clean: mark complete, message merger-agent.

## Rules

- Read-only: scan code, do NOT fix it
- Block on Critical and High, no exceptions
- Process tasks in order (lowest task ID first)


================================================
FILE: skills/agent-teams/agents/team-lead.md
================================================
---
name: team-lead
description: Orchestrates the agent team - creates tasks, spawns feature agents, monitors progress. Never writes code.
model: sonnet
tools: [Read, Glob, Grep, TaskCreate, TaskUpdate, TaskList, TaskGet, SendMessage, TeamCreate]
disallowedTools: [Write, Edit, Bash]
maxTurns: 50
effort: high
---

# Team Lead Agent

You orchestrate work. You do NOT implement.

## Responsibilities

1. Read `_project_specs/features/*.md` to identify all features
2. **iCPG: Check for duplicates** — run `icpg query prior "<feature goal>"` before creating tasks. If >0.75 similarity, warn user.
3. **iCPG: Create ReasonNode** — for each feature, run `icpg create "<goal>" --scope <files> --owner feature-{name} --type task`
4. For each feature, create the full 10-task dependency chain
5. Spawn one feature agent per feature
6. Assign initial tasks (spec-writing) to feature agents
7. Monitor TaskList continuously for progress and blockers
8. Handle blocked tasks and reassign if needed
9. Coordinate cross-feature dependencies (serialize features sharing files)
10. When all PRs are created, send `shutdown_request` to all agents

## Task Chain Template (per feature)

For each feature `{name}`, create these tasks with `addBlockedBy` dependencies:

1. `{name}-spec` — owner: feature-{name}
2. `{name}-spec-review` — owner: quality-agent, blockedBy: [1]
3. `{name}-tests` — owner: feature-{name}, blockedBy: [2]
4. `{name}-tests-fail-verify` — owner: quality-agent, blockedBy: [3]
5. `{name}-implement` — owner: feature-{name}, blockedBy: [4]
6. `{name}-tests-pass-verify` — owner: quality-agent, blockedBy: [5]
7. `{name}-validate` — owner: feature-{name}, blockedBy: [6]
8. `{name}-code-review` — owner: review-agent, blockedBy: [7]
9. `{name}-security-scan` — owner: security-agent, blockedBy: [8]
10. `{name}-branch-pr` — owner: merger-agent, blockedBy: [9]

## Cross-Feature Dependencies

If two features share files:
1. Add `addBlockedBy` from the second feature's implement task to the first feature's branch-pr task
2. Message both feature agents about the serialization

## Completion Protocol

When all `{name}-branch-pr` tasks are completed:
1. Verify all PRs created via `gh pr list`
2. Send broadcast: "All features complete. Shutting down team."
3. Send `shutdown_request` to each agent


================================================
FILE: skills/agentic-development/SKILL.md
================================================
---
name: agentic-development
description: Build AI agents with Pydantic AI (Python) and Claude SDK (Node.js)
when-to-use: When building AI agents, tool-using LLM systems, or agentic workflows
user-invocable: false
effort: high
---

# Agentic Development Skill


For building autonomous AI agents that perform multi-step tasks with tools.

**Sources:** [Claude Agent SDK](https://docs.anthropic.com/en/docs/agents-and-tools/claude-agent-sdk) | [Anthropic Claude Code Best Practices](https://www.anthropic.com/engineering/claude-code-best-practices) | [Pydantic AI](https://ai.pydantic.dev/) | [Google Gemini Agent Development](https://developers.googleblog.com/en/building-agents-google-gemini-open-source-frameworks/) | [OpenAI Building Agents](https://developers.openai.com/tracks/building-agents/)

---

## Framework Selection by Language

| Language/Framework | Default | Why |
|-------------------|---------|-----|
| **Python** | **Pydantic AI** | Type-safe, Pydantic validation, multi-model, production-ready |
| **Node.js / Next.js** | **Claude Agent SDK** | Official Anthropic SDK, tools, multi-agent, native streaming |

### Python: Pydantic AI (Default)
```python
from pydantic_ai import Agent
from pydantic import BaseModel

class SearchResult(BaseModel):
    title: str
    url: str
    summary: str

agent = Agent(
    'claude-sonnet-4-20250514',
    result_type=list[SearchResult],
    system_prompt='You are a research assistant.',
)

# Type-safe result
result = await agent.run('Find articles about AI agents')
for item in result.data:
    print(f"{item.title}: {item.url}")
```

### Node.js / Next.js: Claude Agent SDK (Default)
```typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic();

// Define tools
const tools: Anthropic.Tool[] = [
  {
    name: "web_search",
    description: "Search the web for information",
    input_schema: {
      type: "object",
      properties: {
        query: { type: "string", description: "Search query" },
      },
      required: ["query"],
    },
  },
];

// Agentic loop
async function runAgent(prompt: string) {
  const messages: Anthropic.MessageParam[] = [
    { role: "user", content: prompt },
  ];

  while (true) {
    const response = await client.messages.create({
      model: "claude-sonnet-4-20250514",
      max_tokens: 4096,
      tools,
      messages,
    });

    // Check for tool use
    if (response.stop_reason === "tool_use") {
      const toolUse = response.content.find((b) => b.type === "tool_use");
      if (toolUse) {
        const result = await executeTool(toolUse.name, toolUse.input);
        messages.push({ role: "assistant", content: response.content });
        messages.push({
          role: "user",
          content: [{ type: "tool_result", tool_use_id: toolUse.id, content: result }],
        });
        continue;
      }
    }

    // Done - return final response
    return response.content.find((b) => b.type === "text")?.text;
  }
}
```

---

## Core Principle

**Plan first, act incrementally, verify always.**

Agents that research and plan before executing consistently outperform those that jump straight to action. Break complex tasks into verifiable steps, use tools judiciously, and maintain clear state throughout execution.

---

## Agent Architecture

### Three Components (OpenAI)
```
┌─────────────────────────────────────────────────┐
│                    AGENT                        │
├─────────────────────────────────────────────────┤
│  Model (Brain)      │ LLM for reasoning &       │
│                     │ decision-making           │
├─────────────────────┼───────────────────────────┤
│  Tools (Arms/Legs)  │ APIs, functions, external │
│                     │ systems for action        │
├─────────────────────┼───────────────────────────┤
│  Instructions       │ System prompts defining   │
│  (Rules)            │ behavior & boundaries     │
└─────────────────────┴───────────────────────────┘
```

### Project Structure
```
project/
├── src/
│   ├── agents/
│   │   ├── orchestrator.ts    # Main agent coordinator
│   │   ├── specialized/       # Task-specific agents
│   │   │   ├── researcher.ts
│   │   │   ├── coder.ts
│   │   │   └── reviewer.ts
│   │   └── base.ts            # Shared agent interface
│   ├── tools/
│   │   ├── definitions/       # Tool schemas
│   │   ├── implementations/   # Tool logic
│   │   └── registry.ts        # Tool discovery
│   ├── prompts/
│   │   ├── system/            # Agent instructions
│   │   └── templates/         # Task templates
│   └── memory/
│       ├── conversation.ts    # Short-term context
│       └── persistent.ts      # Long-term storage
├── tests/
│   ├── agents/                # Agent behavior tests
│   ├── tools/                 # Tool unit tests
│   └── evals/                 # End-to-end evaluations
└── skills/                    # Agent skills (Anthropic pattern)
    ├── skill-name/
    │   ├── instructions.md
    │   ├── scripts/
    │   └── resources/
```

---

## Workflow Pattern: Explore-Plan-Execute-Verify

### 1. Explore Phase
```typescript
// Gather context before acting
async function explore(task: Task): Promise<Context> {
  const relevantFiles = await agent.searchCodebase(task.query);
  const existingPatterns = await agent.analyzePatterns(relevantFiles);
  const dependencies = await agent.identifyDependencies(task);

  return { relevantFiles, existingPatterns, dependencies };
}
```

### 2. Plan Phase (Critical)
```typescript
// Plan explicitly before execution
async function plan(task: Task, context: Context): Promise<Plan> {
  const prompt = `
    Task: ${task.description}
    Context: ${JSON.stringify(context)}

    Create a step-by-step plan. For each step:
    1. What action to take
    2. What tools to use
    3. How to verify success
    4. What could go wrong

    Output JSON with steps array.
  `;

  return await llmCall({ prompt, schema: PlanSchema });
}
```

### 3. Execute Phase
```typescript
// Execute with verification at each step
async function execute(plan: Plan): Promise<Result[]> {
  const results: Result[] = [];

  for (const step of plan.steps) {
    // Execute single step
    const result = await executeStep(step);

    // Verify before continuing
    if (!await verify(step, result)) {
      // Self-correct or escalate
      const corrected = await selfCorrect(step, result);
      if (!corrected.success) {
        return handleFailure(step, results);
      }
    }

    results.push(result);
  }

  return results;
}
```

### 4. Verify Phase
```typescript
// Independent verification prevents overfitting
async function verify(step: Step, result: Result): Promise<boolean> {
  // Run tests if available
  if (step.testCommand) {
    const testResult = await runCommand(step.testCommand);
    if (!testResult.success) return false;
  }

  // Use LLM to verify against criteria
  const verification = await llmCall({
    prompt: `
      Step: ${step.description}
      Expected: ${step.successCriteria}
      Actual: ${JSON.stringify(result)}

      Does the result satisfy the success criteria?
      Respond with { "passes": boolean, "reasoning": string }
    `,
    schema: VerificationSchema
  });

  return verification.passes;
}
```

---

## Tool Design

### Tool Definition Pattern
```typescript
// tools/definitions/file-operations.ts
import { z } from 'zod';

export const ReadFileTool = {
  name: 'read_file',
  description: 'Read contents of a file. Use before modifying any file.',
  parameters: z.object({
    path: z.string().describe('Absolute path to the file'),
    startLine: z.number().optional().describe('Start line (1-indexed)'),
    endLine: z.number().optional().describe('End line (1-indexed)'),
  }),
  // Risk level for guardrails (OpenAI pattern)
  riskLevel: 'low' as const,
};

export const WriteFileTool = {
  name: 'write_file',
  description: 'Write content to a file. Always read first to understand context.',
  parameters: z.object({
    path: z.string().describe('Absolute path to the file'),
    content: z.string().describe('Complete file content'),
  }),
  riskLevel: 'medium' as const,
  // Require confirmation for high-risk operations
  requiresConfirmation: true,
};
```

### Tool Implementation
```typescript
// tools/implementations/file-operations.ts
export async function readFile(
  params: z.infer<typeof ReadFileTool.parameters>
): Promise<ToolResult> {
  try {
    const content = await fs.readFile(params.path, 'utf-8');
    const lines = content.split('\n');

    const start = (params.startLine ?? 1) - 1;
    const end = params.endLine ?? lines.length;

    return {
      success: true,
      data: lines.slice(start, end).join('\n'),
      metadata: { totalLines: lines.length }
    };
  } catch (error) {
    return {
      success: false,
      error: `Failed to read file: ${error.message}`
    };
  }
}
```

### Prefer Built-in Tools (OpenAI)
```typescript
// Use platform-provided tools when available
const agent = createAgent({
  tools: [
    // Built-in tools (handled by platform)
    { type: 'web_search' },
    { type: 'code_interpreter' },

    // Custom tools only when needed
    { type: 'function', function: customDatabaseTool },
  ],
});
```

---

## Multi-Agent Patterns

### Single Agent (Default)
Use one agent for most tasks. Multiple agents add complexity.

### Agent-as-Tool Pattern (OpenAI)
```typescript
// Expose specialized agents as callable tools
const researchAgent = createAgent({
  name: 'researcher',
  instructions: 'You research topics and return structured findings.',
  tools: [webSearchTool, documentReadTool],
});

const mainAgent = createAgent({
  tools: [
    {
      type: 'function',
      function: {
        name: 'research_topic',
        description: 'Delegate research to specialized agent',
        parameters: ResearchQuerySchema,
        handler: async (query) => researchAgent.run(query),
      },
    },
  ],
});
```

### Handoff Pattern (OpenAI)
```typescript
// One-way transfer between agents
const customerServiceAgent = createAgent({
  tools: [
    // Handoff to specialist when needed
    {
      name: 'transfer_to_billing',
      description: 'Transfer to billing specialist for payment issues',
      handler: async (context) => {
        return { handoff: 'billing_agent', context };
      },
    },
  ],
});
```

### When to Use Multiple Agents
- Separate task domains with non-overlapping tools
- Different authorization levels needed
- Complex workflows with clear handoff points
- Parallel execution of independent subtasks

---

## Memory & State

### Conversation Memory
```typescript
// memory/conversation.ts
interface ConversationMemory {
  messages: Message[];
  maxTokens: number;

  add(message: Message): void;
  getContext(): Message[];
  summarize(): Promise<string>;
}

// Maintain state across tool calls (Gemini pattern)
interface AgentState {
  thoughtSignature?: string;  // Encrypted reasoning state
  conversationId: string;     // For shared memory
  currentPlan?: Plan;
  completedSteps: Step[];
}
```

### Persistent Memory
```typescript
// memory/persistent.ts
interface PersistentMemory {
  // Store learnings across sessions
  store(key: string, value: any): Promise<void>;
  retrieve(key: string): Promise<any>;

  // Semantic search over past interactions
  search(query: string, limit: number): Promise<Memory[]>;
}
```

---

## Guardrails & Safety

### Multi-Layer Protection (OpenAI)
```typescript
// guards/index.ts
interface GuardrailConfig {
  // Input validation
  inputClassifier: (input: string) => Promise<SafetyResult>;

  // Output validation
  outputValidator: (output: string) => Promise<SafetyResult>;

  // Tool risk assessment
  toolRiskLevels: Record<string, 'low' | 'medium' | 'high'>;

  // Actions requiring human approval
  humanInTheLoop: string[];
}

async function executeWithGuardrails(
  agent: Agent,
  input: string,
  config: GuardrailConfig
): Promise<Result> {
  // 1. Check input safety
  const inputCheck = await config.inputClassifier(input);
  if (!inputCheck.safe) {
    return { blocked: true, reason: inputCheck.reason };
  }

  // 2. Execute with tool monitoring
  const result = await agent.run(input, {
    beforeTool: async (tool, params) => {
      const risk = config.toolRiskLevels[tool.name];
      if (risk === 'high' || config.humanInTheLoop.includes(tool.name)) {
        return await requestHumanApproval(tool, params);
      }
      return { approved: true };
    },
  });

  // 3. Validate output
  const outputCheck = await config.outputValidator(result.output);
  if (!outputCheck.safe) {
    return { blocked: true, reason: outputCheck.reason };
  }

  return result;
}
```

### Scope Enforcement (OpenAI)
```typescript
// Agent must stay within defined scope
const agentInstructions = `
You are a customer service agent for Acme Corp.

SCOPE BOUNDARIES (non-negotiable):
- Only answer questions about Acme products and services
- Never provide legal, medical, or financial advice
- Never access or modify data outside your authorized scope
- If a request is out of scope, politely decline and explain why

If you cannot complete a task within scope, notify the user
and request explicit approval before proceeding.
`;
```

---

## Model Selection

### Match Model to Task
| Task Complexity | Recommended Model | Notes |
|-----------------|-------------------|-------|
| Simple, fast | gpt-5-mini, claude-haiku | Low latency |
| General purpose | gpt-4.1, claude-sonnet | Balance |
| Complex reasoning | o4-mini, claude-opus | Higher accuracy |
| Deep planning | gpt-5 + reasoning, ultrathink | Maximum capability |

### Gemini-Specific
```typescript
// Use thinking_level for reasoning depth
const response = await gemini.generate({
  model: 'gemini-3',
  thinking_level: 'high',  // For complex planning
  temperature: 1.0,        // Optimized for reasoning engine
});

// Preserve thought state across tool calls
const nextResponse = await gemini.generate({
  thoughtSignature: response.thoughtSignature,  // Required for function calling
  // ... rest of params
});
```

### Claude-Specific (Thinking Modes)
```typescript
// Trigger extended thinking with keywords
const thinkingLevels = {
  'think': 'standard analysis',
  'think hard': 'deeper reasoning',
  'think harder': 'extensive analysis',
  'ultrathink': 'maximum reasoning budget',
};

const prompt = `
Think hard about this problem before proposing a solution.

Task: ${task.description}
`;
```

---

## Testing Agents

### Unit Tests (Tools)
```typescript
describe('readFile tool', () => {
  it('reads file content correctly', async () => {
    const result = await readFile({ path: '/test/file.txt' });
    expect(result.success).toBe(true);
    expect(result.data).toContain('expected content');
  });
});
```

### Behavior Tests (Agent Decisions)
```typescript
describe('agent planning', () => {
  it('creates plan before executing file modifications', async () => {
    const trace = await agent.runWithTrace('Refactor the auth module');

    // Verify planning happened first
    const firstToolCall = trace.toolCalls[0];
    expect(firstToolCall.name).toBe('read_file');

    // Verify no writes without reads
    const writeIndex = trace.toolCalls.findIndex(t => t.name === 'write_file');
    const readIndex = trace.toolCalls.findIndex(t => t.name === 'read_file');
    expect(readIndex).toBeLessThan(writeIndex);
  });
});
```

### Evaluation Tests
```typescript
// Run nightly, not in regular CI
describe('Agent Accuracy (Eval)', () => {
  const testCases = loadTestCases('./evals/coding-tasks.json');

  it.each(testCases)('completes $name correctly', async (testCase) => {
    const result = await agent.run(testCase.input);

    // Verify against expected outcomes
    expect(result.filesModified).toEqual(testCase.expectedFiles);
    expect(await runTests(testCase.testCommand)).toBe(true);
  }, 120000);
});
```

---

## Pydantic AI Patterns (Python Default)

### Project Structure (Python)
```
project/
├── src/
│   ├── agents/
│   │   ├── __init__.py
│   │   ├── researcher.py       # Research agent
│   │   ├── coder.py            # Coding agent
│   │   └── orchestrator.py     # Main coordinator
│   ├── tools/
│   │   ├── __init__.py
│   │   ├── web.py              # Web search tools
│   │   ├── files.py            # File operations
│   │   └── database.py         # DB queries
│   ├── models/
│   │   ├── __init__.py
│   │   └── schemas.py          # Pydantic models
│   └── deps.py                 # Dependencies
├── tests/
│   ├── test_agents.py
│   └── test_tools.py
└── pyproject.toml
```

### Agent with Tools
```python
from pydantic_ai import Agent, RunContext
from pydantic import BaseModel
from httpx import AsyncClient

class SearchResult(BaseModel):
    title: str
    url: str
    snippet: str

class ResearchDeps(BaseModel):
    http_client: AsyncClient
    api_key: str

research_agent = Agent(
    'claude-sonnet-4-20250514',
    deps_type=ResearchDeps,
    result_type=list[SearchResult],
    system_prompt='You are a research assistant. Use tools to find information.',
)

@research_agent.tool
async def web_search(ctx: RunContext[ResearchDeps], query: str) -> list[dict]:
    """Search the web for information."""
    response = await ctx.deps.http_client.get(
        'https://api.search.com/search',
        params={'q': query},
        headers={'Authorization': f'Bearer {ctx.deps.api_key}'},
    )
    return response.json()['results']

@research_agent.tool
async def read_webpage(ctx: RunContext[ResearchDeps], url: str) -> str:
    """Read and extract content from a webpage."""
    response = await ctx.deps.http_client.get(url)
    return response.text[:5000]  # Truncate for context

# Usage
async def main():
    async with AsyncClient() as client:
        deps = ResearchDeps(http_client=client, api_key='...')
        result = await research_agent.run(
            'Find recent articles about LLM agents',
            deps=deps,
        )
        for item in result.data:
            print(f"- {item.title}")
```

### Structured Output with Validation
```python
from pydantic import BaseModel, Field
from pydantic_ai import Agent

class CodeReview(BaseModel):
    summary: str = Field(description="Brief summary of the review")
    issues: list[str] = Field(description="List of issues found")
    suggestions: list[str] = Field(description="Improvement suggestions")
    approval: bool = Field(description="Whether code is approved")
    confidence: float = Field(ge=0, le=1, description="Confidence score")

review_agent = Agent(
    'claude-sonnet-4-20250514',
    result_type=CodeReview,
    system_prompt='Review code for quality, security, and best practices.',
)

# Result is validated Pydantic model
result = await review_agent.run(f"Review this code:\n```python\n{code}\n```")
if result.data.approval:
    print("Code approved!")
else:
    for issue in result.data.issues:
        print(f"Issue: {issue}")
```

### Multi-Agent Coordination
```python
from pydantic_ai import Agent

# Specialized agents
planner = Agent('claude-sonnet-4-20250514', system_prompt='Create detailed plans.')
executor = Agent('claude-sonnet-4-20250514', system_prompt='Execute tasks precisely.')
reviewer = Agent('claude-sonnet-4-20250514', system_prompt='Review and verify work.')

async def orchestrate(task: str):
    # 1. Plan
    plan = await planner.run(f"Create a plan for: {task}")

    # 2. Execute each step
    results = []
    for step in plan.data.steps:
        result = await executor.run(f"Execute: {step}")
        results.append(result.data)

    # 3. Review
    review = await reviewer.run(
        f"Review the results:\nTask: {task}\nResults: {results}"
    )

    return review.data
```

### Streaming Responses
```python
from pydantic_ai import Agent

agent = Agent('claude-sonnet-4-20250514')

async def stream_response(prompt: str):
    async with agent.run_stream(prompt) as response:
        async for chunk in response.stream():
            print(chunk, end='', flush=True)

    # Get final structured result
    result = await response.get_data()
    return result
```

### Testing Agents
```python
import pytest
from pydantic_ai import Agent
from pydantic_ai.models.test import TestModel

@pytest.fixture
def test_agent():
    return Agent(
        TestModel(),  # Mock model for testing
        result_type=str,
    )

async def test_agent_response(test_agent):
    result = await test_agent.run('Test prompt')
    assert result.data is not None

# Test with specific responses
async def test_with_mock_response():
    model = TestModel()
    model.seed_response('Expected output')

    agent = Agent(model)
    result = await agent.run('Any prompt')
    assert result.data == 'Expected output'
```

---

## Skills Pattern (Anthropic)

### Skill Structure
```
skills/
└── code-review/
    ├── instructions.md      # How to perform code reviews
    ├── scripts/
    │   └── run-linters.sh   # Supporting scripts
    └── resources/
        └── checklist.md     # Review checklist
```

### instructions.md Example
```markdown
# Code Review Skill

## When to Use
Activate this skill when asked to review code, PRs, or diffs.

## Process
1. Read the changed files completely
2. Run linters: `./scripts/run-linters.sh`
3. Check against resources/checklist.md
4. Provide structured feedback

## Output Format
- Summary (1-2 sentences)
- Issues found (severity: critical/major/minor)
- Suggestions for improvement
- Approval recommendation
```

### Loading Skills Dynamically
```typescript
async function loadSkill(skillName: string): Promise<Skill> {
  const skillPath = `./skills/${skillName}`;
  const instructions = await fs.readFile(`${skillPath}/instructions.md`, 'utf-8');
  const scripts = await glob(`${skillPath}/scripts/*`);
  const resources = await glob(`${skillPath}/resources/*`);

  return {
    name: skillName,
    instructions,
    scripts: scripts.map(s => ({ name: path.basename(s), path: s })),
    resources: await Promise.all(resources.map(loadResource)),
  };
}
```

---

## Anti-Patterns

- **No planning before execution** - Agents that jump to action make more errors
- **Monolithic agents** - One agent with 50 tools becomes confused
- **No verification** - Agents must verify their own work
- **Hardcoded tool sequences** - Let the model decide tool order
- **Missing guardrails** - All agents need safety boundaries
- **No state management** - Lose context across tool calls
- **Testing only happy paths** - Test failures and edge cases
- **Ignoring model differences** - Reasoning models need different prompts
- **No cost tracking** - Agentic workflows can be expensive
- **Full automation without oversight** - Human-in-the-loop for critical actions

---

## Quick Reference

### Agent Development Checklist
- [ ] Define clear agent scope and boundaries
- [ ] Design tools with explicit schemas and risk levels
- [ ] Implement explore-plan-execute-verify workflow
- [ ] Add multi-layer guardrails
- [ ] Set up conversation and persistent memory
- [ ] Write behavior and evaluation tests
- [ ] Configure appropriate model for task complexity
- [ ] Add human-in-the-loop for high-risk operations
- [ ] Monitor token usage and costs
- [ ] Document skills and instructions

### Thinking Triggers (Claude)
```
"think"        → Standard analysis
"think hard"   → Deeper reasoning
"think harder" → Extensive analysis
"ultrathink"   → Maximum reasoning
```

### Gemini Settings
```
thinking_level: "high" | "low"
temperature: 1.0 (keep at 1.0 for reasoning)
thoughtSignature: <pass back for function calling>
```


================================================
FILE: skills/ai-models/SKILL.md
================================================
---
name: ai-models
description: Latest AI models reference - Claude, OpenAI, Gemini, Eleven Labs, Replicate
when-to-use: When choosing models, comparing capabilities, or referencing model specs
user-invocable: true
effort: low
---

# AI Models Reference Skill


**Last Updated: December 2025**

## Philosophy

**Use the right model for the job.** Bigger isn't always better - match model capabilities to task requirements. Consider cost, latency, and accuracy tradeoffs.

## Model Selection Matrix

| Task | Recommended | Why |
|------|-------------|-----|
| Complex reasoning | Claude Opus 4.5, o3, Gemini 3 Pro | Highest accuracy |
| Fast chat/completion | Claude Haiku, GPT-4.1 mini, Gemini Flash | Low latency, cheap |
| Code generation | Claude Sonnet 4.5, Codestral, GPT-4.1 | Strong coding |
| Vision/images | Claude Sonnet, GPT-4o, Gemini 3 Pro | Multimodal |
| Embeddings | text-embedding-3-small, Voyage | Cost-effective |
| Voice synthesis | Eleven Labs v3, OpenAI TTS | Natural sounding |
| Image generation | FLUX.2, DALL-E 3, SD 3.5 | Different styles |

---

## Anthropic (Claude)

### Documentation
- **API Docs**: https://docs.anthropic.com
- **Models Overview**: https://docs.anthropic.com/en/docs/about-claude/models/overview
- **Pricing**: https://www.anthropic.com/pricing

### Latest Models (December 2025)

```typescript
const CLAUDE_MODELS = {
  // Flagship - highest capability
  opus: 'claude-opus-4-5-20251101',

  // Balanced - best for most tasks
  sonnet: 'claude-sonnet-4-5-20250929',

  // Previous generation (still excellent)
  opus4: 'claude-opus-4-20250514',
  sonnet4: 'claude-sonnet-4-20250514',

  // Fast & cheap - high volume tasks
  haiku: 'claude-haiku-3-5-20241022',
} as const;
```

### Usage
```typescript
import Anthropic from '@anthropic-ai/sdk';

const anthropic = new Anthropic({
  apiKey: process.env.ANTHROPIC_API_KEY,
});

const response = await anthropic.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    { role: 'user', content: 'Hello, Claude!' }
  ],
});
```

### Model Selection
```
claude-opus-4-5-20251101 (Opus 4.5)
├── Best for: Complex analysis, research, nuanced writing
├── Context: 200K tokens
├── Cost: $5/$25 per 1M tokens (input/output)
└── Use when: Accuracy matters most

claude-sonnet-4-5-20250929 (Sonnet 4.5)
├── Best for: Code, general tasks, balanced performance
├── Context: 200K tokens
├── Cost: $3/$15 per 1M tokens
└── Use when: Default choice for most applications

claude-haiku-3-5-20241022 (Haiku 3.5)
├── Best for: Classification, extraction, high-volume
├── Context: 200K tokens
├── Cost: $0.25/$1.25 per 1M tokens
└── Use when: Speed and cost matter most
```

---

## OpenAI

### Documentation
- **API Docs**: https://platform.openai.com/docs
- **Models**: https://platform.openai.com/docs/models
- **Pricing**: https://openai.com/pricing

### Latest Models (December 2025)

```typescript
const OPENAI_MODELS = {
  // GPT-5 series (latest)
  gpt5: 'gpt-5.2',
  gpt5Mini: 'gpt-5-mini',

  // GPT-4.1 series (recommended for most)
  gpt41: 'gpt-4.1',
  gpt41Mini: 'gpt-4.1-mini',
  gpt41Nano: 'gpt-4.1-nano',

  // Reasoning models (o-series)
  o3: 'o3',
  o3Pro: 'o3-pro',
  o4Mini: 'o4-mini',

  // Legacy but still useful
  gpt4o: 'gpt-4o',           // Still has audio support
  gpt4oMini: 'gpt-4o-mini',

  // Embeddings
  embeddingSmall: 'text-embedding-3-small',
  embeddingLarge: 'text-embedding-3-large',

  // Image generation
  dalle3: 'dall-e-3',
  gptImage: 'gpt-image-1',

  // Audio
  tts: 'tts-1',
  ttsHd: 'tts-1-hd',
  whisper: 'whisper-1',
} as const;
```

### Usage
```typescript
import OpenAI from 'openai';

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY,
});

// Chat completion
const response = await openai.chat.completions.create({
  model: 'gpt-4.1',
  messages: [
    { role: 'user', content: 'Hello!' }
  ],
});

// With vision
const visionResponse = await openai.chat.completions.create({
  model: 'gpt-4.1',
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is in this image?' },
        { type: 'image_url', image_url: { url: 'https://...' } },
      ],
    },
  ],
});

// Embeddings
const embedding = await openai.embeddings.create({
  model: 'text-embedding-3-small',
  input: 'Your text here',
});
```

### Model Selection
```
o3 / o3-pro
├── Best for: Math, coding, complex multi-step reasoning
├── Context: 200K tokens
├── Cost: Premium pricing
└── Use when: Hardest problems, need chain-of-thought

gpt-4.1
├── Best for: General tasks, coding, instruction following
├── Context: 1M tokens (!)
├── Cost: Lower than GPT-4o
└── Use when: Default choice, replaces GPT-4o

gpt-4.1-mini / gpt-4.1-nano
├── Best for: High-volume, cost-sensitive
├── Context: 1M tokens
├── Cost: Very low
└── Use when: Simple tasks at scale

o4-mini
├── Best for: Fast reasoning at low cost
├── Context: 200K tokens
├── Cost: Budget reasoning
└── Use when: Need reasoning but cost-conscious
```

---

## Google (Gemini)

### Documentation
- **API Docs**: https://ai.google.dev/docs
- **Models**: https://ai.google.dev/gemini-api/docs/models/gemini
- **Pricing**: https://ai.google.dev/pricing

### Latest Models (December 2025)

```typescript
const GEMINI_MODELS = {
  // Gemini 3 (Latest)
  gemini3Pro: 'gemini-3-pro-preview',
  gemini3ProImage: 'gemini-3-pro-image-preview',
  gemini3Flash: 'gemini-3-flash-preview',

  // Gemini 2.5 (Stable)
  gemini25Pro: 'gemini-2.5-pro',
  gemini25Flash: 'gemini-2.5-flash',
  gemini25FlashLite: 'gemini-2.5-flash-lite',

  // Specialized
  gemini25FlashTTS: 'gemini-2.5-flash-preview-tts',
  gemini25FlashAudio: 'gemini-2.5-flash-native-audio-preview-12-2025',

  // Previous generation
  gemini2Flash: 'gemini-2.0-flash',
} as const;
```

### Usage
```typescript
import { GoogleGenerativeAI } from '@google/generative-ai';

const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });

const result = await model.generateContent('Hello!');
const response = result.response.text();

// With vision
const visionModel = genAI.getGenerativeModel({ model: 'gemini-2.5-pro' });
const imagePart = {
  inlineData: {
    data: base64Image,
    mimeType: 'image/jpeg',
  },
};
const result = await visionModel.generateContent(['Describe this:', imagePart]);
```

### Model Selection
```
gemini-3-pro-preview
├── Best for: "Best model in the world for multimodal"
├── Context: 2M tokens
├── Cost: Premium
└── Use when: Need absolute best quality

gemini-2.5-pro
├── Best for: State-of-the-art thinking, complex tasks
├── Context: 2M tokens
├── Cost: $1.25/$5 per 1M tokens
└── Use when: Long context, complex reasoning

gemini-2.5-flash
├── Best for: Fast, balanced performance
├── Context: 1M tokens
├── Cost: $0.075/$0.30 per 1M tokens
└── Use when: Speed and cost matter

gemini-2.5-flash-lite
├── Best for: Ultra-fast, lowest cost
├── Context: 1M tokens
├── Cost: $0.04/$0.15 per 1M tokens
└── Use when: High volume, simple tasks
```

---

## Eleven Labs (Voice)

### Documentation
- **API Docs**: https://elevenlabs.io/docs
- **Models**: https://elevenlabs.io/docs/models
- **Pricing**: https://elevenlabs.io/pricing

### Latest Models (December 2025)

```typescript
const ELEVENLABS_MODELS = {
  // Latest - highest quality (alpha)
  v3: 'eleven_v3',

  // Production ready
  multilingualV2: 'eleven_multilingual_v2',
  turboV2_5: 'eleven_turbo_v2_5',

  // Ultra-low latency
  flashV2_5: 'eleven_flash_v2_5',
  flashV2: 'eleven_flash_v2', // English only
} as const;
```

### Usage
```typescript
import { ElevenLabsClient } from 'elevenlabs';

const elevenlabs = new ElevenLabsClient({
  apiKey: process.env.ELEVENLABS_API_KEY,
});

// Text to speech
const audio = await elevenlabs.textToSpeech.convert('voice-id', {
  text: 'Hello, world!',
  model_id: 'eleven_turbo_v2_5',
  voice_settings: {
    stability: 0.5,
    similarity_boost: 0.75,
  },
});

// Stream audio (for real-time)
const audioStream = await elevenlabs.textToSpeech.convertAsStream('voice-id', {
  text: 'Streaming audio...',
  model_id: 'eleven_flash_v2_5',
});
```

### Model Selection
```
eleven_v3 (Alpha)
├── Best for: Highest quality, emotional range
├── Latency: ~1s+ (not for real-time)
├── Languages: 74
└── Use when: Quality over speed, pre-rendered

eleven_turbo_v2_5
├── Best for: Balanced quality and speed
├── Latency: ~250-300ms
├── Languages: 32
└── Use when: Good quality with reasonable latency

eleven_flash_v2_5
├── Best for: Real-time, conversational AI
├── Latency: <75ms
├── Languages: 32
└── Use when: Live voice agents, chatbots
```

---

## Replicate

### Documentation
- **API Docs**: https://replicate.com/docs
- **Models**: https://replicate.com/explore
- **Pricing**: https://replicate.com/pricing

### Popular Models (December 2025)

```typescript
const REPLICATE_MODELS = {
  // FLUX.2 (Latest - November 2025)
  flux2Pro: 'black-forest-labs/flux-2-pro',
  flux2Flex: 'black-forest-labs/flux-2-flex',
  flux2Dev: 'black-forest-labs/flux-2-dev',

  // FLUX.1 (Still excellent)
  flux11Pro: 'black-forest-labs/flux-1.1-pro',
  fluxKontext: 'black-forest-labs/flux-kontext', // Image editing
  fluxSchnell: 'black-forest-labs/flux-schnell',

  // Video
  stableVideo4D: 'stability-ai/sv4d-2.0',

  // Audio
  musicgen: 'meta/musicgen',

  // LLMs (if needed outside main providers)
  llama: 'meta/llama-3.2-90b-vision',
} as const;
```

### Usage
```typescript
import Replicate from 'replicate';

const replicate = new Replicate({
  auth: process.env.REPLICATE_API_TOKEN,
});

// Image generation with FLUX.2
const output = await replicate.run('black-forest-labs/flux-2-pro', {
  input: {
    prompt: 'A serene mountain landscape at sunset',
    aspect_ratio: '16:9',
    output_format: 'webp',
  },
});

// Image editing with Kontext
const edited = await replicate.run('black-forest-labs/flux-kontext', {
  input: {
    image: 'https://...',
    prompt: 'Change the sky to sunset colors',
  },
});
```

### Model Selection
```
flux-2-pro
├── Best for: Highest quality, up to 4MP
├── Speed: ~6s
├── Cost: $0.015 + per megapixel
└── Use when: Professional quality needed

flux-2-flex
├── Best for: Fine details, typography
├── Speed: ~22s
├── Cost: $0.06 per megapixel
└── Use when: Need precise control

flux-2-dev (Open source)
├── Best for: Fast generation
├── Speed: ~2.5s
├── Cost: $0.012 per megapixel
└── Use when: Speed over quality

flux-kontext
├── Best for: Image editing with text
├── Speed: Variable
├── Cost: Per run
└── Use when: Edit existing images
```

---

## Stability AI

### Documentation
- **API Docs**: https://platform.stability.ai/docs/api-reference
- **Models**: https://stability.ai/stable-image
- **Pricing**: https://platform.stability.ai/pricing

### Latest Models (December 2025)

```typescript
const STABILITY_MODELS = {
  // Image generation
  sd35Large: 'sd3.5-large',
  sd35LargeTurbo: 'sd3.5-large-turbo',
  sd3Medium: 'sd3-medium',

  // Video
  sv4d: 'sv4d-2.0', // Stable Video 4D 2.0

  // Upscaling
  upscale: 'esrgan-v1-x2plus',
} as const;
```

### Usage
```typescript
const response = await fetch(
  'https://api.stability.ai/v2beta/stable-image/generate/sd3',
  {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      Authorization: `Bearer ${process.env.STABILITY_API_KEY}`,
    },
    body: JSON.stringify({
      prompt: 'A futuristic city at night',
      output_format: 'webp',
      aspect_ratio: '16:9',
      model: 'sd3.5-large',
    }),
  }
);
```

---

## Mistral AI

### Documentation
- **API Docs**: https://docs.mistral.ai
- **Models**: https://docs.mistral.ai/getting-started/models
- **Pricing**: https://mistral.ai/technology/#pricing

### Latest Models (December 2025)

```typescript
const MISTRAL_MODELS = {
  // Flagship
  large: 'mistral-large-latest',  // Points to 2411

  // Medium tier
  medium: 'mistral-medium-2505',  // Medium 3

  // Small/Fast
  small: 'mistral-small-2506',    // Small 3.2

  // Code specialized
  codestral: 'codestral-2508',
  devstral: 'devstral-medium-2507',

  // Reasoning (Magistral)
  magistralMedium: 'magistral-medium-2507',
  magistralSmall: 'magistral-small-2507',

  // Audio
  voxtral: 'voxtral-small-2507',

  // OCR
  ocr: 'mistral-ocr-2505',
} as const;
```

### Usage
```typescript
import MistralClient from '@mistralai/mistralai';

const client = new MistralClient(process.env.MISTRAL_API_KEY);

const response = await client.chat({
  model: 'mistral-large-latest',
  messages: [{ role: 'user', content: 'Hello!' }],
});

// Code completion with Codestral
const codeResponse = await client.chat({
  model: 'codestral-2508',
  messages: [{ role: 'user', content: 'Write a Python function to...' }],
});
```

### Model Selection
```
mistral-large-latest (123B params)
├── Best for: Complex reasoning, knowledge tasks
├── Context: 128K tokens
└── Use when: Need high capability

codestral-2508
├── Best for: Code generation, 80+ languages
├── Speed: 2.5x faster than predecessor
└── Use when: Code-focused tasks

magistral-medium-2507
├── Best for: Multi-step reasoning
├── Specialty: Transparent chain-of-thought
└── Use when: Need reasoning traces
```

---

## Voyage AI (Embeddings)

### Documentation
- **API Docs**: https://docs.voyageai.com
- **Models**: https://docs.voyageai.com/docs/embeddings
- **Pricing**: https://www.voyageai.com/pricing

### Latest Models (December 2025)

```typescript
const VOYAGE_MODELS = {
  // General purpose
  large2: 'voyage-large-2',
  large2Instruct: 'voyage-large-2-instruct',

  // Code specialized
  code2: 'voyage-code-2',
  code3: 'voyage-code-3',

  // Multilingual
  multilingual2: 'voyage-multilingual-2',

  // Domain specific
  law2: 'voyage-law-2',
  finance2: 'voyage-finance-2',
} as const;
```

### Usage
```typescript
const response = await fetch('https://api.voyageai.com/v1/embeddings', {
  method: 'POST',
  headers: {
    'Content-Type': 'application/json',
    Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
  },
  body: JSON.stringify({
    model: 'voyage-code-3',
    input: ['Your code to embed'],
  }),
});

const { data } = await response.json();
const embedding = data[0].embedding;
```

---

## Quick Reference

### Cost Comparison (per 1M tokens, approx.)

| Provider | Cheap | Mid | Premium |
|----------|-------|-----|---------|
| Anthropic | $0.25 (Haiku) | $3 (Sonnet 4.5) | $5 (Opus 4.5) |
| OpenAI | $0.15 (4.1-nano) | $2 (4.1) | $15+ (o3) |
| Google | $0.04 (Flash-lite) | $0.08 (Flash) | $1.25 (Pro) |
| Mistral | $0.25 (Small) | $2.70 (Medium) | $8 (Large) |

### Best For Each Task

```
Reasoning/Analysis    → Claude Opus 4.5, o3, Gemini 3 Pro
Code Generation       → Claude Sonnet 4.5, Codestral 2508, GPT-4.1
Fast Responses        → Claude Haiku, GPT-4.1-mini, Gemini Flash
Long Context          → Gemini 2.5 Pro (2M), GPT-4.1 (1M), Claude (200K)
Vision                → GPT-4.1, Claude Sonnet, Gemini 3 Pro
Embeddings            → Voyage code-3, text-embedding-3-small
Voice Synthesis       → Eleven Labs v3/flash, OpenAI TTS
Image Generation      → FLUX.2 Pro, DALL-E 3, SD 3.5
Video Generation      → Stable Video 4D 2.0, Runway
Image Editing         → FLUX Kontext, gpt-image-1
```

### Environment Variables Template
```bash
# .env.example (NEVER commit actual keys)

# LLMs
ANTHROPIC_API_KEY=sk-ant-...
OPENAI_API_KEY=sk-...
GOOGLE_API_KEY=AI...
MISTRAL_API_KEY=...

# Media
ELEVENLABS_API_KEY=...
REPLICATE_API_TOKEN=r8_...
STABILITY_API_KEY=sk-...

# Embeddings
VOYAGE_API_KEY=pa-...
```

### Model Update Checklist
```
When models update:
□ Check official changelog/blog
□ Update model ID strings
□ Test with existing prompts
□ Compare output quality
□ Check pricing changes
□ Update context limits if changed
```

---

## Sources

- [Anthropic Models](https://docs.anthropic.com/en/docs/about-claude/models/overview)
- [OpenAI Models](https://platform.openai.com/docs/models)
- [OpenAI o3 Announcement](https://openai.com/index/introducing-o3-and-o4-mini/)
- [GPT-4.1 Announcement](https://openai.com/index/gpt-4-1/)
- [Google Gemini Models](https://ai.google.dev/gemini-api/docs/models/gemini)
- [Eleven Labs Models](https://elevenlabs.io/docs/models)
- [Replicate FLUX.2](https://replicate.com/blog/run-flux-2-on-replicate)
- [Mistral Models](https://docs.mistral.ai/getting-started/models)
- [Voyage AI](https://docs.voyageai.com)


================================================
FILE: skills/android-java/SKILL.md
================================================
---
name: android-java
description: Android Java development with MVVM, ViewBinding, and Espresso testing
when-to-use: When working on Android Java source files
user-invocable: false
paths: ["**/*.java", "android/**", "**/build.gradle"]
effort: medium
---

# Android Java Skill


---

## Project Structure

```
project/
├── app/
│   ├── src/
│   │   ├── main/
│   │   │   ├── java/com/example/app/
│   │   │   │   ├── data/           # Data layer
│   │   │   │   │   ├── local/      # Room database, SharedPreferences
│   │   │   │   │   ├── remote/     # Retrofit services, API clients
│   │   │   │   │   └── repository/ # Repository implementations
│   │   │   │   ├── di/             # Dependency injection (Hilt/Dagger)
│   │   │   │   ├── domain/         # Business logic
│   │   │   │   │   ├── model/      # Domain models
│   │   │   │   │   ├── repository/ # Repository interfaces
│   │   │   │   │   └── usecase/    # Use cases
│   │   │   │   ├── ui/             # Presentation layer
│   │   │   │   │   ├── feature/    # Feature screens
│   │   │   │   │   │   ├── FeatureActivity.java
│   │   │   │   │   │   ├── FeatureFragment.java
│   │   │   │   │   │   └── FeatureViewModel.java
│   │   │   │   │   └── common/     # Shared UI components
│   │   │   │   └── App.java        # Application class
│   │   │   ├── res/
│   │   │   │   ├── layout/
│   │   │   │   ├── values/
│   │   │   │   └── drawable/
│   │   │   └── AndroidManifest.xml
│   │   ├── test/                   # Unit tests
│   │   └── androidTest/            # Instrumentation tests
│   └── build.gradle
├── build.gradle                    # Project-level build file
├── gradle.properties
├── settings.gradle
└── CLAUDE.md
```

---

## Gradle Configuration

### App-level build.gradle
```groovy
plugins {
    id 'com.android.application'
}

android {
    namespace 'com.example.app'
    compileSdk 34

    defaultConfig {
        applicationId "com.example.app"
        minSdk 24
        targetSdk 34
        versionCode 1
        versionName "1.0"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled true
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }

    compileOptions {
        sourceCompatibility JavaVersion.VERSION_17
        targetCompatibility JavaVersion.VERSION_17
    }

    buildFeatures {
        viewBinding true
    }
}

dependencies {
    // AndroidX
    implementation 'androidx.core:core:1.12.0'
    implementation 'androidx.appcompat:appcompat:1.6.1'
    implementation 'com.google.android.material:material:1.11.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'

    // Lifecycle
    implementation 'androidx.lifecycle:lifecycle-viewmodel:2.7.0'
    implementation 'androidx.lifecycle:lifecycle-livedata:2.7.0'

    // Testing
    testImplementation 'junit:junit:4.13.2'
    testImplementation 'org.mockito:mockito-core:5.8.0'
    androidTestImplementation 'androidx.test.ext:junit:1.1.5'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}
```

---

## Architecture Patterns

### MVVM with ViewModel
```java
// ViewModel - holds UI state, survives configuration changes
public class UserViewModel extends ViewModel {
    private final UserRepository repository;
    private final MutableLiveData<User> user = new MutableLiveData<>();
    private final MutableLiveData<Boolean> loading = new MutableLiveData<>(false);
    private final MutableLiveData<String> error = new MutableLiveData<>();

    public UserViewModel(UserRepository repository) {
        this.repository = repository;
    }

    public LiveData<User> getUser() {
        return user;
    }

    public LiveData<Boolean> isLoading() {
        return loading;
    }

    public LiveData<String> getError() {
        return error;
    }

    public void loadUser(String userId) {
        loading.setValue(true);
        repository.getUser(userId, new Callback<User>() {
            @Override
            public void onSuccess(User result) {
                user.setValue(result);
                loading.setValue(false);
            }

            @Override
            public void onError(String message) {
                error.setValue(message);
                loading.setValue(false);
            }
        });
    }
}
```

### Repository Pattern
```java
// Repository interface (domain layer)
public interface UserRepository {
    void getUser(String userId, Callback<User> callback);
    void saveUser(User user, Callback<Void> callback);
}

// Repository implementation (data layer)
public class UserRepositoryImpl implements UserRepository {
    private final UserApi api;
    private final UserDao dao;

    public UserRepositoryImpl(UserApi api, UserDao dao) {
        this.api = api;
        this.dao = dao;
    }

    @Override
    public void getUser(String userId, Callback<User> callback) {
        // Try cache first, then network
        User cached = dao.getUserById(userId);
        if (cached != null) {
            callback.onSuccess(cached);
            return;
        }
        api.getUser(userId).enqueue(new retrofit2.Callback<User>() {
            @Override
            public void onResponse(Call<User> call, Response<User> response) {
                if (response.isSuccessful() && response.body() != null) {
                    dao.insert(response.body());
                    callback.onSuccess(response.body());
                } else {
                    callback.onError("Failed to load user");
                }
            }

            @Override
            public void onFailure(Call<User> call, Throwable t) {
                callback.onError(t.getMessage());
            }
        });
    }
}
```

---

## Activity & Fragment Patterns

### Activity with ViewBinding
```java
public class MainActivity extends AppCompatActivity {
    private ActivityMainBinding binding;
    private MainViewModel viewModel;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        binding = ActivityMainBinding.inflate(getLayoutInflater());
        setContentView(binding.getRoot());

        viewModel = new ViewModelProvider(this).get(MainViewModel.class);
        setupObservers();
        setupListeners();
    }

    private void setupObservers() {
        viewModel.getUser().observe(this, user -> {
            binding.userName.setText(user.getName());
        });

        viewModel.isLoading().observe(this, isLoading -> {
            binding.progressBar.setVisibility(isLoading ? View.VISIBLE : View.GONE);
        });
    }

    private void setupListeners() {
        binding.refreshButton.setOnClickListener(v -> {
            viewModel.loadUser(getCurrentUserId());
        });
    }

    @Override
    protected void onDestroy() {
        super.onDestroy();
        binding = null;
    }
}
```

### Fragment with ViewBinding
```java
public class UserFragment extends Fragment {
    private FragmentUserBinding binding;
    private UserViewModel viewModel;

    @Override
    public View onCreateView(LayoutInflater inflater, ViewGroup container,
                             Bundle savedInstanceState) {
        binding = FragmentUserBinding.inflate(inflater, container, false);
        return binding.getRoot();
    }

    @Override
    public void onViewCreated(View view, Bundle savedInstanceState) {
        super.onViewCreated(view, savedInstanceState);
        viewModel = new ViewModelProvider(requireActivity()).get(UserViewModel.class);
        setupObservers();
    }

    private void setupObservers() {
        viewModel.getUser().observe(getViewLifecycleOwner(), user -> {
            binding.userName.setText(user.getName());
        });
    }

    @Override
    public void onDestroyView() {
        super.onDestroyView();
        binding = null;  // Prevent memory leaks
    }
}
```

---

## Testing

### Unit Tests with JUnit & Mockito
```java
@RunWith(MockitoJUnitRunner.class)
public class UserViewModelTest {
    @Mock
    private UserRepository repository;

    @Rule
    public InstantTaskExecutorRule instantTaskExecutorRule = new InstantTaskExecutorRule();

    private UserViewModel viewModel;

    @Before
    public void setup() {
        viewModel = new UserViewModel(repository);
    }

    @Test
    public void loadUser_success_updatesUserLiveData() {
        // Arrange
        User expectedUser = new User("1", "John Doe");
        doAnswer(invocation -> {
            Callback<User> callback = invocation.getArgument(1);
            callback.onSuccess(expectedUser);
            return null;
        }).when(repository).getUser(eq("1"), any());

        // Act
        viewModel.loadUser("1");

        // Assert
        assertEquals(expectedUser, viewModel.getUser().getValue());
        assertFalse(viewModel.isLoading().getValue());
    }

    @Test
    public void loadUser_error_updatesErrorLiveData() {
        // Arrange
        doAnswer(invocation -> {
            Callback<User> callback = invocation.getArgument(1);
            callback.onError("Network error");
            return null;
        }).when(repository).getUser(eq("1"), any());

        // Act
        viewModel.loadUser("1");

        // Assert
        assertEquals("Network error", viewModel.getError().getValue());
        assertFalse(viewModel.isLoading().getValue());
    }
}
```

### Instrumentation Tests with Espresso
```java
@RunWith(AndroidJUnit4.class)
public class MainActivityTest {
    @Rule
    public ActivityScenarioRule<MainActivity> activityRule =
            new ActivityScenarioRule<>(MainActivity.class);

    @Test
    public void userName_isDisplayed() {
        onView(withId(R.id.userName))
                .check(matches(isDisplayed()));
    }

    @Test
    public void refreshButton_click_triggersRefresh() {
        onView(withId(R.id.refreshButton))
                .perform(click());

        onView(withId(R.id.progressBar))
                .check(matches(isDisplayed()));
    }

    @Test
    public void userList_scrollToItem_displaysCorrectly() {
        onView(withId(R.id.userList))
                .perform(RecyclerViewActions.scrollToPosition(10));

        onView(withText("User 10"))
                .check(matches(isDisplayed()));
    }
}
```

---

## GitHub Actions

```yaml
name: Android CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'temurin'

      - name: Setup Gradle
        uses: gradle/actions/setup-gradle@v3

      - name: Grant execute permission for gradlew
        run: chmod +x gradlew

      - name: Run Lint
        run: ./gradlew lint

      - name: Run Unit Tests
        run: ./gradlew testDebugUnitTest

      - name: Build Debug APK
        run: ./gradlew assembleDebug

      - name: Upload APK
        uses: actions/upload-artifact@v4
        with:
          name: debug-apk
          path: app/build/outputs/apk/debug/app-debug.apk

  instrumentation-tests:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'temurin'

      - name: Enable KVM
        run: |
          echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
          sudo udevadm control --reload-rules
          sudo udevadm trigger --name-match=kvm

      - name: Run Instrumentation Tests
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: 29
          script: ./gradlew connectedDebugAndroidTest
```

---

## Lint Configuration

### lint.xml
```xml
<?xml version="1.0" encoding="UTF-8"?>
<lint>
    <!-- Treat these as errors -->
    <issue id="HardcodedText" severity="error" />
    <issue id="MissingTranslation" severity="error" />
    <issue id="UnusedResources" severity="warning" />

    <!-- Memory leak detection -->
    <issue id="StaticFieldLeak" severity="error" />

    <!-- Security -->
    <issue id="HardcodedDebugMode" severity="error" />
    <issue id="AllowBackup" severity="warning" />

    <!-- Performance -->
    <issue id="ViewHolder" severity="error" />
    <issue id="Overdraw" severity="warning" />

    <!-- Ignore for tests -->
    <issue id="InvalidPackage">
        <ignore path="**/test/**" />
        <ignore path="**/androidTest/**" />
    </issue>
</lint>
```

### build.gradle lint options
```groovy
android {
    lint {
        abortOnError true
        warningsAsErrors false
        checkReleaseBuilds true
        xmlReport true
        htmlReport true
    }
}
```

---

## Common Patterns

### Null-Safe Callbacks
```java
// Define callback interface
public interface Callback<T> {
    void onSuccess(T result);
    void onError(String message);
}

// Use with null checks
public void fetchData(Callback<Data> callback) {
    if (callback == null) return;

    try {
        Data result = performFetch();
        callback.onSuccess(result);
    } catch (Exception e) {
        callback.onError(e.getMessage());
    }
}
```

### Safe Context Usage
```java
// Use application context for long-lived objects
public class DataManager {
    private final Context appContext;

    public DataManager(Context context) {
        // Always use application context to prevent Activity leaks
        this.appContext = context.getApplicationContext();
    }
}

// Check for null context in callbacks
private void updateUI() {
    Context context = getContext();
    if (context == null || !isAdded()) return;
    // Safe to use context
}
```

### Thread-Safe Singleton
```java
public class ApiClient {
    private static volatile ApiClient instance;
    private final Retrofit retrofit;

    private ApiClient() {
        retrofit = new Retrofit.Builder()
                .baseUrl(BASE_URL)
                .addConverterFactory(GsonConverterFactory.create())
                .build();
    }

    public static ApiClient getInstance() {
        if (instance == null) {
            synchronized (ApiClient.class) {
                if (instance == null) {
                    instance = new ApiClient();
                }
            }
        }
        return instance;
    }
}
```

---

## Android Anti-Patterns

- ❌ **Context leaks** - Never hold Activity/Fragment references in static fields or singletons
- ❌ **Memory leaks in callbacks** - Always use WeakReference or clear callbacks in onDestroy
- ❌ **UI updates on background thread** - Always post to main thread for UI changes
- ❌ **Hardcoded strings** - Use string resources for all user-visible text
- ❌ **God Activities** - Keep Activities under 200 lines, extract logic to ViewModels
- ❌ **NetworkOnMainThreadException** - Never perform network calls on main thread
- ❌ **Ignoring lifecycle** - Always respect Activity/Fragment lifecycle states
- ❌ **Blocking the main thread** - Keep main thread operations under 16ms
- ❌ **Not handling configuration changes** - Use ViewModel to survive rotation
- ❌ **Hardcoded dimensions** - Use dp/sp units and dimension resources
- ❌ **Deep view hierarchies** - Keep layout depth under 10 levels, use ConstraintLayout
- ❌ **Not closing resources** - Always close Cursor, InputStream, database connections


================================================
FILE: skills/android-kotlin/SKILL.md
================================================
---
name: android-kotlin
description: Android Kotlin development with Coroutines, Jetpack Compose, Hilt, and MockK testing
when-to-use: When working on Android Kotlin source files
user-invocable: false
paths: ["**/*.kt", "**/*.kts", "android/**", "**/build.gradle.kts"]
effort: medium
---

# Android Kotlin Skill


---

## Project Structure

```
project/
├── app/
│   ├── src/
│   │   ├── main/
│   │   │   ├── kotlin/com/example/app/
│   │   │   │   ├── data/               # Data layer
│   │   │   │   │   ├── local/          # Room database
│   │   │   │   │   ├── remote/         # Retrofit/Ktor services
│   │   │   │   │   └── repository/     # Repository implementations
│   │   │   │   ├── di/                 # Hilt modules
│   │   │   │   ├── domain/             # Business logic
│   │   │   │   │   ├── model/          # Domain models
│   │   │   │   │   ├── repository/     # Repository interfaces
│   │   │   │   │   └── usecase/        # Use cases
│   │   │   │   ├── ui/                 # Presentation layer
│   │   │   │   │   ├── feature/        # Feature screens
│   │   │   │   │   │   ├── FeatureScreen.kt      # Compose UI
│   │   │   │   │   │   └── FeatureViewModel.kt
│   │   │   │   │   ├── components/     # Reusable Compose components
│   │   │   │   │   └── theme/          # Material theme
│   │   │   │   └── App.kt              # Application class
│   │   │   ├── res/
│   │   │   └── AndroidManifest.xml
│   │   ├── test/                       # Unit tests
│   │   └── androidTest/                # Instrumentation tests
│   └── build.gradle.kts
├── build.gradle.kts                    # Project-level build file
├── gradle.properties
├── settings.gradle.kts
└── CLAUDE.md
```

---

## Gradle Configuration (Kotlin DSL)

### App-level build.gradle.kts
```kotlin
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
    id("com.google.dagger.hilt.android")
    id("com.google.devtools.ksp")
}

android {
    namespace = "com.example.app"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.example.app"
        minSdk = 24
        targetSdk = 34
        versionCode = 1
        versionName = "1.0"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            isMinifyEnabled = true
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }

    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_17
        targetCompatibility = JavaVersion.VERSION_17
    }

    kotlinOptions {
        jvmTarget = "17"
    }

    buildFeatures {
        compose = true
    }

    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.8"
    }
}

dependencies {
    // Compose BOM
    val composeBom = platform("androidx.compose:compose-bom:2024.01.00")
    implementation(composeBom)
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.compose.material3:material3")
    implementation("androidx.activity:activity-compose:1.8.2")
    implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.7.0")

    // Coroutines
    implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3")

    // Hilt
    implementation("com.google.dagger:hilt-android:2.50")
    ksp("com.google.dagger:hilt-compiler:2.50")
    implementation("androidx.hilt:hilt-navigation-compose:1.1.0")

    // Room
    implementation("androidx.room:room-runtime:2.6.1")
    implementation("androidx.room:room-ktx:2.6.1")
    ksp("androidx.room:room-compiler:2.6.1")

    // Testing
    testImplementation("junit:junit:4.13.2")
    testImplementation("io.mockk:mockk:1.13.9")
    testImplementation("org.jetbrains.kotlinx:kotlinx-coroutines-test:1.7.3")
    testImplementation("app.cash.turbine:turbine:1.0.0")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}
```

---

## Kotlin Coroutines & Flow

### ViewModel with StateFlow
```kotlin
@HiltViewModel
class UserViewModel @Inject constructor(
    private val getUserUseCase: GetUserUseCase,
    private val savedStateHandle: SavedStateHandle
) : ViewModel() {

    private val _uiState = MutableStateFlow(UserUiState())
    val uiState: StateFlow<UserUiState> = _uiState.asStateFlow()

    private val userId: String = checkNotNull(savedStateHandle["userId"])

    init {
        loadUser()
    }

    fun loadUser() {
        viewModelScope.launch {
            _uiState.update { it.copy(isLoading = true) }

            getUserUseCase(userId)
                .catch { e ->
                    _uiState.update {
                        it.copy(isLoading = false, error = e.message)
                    }
                }
                .collect { user ->
                    _uiState.update {
                        it.copy(isLoading = false, user = user, error = null)
                    }
                }
        }
    }

    fun clearError() {
        _uiState.update { it.copy(error = null) }
    }
}

data class UserUiState(
    val user: User? = null,
    val isLoading: Boolean = false,
    val error: String? = null
)
```

### Repository with Flow
```kotlin
interface UserRepository {
    fun getUser(userId: String): Flow<User>
    fun observeUsers(): Flow<List<User>>
    suspend fun saveUser(user: User)
}

class UserRepositoryImpl @Inject constructor(
    private val api: UserApi,
    private val dao: UserDao,
    private val dispatcher: CoroutineDispatcher = Dispatchers.IO
) : UserRepository {

    override fun getUser(userId: String): Flow<User> = flow {
        // Emit cached data first
        dao.getUserById(userId)?.let { emit(it) }

        // Fetch from network and update cache
        val remoteUser = api.getUser(userId)
        dao.insert(remoteUser)
        emit(remoteUser)
    }.flowOn(dispatcher)

    override fun observeUsers(): Flow<List<User>> =
        dao.observeAllUsers().flowOn(dispatcher)

    override suspend fun saveUser(user: User) = withContext(dispatcher) {
        api.saveUser(user)
        dao.insert(user)
    }
}
```

---

## Jetpack Compose

### Screen with ViewModel
```kotlin
@Composable
fun UserScreen(
    viewModel: UserViewModel = hiltViewModel(),
    onNavigateBack: () -> Unit
) {
    val uiState by viewModel.uiState.collectAsStateWithLifecycle()

    UserScreenContent(
        uiState = uiState,
        onRefresh = viewModel::loadUser,
        onErrorDismiss = viewModel::clearError,
        onNavigateBack = onNavigateBack
    )
}

@Composable
private fun UserScreenContent(
    uiState: UserUiState,
    onRefresh: () -> Unit,
    onErrorDismiss: () -> Unit,
    onNavigateBack: () -> Unit
) {
    Scaffold(
        topBar = {
            TopAppBar(
                title = { Text("User Profile") },
                navigationIcon = {
                    IconButton(onClick = onNavigateBack) {
                        Icon(Icons.AutoMirrored.Filled.ArrowBack, "Back")
                    }
                }
            )
        }
    ) { padding ->
        Box(
            modifier = Modifier
                .fillMaxSize()
                .padding(padding)
        ) {
            when {
                uiState.isLoading -> {
                    CircularProgressIndicator(
                        modifier = Modifier.align(Alignment.Center)
                    )
                }
                uiState.user != null -> {
                    UserContent(user = uiState.user)
                }
            }

            uiState.error?.let { error ->
                Snackbar(
                    modifier = Modifier.align(Alignment.BottomCenter),
                    action = {
                        TextButton(onClick = onErrorDismiss) {
                            Text("Dismiss")
                        }
                    }
                ) {
                    Text(error)
                }
            }
        }
    }
}
```

---

## Sealed Classes for State

### Result Wrapper
```kotlin
sealed interface Result<out T> {
    data class Success<T>(val data: T) : Result<T>
    data class Error(val exception: Throwable) : Result<Nothing>
    data object Loading : Result<Nothing>
}

fun <T> Result<T>.getOrNull(): T? = (this as? Result.Success)?.data

inline fun <T, R> Result<T>.map(transform: (T) -> R): Result<R> = when (this) {
    is Result.Success -> Result.Success(transform(data))
    is Result.Error -> this
    is Result.Loading -> this
}
```

---

## Testing with MockK & Turbine

### ViewModel Tests
```kotlin
@OptIn(ExperimentalCoroutinesApi::class)
class UserViewModelTest {

    @get:Rule
    val mainDispatcherRule = MainDispatcherRule()

    private val getUserUseCase: GetUserUseCase = mockk()
    private val savedStateHandle = SavedStateHandle(mapOf("userId" to "123"))

    private lateinit var viewModel: UserViewModel

    @Before
    fun setup() {
        viewModel = UserViewModel(getUserUseCase, savedStateHandle)
    }

    @Test
    fun `loadUser success updates state with user`() = runTest {
        val user = User("123", "John Doe", "john@example.com")
        coEvery { getUserUseCase("123") } returns flowOf(user)

        viewModel.uiState.test {
            val initial = awaitItem()
            assertFalse(initial.isLoading)

            viewModel.loadUser()

            val loading = awaitItem()
            assertTrue(loading.isLoading)

            val success = awaitItem()
            assertFalse(success.isLoading)
            assertEquals(user, success.user)
        }
    }
}

class MainDispatcherRule(
    private val dispatcher: TestDispatcher = UnconfinedTestDispatcher()
) : TestWatcher() {
    override fun starting(description: Description) {
        Dispatchers.setMain(dispatcher)
    }
    override fun finished(description: Description) {
        Dispatchers.resetMain()
    }
}
```

---

## GitHub Actions

```yaml
name: Android Kotlin CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'temurin'

      - name: Setup Gradle
        uses: gradle/actions/setup-gradle@v3

      - name: Run Detekt
        run: ./gradlew detekt

      - name: Run Ktlint
        run: ./gradlew ktlintCheck

      - name: Run Unit Tests
        run: ./gradlew testDebugUnitTest

      - name: Build Debug APK
        run: ./gradlew assembleDebug
```

---

## Lint Configuration

### detekt.yml
```yaml
build:
  maxIssues: 0

complexity:
  LongMethod:
    threshold: 20
  LongParameterList:
    functionThreshold: 4
  TooManyFunctions:
    thresholdInFiles: 10

style:
  MaxLineLength:
    maxLineLength: 120
  WildcardImport:
    active: true

coroutines:
  GlobalCoroutineUsage:
    active: true
```

---

## Kotlin Anti-Patterns

- ❌ **Blocking coroutines on Main** - Never use `runBlocking` on main thread
- ❌ **GlobalScope usage** - Use structured concurrency with viewModelScope/lifecycleScope
- ❌ **Collecting flows in init** - Use `repeatOnLifecycle` or `collectAsStateWithLifecycle`
- ❌ **Mutable state exposure** - Expose `StateFlow` not `MutableStateFlow`
- ❌ **Not handling exceptions in flows** - Always use `catch` operator
- ❌ **Lateinit for nullable** - Use `lazy` or nullable with `?`
- ❌ **Hardcoded dispatchers** - Inject dispatchers for testability
- ❌ **Not using sealed classes** - Prefer sealed for finite state sets
- ❌ **Side effects in Composables** - Use `LaunchedEffect`/`SideEffect`
- ❌ **Unstable Compose parameters** - Use stable/immutable types or `@Stable`


================================================
FILE: skills/aws-aurora/SKILL.md
================================================
---
name: aws-aurora
description: AWS Aurora Serverless v2, RDS Proxy, Data API, connection pooling
when-to-use: When working with AWS Aurora/RDS databases
user-invocable: false
paths: ["**/rds*", "**/aurora*", "serverless.*", "template.yaml"]
effort: medium
---

# AWS Aurora Skill


Amazon Aurora is a MySQL/PostgreSQL-compatible relational database with serverless scaling, high availability, and enterprise features.

**Sources:** [Aurora Docs](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/) | [Serverless v2](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2.html) | [RDS Proxy](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/rds-proxy.html)

---

## Core Principle

**Use RDS Proxy for serverless, Data API for simplicity, connection pooling always.**

Aurora excels at ACID-compliant workloads. For serverless architectures (Lambda), always use RDS Proxy or Data API to handle connection management. Never open raw connections from Lambda functions.

---

## Aurora Options

| Option | Best For |
|--------|----------|
| **Aurora Serverless v2** | Variable workloads, auto-scaling (0.5-128 ACUs) |
| **Aurora Provisioned** | Predictable workloads, maximum performance |
| **Aurora Global** | Multi-region, disaster recovery |
| **Data API** | Serverless without VPC, simple HTTP access |
| **RDS Proxy** | Connection pooling for Lambda, high concurrency |

---

## Connection Strategies

### Strategy 1: RDS Proxy (Recommended for Lambda)
```
Lambda → RDS Proxy → Aurora
         (pool)
```
- Connection pooling and reuse
- Automatic failover handling
- IAM authentication support
- Works with existing SQL clients

### Strategy 2: Data API (Simplest for Serverless)
```
Lambda → Data API (HTTP) → Aurora
```
- No VPC required
- No connection management
- Higher latency per query
- Limited to Aurora Serverless

### Strategy 3: Direct Connection (Not for Lambda)
```
App Server → Aurora
(persistent connection)
```
- Only for long-running servers (ECS, EC2)
- Manage connection pool yourself
- Not suitable for serverless

---

## RDS Proxy Setup

### Create Proxy (AWS Console/CDK)
```typescript
// CDK example
import * as rds from 'aws-cdk-lib/aws-rds';

const proxy = new rds.DatabaseProxy(this, 'Proxy', {
  proxyTarget: rds.ProxyTarget.fromCluster(cluster),
  secrets: [cluster.secret!],
  vpc,
  securityGroups: [proxySecurityGroup],
  requireTLS: true,
  idleClientTimeout: cdk.Duration.minutes(30),
  maxConnectionsPercent: 90,
  maxIdleConnectionsPercent: 10,
  borrowTimeout: cdk.Duration.seconds(30)
});
```

### Connect via Proxy (TypeScript/Node.js)
```typescript
// lib/db.ts
import { Pool } from 'pg';
import { Signer } from '@aws-sdk/rds-signer';

const signer = new Signer({
  hostname: process.env.RDS_PROXY_ENDPOINT!,
  port: 5432,
  username: process.env.DB_USER!,
  region: process.env.AWS_REGION!
});

// IAM authentication
async function getPool(): Promise<Pool> {
  const token = await signer.getAuthToken();

  return new Pool({
    host: process.env.RDS_PROXY_ENDPOINT,
    port: 5432,
    database: process.env.DB_NAME,
    user: process.env.DB_USER,
    password: token,
    ssl: { rejectUnauthorized: true },
    max: 1,  // Single connection for Lambda
    idleTimeoutMillis: 120000,
    connectionTimeoutMillis: 10000
  });
}

// Usage in Lambda
let pool: Pool | null = null;

export async function handler(event: any) {
  if (!pool) {
    pool = await getPool();
  }

  const result = await pool.query('SELECT * FROM users WHERE id = $1', [event.userId]);
  return result.rows[0];
}
```

### Proxy Configuration Best Practices
```bash
# Key settings for Lambda workloads
MaxConnectionsPercent: 90        # Use most of DB connections
MaxIdleConnectionsPercent: 10    # Keep some idle for bursts
ConnectionBorrowTimeout: 30s     # Wait for available connection
IdleClientTimeout: 30min         # Close idle proxy connections

# Monitor these CloudWatch metrics:
# - DatabaseConnectionsCurrentlyBorrowed
# - DatabaseConnectionsCurrentlySessionPinned
# - QueryDatabaseResponseLatency
```

---

## Data API (HTTP-based)

### Enable Data API
```bash
# Must be Aurora Serverless
aws rds modify-db-cluster \
  --db-cluster-identifier my-cluster \
  --enable-http-endpoint
```

### TypeScript with Data API Client v2
```bash
npm install data-api-client
```

```typescript
// lib/db.ts
import DataAPIClient from 'data-api-client';

const db = DataAPIClient({
  secretArn: process.env.DB_SECRET_ARN!,
  resourceArn: process.env.DB_CLUSTER_ARN!,
  database: process.env.DB_NAME!,
  region: process.env.AWS_REGION!
});

// Simple query
const users = await db.query('SELECT * FROM users WHERE active = :active', {
  active: true
});

// Insert with returning
const result = await db.query(
  'INSERT INTO users (email, name) VALUES (:email, :name) RETURNING *',
  { email: 'user@test.com', name: 'Test User' }
);

// Transaction
const transaction = await db.transaction();
try {
  await transaction.query('UPDATE accounts SET balance = balance - :amount WHERE id = :from', {
    amount: 100, from: 1
  });
  await transaction.query('UPDATE accounts SET balance = balance + :amount WHERE id = :to', {
    amount: 100, to: 2
  });
  await transaction.commit();
} catch (error) {
  await transaction.rollback();
  throw error;
}
```

### Python with boto3
```python
# requirements.txt
boto3>=1.34.0

# db.py
import boto3
import os

rds_data = boto3.client('rds-data')

CLUSTER_ARN = os.environ['DB_CLUSTER_ARN']
SECRET_ARN = os.environ['DB_SECRET_ARN']
DATABASE = os.environ['DB_NAME']


def execute_sql(sql: str, parameters: list = None):
    """Execute SQL via Data API."""
    params = {
        'resourceArn': CLUSTER_ARN,
        'secretArn': SECRET_ARN,
        'database': DATABASE,
        'sql': sql
    }

    if parameters:
        params['parameters'] = parameters

    return rds_data.execute_statement(**params)


def get_user(user_id: int):
    result = execute_sql(
        'SELECT * FROM users WHERE id = :id',
        [{'name': 'id', 'value': {'longValue': user_id}}]
    )
    return result.get('records', [])


def create_user(email: str, name: str):
    result = execute_sql(
        'INSERT INTO users (email, name) VALUES (:email, :name) RETURNING *',
        [
            {'name': 'email', 'value': {'stringValue': email}},
            {'name': 'name', 'value': {'stringValue': name}}
        ]
    )
    return result.get('generatedFields')


# Transaction
def transfer_funds(from_id: int, to_id: int, amount: float):
    transaction = rds_data.begin_transaction(
        resourceArn=CLUSTER_ARN,
        secretArn=SECRET_ARN,
        database=DATABASE
    )
    transaction_id = transaction['transactionId']

    try:
        execute_sql(
            'UPDATE accounts SET balance = balance - :amount WHERE id = :id',
            [
                {'name': 'amount', 'value': {'doubleValue': amount}},
                {'name': 'id', 'value': {'longValue': from_id}}
            ]
        )

        execute_sql(
            'UPDATE accounts SET balance = balance + :amount WHERE id = :id',
            [
                {'name': 'amount', 'value': {'doubleValue': amount}},
                {'name': 'id', 'value': {'longValue': to_id}}
            ]
        )

        rds_data.commit_transaction(
            resourceArn=CLUSTER_ARN,
            secretArn=SECRET_ARN,
            transactionId=transaction_id
        )
    except Exception as e:
        rds_data.rollback_transaction(
            resourceArn=CLUSTER_ARN,
            secretArn=SECRET_ARN,
            transactionId=transaction_id
        )
        raise e
```

---

## Prisma with Aurora

### Setup (VPC Connection via RDS Proxy)
```bash
npm install prisma @prisma/client
npx prisma init
```

```prisma
// prisma/schema.prisma
generator client {
  provider = "prisma-client-js"
}

datasource db {
  provider = "postgresql"
  url      = env("DATABASE_URL")
}

model User {
  id        Int      @id @default(autoincrement())
  email     String   @unique
  name      String
  posts     Post[]
  createdAt DateTime @default(now())
  updatedAt DateTime @updatedAt
}

model Post {
  id        Int      @id @default(autoincrement())
  title     String
  content   String?
  published Boolean  @default(false)
  author    User     @relation(fields: [authorId], references: [id])
  authorId  Int
  createdAt DateTime @default(now())
}
```

### Environment
```bash
# Use RDS Proxy endpoint
DATABASE_URL="postgresql://user:password@proxy-endpoint.proxy-xxx.region.rds.amazonaws.com:5432/mydb?schema=public&connection_limit=1"
```

### Lambda Handler with Prisma
```typescript
// handlers/users.ts
import { PrismaClient } from '@prisma/client';

// Reuse client across invocations
let prisma: PrismaClient | null = null;

function getPrisma(): PrismaClient {
  if (!prisma) {
    prisma = new PrismaClient({
      datasources: {
        db: { url: process.env.DATABASE_URL }
      }
    });
  }
  return prisma;
}

export async function handler(event: any) {
  const db = getPrisma();

  const users = await db.user.findMany({
    include: { posts: true },
    take: 10
  });

  return {
    statusCode: 200,
    body: JSON.stringify(users)
  };
}
```

---

## Aurora Serverless v2

### Capacity Configuration
```typescript
// CDK
const cluster = new rds.DatabaseCluster(this, 'Cluster', {
  engine: rds.DatabaseClusterEngine.auroraPostgres({
    version: rds.AuroraPostgresEngineVersion.VER_15_4
  }),
  serverlessV2MinCapacity: 0.5,  // Minimum ACUs
  serverlessV2MaxCapacity: 16,   // Maximum ACUs
  writer: rds.ClusterInstance.serverlessV2('writer'),
  readers: [
    rds.ClusterInstance.serverlessV2('reader', { scaleWithWriter: true })
  ],
  vpc,
  vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }
});
```

### Capacity Guidelines

| Workload | Min ACUs | Max ACUs |
|----------|----------|----------|
| Dev/Test | 0.5 | 2 |
| Small Production | 2 | 8 |
| Medium Production | 4 | 32 |
| Large Production | 8 | 128 |

### Handle Scale-to-Zero Wake-up
```typescript
// Data API Client v2 handles this automatically
// For direct connections, implement retry logic:

import { Pool } from 'pg';

async function queryWithRetry(
  pool: Pool,
  sql: string,
  params: any[],
  maxRetries = 3
): Promise<any> {
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      return await pool.query(sql, params);
    } catch (error: any) {
      // Aurora Serverless waking up
      if (error.code === 'ETIMEDOUT' || error.message?.includes('Communications link failure')) {
        if (attempt === maxRetries) throw error;
        // Exponential backoff
        await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 1000));
        continue;
      }
      throw error;
    }
  }
}
```

---

## Migrations

### Using Prisma Migrate
```bash
# Development (creates migration)
npx prisma migrate dev --name add_users_table

# Production (apply migrations)
npx prisma migrate deploy

# Generate client
npx prisma generate
```

### CI/CD Migration Script
```yaml
# .github/workflows/deploy.yml
- name: Run migrations
  run: |
    # Connect via bastion or use a migration Lambda
    npx prisma migrate deploy
  env:
    DATABASE_URL: ${{ secrets.DATABASE_URL }}
```

### Migration Lambda
```typescript
// lambdas/migrate.ts
import { execSync } from 'child_process';

export async function handler() {
  try {
    execSync('npx prisma migrate deploy', {
      env: {
        ...process.env,
        DATABASE_URL: process.env.DATABASE_URL
      },
      stdio: 'inherit'
    });
    return { statusCode: 200, body: 'Migrations applied' };
  } catch (error) {
    console.error('Migration failed:', error);
    throw error;
  }
}
```

---

## Connection Pooling (Non-Lambda)

### PgBouncer Sidecar (ECS/EKS)
```yaml
# docker-compose.yml
services:
  app:
    build: .
    environment:
      DATABASE_URL: postgresql://user:pass@pgbouncer:6432/mydb

  pgbouncer:
    image: edoburu/pgbouncer
    environment:
      DATABASE_URL: postgresql://user:pass@aurora-endpoint:5432/mydb
      POOL_MODE: transaction
      MAX_CLIENT_CONN: 1000
      DEFAULT_POOL_SIZE: 20
```

### Application-Level Pooling
```typescript
// For long-running servers (not Lambda)
import { Pool } from 'pg';

const pool = new Pool({
  host: process.env.DB_HOST,
  port: 5432,
  database: process.env.DB_NAME,
  user: process.env.DB_USER,
  password: process.env.DB_PASSWORD,
  max: 20,                  // Max connections
  idleTimeoutMillis: 30000, // Close idle after 30s
  connectionTimeoutMillis: 10000
});

// Use pool for all queries
export async function query(sql: string, params?: any[]) {
  const client = await pool.connect();
  try {
    return await client.query(sql, params);
  } finally {
    client.release();
  }
}
```

---

## Monitoring

### Key CloudWatch Metrics
```
# Aurora
- CPUUtilization
- DatabaseConnections
- FreeableMemory
- ServerlessDatabaseCapacity (ACUs)
- AuroraReplicaLag

# RDS Proxy
- DatabaseConnectionsCurrentlyBorrowed
- DatabaseConnectionsCurrentlySessionPinned
- QueryDatabaseResponseLatency
- ClientConnectionsReceived
```

### Performance Insights
```bash
# Enable via console or CLI
aws rds modify-db-cluster \
  --db-cluster-identifier my-cluster \
  --enable-performance-insights \
  --performance-insights-retention-period 7
```

---

## Security

### IAM Database Authentication
```typescript
import { Signer } from '@aws-sdk/rds-signer';

const signer = new Signer({
  hostname: process.env.DB_HOST!,
  port: 5432,
  username: 'iam_user',
  region: 'us-east-1'
});

const token = await signer.getAuthToken();

// Use token as password (valid for 15 minutes)
const pool = new Pool({
  host: process.env.DB_HOST,
  user: 'iam_user',
  password: token,
  ssl: true
});
```

### Secrets Manager Rotation
```typescript
import { SecretsManagerClient, GetSecretValueCommand } from '@aws-sdk/client-secrets-manager';

const client = new SecretsManagerClient({ region: 'us-east-1' });

async function getDbCredentials() {
  const response = await client.send(
    new GetSecretValueCommand({ SecretId: process.env.DB_SECRET_ARN })
  );
  return JSON.parse(response.SecretString!);
}
```

---

## CLI Quick Reference

```bash
# Cluster operations
aws rds describe-db-clusters
aws rds create-db-cluster --engine aurora-postgresql --db-cluster-identifier my-cluster
aws rds delete-db-cluster --db-cluster-identifier my-cluster --skip-final-snapshot

# Serverless v2
aws rds modify-db-cluster \
  --db-cluster-identifier my-cluster \
  --serverless-v2-scaling-configuration MinCapacity=0.5,MaxCapacity=16

# Data API
aws rds-data execute-statement \
  --resource-arn $CLUSTER_ARN \
  --secret-arn $SECRET_ARN \
  --database mydb \
  --sql "SELECT * FROM users"

# Proxy
aws rds describe-db-proxies
aws rds create-db-proxy --db-proxy-name my-proxy --engine-family POSTGRESQL ...

# Snapshots
aws rds create-db-cluster-snapshot --db-cluster-identifier my-cluster --db-cluster-snapshot-identifier backup-1
aws rds restore-db-cluster-from-snapshot --db-cluster-identifier restored --snapshot-identifier backup-1
```

---

## Anti-Patterns

- **Direct Lambda→Aurora connections** - Always use RDS Proxy or Data API
- **No connection limits** - Set `max: 1` for Lambda, use pooling for servers
- **Ignoring cold starts** - Serverless v2 needs time to scale; keep minimum ACUs for production
- **No read replicas** - Offload reads to replicas for heavy workloads
- **Missing IAM auth** - Use IAM over static passwords when possible
- **No retry logic** - Handle transient errors from scaling/failover
- **Over-provisioned capacity** - Use Serverless v2 for variable workloads
- **Skipping Secrets Manager** - Never hardcode credentials


================================================
FILE: skills/aws-dynamodb/SKILL.md
================================================
---
name: aws-dynamodb
description: AWS DynamoDB single-table design, GSI patterns, SDK v3 TypeScript/Python
when-to-use: When working with DynamoDB tables or AWS SDK data operations
user-invocable: false
paths: ["**/dynamodb*", "**/dynamo*", "serverless.*", "template.yaml"]
effort: medium
---

# AWS DynamoDB Skill


DynamoDB is a fully managed NoSQL database designed for single-digit millisecond performance at any scale. Master single-table design and access pattern modeling.

**Sources:** [DynamoDB Docs](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/) | [SDK v3](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/client/dynamodb/) | [Best Practices](https://aws.amazon.com/blogs/database/single-table-vs-multi-table-design-in-amazon-dynamodb/)

---

## Core Principle

**Design for access patterns, not entities. Think access-pattern-first.**

DynamoDB requires you to know your queries before designing your schema. Model around how you'll access data, not how data relates. Single-table design stores multiple entity types in one table using generic key attributes.

---

## Key Concepts

| Concept | Description |
|---------|-------------|
| **Partition Key (PK)** | Primary key attribute - determines data distribution |
| **Sort Key (SK)** | Optional secondary key for range queries within partition |
| **GSI** | Global Secondary Index - alternate partition/sort keys |
| **LSI** | Local Secondary Index - same partition, different sort |
| **Item** | Single record (max 400 KB) |
| **Attribute** | Field within an item |

---

## Single-Table Design

### Why Single Table?
- Fetch related data in single query
- Reduce round trips and costs
- Enable transactions across entity types
- Simplify operations (backup, restore, IAM)

### Generic Key Pattern
```typescript
// Instead of entity-specific keys:
// userId, orderId, productId

// Use generic keys that work for all entities:
interface BaseItem {
  PK: string;   // Partition Key
  SK: string;   // Sort Key
  GSI1PK?: string;  // First GSI partition key
  GSI1SK?: string;  // First GSI sort key
  EntityType: string;
  // ... entity-specific attributes
}
```

### Example: E-commerce Schema
```typescript
// Users
{ PK: 'USER#123', SK: 'PROFILE', EntityType: 'User', name: 'John', email: 'john@test.com' }
{ PK: 'USER#123', SK: 'ADDRESS#1', EntityType: 'Address', street: '123 Main', city: 'NYC' }

// Orders for user (1:N relationship)
{ PK: 'USER#123', SK: 'ORDER#2024-001', EntityType: 'Order', total: 99.99, status: 'shipped' }
{ PK: 'USER#123', SK: 'ORDER#2024-002', EntityType: 'Order', total: 49.99, status: 'pending' }

// Order details (query by order ID using GSI)
{ PK: 'USER#123', SK: 'ORDER#2024-001', GSI1PK: 'ORDER#2024-001', GSI1SK: 'ORDER', ... }
{ PK: 'ORDER#2024-001', SK: 'ITEM#1', GSI1PK: 'ORDER#2024-001', GSI1SK: 'ITEM#1', productId: 'PROD#456', qty: 2 }

// Products
{ PK: 'PROD#456', SK: 'PRODUCT', EntityType: 'Product', name: 'Widget', price: 29.99 }
```

### Access Patterns Covered
```
1. Get user profile          → Query PK='USER#123', SK='PROFILE'
2. Get user with addresses   → Query PK='USER#123', SK begins_with 'ADDRESS'
3. Get all user orders       → Query PK='USER#123', SK begins_with 'ORDER'
4. Get order by ID           → Query GSI1, PK='ORDER#2024-001'
5. Get order with items      → Query GSI1, PK='ORDER#2024-001'
6. Get product details       → Query PK='PROD#456', SK='PRODUCT'
```

---

## SDK v3 Setup (TypeScript)

### Install Dependencies
```bash
npm install @aws-sdk/client-dynamodb @aws-sdk/lib-dynamodb
```

### Client Configuration
```typescript
// lib/dynamodb.ts
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
import { DynamoDBDocumentClient } from '@aws-sdk/lib-dynamodb';

const client = new DynamoDBClient({
  region: process.env.AWS_REGION || 'us-east-1',
  // For local development with DynamoDB Local
  ...(process.env.DYNAMODB_LOCAL && {
    endpoint: 'http://localhost:8000',
    credentials: { accessKeyId: 'local', secretAccessKey: 'local' }
  })
});

// Document client for simplified operations
export const docClient = DynamoDBDocumentClient.from(client, {
  marshallOptions: {
    removeUndefinedValues: true,  // Important: match v2 behavior
    convertClassInstanceToMap: true
  },
  unmarshallOptions: {
    wrapNumbers: false
  }
});

export const TABLE_NAME = process.env.DYNAMODB_TABLE || 'MyTable';
```

### Type Definitions
```typescript
// types/dynamodb.ts
export interface BaseItem {
  PK: string;
  SK: string;
  GSI1PK?: string;
  GSI1SK?: string;
  EntityType: string;
  createdAt: string;
  updatedAt: string;
}

export interface User extends BaseItem {
  EntityType: 'User';
  userId: string;
  email: string;
  name: string;
}

export interface Order extends BaseItem {
  EntityType: 'Order';
  orderId: string;
  userId: string;
  total: number;
  status: 'pending' | 'paid' | 'shipped' | 'delivered';
}

// Key builders
export const keys = {
  user: (userId: string) => ({
    PK: `USER#${userId}`,
    SK: 'PROFILE'
  }),
  userOrders: (userId: string) => ({
    PK: `USER#${userId}`,
    SKPrefix: 'ORDER#'
  }),
  order: (userId: string, orderId: string) => ({
    PK: `USER#${userId}`,
    SK: `ORDER#${orderId}`,
    GSI1PK: `ORDER#${orderId}`,
    GSI1SK: 'ORDER'
  })
};
```

---

## CRUD Operations

### Put Item (Create/Update)
```typescript
import { PutCommand } from '@aws-sdk/lib-dynamodb';
import { docClient, TABLE_NAME } from './dynamodb';
import { User, keys } from './types';

async function createUser(userId: string, data: { email: string; name: string }): Promise<User> {
  const now = new Date().toISOString();
  const item: User = {
    ...keys.user(userId),
    EntityType: 'User',
    userId,
    email: data.email,
    name: data.name,
    createdAt: now,
    updatedAt: now
  };

  await docClient.send(new PutCommand({
    TableName: TABLE_NAME,
    Item: item,
    ConditionExpression: 'attribute_not_exists(PK)'  // Prevent overwrite
  }));

  return item;
}
```

### Get Item (Read)
```typescript
import { GetCommand } from '@aws-sdk/lib-dynamodb';

async function getUser(userId: string): Promise<User | null> {
  const result = await docClient.send(new GetCommand({
    TableName: TABLE_NAME,
    Key: keys.user(userId)
  }));

  return (result.Item as User) || null;
}
```

### Query (List/Search)
```typescript
import { QueryCommand } from '@aws-sdk/lib-dynamodb';

// Get all orders for a user
async function getUserOrders(userId: string): Promise<Order[]> {
  const result = await docClient.send(new QueryCommand({
    TableName: TABLE_NAME,
    KeyConditionExpression: 'PK = :pk AND begins_with(SK, :sk)',
    ExpressionAttributeValues: {
      ':pk': `USER#${userId}`,
      ':sk': 'ORDER#'
    },
    ScanIndexForward: false  // Newest first
  }));

  return (result.Items as Order[]) || [];
}

// Query GSI by order ID
async function getOrderById(orderId: string): Promise<Order | null> {
  const result = await docClient.send(new QueryCommand({
    TableName: TABLE_NAME,
    IndexName: 'GSI1',
    KeyConditionExpression: 'GSI1PK = :pk',
    ExpressionAttributeValues: {
      ':pk': `ORDER#${orderId}`
    }
  }));

  return (result.Items?.[0] as Order) || null;
}

// Paginated query
async function getUserOrdersPaginated(
  userId: string,
  pageSize: number = 20,
  lastKey?: Record<string, any>
): Promise<{ items: Order[]; lastKey?: Record<string, any> }> {
  const result = await docClient.send(new QueryCommand({
    TableName: TABLE_NAME,
    KeyConditionExpression: 'PK = :pk AND begins_with(SK, :sk)',
    ExpressionAttributeValues: {
      ':pk': `USER#${userId}`,
      ':sk': 'ORDER#'
    },
    Limit: pageSize,
    ExclusiveStartKey: lastKey
  }));

  return {
    items: (result.Items as Order[]) || [],
    lastKey: result.LastEvaluatedKey
  };
}
```

### Update Item
```typescript
import { UpdateCommand } from '@aws-sdk/lib-dynamodb';

async function updateUser(userId: string, updates: Partial<Pick<User, 'name' | 'email'>>): Promise<User> {
  // Build update expression dynamically
  const updateParts: string[] = ['#updatedAt = :updatedAt'];
  const names: Record<string, string> = { '#updatedAt': 'updatedAt' };
  const values: Record<string, any> = { ':updatedAt': new Date().toISOString() };

  if (updates.name !== undefined) {
    updateParts.push('#name = :name');
    names['#name'] = 'name';
    values[':name'] = updates.name;
  }

  if (updates.email !== undefined) {
    updateParts.push('#email = :email');
    names['#email'] = 'email';
    values[':email'] = updates.email;
  }

  const result = await docClient.send(new UpdateCommand({
    TableName: TABLE_NAME,
    Key: keys.user(userId),
    UpdateExpression: `SET ${updateParts.join(', ')}`,
    ExpressionAttributeNames: names,
    ExpressionAttributeValues: values,
    ReturnValues: 'ALL_NEW',
    ConditionExpression: 'attribute_exists(PK)'  // Must exist
  }));

  return result.Attributes as User;
}

// Atomic counter increment
async function incrementOrderCount(userId: string): Promise<void> {
  await docClient.send(new UpdateCommand({
    TableName: TABLE_NAME,
    Key: keys.user(userId),
    UpdateExpression: 'SET orderCount = if_not_exists(orderCount, :zero) + :inc',
    ExpressionAttributeValues: {
      ':zero': 0,
      ':inc': 1
    }
  }));
}
```

### Delete Item
```typescript
import { DeleteCommand } from '@aws-sdk/lib-dynamodb';

async function deleteUser(userId: string): Promise<void> {
  await docClient.send(new DeleteCommand({
    TableName: TABLE_NAME,
    Key: keys.user(userId),
    ConditionExpression: 'attribute_exists(PK)'
  }));
}
```

---

## Batch Operations

### Batch Write (Up to 25 items)
```typescript
import { BatchWriteCommand } from '@aws-sdk/lib-dynamodb';

async function batchCreateItems(items: BaseItem[]): Promise<void> {
  // DynamoDB allows max 25 items per batch
  const chunks = [];
  for (let i = 0; i < items.length; i += 25) {
    chunks.push(items.slice(i, i + 25));
  }

  for (const chunk of chunks) {
    await docClient.send(new BatchWriteCommand({
      RequestItems: {
        [TABLE_NAME]: chunk.map(item => ({
          PutRequest: { Item: item }
        }))
      }
    }));
  }
}
```

### Batch Get (Up to 100 items)
```typescript
import { BatchGetCommand } from '@aws-sdk/lib-dynamodb';

async function batchGetUsers(userIds: string[]): Promise<User[]> {
  const result = await docClient.send(new BatchGetCommand({
    RequestItems: {
      [TABLE_NAME]: {
        Keys: userIds.map(id => keys.user(id))
      }
    }
  }));

  return (result.Responses?.[TABLE_NAME] as User[]) || [];
}
```

---

## Transactions

### TransactWrite (Atomic Multi-Item)
```typescript
import { TransactWriteCommand } from '@aws-sdk/lib-dynamodb';

async function createOrderWithItems(
  userId: string,
  orderId: string,
  orderData: { total: number },
  items: { productId: string; quantity: number }[]
): Promise<void> {
  const now = new Date().toISOString();

  const transactItems = [
    // Create order
    {
      Put: {
        TableName: TABLE_NAME,
        Item: {
          ...keys.order(userId, orderId),
          EntityType: 'Order',
          orderId,
          userId,
          total: orderData.total,
          status: 'pending',
          createdAt: now,
          updatedAt: now
        },
        ConditionExpression: 'attribute_not_exists(PK)'
      }
    },
    // Update user's order count
    {
      Update: {
        TableName: TABLE_NAME,
        Key: keys.user(userId),
        UpdateExpression: 'SET orderCount = if_not_exists(orderCount, :zero) + :inc',
        ExpressionAttributeValues: { ':zero': 0, ':inc': 1 }
      }
    },
    // Add order items
    ...items.map((item, index) => ({
      Put: {
        TableName: TABLE_NAME,
        Item: {
          PK: `ORDER#${orderId}`,
          SK: `ITEM#${index}`,
          GSI1PK: `ORDER#${orderId}`,
          GSI1SK: `ITEM#${index}`,
          EntityType: 'OrderItem',
          productId: item.productId,
          quantity: item.quantity,
          createdAt: now
        }
      }
    }))
  ];

  await docClient.send(new TransactWriteCommand({
    TransactItems: transactItems
  }));
}
```

---

## GSI Patterns

### Sparse Index
```typescript
// Only items with GSI1PK attribute appear in the index
// Useful for "featured" or "flagged" items

// Featured products (only some products have GSI1PK)
{ PK: 'PROD#1', SK: 'PRODUCT', GSI1PK: 'FEATURED', GSI1SK: 'PROD#1', ... }  // In index
{ PK: 'PROD#2', SK: 'PRODUCT', ... }  // Not in index (no GSI1PK)

// Query featured products
const featured = await docClient.send(new QueryCommand({
  TableName: TABLE_NAME,
  IndexName: 'GSI1',
  KeyConditionExpression: 'GSI1PK = :pk',
  ExpressionAttributeValues: { ':pk': 'FEATURED' }
}));
```

### Inverted Index (GSI)
```typescript
// Main table: User -> Orders (PK=USER#, SK=ORDER#)
// GSI: Orders by status (GSI1PK=STATUS#, GSI1SK=ORDER#)

{ PK: 'USER#123', SK: 'ORDER#001', GSI1PK: 'STATUS#pending', GSI1SK: 'ORDER#001', ... }
{ PK: 'USER#456', SK: 'ORDER#002', GSI1PK: 'STATUS#shipped', GSI1SK: 'ORDER#002', ... }

// Get all pending orders across all users
const pending = await docClient.send(new QueryCommand({
  TableName: TABLE_NAME,
  IndexName: 'GSI1',
  KeyConditionExpression: 'GSI1PK = :pk',
  ExpressionAttributeValues: { ':pk': 'STATUS#pending' }
}));
```

### Multi-Attribute Composite Keys (Nov 2025+)
```typescript
// New feature: Up to 4 attributes per partition/sort key
// No more synthetic keys like "TOURNAMENT#WINTER2024#REGION#NA-EAST"

// Table definition (IaC)
const table = {
  AttributeDefinitions: [
    { AttributeName: 'tournament', AttributeType: 'S' },
    { AttributeName: 'region', AttributeType: 'S' },
    { AttributeName: 'score', AttributeType: 'N' }
  ],
  GlobalSecondaryIndexes: [{
    IndexName: 'TournamentRegionIndex',
    KeySchema: [
      { AttributeName: 'tournament', KeyType: 'HASH' },  // Composite PK part 1
      { AttributeName: 'region', KeyType: 'HASH' },      // Composite PK part 2
      { AttributeName: 'score', KeyType: 'RANGE' }
    ]
  }]
};
```

---

## Python (boto3)

### Setup
```python
# requirements.txt
boto3>=1.34.0

# db.py
import boto3
from boto3.dynamodb.conditions import Key, Attr
import os

dynamodb = boto3.resource(
    'dynamodb',
    region_name=os.getenv('AWS_REGION', 'us-east-1'),
    endpoint_url=os.getenv('DYNAMODB_LOCAL_ENDPOINT')  # For local dev
)

table = dynamodb.Table(os.getenv('DYNAMODB_TABLE', 'MyTable'))
```

### Operations
```python
from datetime import datetime
from typing import Optional, List
from decimal import Decimal

def create_user(user_id: str, email: str, name: str) -> dict:
    now = datetime.utcnow().isoformat()
    item = {
        'PK': f'USER#{user_id}',
        'SK': 'PROFILE',
        'EntityType': 'User',
        'userId': user_id,
        'email': email,
        'name': name,
        'createdAt': now,
        'updatedAt': now
    }

    table.put_item(
        Item=item,
        ConditionExpression='attribute_not_exists(PK)'
    )
    return item


def get_user(user_id: str) -> Optional[dict]:
    response = table.get_item(
        Key={'PK': f'USER#{user_id}', 'SK': 'PROFILE'}
    )
    return response.get('Item')


def get_user_orders(user_id: str) -> List[dict]:
    response = table.query(
        KeyConditionExpression=Key('PK').eq(f'USER#{user_id}') & Key('SK').begins_with('ORDER#'),
        ScanIndexForward=False
    )
    return response.get('Items', [])


def update_user(user_id: str, **updates) -> dict:
    update_parts = ['#updatedAt = :updatedAt']
    names = {'#updatedAt': 'updatedAt'}
    values = {':updatedAt': datetime.utcnow().isoformat()}

    for key, value in updates.items():
        update_parts.append(f'#{key} = :{key}')
        names[f'#{key}'] = key
        values[f':{key}'] = value

    response = table.update_item(
        Key={'PK': f'USER#{user_id}', 'SK': 'PROFILE'},
        UpdateExpression=f'SET {", ".join(update_parts)}',
        ExpressionAttributeNames=names,
        ExpressionAttributeValues=values,
        ReturnValues='ALL_NEW'
    )
    return response['Attributes']


def delete_user(user_id: str) -> None:
    table.delete_item(
        Key={'PK': f'USER#{user_id}', 'SK': 'PROFILE'}
    )
```

---

## Local Development

### DynamoDB Local
```bash
# Docker
docker run -d -p 8000:8000 amazon/dynamodb-local

# Create table locally
aws dynamodb create-table \
  --endpoint-url http://localhost:8000 \
  --table-name MyTable \
  --attribute-definitions \
    AttributeName=PK,AttributeType=S \
    AttributeName=SK,AttributeType=S \
    AttributeName=GSI1PK,AttributeType=S \
    AttributeName=GSI1SK,AttributeType=S \
  --key-schema \
    AttributeName=PK,KeyType=HASH \
    AttributeName=SK,KeyType=RANGE \
  --global-secondary-indexes \
    'IndexName=GSI1,KeySchema=[{AttributeName=GSI1PK,KeyType=HASH},{AttributeName=GSI1SK,KeyType=RANGE}],Projection={ProjectionType=ALL}' \
  --billing-mode PAY_PER_REQUEST
```

### NoSQL Workbench
AWS provides [NoSQL Workbench](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/workbench.html) for visual data modeling and querying.

---

## CLI Quick Reference

```bash
# Table operations
aws dynamodb create-table --cli-input-json file://table.json
aws dynamodb describe-table --table-name MyTable
aws dynamodb delete-table --table-name MyTable

# Item operations
aws dynamodb put-item --table-name MyTable --item '{"PK":{"S":"USER#1"},"SK":{"S":"PROFILE"}}'
aws dynamodb get-item --table-name MyTable --key '{"PK":{"S":"USER#1"},"SK":{"S":"PROFILE"}}'
aws dynamodb delete-item --table-name MyTable --key '{"PK":{"S":"USER#1"},"SK":{"S":"PROFILE"}}'

# Query
aws dynamodb query --table-name MyTable \
  --key-condition-expression "PK = :pk" \
  --expression-attribute-values '{":pk":{"S":"USER#1"}}'

# Scan (avoid in production)
aws dynamodb scan --table-name MyTable --limit 10
```

---

## Anti-Patterns

- **Scan operations** - Always use Query with proper key conditions
- **Hot partitions** - Distribute writes with high-cardinality partition keys
- **Large items** - Keep items under 400KB; use S3 for large data
- **Too many GSIs** - Each GSI duplicates data; design carefully
- **Ignoring capacity** - Monitor consumed capacity, use on-demand for variable loads
- **No condition expressions** - Always validate with ConditionExpression
- **Fetching all attributes** - Use ProjectionExpression to limit data
- **Multi-table design without reason** - Single-table is preferred unless access patterns don't overlap


================================================
FILE: skills/azure-cosmosdb/SKILL.md
================================================
---
name: azure-cosmosdb
description: Azure Cosmos DB partition keys, consistency levels, change feed, SDK patterns
when-to-use: When working with Azure Cosmos DB
user-invocable: false
paths: ["**/cosmos*", "**/azure*"]
effort: medium
---

## Core Principle

**Choose partition key wisely, design for your access patterns, understand consistency tradeoffs.**

Cosmos DB distributes data across partitions. Your partition key choice determines scalability, performance, and cost. Design for even distribution and query efficiency.

---

## Cosmos DB APIs

| API | Use Case |
|-----|----------|
| **NoSQL (Core)** | Document database, most flexible |
| **MongoDB** | MongoDB wire protocol compatible |
| **PostgreSQL** | Distributed PostgreSQL (Citus) |
| **Apache Cassandra** | Wide-column store |
| **Apache Gremlin** | Graph database |
| **Table** | Key-value (Azure Table Storage compatible) |

This skill focuses on **NoSQL (Core) API** - the most common choice.

---

## Key Concepts

| Concept | Description |
|---------|-------------|
| **Container** | Collection of items (like a table) |
| **Item** | Single document/record (JSON) |
| **Partition Key** | Determines data distribution |
| **Logical Partition** | Items with same partition key |
| **Physical Partition** | Storage unit (max 50GB, 10K RU/s) |
| **RU (Request Unit)** | Throughput currency |

---

## Partition Key Design

### Good Partition Keys
```typescript
// High cardinality, even distribution, used in queries

// E-commerce: userId for user data
{ "id": "order-123", "userId": "user-456", ... }  // PK: /userId

// Multi-tenant: tenantId
{ "id": "doc-1", "tenantId": "tenant-abc", ... }  // PK: /tenantId

// IoT: deviceId for telemetry
{ "id": "reading-1", "deviceId": "device-789", ... }  // PK: /deviceId

// Logs: synthetic key (date + category)
{ "id": "log-1", "partitionKey": "2024-01-15_errors", ... }  // PK: /partitionKey
```

### Hierarchical Partition Keys
```typescript
// For multi-level distribution (e.g., tenant → user)
// Container created with: /tenantId, /userId

{
  "id": "order-123",
  "tenantId": "acme-corp",
  "userId": "user-456",
  "items": [...]
}

// Query within tenant and user efficiently
```

### Bad Partition Keys
```typescript
// Avoid:
// - Low cardinality (status, type, boolean)
// - Monotonically increasing (timestamp, auto-increment)
// - Frequently updated fields
// - Fields not used in queries

// Bad: Only 3 values → hot partitions
{ "status": "pending" | "completed" | "cancelled" }

// Bad: All writes go to latest partition
{ "timestamp": "2024-01-15T10:30:00Z" }
```

---

## SDK Setup (TypeScript)

### Install
```bash
npm install @azure/cosmos
```

### Initialize Client
```typescript
// lib/cosmosdb.ts
import { CosmosClient, Database, Container } from '@azure/cosmos';

const endpoint = process.env.COSMOS_ENDPOINT!;
const key = process.env.COSMOS_KEY!;
const databaseId = process.env.COSMOS_DATABASE!;

const client = new CosmosClient({ endpoint, key });

// Or with connection string
// const client = new CosmosClient(process.env.COSMOS_CONNECTION_STRING!);

export const database: Database = client.database(databaseId);

export function getContainer(containerId: string): Container {
  return database.container(containerId);
}
```

### Type Definitions
```typescript
// types/cosmos.ts
export interface BaseItem {
  id: string;
  _ts?: number;      // Auto-generated timestamp
  _etag?: string;    // For optimistic concurrency
}

export interface User extends BaseItem {
  userId: string;    // Partition key
  email: string;
  name: string;
  createdAt: string;
  updatedAt: string;
}

export interface Order extends BaseItem {
  userId: string;    // Partition key
  orderId: string;
  items: OrderItem[];
  total: number;
  status: 'pending' | 'paid' | 'shipped' | 'delivered';
  createdAt: string;
}

export interface OrderItem {
  productId: string;
  name: string;
  quantity: number;
  price: number;
}
```

---

## CRUD Operations

### Create Item
```typescript
import { getContainer } from './cosmosdb';
import { User } from './types';

const usersContainer = getContainer('users');

async function createUser(data: Omit<User, 'id' | 'createdAt' | 'updatedAt'>): Promise<User> {
  const now = new Date().toISOString();
  const user: User = {
    id: crypto.randomUUID(),
    ...data,
    createdAt: now,
    updatedAt: now
  };

  const { resource } = await usersContainer.items.create(user);
  return resource as User;
}
```

### Read Item (Point Read)
```typescript
// Most efficient read - requires id AND partition key
async function getUser(userId: string, id: string): Promise<User | null> {
  try {
    const { resource } = await usersContainer.item(id, userId).read<User>();
    return resource || null;
  } catch (error: any) {
    if (error.code === 404) return null;
    throw error;
  }
}

// If id equals partition key value
async function getUserById(userId: string): Promise<User | null> {
  try {
    const { resource } = await usersContainer.item(userId, userId).read<User>();
    return resource || null;
  } catch (error: any) {
    if (error.code === 404) return null;
    throw error;
  }
}
```

### Query Items
```typescript
// Query within partition (efficient)
async function getUserOrders(userId: string): Promise<Order[]> {
  const ordersContainer = getContainer('orders');

  const { resources } = await ordersContainer.items
    .query<Order>({
      query: 'SELECT * FROM c WHERE c.userId = @userId ORDER BY c.createdAt DESC',
      parameters: [{ name: '@userId', value: userId }]
    })
    .fetchAll();

  return resources;
}

// Cross-partition query (use sparingly)
async function getOrdersByStatus(status: string): Promise<Order[]> {
  const ordersContainer = getContainer('orders');

  const { resources } = await ordersContainer.items
    .query<Order>({
      query: 'SELECT * FROM c WHERE c.status = @status',
      parameters: [{ name: '@status', value: status }]
    })
    .fetchAll();

  return resources;
}

// Paginated query
async function getOrdersPaginated(
  userId: string,
  pageSize: number = 10,
  continuationToken?: string
): Promise<{ items: Order[]; continuationToken?: string }> {
  const ordersContainer = getContainer('orders');

  const queryIterator = ordersContainer.items.query<Order>(
    {
      query: 'SELECT * FROM c WHERE c.userId = @userId ORDER BY c.createdAt DESC',
      parameters: [{ name: '@userId', value: userId }]
    },
    {
      maxItemCount: pageSize,
      continuationToken
    }
  );

  const { resources, continuationToken: nextToken } = await queryIterator.fetchNext();

  return {
    items: resources,
    continuationToken: nextToken
  };
}
```

### Update Item
```typescript
// Replace entire item
async function updateUser(userId: string, id: string, updates: Partial<User>): Promise<User> {
  const existing = await getUser(userId, id);
  if (!existing) throw new Error('User not found');

  const updated: User = {
    ...existing,
    ...updates,
    updatedAt: new Date().toISOString()
  };

  const { resource } = await usersContainer.item(id, userId).replace(updated);
  return resource as User;
}

// Partial update (patch operations)
async function patchUser(userId: string, id: string, operations: any[]): Promise<User> {
  const { resource } = await usersContainer.item(id, userId).patch(operations);
  return resource as User;
}

// Usage:
await patchUser('user-123', 'user-123', [
  { op: 'set', path: '/name', value: 'New Name' },
  { op: 'set', path: '/updatedAt', value: new Date().toISOString() },
  { op: 'incr', path: '/loginCount', value: 1 }
]);
```

### Delete Item
```typescript
async function deleteUser(userId: string, id: string): Promise<void> {
  await usersContainer.item(id, userId).delete();
}
```

### Optimistic Concurrency (ETags)
```typescript
async function updateUserWithETag(
  userId: string,
  id: string,
  updates: Partial<User>,
  etag: string
): Promise<User> {
  const existing = await getUser(userId, id);
  if (!existing) throw new Error('User not found');

  const updated: User = {
    ...existing,
    ...updates,
    updatedAt: new Date().toISOString()
  };

  try {
    const { resource } = await usersContainer.item(id, userId).replace(updated, {
      accessCondition: { type: 'IfMatch', condition: etag }
    });
    return resource as User;
  } catch (error: any) {
    if (error.code === 412) {
      throw new Error('Document was modified by another process');
    }
    throw error;
  }
}
```

---

## Consistency Levels

| Level | Guarantees | Latency | Use Case |
|-------|-----------|---------|----------|
| **Strong** | Linearizable reads | Highest | Financial, inventory |
| **Bounded Staleness** | Consistent within bounds | High | Leaderboards, counters |
| **Session** | Read your writes | Medium | User sessions (default) |
| **Consistent Prefix** | Ordered reads | Low | Social feeds |
| **Eventual** | No ordering guarantee | Lowest | Analytics, logs |

### Set Consistency Per Request
```typescript
// Override default consistency
const { resource } = await usersContainer.item(id, userId).read<User>({
  consistencyLevel: 'Strong'
});

// For queries
const { resources } = await container.items.query(
  { query: 'SELECT * FROM c' },
  { consistencyLevel: 'BoundedStaleness' }
).fetchAll();
```

---

## Batch Operations

### Transactional Batch (Same Partition)
```typescript
async function createOrderWithItems(userId: string, order: Order, items: any[]): Promise<void> {
  const ordersContainer = getContainer('orders');

  const operations = [
    { operationType: 'Create' as const, resourceBody: order },
    ...items.map(item => ({
      operationType: 'Create' as const,
      resourceBody: { ...item, userId, orderId: order.orderId }
    }))
  ];

  const { result } = await ordersContainer.items.batch(operations, userId);

  // Check if any operation failed
  if (result.some(r => r.statusCode >= 400)) {
    throw new Error('Batch operation failed');
  }
}
```

### Bulk Operations
```typescript
// For large-scale imports (not transactional)
async function bulkImportUsers(users: User[]): Promise<void> {
  const operations = users.map(user => ({
    operationType: 'Create' as const,
    resourceBody: user,
    partitionKey: user.userId
  }));

  // Process in chunks
  const chunkSize = 100;
  for (let i = 0; i < operations.length; i += chunkSize) {
    const chunk = operations.slice(i, i + chunkSize);
    await usersContainer.items.bulk(chunk);
  }
}
```

---

## Change Feed

### Process Changes
```typescript
import { ChangeFeedStartFrom } from '@azure/cosmos';

async function processChangeFeed(): Promise<void> {
  const container = getContainer('orders');

  const changeFeedIterator = container.items.changeFeed({
    changeFeedStartFrom: ChangeFeedStartFrom.Beginning()
  });

  while (changeFeedIterator.hasMoreResults) {
    const { result: items, statusCode } = await changeFeedIterator.fetchNext();

    if (statusCode === 304) {
      // No new changes
      await sleep(1000);
      continue;
    }

    for (const item of items) {
      console.log('Changed item:', item);
      // Process the change...
    }
  }
}

// For production, use Change Feed Processor with lease container
```

### Change Feed Processor Pattern
```typescript
async function startChangeFeedProcessor(): Promise<void> {
  const sourceContainer = getContainer('orders');
  const leaseContainer = getContainer('leases');

  const changeFeedProcessor = sourceContainer.items.changeFeed
    .for(item => {
      // Process each change
      console.log('Processing:', item);
    })
    .withLeaseContainer(leaseContainer)
    .build();

  await changeFeedProcessor.start();
}
```

---

## Python SDK

### Install
```bash
pip install azure-cosmos
```

### Setup and Operations
```python
# cosmos_db.py
import os
from azure.cosmos import CosmosClient, PartitionKey
from azure.cosmos.exceptions import CosmosResourceNotFoundError
from typing import Optional, List
from datetime import datetime
import uuid

# Initialize client
endpoint = os.environ['COSMOS_ENDPOINT']
key = os.environ['COSMOS_KEY']
database_name = os.environ['COSMOS_DATABASE']

client = CosmosClient(endpoint, key)
database = client.get_database_client(database_name)


def get_container(container_name: str):
    return database.get_container_client(container_name)


# CRUD Operations
users_container = get_container('users')


def create_user(email: str, name: str, user_id: str = None) -> dict:
    user_id = user_id or str(uuid.uuid4())
    now = datetime.utcnow().isoformat()

    user = {
        'id': user_id,
        'userId': user_id,  # Partition key
        'email': email,
        'name': name,
        'createdAt': now,
        'updatedAt': now
    }

    return users_container.create_item(user)


def get_user(user_id: str) -> Optional[dict]:
    try:
        return users_container.read_item(item=user_id, partition_key=user_id)
    except CosmosResourceNotFoundError:
        return None


def query_users(email_domain: str) -> List[dict]:
    query = "SELECT * FROM c WHERE CONTAINS(c.email, @domain)"
    parameters = [{'name': '@domain', 'value': email_domain}]

    return list(users_container.query_items(
        query=query,
        parameters=parameters,
        enable_cross_partition_query=True
    ))


def update_user(user_id: str, **updates) -> dict:
    user = get_user(user_id)
    if not user:
        raise ValueError('User not found')

    user.update(updates)
    user['updatedAt'] = datetime.utcnow().isoformat()

    return users_container.replace_item(item=user_id, body=user)


def delete_user(user_id: str) -> None:
    users_container.delete_item(item=user_id, partition_key=user_id)


# Paginated query
def get_users_paginated(page_size: int = 10, continuation_token: str = None):
    query = "SELECT * FROM c ORDER BY c.createdAt DESC"

    items = users_container.query_items(
        query=query,
        enable_cross_partition_query=True,
        max_item_count=page_size,
        continuation_token=continuation_token
    )

    page = items.by_page()
    results = list(next(page))

    return {
        'items': results,
        'continuation_token': page.continuation_token
    }
```

---

## Indexing

### Custom Indexing Policy
```json
{
  "indexingMode": "consistent",
  "automatic": true,
  "includedPaths": [
    { "path": "/userId/?" },
    { "path": "/status/?" },
    { "path": "/createdAt/?" }
  ],
  "excludedPaths": [
    { "path": "/content/*" },
    { "path": "/_etag/?" }
  ],
  "compositeIndexes": [
    [
      { "path": "/userId", "order": "ascending" },
      { "path": "/createdAt", "order": "descending" }
    ]
  ]
}
```

### Create Container with Index
```typescript
await database.containers.createIfNotExists({
  id: 'orders',
  partitionKey: { paths: ['/userId'] },
  indexingPolicy: {
    indexingMode: 'consistent',
    includedPaths: [
      { path: '/userId/?' },
      { path: '/status/?' },
      { path: '/createdAt/?' }
    ],
    excludedPaths: [
      { path: '/*' }  // Exclude all by default
    ]
  }
});
```

---

## Throughput Management

### Provisioned Throughput
```typescript
// Container level
await database.containers.createIfNotExists({
  id: 'orders',
  partitionKey: { paths: ['/userId'] },
  throughput: 1000  // RU/s
});

// Scale throughput
const container = database.container('orders');
await container.throughput.replace(2000);
```

### Autoscale
```typescript
await database.containers.createIfNotExists({
  id: 'orders',
  partitionKey: { paths: ['/userId'] },
  maxThroughput: 10000  // Auto-scales 10% to 100%
});
```

### Serverless
```typescript
// No throughput configuration needed
// Pay per request (good for dev/test, intermittent workloads)
await database.containers.createIfNotExists({
  id: 'orders',
  partitionKey: { paths: ['/userId'] }
  // No throughput = serverless
});
```

---

## CLI Quick Reference

```bash
# Azure CLI
az cosmosdb create --name myaccount --resource-group mygroup
az cosmosdb sql database create --account-name myaccount --name mydb --resource-group mygroup
az cosmosdb sql container create \
  --account-name myaccount \
  --database-name mydb \
  --name orders \
  --partition-key-path /userId \
  --throughput 400

# Query
az cosmosdb sql query --account-name myaccount --database-name mydb \
  --container-name orders --query "SELECT * FROM c"

# Keys
az cosmosdb keys list --name myaccount --resource-group mygroup
az cosmosdb keys list --name myaccount --resource-group mygroup --type connection-strings
```

---

## Cost Optimization

| Strategy | Impact |
|----------|--------|
| **Right partition key** | Avoid hot partitions (wasted RUs) |
| **Index only what you query** | Reduce write RU cost |
| **Use point reads** | 1 RU vs 3+ RU for queries |
| **Serverless for dev/test** | Pay per request |
| **Autoscale for production** | Scale down during low traffic |
| **TTL for temporary data** | Auto-delete old items |

### Time-to-Live (TTL)
```typescript
// Enable TTL on container
await database.containers.createIfNotExists({
  id: 'sessions',
  partitionKey: { paths: ['/userId'] },
  defaultTtl: 3600  // 1 hour
});

// Per-item TTL
const session = {
  id: 'session-123',
  userId: 'user-456',
  ttl: 1800  // Override: 30 minutes
};
```

---

## Anti-Patterns

- **Bad partition key** - Low cardinality causes hot partitions
- **Cross-partition queries** - Expensive; design for single-partition queries
- **Over-indexing** - Increases write cost; index only queried paths
- **Large items** - Max 2MB; store blobs in Azure Blob Storage
- **Ignoring RU cost** - Monitor and optimize expensive queries
- **Strong consistency everywhere** - Use Session (default) unless required
- **No retry logic** - Handle 429 (throttling) with exponential backoff
- **Missing TTL** - Set TTL for temporary/session data


================================================
FILE: skills/base/SKILL.md
================================================
---
name: base
description: Universal coding patterns, constraints, TDD workflow, atomic todos
when-to-use: Always loaded as foundation for all projects - TDD workflow, simplicity rules, atomic todos
user-invocable: false
effort: medium
---

# Base Skill - Universal Patterns

## Core Principle

Complexity is the enemy. Every line of code is a liability. The goal is software simple enough that any engineer (or AI) can understand the entire system in one session.

---

## Simplicity Rules

These limits apply to every file created or modified.

### Function Level
- **Maximum 20 lines per function** - if longer, decompose IMMEDIATELY
- **Maximum 3 parameters per function** - if more, use an options object or decompose
- **Maximum 2 levels of nesting** - flatten with early returns or extract functions
- **Single responsibility** - each function does exactly one thing
- **Descriptive names over comments** - if you need a comment to explain what, rename it

### File Level
- **Maximum 200 lines per file** - if longer, split by responsibility BEFORE continuing
- **Maximum 10 functions per file** - keeps cognitive load manageable
- **One export focus per file** - a file should have one primary purpose

### Module Level
- **Maximum 3 levels of directory nesting** - flat is better than nested
- **Clear boundaries** - each module has a single public interface
- **No circular dependencies** - ever

### Enforcement Protocol

**Before completing ANY file:**
1. Count total lines - if > 200, STOP and split
2. Count functions - if > 10, STOP and split
3. Check each function length - if any > 20 lines, STOP and decompose
4. Check parameter counts - if any > 3, STOP and refactor

**If limits are exceeded during development:**
```
⚠️ FILE SIZE VIOLATION DETECTED

[filename] has [X] lines (limit: 200)

Splitting into:
- [filename-a].ts - [responsibility A]
- [filename-b].ts - [responsibility B]
```

**Never defer refactoring.** Fix violations immediately, not "later".

---

## Architectural Patterns

### Functional Core, Imperative Shell
- Pure functions for business logic - no side effects, deterministic
- Side effects only at boundaries - API calls, database, file system at edges
- Data in, data out - functions transform data, they don't mutate state

### Composition Over Inheritance
- No inheritance deeper than 1 level - prefer interfaces/composition
- Small, composable utilities - build complex from simple
- Dependency injection - pass dependencies, don't import them directly

### Error Handling
- Fail fast, fail loud - errors surface immediately
- No silent failures - every error is logged or thrown
- Design APIs where misuse is impossible

---

## Testing Philosophy

- **100% coverage on business logic** - the functional core
- **Integration tests for boundaries** - API endpoints, database operations
- **No untested code merges** - CI blocks without passing tests
- **Test behavior, not implementation** - tests survive refactoring
- **Each test runs in isolation** - no interdependence

---

## Anti-Patterns (Never Do This)

- ❌ Global state
- ❌ Magic numbers/strings - use named constants
- ❌ Deep nesting - flatten or extract
- ❌ Long parameter lists - use objects
- ❌ Comments explaining "what" - code should be self-documenting
- ❌ Dead code - delete it, git remembers
- ❌ Copy-paste duplication - extract to shared function
- ❌ God objects/files - split by responsibility
- ❌ Circular dependencies
- ❌ Premature optimization
- ❌ Large PRs - small, focused changes only
- ❌ Mixing refactoring with features - separate commits

---

## Documentation Structure

Every project must have clear separation between code docs and project specs:

```
project/
├── docs/                      # Code documentation
│   ├── architecture.md        # System design decisions
│   ├── api.md                 # API reference (if applicable)
│   └── setup.md               # Development setup guide
├── _project_specs/            # Project specifications
│   ├── overview.md            # Project vision and goals
│   ├── features/              # Feature specifications
│   │   ├── feature-a.md
│   │   └── feature-b.md
│   ├── todos/                 # Atomic todos tracking
│   │   ├── active.md          # Current sprint/focus
│   │   ├── backlog.md         # Future work
│   │   └── completed.md       # Done items (for reference)
│   ├── session/               # Session state (see session-management.md)
│   │   ├── current-state.md   # Live session state
│   │   ├── decisions.md       # Key decisions log
│   │   ├── code-landmarks.md  # Important code locations
│   │   └── archive/           # Past session summaries
│   └── prompts/               # LLM prompt specifications (if AI-first)
└── CLAUDE.md                  # Claude instructions (references skills)
```

### What Goes Where

| Location | Content |
|----------|---------|
| `docs/` | Technical documentation, API refs, setup guides |
| `_project_specs/` | Business logic, features, requirements, todos |
| `_project_specs/session/` | Session state, decisions, context for resumability |
| `CLAUDE.md` | Claude-specific instructions and skill references |

---

## Atomic Todos

All work is tracked as atomic todos with validation and test criteria.

### Todo Format (Required)
```markdown
## [TODO-001] Short descriptive title

**Status:** pending | in-progress | blocked | done
**Priority:** high | medium | low
**Estimate:** XS | S | M | L | XL

### Description
One paragraph describing what needs to be done.

### Acceptance Criteria
- [ ] Criterion 1 - specific, measurable
- [ ] Criterion 2 - specific, measurable

### Validation
How to verify this is complete:
- Manual: [steps to manually test]
- Automated: [test file/command that validates this]

### Test Cases
| Input | Expected Output | Notes |
|-------|-----------------|-------|
| ... | ... | ... |

### Dependencies
- Depends on: [TODO-xxx] (if any)
- Blocks: [TODO-yyy] (if any)

### TDD Execution Log
| Phase | Command | Result | Timestamp |
|-------|---------|--------|-----------|
| RED | `[test command]` | - | - |
| GREEN | `[test command]` | - | - |
| VALIDATE | `[lint && typecheck && test --coverage]` | - | - |
| COMPLETE | Moved to completed.md | - | - |
```

### Todo Rules
1. **Atomic** - Each todo is a single, completable unit of work
2. **Testable** - Every todo has validation criteria and test cases
3. **Sized** - If larger than "M", break it down further
4. **Independent** - Minimize dependencies between todos
5. **Tracked** - Move between active.md → completed.md when done

### Todo Execution Workflow (TDD - Mandatory)

**Every todo MUST follow this exact workflow. No exceptions.**

```
┌─────────────────────────────────────────────────────────────┐
│  1. RED: Write Tests First                                  │
│     └─ Create test file(s) based on Test Cases table        │
│     └─ Tests should cover all acceptance criteria           │
│     └─ Run tests → ALL MUST FAIL (proves tests are valid)   │
├─────────────────────────────────────────────────────────────┤
│  2. GREEN: Implement the Feature                            │
│     └─ Write minimum code to make tests pass                │
│     └─ Follow simplicity rules (20 lines/function, etc.)    │
│     └─ Run tests → ALL MUST PASS                            │
├─────────────────────────────────────────────────────────────┤
│  3. VALIDATE: Quality Gates                                 │
│     └─ Run linter (auto-fix if possible)                    │
│     └─ Run type checker (tsc/mypy/pyright)                  │
│     └─ Run full test suite with coverage                    │
│     └─ Verify coverage threshold (≥80%)                     │
├─────────────────────────────────────────────────────────────┤
│  4. COMPLETE: Mark Done                                     │
│     └─ Only after ALL validations pass                      │
│     └─ Move todo to completed.md                            │
│     └─ Checkpoint session state                             │
└─────────────────────────────────────────────────────────────┘
```

#### Execution Commands by Stack

**Node.js/TypeScript:**
```bash
# 1. RED - Run tests (expect failures)
npm test -- --grep "todo-description"

# 2. GREEN - Run tests (expect pass)
npm test -- --grep "todo-description"

# 3. VALIDATE - Full quality check
npm run lint && npm run typecheck && npm test -- --coverage
```

**Python:**
```bash
# 1. RED - Run tests (expect failures)
pytest -k "todo_description" -v

# 2. GREEN - Run tests (expect pass)
pytest -k "todo_description" -v

# 3. VALIDATE - Full quality check
ruff check . && mypy . && pytest --cov --cov-fail-under=80
```

**React/Next.js:**
```bash
# 1. RED - Run tests (expect failures)
npm test -- --testPathPattern="ComponentName"

# 2. GREEN - Run tests (expect pass)
npm test -- --testPathPattern="ComponentName"

# 3. VALIDATE - Full quality check
npm run lint && npm run typecheck && npm test -- --coverage --watchAll=false
```

#### Blocking Conditions

**NEVER mark a todo as complete if:**
- ❌ Tests were not written first (skipped RED phase)
- ❌ Tests did not fail initially (invalid tests)
- ❌ Any test is failing
- ❌ Linter has errors (warnings may be acceptable)
- ❌ Type checker has errors
- ❌ Coverage dropped below threshold

**If blocked by failures:**
```markdown
## [TODO-042] - BLOCKED

**Blocking Reason:** [Lint error in X / Test failure in Y / Coverage at 75%]
**Action Required:** [Specific fix needed]
```

### Bug Fix Workflow (TDD - Mandatory)

**When a user reports a bug, NEVER jump to fixing it directly.**

```
┌─────────────────────────────────────────────────────────────┐
│  1. DIAGNOSE: Identify the Test Gap                         │
│     └─ Run existing tests - do any fail?                    │
│     └─ If tests pass but bug exists → tests are incomplete  │
│     └─ Document: "Test gap: [what was missed]"              │
├─────────────────────────────────────────────────────────────┤
│  2. RED: Write a Failing Test for the Bug                   │
│     └─ Create test that reproduces the exact bug            │
│     └─ Test should FAIL with current code                   │
│     └─ This proves the test catches the bug                 │
├─────────────────────────────────────────────────────────────┤
│  3. GREEN: Fix the Bug                                      │
│     └─ Write minimum code to make the test pass             │
│     └─ Run test → must PASS now                             │
├─────────────────────────────────────────────────────────────┤
│  4. VALIDATE: Full Quality Check                            │
│     └─ Run ALL tests (not just the new one)                 │
│     └─ Run linter and type checker                          │
│     └─ Verify no regression in coverage                     │
└─────────────────────────────────────────────────────────────┘
```

#### Bug Report Todo Format

```markdown
## [BUG-001] Short description of the bug

**Status:** pending
**Priority:** high
**Reported:** [how user reported it / reproduction steps]

### Bug Description
What is happening vs. what should happen.

### Reproduction Steps
1. Step one
2. Step two
3. Observe: [incorrect behavior]
4. Expected: [correct behavior]

### Test Gap Analysis
- Existing test coverage: [list relevant test files]
- Gap identified: [what the tests missed]
- New test needed: [describe the test to add]

### Test Cases for Bug
| Input | Current (Bug) | Expected (Fixed) |
|-------|---------------|------------------|
| ... | ... | ... |

### TDD Execution Log
| Phase | Command | Result | Timestamp |
|-------|---------|--------|-----------|
| DIAGNOSE | `npm test` | All pass (gap!) | - |
| RED | `npm test -- --grep "bug description"` | 1 test failed ✓ | - |
| GREEN | `npm test -- --grep "bug description"` | 1 test passed ✓ | - |
| VALIDATE | `npm run lint && npm run typecheck && npm test -- --coverage` | Pass ✓ | - |
```

#### Bug Fix Anti-Patterns

- ❌ **Fixing without a test** - Bug will likely return
- ❌ **Writing test after fix** - Can't prove test catches the bug
- ❌ **Skipping test gap analysis** - Misses why tests didn't catch it
- ❌ **Only testing the fix** - Must run full test suite for regressions

### Example Atomic Todo
```markdown
## [TODO-042] Add email validation to signup form

**Status:** pending
**Priority:** high
**Estimate:** S

### Description
Validate email format on the signup form before submission. Show inline error if invalid.

### Acceptance Criteria
- [ ] Email field shows error for invalid format
- [ ] Error clears when user fixes the email
- [ ] Form cannot submit with invalid email
- [ ] Valid emails pass through without error

### Validation
- Manual: Enter "notanemail" in signup form, verify error appears
- Automated: `npm test -- --grep "email validation"`

### Test Cases
| Input | Expected Output | Notes |
|-------|-----------------|-------|
| user@example.com | Valid, no error | Standard email |
| user@sub.example.com | Valid, no error | Subdomain |
| notanemail | Invalid, show error | No @ symbol |
| user@ | Invalid, show error | No domain |
| @example.com | Invalid, show error | No local part |

### Dependencies
- Depends on: [TODO-041] Signup form component
- Blocks: [TODO-045] Signup flow integration test

### TDD Execution Log
| Phase | Command | Result | Timestamp |
|-------|---------|--------|-----------|
| RED | `npm test -- --grep "email validation"` | 5 tests failed ✓ | - |
| GREEN | `npm test -- --grep "email validation"` | 5 tests passed ✓ | - |
| VALIDATE | `npm run lint && npm run typecheck && npm test -- --coverage` | Pass, 84% coverage ✓ | - |
| COMPLETE | Moved to completed.md | ✓ | - |
```

---

## Credentials Management 
When a project needs API keys, always ask the user for their centralized access file first.

### Workflow
```
1. Ask: "Do you have an access keys file? (e.g., ~/Documents/Access.txt)"
2. Read and parse the file for known key patterns
3. Validate keys are working
4. Create project .env with found keys
5. Report missing keys and where to get them
```

### Key Patterns to Detect
| Service | Pattern | Env Variable |
|---------|---------|--------------|
| OpenAI | `sk-proj-*` | `OPENAI_API_KEY` |
| Claude | `sk-ant-*` | `ANTHROPIC_API_KEY` |
| Render | `rnd_*` | `RENDER_API_KEY` |
| Replicate | `r8_*` | `REPLICATE_API_TOKEN` |
| Reddit | client_id + secret | `REDDIT_CLIENT_ID`, `REDDIT_CLIENT_SECRET` |

See `credentials.md` for full parsing logic and validation commands.

---

## Security 
Every project must meet these security requirements. See `security.md` skill for detailed patterns.

### Essential Security Checks
1. **No secrets in code** - Use environment variables, never commit secrets
2. **`.env` in `.gitignore`** - Always, no exceptions
3. **No secrets in client-exposed env vars** - Never use `VITE_*`, `NEXT_PUBLIC_*` for secrets
4. **Validate all input** - Use Zod/Pydantic at API boundaries
5. **Parameterized queries only** - No string concatenation for SQL
6. **Hash passwords properly** - bcrypt with 12+ rounds
7. **Dependency scanning** - npm audit / safety check must pass

### Required Files
- `.gitignore` with secrets patterns
- `.env.example` with all required vars (no values)
- `scripts/security-check.sh` for pre-commit validation

### Security in CI
Every PR must pass:
- Secret scanning (detect-secrets / trufflehog)
- Dependency audit (npm audit / safety)
- Static analysis (CodeQL)

---

## Quality Gates 
### Coverage Threshold
- **Minimum 80% code coverage** - CI must fail below this
- Business logic (core/) should aim for 100%
- Integration tests cover boundaries

### Pre-Commit Hooks
All projects must have pre-commit hooks that run:
1. Linting (auto-fix where possible)
2. Type checking
3. Tests (at minimum, affected tests)

This catches issues before they hit CI, saving time and keeping the main branch clean.

---

## Session Management 
Maintain context for resumability. See `session-management.md` for full details.

### Core Rule: Checkpoint at Natural Breakpoints

After completing any task, ask:
1. **Decision made?** → Log to `_project_specs/session/decisions.md`
2. **>10 tool calls?** → Full checkpoint to `current-state.md`
3. **Major feature done?** → Archive to `session/archive/`
4. **Otherwise** → Quick update to `current-state.md`

### Session Start
1. Read `_project_specs/session/current-state.md`
2. Check `_project_specs/todos/active.md`
3. Continue from documented "Next Steps"

### Session End
1. Archive current session
2. Update `current-state.md` with handoff notes
3. Ensure next steps are specific and actionable

---

## Response Format

When implementing features (following TDD):
1. **Clarify requirements** if ambiguous
2. **Propose structure** - outline before code
3. **Write tests FIRST** - based on test cases table (RED phase)
4. **Run tests to verify they fail** - proves tests are valid
5. **Implement minimum code** to make tests pass (GREEN phase)
6. **Run full validation** - lint, typecheck, coverage (VALIDATE phase)
7. **Flag complexity** - warn if approaching limits
8. **Checkpoint after completing** - update session state, log TDD execution

**TDD is non-negotiable.** Tests must exist and fail before any implementation begins.

When you notice code violating these rules, **stop and refactor** before continuing.

---

## Automatic TDD Loops (via Stop Hook)

The Stop hook in `.claude/settings.json` runs tests after each response. If tests fail, the failure output is fed back to Claude automatically. No manual intervention needed.

See the `iterative-development` skill for setup details.

### How It Works

1. You ask Claude to implement something
2. Claude writes tests + implementation
3. Stop hook runs tests automatically
4. If failures: output fed back to Claude, it fixes and tries again
5. If all pass: Claude stops, work is done

### When It Activates

| Task Type | TDD Loop? |
|-----------|-----------|
| New feature | Yes - tests run after each response |
| Bug fix | Yes - write failing test first |
| Refactoring | Yes - existing tests catch regressions |
| Simple question/explanation | No - no code changes |
| One-line fix | No - trivial change |


================================================
FILE: skills/cloudflare-d1/SKILL.md
================================================
---
name: cloudflare-d1
description: Cloudflare D1 SQLite database with Workers, Drizzle ORM, migrations
when-to-use: When working with Cloudflare D1 or Workers
user-invocable: false
paths: ["wrangler.toml", "src/worker*", "**/d1/**"]
effort: medium
---

# Cloudflare D1 Skill


Cloudflare D1 is a serverless SQLite database designed for Cloudflare Workers with global distribution and zero cold starts.

**Sources:** [D1 Docs](https://developers.cloudflare.com/d1/) | [Drizzle + D1](https://orm.drizzle.team/docs/connect-cloudflare-d1) | [Wrangler CLI](https://developers.cloudflare.com/workers/wrangler/)

---

## Core Principle

**SQLite at the edge, migrations in version control, Drizzle for type safety.**

D1 brings SQLite's simplicity to serverless. Design for horizontal scale (multiple small databases) rather than vertical (one large database). Use Drizzle ORM for type-safe queries and migrations.

---

## D1 Stack

| Component | Purpose |
|-----------|---------|
| **D1** | Serverless SQLite database |
| **Workers** | Edge runtime for your application |
| **Wrangler** | CLI for development and deployment |
| **Drizzle ORM** | Type-safe ORM with migrations |
| **Drizzle Kit** | Migration tooling |
| **Hono** | Lightweight web framework (optional) |

---

## Project Setup

### Create Worker Project
```bash
# Create new project
npm create cloudflare@latest my-app -- --template "worker-typescript"
cd my-app

# Install dependencies
npm install drizzle-orm
npm install -D drizzle-kit
```

### Create D1 Database
```bash
# Create database (creates both local and remote)
npx wrangler d1 create my-database

# Output:
# [[d1_databases]]
# binding = "DB"
# database_name = "my-database"
# database_id = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
```

### Configure wrangler.toml
```toml
name = "my-app"
main = "src/index.ts"
compatibility_date = "2024-01-01"

[[d1_databases]]
binding = "DB"
database_name = "my-database"
database_id = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
migrations_dir = "drizzle"
migrations_table = "drizzle_migrations"
```

### Generate TypeScript Types
```bash
# Generate env types from wrangler.toml
npx wrangler types

# Creates worker-configuration.d.ts:
# interface Env {
#   DB: D1Database;
# }
```

---

## Drizzle ORM Setup

### Schema Definition
```typescript
// src/db/schema.ts
import { sqliteTable, text, integer, real, blob } from 'drizzle-orm/sqlite-core';
import { sql } from 'drizzle-orm';

export const users = sqliteTable('users', {
  id: integer('id').primaryKey({ autoIncrement: true }),
  email: text('email').notNull().unique(),
  name: text('name').notNull(),
  role: text('role', { enum: ['user', 'admin'] }).default('user'),
  createdAt: text('created_at').default(sql`CURRENT_TIMESTAMP`),
  updatedAt: text('updated_at').default(sql`CURRENT_TIMESTAMP`)
});

export const posts = sqliteTable('posts', {
  id: integer('id').primaryKey({ autoIncrement: true }),
  title: text('title').notNull(),
  content: text('content'),
  authorId: integer('author_id').references(() => users.id),
  published: integer('published', { mode: 'boolean' }).default(false),
  viewCount: integer('view_count').default(0),
  createdAt: text('created_at').default(sql`CURRENT_TIMESTAMP`)
});

export const tags = sqliteTable('tags', {
  id: integer('id').primaryKey({ autoIncrement: true }),
  name: text('name').notNull().unique()
});

export const postTags = sqliteTable('post_tags', {
  postId: integer('post_id').references(() => posts.id),
  tagId: integer('tag_id').references(() => tags.id)
});

// Type exports
export type User = typeof users.$inferSelect;
export type NewUser = typeof users.$inferInsert;
export type Post = typeof posts.$inferSelect;
export type NewPost = typeof posts.$inferInsert;
```

### Drizzle Config
```typescript
// drizzle.config.ts
import { defineConfig } from 'drizzle-kit';

export default defineConfig({
  schema: './src/db/schema.ts',
  out: './drizzle',
  dialect: 'sqlite',
  driver: 'd1-http',
  dbCredentials: {
    accountId: process.env.CLOUDFLARE_ACCOUNT_ID!,
    databaseId: process.env.CLOUDFLARE_DATABASE_ID!,
    token: process.env.CLOUDFLARE_D1_TOKEN!
  }
});
```

### Database Client
```typescript
// src/db/index.ts
import { drizzle } from 'drizzle-orm/d1';
import * as schema from './schema';

export function createDb(d1: D1Database) {
  return drizzle(d1, { schema });
}

export type Database = ReturnType<typeof createDb>;
export * from './schema';
```

---

## Migration Workflow

### Generate Migration
```bash
# Generate migration from schema changes
npx drizzle-kit generate

# Output: drizzle/0000_initial.sql
```

### Apply Migrations Locally
```bash
# Apply to local D1
npx wrangler d1 migrations apply my-database --local

# Or via Drizzle
npx drizzle-kit migrate
```

### Apply Migrations to Production
```bash
# Apply to remote D1
npx wrangler d1 migrations apply my-database --remote

# Preview first (dry run)
npx wrangler d1 migrations apply my-database --remote --dry-run
```

### Migration File Example
```sql
-- drizzle/0000_initial.sql
CREATE TABLE `users` (
  `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
  `email` text NOT NULL,
  `name` text NOT NULL,
  `role` text DEFAULT 'user',
  `created_at` text DEFAULT CURRENT_TIMESTAMP,
  `updated_at` text DEFAULT CURRENT_TIMESTAMP
);

CREATE UNIQUE INDEX `users_email_unique` ON `users` (`email`);

CREATE TABLE `posts` (
  `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
  `title` text NOT NULL,
  `content` text,
  `author_id` integer REFERENCES `users`(`id`),
  `published` integer DEFAULT false,
  `view_count` integer DEFAULT 0,
  `created_at` text DEFAULT CURRENT_TIMESTAMP
);
```

---

## Worker Implementation

### Basic Worker with Hono
```typescript
// src/index.ts
import { Hono } from 'hono';
import { createDb, users, posts } from './db';
import { eq, desc } from 'drizzle-orm';

type Bindings = {
  DB: D1Database;
};

const app = new Hono<{ Bindings: Bindings }>();

// Middleware to inject db
app.use('*', async (c, next) => {
  c.set('db', createDb(c.env.DB));
  await next();
});

// List users
app.get('/users', async (c) => {
  const db = c.get('db');
  const allUsers = await db.select().from(users);
  return c.json(allUsers);
});

// Get user by ID
app.get('/users/:id', async (c) => {
  const db = c.get('db');
  const id = parseInt(c.req.param('id'));

  const user = await db.select().from(users).where(eq(users.id, id)).get();

  if (!user) {
    return c.json({ error: 'User not found' }, 404);
  }
  return c.json(user);
});

// Create user
app.post('/users', async (c) => {
  const db = c.get('db');
  const body = await c.req.json<{ email: string; name: string }>();

  const result = await db.insert(users).values({
    email: body.email,
    name: body.name
  }).returning();

  return c.json(result[0], 201);
});

// Update user
app.put('/users/:id', async (c) => {
  const db = c.get('db');
  const id = parseInt(c.req.param('id'));
  const body = await c.req.json<Partial<{ email: string; name: string }>>();

  const result = await db.update(users)
    .set({ ...body, updatedAt: new Date().toISOString() })
    .where(eq(users.id, id))
    .returning();

  if (result.length === 0) {
    return c.json({ error: 'User not found' }, 404);
  }
  return c.json(result[0]);
});

// Delete user
app.delete('/users/:id', async (c) => {
  const db = c.get('db');
  const id = parseInt(c.req.param('id'));

  const result = await db.delete(users).where(eq(users.id, id)).returning();

  if (result.length === 0) {
    return c.json({ error: 'User not found' }, 404);
  }
  return c.json({ deleted: true });
});

export default app;
```

### Raw D1 API (Without ORM)
```typescript
// src/index.ts
export default {
  async fetch(request: Request, env: Env): Promise<Response> {
    const url = new URL(request.url);

    if (url.pathname === '/users' && request.method === 'GET') {
      const { results } = await env.DB.prepare(
        'SELECT * FROM users ORDER BY created_at DESC'
      ).all();
      return Response.json(results);
    }

    if (url.pathname === '/users' && request.method === 'POST') {
      const body = await request.json() as { email: string; name: string };

      const result = await env.DB.prepare(
        'INSERT INTO users (email, name) VALUES (?, ?) RETURNING *'
      ).bind(body.email, body.name).first();

      return Response.json(result, { status: 201 });
    }

    return new Response('Not Found', { status: 404 });
  }
};
```

---

## Query Patterns

### Select Queries
```typescript
import { eq, and, or, like, gt, desc, asc, count, sql } from 'drizzle-orm';

// Basic select
const allPosts = await db.select().from(posts);

// Select specific columns
const titles = await db.select({ id: posts.id, title: posts.title }).from(posts);

// Where clause
const published = await db.select().from(posts).where(eq(posts.published, true));

// Multiple conditions
const recentPublished = await db.select().from(posts).where(
  and(
    eq(posts.published, true),
    gt(posts.createdAt, '2024-01-01')
  )
);

// OR conditions
const featured = await db.select().from(posts).where(
  or(
    eq(posts.viewCount, 1000),
    like(posts.title, '%featured%')
  )
);

// Order and limit
const topPosts = await db.select()
  .from(posts)
  .orderBy(desc(posts.viewCount))
  .limit(10);

// Pagination
const page2 = await db.select()
  .from(posts)
  .orderBy(desc(posts.createdAt))
  .limit(10)
  .offset(10);

// Count
const postCount = await db.select({ count: count() }).from(posts);
```

### Joins
```typescript
// Inner join
const postsWithAuthors = await db.select({
  post: posts,
  author: users
})
.from(posts)
.innerJoin(users, eq(posts.authorId, users.id));

// Left join
const allPostsWithAuthors = await db.select()
  .from(posts)
  .leftJoin(users, eq(posts.authorId, users.id));

// Many-to-many via junction table
const postsWithTags = await db.select({
  post: posts,
  tag: tags
})
.from(posts)
.leftJoin(postTags, eq(posts.id, postTags.postId))
.leftJoin(tags, eq(postTags.tagId, tags.id));
```

### Insert, Update, Delete
```typescript
// Insert single
const newUser = await db.insert(users).values({
  email: 'user@example.com',
  name: 'John Doe'
}).returning();

// Insert multiple
await db.insert(users).values([
  { email: 'a@test.com', name: 'Alice' },
  { email: 'b@test.com', name: 'Bob' }
]);

// Upsert (insert or update on conflict)
await db.insert(users)
  .values({ email: 'user@test.com', name: 'New Name' })
  .onConflictDoUpdate({
    target: users.email,
    set: { name: 'New Name' }
  });

// Update
await db.update(posts)
  .set({ published: true })
  .where(eq(posts.id, 1));

// Update with increment
await db.update(posts)
  .set({ viewCount: sql`${posts.viewCount} + 1` })
  .where(eq(posts.id, 1));

// Delete
await db.delete(posts).where(eq(posts.id, 1));
```

### Transactions
```typescript
// D1 supports transactions via batch
const results = await db.batch([
  db.insert(users).values({ email: 'a@test.com', name: 'A' }),
  db.insert(users).values({ email: 'b@test.com', name: 'B' }),
  db.update(posts).set({ published: true }).where(eq(posts.id, 1))
]);

// Raw D1 batch
const batchResults = await env.DB.batch([
  env.DB.prepare('INSERT INTO users (email, name) VALUES (?, ?)').bind('a@test.com', 'A'),
  env.DB.prepare('INSERT INTO users (email, name) VALUES (?, ?)').bind('b@test.com', 'B')
]);
```

---

## Local Development

### Start Dev Server
```bash
# Local development with D1
npx wrangler dev

# With specific port
npx wrangler dev --port 8787
```

### Database Management
```bash
# Execute SQL locally
npx wrangler d1 execute my-database --local --command "SELECT * FROM users"

# Execute SQL file
npx wrangler d1 execute my-database --local --file ./seed.sql

# Open SQLite shell
npx wrangler d1 execute my-database --local --command ".tables"
```

### Drizzle Studio
```bash
# Run Drizzle Studio for visual DB management
npx drizzle-kit studio
```

### Seed Data
```sql
-- seed.sql
INSERT INTO users (email, name, role) VALUES
  ('admin@example.com', 'Admin User', 'admin'),
  ('user@example.com', 'Test User', 'user');

INSERT INTO posts (title, content, author_id, published) VALUES
  ('First Post', 'Hello World!', 1, true),
  ('Draft Post', 'Work in progress...', 1, false);
```

```bash
# Seed local database
npx wrangler d1 execute my-database --local --file ./seed.sql
```

---

## Multi-Environment Setup

### wrangler.toml
```toml
name = "my-app"
main = "src/index.ts"
compatibility_date = "2024-01-01"

# Development
[env.dev]
[[env.dev.d1_databases]]
binding = "DB"
database_name = "my-database-dev"
database_id = "dev-database-id"

# Staging
[env.staging]
[[env.staging.d1_databases]]
binding = "DB"
database_name = "my-database-staging"
database_id = "staging-database-id"

# Production
[env.production]
[[env.production.d1_databases]]
binding = "DB"
database_name = "my-database-prod"
database_id = "prod-database-id"
```

### Deploy to Environments
```bash
# Deploy to staging
npx wrangler deploy --env staging

# Deploy to production
npx wrangler deploy --env production

# Apply migrations to staging
npx wrangler d1 migrations apply my-database-staging --remote --env staging
```

---

## Testing

### Integration Tests
```typescript
// tests/api.test.ts
import { unstable_dev } from 'wrangler';
import type { UnstableDevWorker } from 'wrangler';
import { describe, beforeAll, afterAll, it, expect } from 'vitest';

describe('API', () => {
  let worker: UnstableDevWorker;

  beforeAll(async () => {
    worker = await unstable_dev('src/index.ts', {
      experimental: { disableExperimentalWarning: true }
    });
  });

  afterAll(async () => {
    await worker.stop();
  });

  it('should list users', async () => {
    const res = await worker.fetch('/users');
    expect(res.status).toBe(200);
    const data = await res.json();
    expect(Array.isArray(data)).toBe(true);
  });

  it('should create user', async () => {
    const res = await worker.fetch('/users', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ email: 'test@test.com', name: 'Test' })
    });
    expect(res.status).toBe(201);
  });
});
```

---

## CLI Quick Reference

```bash
# Database
wrangler d1 create <name>                    # Create database
wrangler d1 list                             # List databases
wrangler d1 info <name>                      # Database info
wrangler d1 delete <name>                    # Delete database

# Migrations
wrangler d1 migrations list <name>           # List migrations
wrangler d1 migrations apply <name> --local  # Apply locally
wrangler d1 migrations apply <name> --remote # Apply to production

# SQL execution
wrangler d1 execute <name> --command "SQL"   # Run SQL
wrangler d1 execute <name> --file ./file.sql # Run SQL file
wrangler d1 execute <name> --local           # Run on local
wrangler d1 execute <name> --remote          # Run on production

# Development
wrangler dev                                 # Start local server
wrangler types                               # Generate TypeScript types
wrangler deploy                              # Deploy to production

# Drizzle
drizzle-kit generate                         # Generate migrations
drizzle-kit migrate                          # Apply migrations
drizzle-kit studio                           # Open Drizzle Studio
drizzle-kit push                             # Push schema (dev only)
```

---

## D1 Limits & Considerations

| Limit | Value |
|-------|-------|
| **Database size** | 10 GB max |
| **Row size** | 1 MB max |
| **SQL statement** | 100 KB max |
| **Batch size** | 1000 statements |
| **Reads per day (free)** | 5 million |
| **Writes per day (free)** | 100,000 |

---

## Anti-Patterns

- **Single large database** - Design for multiple smaller databases (per-tenant)
- **No migrations** - Always version control schema changes
- **Raw SQL everywhere** - Use Drizzle for type safety
- **No connection to remote** - Always test against real D1 before deploy
- **Large blobs in D1** - Use R2 for file storage
- **Complex joins** - D1 is SQLite; keep queries simple
- **No batching** - Use batch for multiple operations
- **Ignoring limits** - Monitor usage on free tier


================================================
FILE: skills/code-deduplication/SKILL.md
================================================
---
name: code-deduplication
description: Prevent semantic code duplication with capability index and check-before-write
when-to-use: Before creating new utility functions or shared code
user-invocable: false
effort: medium
---

# Code Deduplication Skill


**Purpose:** Prevent semantic duplication and code bloat. Maintain a capability index so Claude always knows what exists before writing something new.

---

## Core Philosophy

```
┌─────────────────────────────────────────────────────────────────┐
│  CHECK BEFORE YOU WRITE                                         │
│  ─────────────────────────────────────────────────────────────  │
│  AI doesn't copy/paste - it reimplements.                       │
│  The problem isn't duplicate code, it's duplicate PURPOSE.      │
│                                                                 │
│  Before writing ANY new function:                               │
│  1. Check CODE_INDEX.md for existing capabilities               │
│  2. Search codebase for similar functionality                   │
│  3. Extend existing code if possible                            │
│  4. Only create new if nothing suitable exists                  │
├─────────────────────────────────────────────────────────────────┤
│  AFTER WRITING: Update the index immediately.                   │
│  PERIODICALLY: Run /audit-duplicates to catch overlap.          │
└─────────────────────────────────────────────────────────────────┘
```

---

## Code Index Structure

Maintain `CODE_INDEX.md` in project root, organized by **capability** not file location:

```markdown
# Code Index

*Last updated: [timestamp]*
*Run `/update-code-index` to regenerate*

## Quick Reference

| Category | Count | Location |
|----------|-------|----------|
| Date/Time | 5 functions | src/utils/dates.ts |
| Validation | 8 functions | src/utils/validate.ts |
| API Clients | 12 functions | src/api/*.ts |
| Auth | 6 functions | src/auth/*.ts |

---

## Date/Time Operations

| Function | Location | Does What | Params |
|----------|----------|-----------|--------|
| `formatDate()` | utils/dates.ts:15 | Formats Date → "Jan 15, 2024" | `(date: Date, format?: string)` |
| `formatRelative()` | utils/dates.ts:32 | Formats Date → "2 days ago" | `(date: Date)` |
| `parseDate()` | utils/dates.ts:48 | Parses string → Date | `(str: string, format?: string)` |
| `isExpired()` | auth/tokens.ts:22 | Checks if timestamp past now | `(timestamp: number)` |
| `addDays()` | utils/dates.ts:61 | Adds days to date | `(date: Date, days: number)` |

---

## Validation

| Function | Location | Does What | Params |
|----------|----------|-----------|--------|
| `isEmail()` | utils/validate.ts:10 | Validates email format | `(email: string)` |
| `isPhone()` | utils/validate.ts:25 | Validates phone with country | `(phone: string, country?: string)` |
| `isURL()` | utils/validate.ts:42 | Validates URL format | `(url: string)` |
| `isUUID()` | utils/validate.ts:55 | Validates UUID v4 | `(id: string)` |
| `sanitizeHTML()` | utils/sanitize.ts:12 | Strips XSS from input | `(html: string)` |
| `sanitizeSQL()` | utils/sanitize.ts:28 | Escapes SQL special chars | `(input: string)` |

---

## String Operations

| Function | Location | Does What | Params |
|----------|----------|-----------|--------|
| `slugify()` | utils/strings.ts:8 | Converts to URL slug | `(str: string)` |
| `truncate()` | utils/strings.ts:20 | Truncates with ellipsis | `(str: string, len: number)` |
| `capitalize()` | utils/strings.ts:32 | Capitalizes first letter | `(str: string)` |
| `pluralize()` | utils/strings.ts:40 | Adds s/es correctly | `(word: string, count: number)` |

---

## API Clients

| Function | Location | Does What | Returns |
|----------|----------|-----------|---------|
| `fetchUser()` | api/users.ts:15 | GET /users/:id | `Promise<User>` |
| `fetchUsers()` | api/users.ts:28 | GET /users with pagination | `Promise<User[]>` |
| `createUser()` | api/users.ts:45 | POST /users | `Promise<User>` |
| `updateUser()` | api/users.ts:62 | PATCH /users/:id | `Promise<User>` |
| `deleteUser()` | api/users.ts:78 | DELETE /users/:id | `Promise<void>` |

---

## Error Handling

| Function/Class | Location | Does What |
|----------------|----------|-----------|
| `AppError` | utils/errors.ts:5 | Base error class with code |
| `ValidationError` | utils/errors.ts:20 | Input validation failures |
| `NotFoundError` | utils/errors.ts:32 | Resource not found |
| `handleAsync()` | utils/errors.ts:45 | Wraps async route handlers |
| `errorMiddleware()` | middleware/error.ts:10 | Express error handler |

---

## Hooks (React)

| Hook | Location | Does What |
|------|----------|-----------|
| `useAuth()` | hooks/useAuth.ts | Auth state + login/logout |
| `useUser()` | hooks/useUser.ts | Current user data |
| `useDebounce()` | hooks/useDebounce.ts | Debounces value changes |
| `useLocalStorage()` | hooks/useLocalStorage.ts | Persisted state |
| `useFetch()` | hooks/useFetch.ts | Data fetching with loading/error |

---

## Components (React)

| Component | Location | Does What |
|-----------|----------|-----------|
| `Button` | components/Button.tsx | Styled button with variants |
| `Input` | components/Input.tsx | Form input with validation |
| `Modal` | components/Modal.tsx | Dialog overlay |
| `Toast` | components/Toast.tsx | Notification popup |
| `Spinner` | components/Spinner.tsx | Loading indicator |
```

---

## File Header Format

Every file should have a summary header:

### TypeScript/JavaScript

```typescript
/**
 * @file User authentication utilities
 * @description Handles login, logout, session management, and token refresh.
 *
 * Key exports:
 * - login(email, password) - Authenticates user, returns tokens
 * - logout() - Clears session and tokens
 * - refreshToken() - Gets new access token
 * - validateSession() - Checks if session is valid
 *
 * @see src/api/auth.ts for API endpoints
 * @see src/hooks/useAuth.ts for React hook
 */

import { ... } from '...';
```

### Python

```python
"""
User authentication utilities.

Handles login, logout, session management, and token refresh.

Key exports:
    - login(email, password) - Authenticates user, returns tokens
    - logout() - Clears session and tokens
    - refresh_token() - Gets new access token
    - validate_session() - Checks if session is valid

See Also:
    - src/api/auth.py for API endpoints
    - src/services/user.py for user operations
"""

from typing import ...
```

---

## Function Documentation

Every function needs a one-line summary:

### TypeScript

```typescript
/**
 * Formats a date into a human-readable relative string.
 * Examples: "2 minutes ago", "yesterday", "3 months ago"
 */
export function formatRelative(date: Date): string {
  // ...
}

/**
 * Validates email format and checks for disposable domains.
 * Returns true for valid non-disposable emails.
 */
export function isValidEmail(email: string): boolean {
  // ...
}
```

### Python

```python
def format_relative(date: datetime) -> str:
    """Formats a date into a human-readable relative string.

    Examples: "2 minutes ago", "yesterday", "3 months ago"
    """
    ...

def is_valid_email(email: str) -> bool:
    """Validates email format and checks for disposable domains.

    Returns True for valid non-disposable emails.
    """
    ...
```

---

## Check Before Write Process

### Before Creating ANY New Function

```
┌─────────────────────────────────────────────────────────────────┐
│  BEFORE WRITING NEW CODE                                        │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. DESCRIBE what you need in plain English                     │
│     "I need to format a date as relative time"                  │
│                                                                 │
│  2. CHECK CODE_INDEX.md                                         │
│     Search for: date, time, format, relative                    │
│     → Found: formatRelative() in utils/dates.ts                 │
│                                                                 │
│  3. EVALUATE if existing code works                             │
│     - Does it do what I need? → Use it                          │
│     - Close but not quite? → Extend it                          │
│     - Nothing suitable? → Create new, update index              │
│                                                                 │
│  4. If extending, check for breaking changes                    │
│     - Add optional params, don't change existing behavior       │
│     - Update tests for new functionality                        │
└─────────────────────────────────────────────────────────────────┘
```

### Decision Tree

```
Need new functionality
        │
        ▼
Check CODE_INDEX.md for similar
        │
        ├─► Found exact match ──────► USE IT
        │
        ├─► Found similar ──────────► Can it be extended?
        │                                   │
        │                    ┌──────────────┴──────────────┐
        │                    ▼                             ▼
        │               Yes: Extend                   No: Create new
        │               (add params)                  (update index)
        │
        └─► Nothing found ──────────► Create new (update index)
```

---

## Common Duplication Patterns

### Pattern 1: Utility Function Reimplementation

❌ **Bad:** Creating `validateEmail()` when `isEmail()` exists
```typescript
// DON'T: This already exists as isEmail()
function validateEmail(email: string): boolean {
  return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email);
}
```

✅ **Good:** Check index first, use existing
```typescript
import { isEmail } from '@/utils/validate';

if (isEmail(userInput)) { ... }
```

### Pattern 2: Slightly Different Versions

❌ **Bad:** Multiple date formatters with slight variations
```typescript
// In file A
function formatDate(d: Date) { return d.toLocaleDateString(); }

// In file B
function displayDate(d: Date) { return d.toLocaleDateString('en-US'); }

// In file C
function showDate(d: Date) { return d.toLocaleDateString('en-US', { month: 'short' }); }
```

✅ **Good:** One function with options
```typescript
// utils/dates.ts
function formatDate(d: Date, options?: { locale?: string; format?: 'short' | 'long' }) {
  const locale = options?.locale ?? 'en-US';
  const formatOpts = options?.format === 'short'
    ? { month: 'short', day: 'numeric' }
    : { month: 'long', day: 'numeric', year: 'numeric' };
  return d.toLocaleDateString(locale, formatOpts);
}
```

### Pattern 3: Inline Logic That Should Be Extracted

❌ **Bad:** Same validation logic scattered across files
```typescript
// In signup.ts
if (!email || !email.includes('@') || email.length < 5) { ... }

// In profile.ts
if (!email || !email.includes('@') || email.length < 5) { ... }

// In invite.ts
if (!email || !email.includes('@') || email.length < 5) { ... }
```

✅ **Good:** Extract once, import everywhere
```typescript
// utils/validate.ts
export const isEmail = (email: string) =>
  email && email.includes('@') && email.length >= 5;

// Everywhere else
import { isEmail } from '@/utils/validate';
if (!isEmail(email)) { ... }
```

---

## Periodic Audit

Run `/audit-duplicates` periodically to catch semantic overlap:

### Audit Checklist

- [ ] **Utility functions**: Any functions doing similar things?
- [ ] **API calls**: Multiple ways to fetch same data?
- [ ] **Validation**: Scattered inline validation logic?
- [ ] **Error handling**: Inconsistent error patterns?
- [ ] **Components**: Similar UI components that could merge?
- [ ] **Hooks**: Custom hooks with overlapping logic?

### Audit Output Format

```markdown
## Duplicate Audit - [DATE]

### 🔴 High Priority (Merge These)

1. **Date formatting** - 3 similar functions found
   - `formatDate()` in utils/dates.ts
   - `displayDate()` in components/Header.tsx
   - `showDate()` in pages/Profile.tsx
   - **Action:** Consolidate into utils/dates.ts

2. **Email validation** - Inline logic in 5 files
   - signup.ts:42
   - profile.ts:28
   - invite.ts:15
   - settings.ts:67
   - admin.ts:33
   - **Action:** Extract to utils/validate.ts

### 🟡 Medium Priority (Consider Merging)

1. **User fetching** - 2 different patterns
   - `fetchUser()` in api/users.ts
   - `getUser()` in services/user.ts
   - **Action:** Decide on one pattern

### 🟢 Low Priority (Monitor)

1. **Button components** - 3 variants exist
   - May be intentional for different use cases
   - **Action:** Document the differences
```

---

## Vector DB Integration (Optional)

For large codebases (100+ files), add vector search:

### Setup with ChromaDB

```python
# scripts/index_codebase.py
import chromadb
from chromadb.utils import embedding_functions

# Initialize
client = chromadb.PersistentClient(path="./.chroma")
ef = embedding_functions.DefaultEmbeddingFunction()
collection = client.get_or_create_collection("code_index", embedding_function=ef)

# Index a function
collection.add(
    documents=["Formats a date into human-readable relative string like '2 days ago'"],
    metadatas=[{"function": "formatRelative", "file": "utils/dates.ts", "line": 32}],
    ids=["formatRelative"]
)

# Search before writing
results = collection.query(
    query_texts=["format date as relative time"],
    n_results=5
)
# Returns: formatRelative in utils/dates.ts - 0.92 similarity
```

### Setup with LanceDB (Lighter)

```python
# scripts/index_codebase.py
import lancedb

db = lancedb.connect("./.lancedb")

# Create table
data = [
    {"function": "formatRelative", "file": "utils/dates.ts", "description": "Formats date as relative time"},
    {"function": "isEmail", "file": "utils/validate.ts", "description": "Validates email format"},
]
table = db.create_table("code_index", data)

# Search
results = table.search("validate email address").limit(5).to_list()
```

### When to Use Vector DB

| Codebase Size | Recommendation |
|---------------|----------------|
| < 50 files | Markdown index only |
| 50-200 files | Markdown + periodic audit |
| 200+ files | Add vector DB |
| 500+ files | Vector DB essential |

---

## Claude Instructions

### At Session Start

1. Read `CODE_INDEX.md` if it exists
2. Note the categories and key functions available
3. Keep this context for the session

### Before Writing New Code

1. **Pause and check**: "Does something like this exist?"
2. Search CODE_INDEX.md for similar capabilities
3. If unsure, search the codebase: `grep -r "functionName\|similar_term" src/`
4. Only create new if confirmed nothing suitable exists

### After Writing New Code

1. **Immediately update CODE_INDEX.md**
2. Add file header if new file
3. Add function docstring
4. Commit index update with code

### When User Says "Add X functionality"

```
Before implementing, let me check if we already have something similar...

[Checks CODE_INDEX.md]

Found: `existingFunction()` in utils/file.ts does something similar.
Options:
1. Use existing function as-is
2. Extend it with new capability
3. Create new (if truly different use case)

Which approach would you prefer?
```

---

## Quick Reference

### Update Index Command
```bash
/update-code-index
```

### Audit Command
```bash
/audit-duplicates
```

### File Header Template
```typescript
/**
 * @file [Short description]
 * @description [What this file does]
 *
 * Key exports:
 * - function1() - [what it does]
 * - function2() - [what it does]
 */
```

### Function Template
```typescript
/**
 * [One line description of what it does]
 */
export function name(params): ReturnType {
```

### Index Entry Template
```markdown
| `functionName()` | path/file.ts:line | Does what in plain English | `(params)` |
```


================================================
FILE: skills/code-graph/SKILL.md
================================================
---
name: code-graph
description: AST-based code graph for fast symbol lookup, dependency analysis, and blast radius via codebase-memory-mcp MCP server
when-to-use: "Before reading files — query the graph first for symbol lookup, call tracing, and blast radius"
user-invocable: false
effort: medium
---

# Code Graph Skill


**Purpose:** Use the code graph (codebase-memory-mcp) for sub-millisecond
symbol lookup, function search, dependency analysis, and blast radius
detection. This replaces brute-force grep and file reading for code
navigation.

---

## Core Principle

**Graph first, file second.** Before reading files or grepping, query the
code graph. Only read full files when you need to modify them or need
context beyond what the graph provides.

**Consider graph when planning.** When planning any change — feature,
refactor, bug fix — start by querying the graph to understand scope,
dependencies, and blast radius. This applies to thinking and planning
phases, not just implementation. Grep is still the right tool for
searching string literals, log messages, config values, and content
that lives outside code structure.

```
┌────────────────────────────────────────────────────────────────┐
│  GRAPH FIRST, FILE SECOND                                      │
│  ─────────────────────────────────────────────────────────────│
│  The code graph indexes your entire codebase as a persistent   │
│  knowledge graph. Claude queries it via MCP for instant         │
│  symbol lookup, dependency chains, and blast radius — instead   │
│  of reading hundreds of files.                                 │
│                                                                │
│  14 MCP tools │ 64 languages │ sub-ms queries │ zero deps      │
│  ~99% fewer tokens for navigation vs brute-force file reads    │
├────────────────────────────────────────────────────────────────┤
│  AUTO-UPDATED                                                  │
│  ─────────────────────────────────────────────────────────────│
│  File watcher keeps graph in sync. Post-commit hook ensures    │
│  freshness. No manual rebuild needed.                          │
└────────────────────────────────────────────────────────────────┘
```

---

## When to Use Graph vs Direct Read

| Task | Use Graph Tool | Use Direct Read? |
|------|---------------|------------------|
| Find function/class definition | `search_graph` | No |
| Get function signature + docs | `get_code_snippet` | No |
| Find all callers of a function | `trace_call_path` | No |
| Trace dependency chain | `query_graph` | No |
| Determine blast radius of change | `detect_changes` | No |
| Understand project architecture | `get_architecture` | No |
| Search for code patterns | `search_code` | No |
| Read full implementation to modify | `search_graph` to locate, then Read file | Yes |
| Understand business logic context | `get_code_snippet` for overview, then Read | Yes |

**Rule:** If a graph tool can answer the question, use it. Only open files
when you need the full source to make edits.

---

## Available MCP Tools

### Indexing & Status

| Tool | Purpose | When to Use |
|------|---------|-------------|
| `index_repository` | Build/rebuild graph for a project | First setup, or after major restructure |
| `index_status` | Check if graph is current | Before querying, if unsure of freshness |
| `list_projects` | List all indexed projects | Multi-project navigation |

### Querying & Navigation

| Tool | Purpose | When to Use |
|------|---------|-------------|
| `search_graph` | Find symbols by name (fuzzy) | "Find auth-related functions" |
| `search_code` | Text search across indexed codebase | "Find TODO comments", pattern matching |
| `get_code_snippet` | Get source code for a specific symbol | Need signature, docstring, implementation |
| `get_graph_schema` | Understand graph structure and relationships | Exploring what data is available |
| `query_graph` | Run structured graph queries | Complex dependency/relationship queries |

### Analysis

| Tool | Purpose | When to Use |
|------|---------|-------------|
| `trace_call_path` | Trace caller/callee chains | "Who calls sendEmail?", "What does init() trigger?" |
| `detect_changes` | Identify changed files and blast radius | Before/after code changes, PR review |
| `get_architecture` | High-level module/package structure | Onboarding, understanding project layout |

### Management

| Tool | Purpose | When to Use |
|------|---------|-------------|
| `delete_project` | Remove a project from the graph | Cleanup, project restructure |
| `manage_adr` | Architecture decision records | Document architectural decisions |
| `ingest_traces` | Import runtime traces | Performance analysis, dead code detection |

---

## Workflow: Before Any Code Change

```
0. PLAN       → get_architecture + search_graph to understand scope before planning
1. LOCATE     → search_graph to find the symbol
2. UNDERSTAND → get_code_snippet for context
3. BLAST      → detect_changes to assess impact
4. TRACE      → trace_call_path to find all affected callers
5. CHANGE     → Read file, make edit
6. VERIFY     → detect_changes again to confirm scope
```

**Step 0 applies to planning, not just coding.** When the user asks you to
plan a feature, refactor, or fix — query the graph first to understand
what exists, what depends on what, and what the scope looks like. This
prevents plans based on wrong assumptions about the codebase.

**Never skip step 3.** Blast radius analysis prevents unexpected breakage
from changes to shared code.

---

## Graph Data & Freshness

The graph stays fresh automatically through 3 layers — no manual rebuild needed:

| Layer | Trigger | What Happens |
|-------|---------|-------------|
| **File watcher** | Every file save | codebase-memory-mcp detects changes and re-indexes affected files in real-time |
| **Auto-index** | Session start | `auto_index: true` ensures graph is current when Claude Code starts |
| **Post-commit hook** | Every `git commit` | Touches `.code-graph/.needs-update` marker — file watcher picks it up (~10ms, non-blocking) |

**You do NOT need to manually re-index** unless you do a major restructure
(rename entire directories, switch branches with massive diffs). In that
case: `index_repository` once, then the 3 layers keep it fresh.

- **Storage**: `.code-graph/` directory (auto-created, gitignored)
- **MCP config**: `.mcp.json` at project root (committed, shared with team)

---

## MCP Configuration

The code graph MCP server is configured in `.mcp.json` at project root:

```json
{
  "mcpServers": {
    "codebase-memory": {
      "command": "codebase-memory-mcp",
      "args": []
    }
  }
}
```

**Installation:** `~/.claude/install-graph-tools.sh`

---

## Decision Framework

```
Need to find a symbol/function?
  → search_graph (sub-ms, structured result)
  → NOT: grep -r "functionName" (slow, unstructured)

Need to understand dependencies?
  → query_graph or trace_call_path (complete, traversable)
  → NOT: manually reading import statements

Need to assess change impact?
  → detect_changes (comprehensive, instant)
  → NOT: searching for usages manually across files

Need to understand architecture?
  → get_architecture (high-level overview)
  → NOT: reading every directory listing

Need to read/modify code?
  → search_graph to locate, then Read the specific file
  → NOT: reading entire directories hoping to find it
```

---

## Anti-Patterns

| Anti-Pattern | Do This Instead |
|-------------|-----------------|
| Grepping for function names | `search_graph` with the function name |
| Reading entire files to find a signature | `get_code_snippet` for the specific symbol |
| Manually tracing import chains | `trace_call_path` or `query_graph` |
| Making changes without checking impact | `detect_changes` before every edit to shared code |
| Reading all files in a directory | `get_architecture` for structure, `search_graph` for specifics |
| Ignoring graph staleness warnings | Check `index_status`, re-index if needed |
| Re-indexing on every query | Trust the file watcher; only manual re-index after major restructure |


================================================
FILE: skills/code-review/SKILL.md
================================================
---
name: code-review
description: Mandatory code reviews via /code-review before commits and deploys
when-to-use: When user asks to review code, before commits, or when /code-review is invoked
user-invocable: true
allowed-tools: [Read, Glob, Grep, Bash]
effort: high
---

# Code Review Skill


**Purpose:** Enforce automated code reviews as a mandatory guardrail before every commit and deployment. Choose between Claude, OpenAI Codex, Google Gemini, or multiple engines for comprehensive analysis.

---

## Review Engine Choice

When running `/code-review`, users can choose their preferred review engine:

```
┌─────────────────────────────────────────────────────────────────┐
│  CODE REVIEW - Choose Your Engine                               │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  ○ Claude (default)                                             │
│    Built-in, no extra setup, full conversation context          │
│                                                                 │
│  ○ OpenAI Codex CLI                                             │
│    GPT-5.2-Codex specialized for code review, 88% detection     │
│    Requires: npm install -g @openai/codex                       │
│                                                                 │
│  ○ Google Gemini CLI                                            │
│    Gemini 2.5 Pro with 1M token context, free tier available    │
│    Requires: npm install -g @google/gemini-cli                  │
│                                                                 │
│  ○ Dual Engine (any two)                                        │
│    Run two engines, compare findings, catch more issues         │
│                                                                 │
│  ○ All Three (maximum coverage)                                 │
│    Run Claude + Codex + Gemini for critical/security code       │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘
```

### Engine Comparison

| Aspect | Claude | Codex | Gemini | Multi-Engine |
|--------|--------|-------|--------|--------------|
| **Setup** | None | npm + OpenAI API | npm + Google Account | All setups |
| **Speed** | Fast | Fast | Fast | 2-3x time |
| **Context** | Conversation | Fresh per review | 1M tokens | N/A |
| **Detection** | Good | 88% (best) | 63.8% SWE-Bench | Combined |
| **Free Tier** | N/A | Limited | 1,000/day | Varies |
| **Best for** | Quick reviews | High accuracy | Large codebases | Critical code |

### Set Default Engine

```toml
# ~/.claude/settings.toml or project CLAUDE.md
[code-review]
default_engine = "claude"  # Options: claude, codex, gemini, dual, all
```

### Usage Examples

```bash
# Use default engine
/code-review

# Explicitly choose engine
/code-review --engine claude
/code-review --engine codex
/code-review --engine gemini

# Dual engine (pick any two)
/code-review --engine claude,codex
/code-review --engine claude,gemini
/code-review --engine codex,gemini

# All three engines
/code-review --engine all

# Quick shortcuts
/code-review              # Uses default
/code-review --codex      # Use Codex
/code-review --gemini     # Use Gemini
/code-review --all        # All three engines
```

---

## Multi-Engine Output

When using multiple engines, findings are compared and deduplicated:

### Dual Engine Example

```
┌─────────────────────────────────────────────────────────────────┐
│  CODE REVIEW RESULTS - DUAL ENGINE (Claude + Codex)             │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  ✅ AGREED (Found by both):                                     │
│  🔴 SQL injection in auth.ts:45                                 │
│  🟡 Missing error handling in api.ts:112                        │
│                                                                 │
│  🔷 CLAUDE ONLY:                                                │
│  🟠 Potential race condition in worker.ts:89                    │
│  🟢 Consider extracting helper function                         │
│                                                                 │
│  🔶 CODEX ONLY:                                                 │
│  🟠 Memory leak - unclosed stream in upload.ts:34               │
│  🟡 N+1 query pattern in orders.ts:156                          │
│                                                                 │
├─────────────────────────────────────────────────────────────────┤
│  SUMMARY                                                        │
│  Agreed: 2 | Claude only: 2 | Codex only: 2                     │
│  Critical: 1 | High: 2 | Medium: 2 | Low: 1                     │
│  Status: ❌ BLOCKED - Fix critical/high issues                  │
└─────────────────────────────────────────────────────────────────┘
```

### Triple Engine Example (All Three)

```
┌─────────────────────────────────────────────────────────────────┐
│  CODE REVIEW RESULTS - TRIPLE ENGINE                            │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  ✅ UNANIMOUS (All 3 found):                                    │
│  🔴 SQL injection in auth.ts:45                                 │
│                                                                 │
│  ✅ MAJORITY (2 of 3 found):                                    │
│  🟠 Memory leak - unclosed stream in upload.ts:34 (Codex+Gemini)│
│  🟡 Missing error handling in api.ts:112 (Claude+Codex)         │
│                                                                 │
│  🔷 CLAUDE ONLY:                                                │
│  🟠 Potential race condition in worker.ts:89                    │
│                                                                 │
│  🔶 CODEX ONLY:                                                 │
│  🟡 N+1 query pattern in orders.ts:156                          │
│                                                                 │
│  🟢 GEMINI ONLY:                                                │
│  🟡 Consider using batch API for better performance             │
│  🟢 Type could be more specific in types.ts:23                  │
│                                                                 │
├─────────────────────────────────────────────────────────────────┤
│  SUMMARY                                                        │
│  Unanimous: 1 | Majority: 2 | Single: 5                         │
│  Critical: 1 | High: 2 | Medium: 3 | Low: 2                     │
│  Status: ❌ BLOCKED - Fix critical/high issues                  │
└─────────────────────────────────────────────────────────────────┘
```

### When to Use Each Mode

| Mode | Use When |
|------|----------|
| **Single (Claude)** | Quick in-flow reviews, exploration |
| **Single (Codex)** | CI/CD automation, high accuracy needed |
| **Single (Gemini)** | Large codebases (100+ files), free tier |
| **Dual** | Important PRs, pre-merge reviews |
| **Triple (All)** | Security-critical code, payment systems, auth |

---

## Core Philosophy

```
┌─────────────────────────────────────────────────────────────────┐
│  CODE REVIEW IS NON-NEGOTIABLE                                  │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  Every commit must pass code review.                            │
│  Every PR must be reviewed before merge.                        │
│  Every deployment must include review sign-off.                 │
│                                                                 │
│  AI catches what humans miss. Humans catch what AI misses.      │
│  Together: fewer bugs, cleaner code, better security.           │
├─────────────────────────────────────────────────────────────────┤
│  INVOKE: /code-review                                           │
│  PLUGIN: code-review@claude-plugins-official                    │
└─────────────────────────────────────────────────────────────────┘
```

---

## When to Run Code Review

### Mandatory Review Points

| Trigger | Action | Command |
|---------|--------|---------|
| **Before commit** | Review staged changes | `/code-review` |
| **Before PR** | Review all changes vs base | `/code-review` |
| **Before merge** | Final review of PR | `/code-review` |
| **Before deploy** | Review deployment diff | `/code-review` |

### Automatic Integration

**Run code review automatically before every commit:**

```
┌─────────────────────────────────────────────────────────────────┐
│  COMMIT WORKFLOW                                                │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. Write code                                                  │
│  2. Run tests (TDD - must pass)                                 │
│  3. Run /code-review  ← MANDATORY                               │
│  4. Address critical/high issues                                │
│  5. Commit                                                      │
│  6. Push                                                        │
│                                                                 │
│  Skip step 3? ❌ NO COMMIT ALLOWED                              │
└─────────────────────────────────────────────────────────────────┘
```

---

## Using the Code Review Plugin

### Basic Usage

```bash
# Review current changes
/code-review

# Review specific files
/code-review src/auth/*.ts

# Review a PR
/code-review --pr 123

# Review with specific focus
/code-review --focus security
/code-review --focus performance
/code-review --focus architecture
```

### Review Categories

The code review plugin analyzes:

| Category | What It Checks |
|----------|----------------|
| **Security** | Vulnerabilities, injection risks, auth issues, secrets |
| **Performance** | N+1 queries, memory leaks, inefficient algorithms |
| **Architecture** | Design patterns, SOLID principles, coupling |
| **Code Quality** | Readability, complexity, duplication |
| **Best Practices** | Language idioms, framework conventions |
| **Testing** | Coverage gaps, test quality, edge cases |
| **Documentation** | Missing docs, outdated comments |

### Severity Levels

| Level | Action Required | Can Commit? |
|-------|-----------------|-------------|
| 🔴 **Critical** | Must fix immediately | ❌ NO |
| 🟠 **High** | Should fix before commit | ❌ NO |
| 🟡 **Medium** | Fix soon, can commit | ✅ YES |
| 🟢 **Low** | Nice to have | ✅ YES |
| ℹ️ **Info** | Suggestions only | ✅ YES |

---

## Pre-Commit Hook Integration

### Install Pre-Commit Hook

```bash
#!/bin/bash
# .git/hooks/pre-commit

echo "🔍 Running code review..."

# Run Claude code review on staged files
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep -E '\.(ts|tsx|js|jsx|py|go|rs)$')

if [ -n "$STAGED_FILES" ]; then
    # Invoke code review (requires claude CLI)
    claude --print "/code-review $STAGED_FILES" > /tmp/code-review-result.txt 2>&1

    # Check for critical/high issues
    if grep -q "🔴\|Critical\|🟠\|High" /tmp/code-review-result.txt; then
        echo "❌ Code review found critical/high issues:"
        cat /tmp/code-review-result.txt
        echo ""
        echo "Fix these issues before committing."
        exit 1
    fi

    echo "✅ Code review passed"
fi

exit 0
```

### Make Hook Executable

```bash
chmod +x .git/hooks/pre-commit
```

---

## Codex CLI Setup (For Codex/Both Modes)

If you want to use Codex or Both modes, install the Codex CLI:

```bash
# Prerequisites: Node.js 22+
node --version  # Must be 22+

# Install Codex CLI
npm install -g @openai/codex

# Authenticate (choose one):
# Option 1: ChatGPT subscription (Plus, Pro, Team, Enterprise)
codex  # Follow prompts to sign in

# Option 2: API key
export OPENAI_API_KEY=sk-proj-...
```

### Verify Installation

```bash
# Check Codex is installed
codex --version

# Test review
codex
> /review
```

See `codex-review.md` skill for full Codex documentation.

---

## Gemini CLI Setup (For Gemini/Multi-Engine Modes)

If you want to use Gemini or multi-engine modes, install the Gemini CLI:

```bash
# Prerequisites: Node.js 20+
node --version  # Must be 20+

# Install Gemini CLI
npm install -g @google/gemini-cli

# Or via Homebrew (macOS)
brew install gemini-cli

# Install Code Review extension
gemini extensions install https://github.com/gemini-cli-extensions/code-review
```

### Authenticate

```bash
# Option 1: Google Account (recommended, 1000 req/day free)
gemini  # Follow browser login prompts

# Option 2: API key (100 req/day free)
export GEMINI_API_KEY="your-key-from-aistudio.google.com"
```

### Verify Installation

```bash
# Check Gemini is installed
gemini --version

# List extensions
gemini extensions list

# Test review
gemini
> /code-review
```

See `gemini-review.md` skill for full Gemini documentation.

---

## CI/CD Integration

### GitHub Actions - Claude Only

```yaml
# .github/workflows/code-review.yml
name: Code Review

on:
  pull_request:
    types: [opened, synchronize, reopened]

jobs:
  code-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Get changed files
        id: changed-files
        run: |
          echo "files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | tr '\n' ' ')" >> $GITHUB_OUTPUT

      - name: Run Claude Code Review
        env:
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
        run: |
          npx @anthropic-ai/claude-code --print "/code-review ${{ steps.changed-files.outputs.files }}" > review.md

      - name: Post Review Comment
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('review.md', 'utf8');

            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: `## 🔍 Claude Code Review\n\n${review}`
            });

      - name: Check for Critical Issues
        run: |
          if grep -q "Critical\|🔴" review.md; then
            echo "❌ Critical issues found"
            exit 1
          fi
```

### GitHub Actions - Codex Only

```yaml
# .github/workflows/codex-review.yml
name: Codex Code Review

on:
  pull_request:

jobs:
  review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Codex Review
        uses: openai/codex-action@main
        with:
          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
          model: gpt-5.2-codex
          safety_strategy: drop-sudo
```

### GitHub Actions - Both Engines

```yaml
# .github/workflows/dual-review.yml
name: Dual Code Review

on:
  pull_request:

jobs:
  claude-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Claude Review
        env:
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
        run: |
          npx @anthropic-ai/claude-code --print "/code-review" > claude-review.md

      - uses: actions/upload-artifact@v4
        with:
          name: claude-review
          path: claude-review.md

  codex-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-node@v4
        with:
          node-version: '22'

      - name: Install Codex
        run: npm install -g @openai/codex

      - name: Codex Review
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          codex exec --full-auto --sandbox read-only \
            --output-last-message codex-review.md \
            "Review this code for bugs, security issues, and quality problems"

      - uses: actions/upload-artifact@v4
        with:
          name: codex-review
          path: codex-review.md

  combine-reviews:
    needs: [claude-review, codex-review]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4

      - name: Combine Reviews
        run: |
          echo "## 🔍 Dual Code Review Results" > combined-review.md
          echo "" >> combined-review.md
          echo "### Claude Findings" >> combined-review.md
          cat claude-review/claude-review.md >> combined-review.md
          echo "" >> combined-review.md
          echo "### Codex Findings" >> combined-review.md
          cat codex-review/codex-review.md >> combined-review.md

      - name: Post Combined Review
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('combined-review.md', 'utf8');
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: review
            });
```

### GitHub Actions - Gemini Only

```yaml
# .github/workflows/gemini-review.yml
name: Gemini Code Review

on:
  pull_request:
    types: [opened, synchronize]

jobs:
  review:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '20'

      - name: Install Gemini CLI
        run: npm install -g @google/gemini-cli

      - name: Run Review
        env:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
          # Get diff
          git diff origin/${{ github.base_ref }}...HEAD > diff.txt

          # Run Gemini review
          gemini -p "Review this pull request diff for bugs, security issues, and code quality problems. Be specific about file names and line numbers.

          $(cat diff.txt)" > review.md

      - name: Post Review Comment
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('review.md', 'utf8');
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: `## 🤖 Gemini Code Review\n\n${review}`
            });

      - name: Check for Critical Issues
        run: |
          if grep -qi "critical\|security vulnerability\|injection" review.md; then
            echo "❌ Critical issues found"
            exit 1
          fi
```

### GitHub Actions - All Three Engines

```yaml
# .github/workflows/triple-review.yml
name: Triple Engine Code Review

on:
  pull_request:

jobs:
  claude-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Claude Review
        env:
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
        run: |
          npx @anthropic-ai/claude-code --print "/code-review" > claude-review.md

      - uses: actions/upload-artifact@v4
        with:
          name: claude-review
          path: claude-review.md

  codex-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-node@v4
        with:
          node-version: '22'

      - name: Install Codex
        run: npm install -g @openai/codex

      - name: Codex Review
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          codex exec --full-auto --sandbox read-only \
            --output-last-message codex-review.md \
            "Review this code for bugs, security issues, and quality problems"

      - uses: actions/upload-artifact@v4
        with:
          name: codex-review
          path: codex-review.md

  gemini-review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-node@v4
        with:
          node-version: '20'

      - name: Install Gemini CLI
        run: npm install -g @google/gemini-cli

      - name: Gemini Review
        env:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
          git diff origin/${{ github.base_ref }}...HEAD > diff.txt
          gemini -p "Review this code diff for bugs, security, and quality issues:
          $(cat diff.txt)" > gemini-review.md

      - uses: actions/upload-artifact@v4
        with:
          name: gemini-review
          path: gemini-review.md

  combine-reviews:
    needs: [claude-review, codex-review, gemini-review]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4

      - name: Combine Reviews
        run: |
          echo "## 🔍 Triple Engine Code Review Results" > combined-review.md
          echo "" >> combined-review.md
          echo "### 🟣 Claude Findings" >> combined-review.md
          cat claude-review/claude-review.md >> combined-review.md
          echo "" >> combined-review.md
          echo "---" >> combined-review.md
          echo "### 🟢 Codex Findings" >> combined-review.md
          cat codex-review/codex-review.md >> combined-review.md
          echo "" >> combined-review.md
          echo "---" >> combined-review.md
          echo "### 🔵 Gemini Findings" >> combined-review.md
          cat gemini-review/gemini-review.md >> combined-review.md

      - name: Post Combined Review
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('combined-review.md', 'utf8');
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: review
            });

      - name: Check Critical Issues
        run: |
          # Fail if any engine found critical issues
          if grep -qi "critical\|🔴" combined-review.md; then
            echo "❌ Critical issues found by at least one engine"
            exit 1
          fi
```

---

## Review Checklist

### Before Every Commit

- [ ] Run `/code-review` on staged changes
- [ ] No critical (🔴) issues
- [ ] No high (🟠) issues
- [ ] Security concerns addressed
- [ ] Performance issues considered

### Before Every PR

- [ ] Full code review of all changes
- [ ] All critical/high issues resolved
- [ ] Tests added for new functionality
- [ ] Documentation updated if needed

### Before Every Deployment

- [ ] Final review of deployment diff
- [ ] Security scan passed
- [ ] No new vulnerabilities introduced
- [ ] Rollback plan documented

---

## Common Review Findings

### Security Issues (Always Fix)

| Issue | Example | Fix |
|-------|---------|-----|
| SQL Injection | `query = f"SELECT * FROM users WHERE id = {id}"` | Use parameterized queries |
| XSS | `innerHTML = userInput` | Sanitize or use textContent |
| Secrets in code | `apiKey = "sk-xxx"` | Use environment variables |
| Missing auth | Unprotected endpoints | Add authentication middleware |
| Insecure crypto | MD5/SHA1 for passwords | Use bcrypt/argon2 |

### Performance Issues (Should Fix)

| Issue | Example | Fix |
|-------|---------|-----|
| N+1 queries | Loop with individual queries | Use batch/eager loading |
| Memory leak | Unclosed connections | Use connection pooling |
| Missing index | Slow queries | Add database indexes |
| Large payload | Fetching unused fields | Select only needed fields |
| No pagination | Loading all records | Implement pagination |

### Code Quality (Nice to Fix)

| Issue | Example | Fix |
|-------|---------|-----|
| Long function | 100+ lines | Extract into smaller functions |
| Deep nesting | 5+ levels | Early returns, extract methods |
| Magic numbers | `if (status === 3)` | Use named constants |
| Duplicate code | Copy-pasted blocks | Extract shared function |
| Missing types | `any` everywhere | Add proper TypeScript types |

---

## Integration with TDD Workflow

```
┌─────────────────────────────────────────────────────────────────┐
│  TDD + CODE REVIEW WORKFLOW                                     │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. RED: Write failing tests                                    │
│  2. GREEN: Write code to pass tests                             │
│  3. REFACTOR: Clean up code                                     │
│  4. REVIEW: Run /code-review  ← NEW STEP                        │
│  5. FIX: Address critical/high issues                           │
│  6. VALIDATE: Lint + TypeCheck + Coverage                       │
│  7. COMMIT: Only after review passes                            │
│                                                                 │
│  Review catches what tests miss:                                │
│  - Security vulnerabilities                                     │
│  - Performance issues                                           │
│  - Architecture problems                                        │
│  - Code maintainability                                         │
└─────────────────────────────────────────────────────────────────┘
```

---

## Review Response Template

When code review finds issues, respond with:

```markdown
## Code Review Results

### 🔴 Critical Issues (Must Fix)
1. **SQL Injection in userController.ts:45**
   - Issue: User input directly interpolated into query
   - Fix: Use parameterized query
   - Code: `db.query('SELECT * FROM users WHERE id = $1', [userId])`

### 🟠 High Issues (Should Fix)
1. **Missing authentication on /api/admin endpoints**
   - Issue: Admin routes accessible without auth
   - Fix: Add auth middleware

### 🟡 Medium Issues (Fix Soon)
1. **N+1 query in getOrders function**
   - Consider eager loading or batch query

### 🟢 Low Issues (Nice to Have)
1. **Consider extracting validation logic to separate file**

### ✅ Strengths
- Good test coverage
- Clear function names
- Proper error handling

### 📊 Summary
- Critical: 1 | High: 1 | Medium: 1 | Low: 1
- **Status: ❌ BLOCKED** - Fix critical/high issues before commit
```

---

## Claude Instructions

### When to Invoke Code Review

Claude should automatically suggest or run code review:

1. **After completing a feature** → "Let me run a code review before we commit"
2. **Before creating a PR** → "Running code review on all changes"
3. **When user says "commit"** → "First, let me review the changes"
4. **After fixing bugs** → "Reviewing the fix for any issues"

### Review Focus Areas

Prioritize review based on change type:

| Change Type | Focus Areas |
|-------------|-------------|
| Auth/Security code | Security, input validation, crypto |
| Database code | SQL injection, N+1, transactions |
| API endpoints | Auth, rate limiting, validation |
| Frontend code | XSS, state management, performance |
| Infrastructure | Secrets, permissions, logging |

---

## Quick Reference

### Commands

```bash
# Basic review
/code-review

# Review specific files
/code-review src/auth.ts src/users.ts

# Review with focus
/code-review --focus security

# Review PR
/code-review --pr 123
```

### Severity Actions

```
🔴 Critical → STOP. Fix now. No commit.
🟠 High     → STOP. Fix now. No commit.
🟡 Medium   → Note it. Fix soon. Can commit.
🟢 Low      → Optional. Nice to have.
ℹ️ Info     → FYI only.
```

### Workflow

```
Code → Test → Review → Fix → Commit → Push → PR → Review → Merge → Deploy
              ↑                              ↑                    ↑
           /code-review                /code-review          /code-review
```


================================================
FILE: skills/codex-review/SKILL.md
================================================
---
name: codex-review
description: OpenAI Codex CLI code review with GPT-5.2-Codex, CI/CD integration
when-to-use: When user requests Codex-powered code review or multi-engine review
user-invocable: true
effort: medium
---

# OpenAI Codex Code Review Skill


Use OpenAI's Codex CLI for specialized code review with GPT-5.2-Codex - trained specifically for detecting bugs, security flaws, and code quality issues.

**Sources:** [Codex CLI](https://developers.openai.com/codex/cli/) | [GitHub](https://github.com/openai/codex) | [Code Review Cookbook](https://cookbook.openai.com/examples/codex/build_code_review_with_codex_sdk)

---

## Why Codex for Code Review?

| Feature | Benefit |
|---------|---------|
| **GPT-5.2-Codex** | Specialized training for code review |
| **88% detection rate** | Bugs, security flaws, style issues (LiveCodeBench) |
| **Structured output** | JSON schema for consistent findings |
| **GitHub native** | `@codex review` in PR comments |
| **Headless mode** | CI/CD automation without TUI |

---

## Installation

### Prerequisites

```bash
# Check Node.js version (requires 22+)
node --version

# Install Node.js 22 if needed
# macOS
brew install node@22

# Or via nvm
nvm install 22
nvm use 22
```

### Install Codex CLI

```bash
# Via npm (recommended)
npm install -g @openai/codex

# Via Homebrew (macOS)
brew install --cask codex

# Verify installation
codex --version
```

### Authentication

**Option 1: ChatGPT Subscription** (Plus, Pro, Team, Edu, Enterprise)
```bash
codex
# Follow prompts to sign in with ChatGPT account
```

**Option 2: OpenAI API Key**
```bash
# Set environment variable
export OPENAI_API_KEY=sk-proj-...

# Or add to shell profile
echo 'export OPENAI_API_KEY=sk-proj-...' >> ~/.zshrc

# Run Codex
codex
```

### Shell Completions (Optional)

```bash
# Bash
codex completion bash >> ~/.bashrc

# Zsh
codex completion zsh >> ~/.zshrc

# Fish
codex completion fish > ~/.config/fish/completions/codex.fish
```

---

## Interactive Code Review

### Launch Review Mode

```bash
# Start Codex
codex

# In the TUI, type:
/review
```

### Review Presets

| Preset | Use Case |
|--------|----------|
| **Review against base branch** | Before opening PR - diffs against upstream |
| **Review uncommitted changes** | Before committing - staged + unstaged + untracked |
| **Review a commit** | Analyze specific SHA from history |
| **Custom instructions** | e.g., "Focus on security vulnerabilities" |

### Example Session

```
$ codex
> /review

Select review type:
❯ Review against a base branch
  Review uncommitted changes
  Review a commit
  Custom review instructions

Select base branch: main

Reviewing changes...

┌─────────────────────────────────────────────────────────────┐
│ CODE REVIEW FINDINGS                                        │
├─────────────────────────────────────────────────────────────┤
│ 🔴 CRITICAL: SQL Injection vulnerability                    │
│    File: src/api/users.ts:45                                │
│    Issue: User input directly interpolated in query         │
│    Fix: Use parameterized queries                           │
├─────────────────────────────────────────────────────────────┤
│ 🟠 HIGH: Missing authentication check                       │
│    File: src/api/admin.ts:23                                │
│    Issue: Admin endpoint accessible without auth            │
│    Fix: Add requireAuth middleware                          │
├─────────────────────────────────────────────────────────────┤
│ 🟡 MEDIUM: Inefficient database query                       │
│    File: src/services/orders.ts:89                          │
│    Issue: N+1 query pattern in loop                         │
│    Fix: Use batch query or JOIN                             │
└─────────────────────────────────────────────────────────────┘
```

---

## Headless Mode (Automation)

### Basic Usage

```bash
# Simple review
codex exec "review the code for bugs and security issues"

# Review with JSON output
codex exec --json "review uncommitted changes" > review.json

# Save final message to file
codex exec --output-last-message review.txt "review the diff against main"
```

### Full Automation (CI/CD)

```bash
# Full auto mode (use only in isolated runners!)
codex exec \
  --full-auto \
  --json \
  --output-last-message findings.txt \
  --sandbox read-only \
  -m gpt-5.2-codex \
  "Review this code for bugs, security issues, and performance problems"
```

### Structured Output with Schema

```bash
# Define output schema
cat > review-schema.json << 'EOF'
{
  "type": "object",
  "properties": {
    "findings": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "severity": { "enum": ["critical", "high", "medium", "low"] },
          "title": { "type": "string" },
          "file": { "type": "string" },
          "line": { "type": "integer" },
          "description": { "type": "string" },
          "suggestion": { "type": "string" }
        },
        "required": ["severity", "title", "file", "description"]
      }
    },
    "summary": { "type": "string" },
    "approved": { "type": "boolean" }
  },
  "required": ["findings", "summary", "approved"]
}
EOF

# Run with schema validation
codex exec \
  --output-schema review-schema.json \
  --output-last-message review.json \
  "Review the staged changes and output findings"
```

---

## GitHub Integration

### Option 1: PR Comment Trigger

In any pull request, add a comment:
```
@codex review
```

Codex will respond with a standard GitHub code review.

### Option 2: GitHub Action

```yaml
# .github/workflows/codex-review.yml
name: Codex Code Review

on:
  pull_request:
    types: [opened, synchronize]

jobs:
  review:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Codex Review
        uses: openai/codex-action@main
        with:
          openai_api_key: ${{ secrets.OPENAI_API_KEY }}
          model: gpt-5.2-codex
          safety_strategy: drop-sudo
```

### Option 3: Manual Headless in CI

```yaml
# .github/workflows/codex-review.yml
name: Codex Code Review

on:
  pull_request:

jobs:
  review:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-node@v4
        with:
          node-version: '22'

      - name: Install Codex CLI
        run: npm install -g @openai/codex

      - name: Run Review
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          # Get diff
          git diff origin/${{ github.base_ref }}...HEAD > diff.txt

          # Run Codex review
          codex exec \
            --full-auto \
            --sandbox read-only \
            --output-last-message review.md \
            "Review this git diff for bugs, security issues, and code quality: $(cat diff.txt)"

      - name: Post Review Comment
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('review.md', 'utf8');
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: `## 🤖 Codex Code Review\n\n${review}`
            });
```

---

## GitLab CI/CD

```yaml
# .gitlab-ci.yml
codex-review:
  image: node:22
  stage: review
  script:
    - npm install -g @openai/codex
    - |
      codex exec \
        --full-auto \
        --sandbox read-only \
        --output-last-message review.md \
        "Review the merge request changes for bugs and security issues"
    - cat review.md
  artifacts:
    paths:
      - review.md
  rules:
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
```

---

## Jenkins Pipeline

```groovy
pipeline {
    agent any

    environment {
        OPENAI_API_KEY = credentials('openai-api-key')
    }

    stages {
        stage('Install Codex') {
            steps {
                sh 'npm install -g @openai/codex'
            }
        }

        stage('Code Review') {
            steps {
                sh '''
                    codex exec \
                      --full-auto \
                      --sandbox read-only \
                      --output-last-message review.md \
                      "Review the code changes for bugs and security issues"
                '''
            }
        }

        stage('Publish Results') {
            steps {
                archiveArtifacts artifacts: 'review.md'
                script {
                    def review = readFile('review.md')
                    echo "Code Review Results:\n${review}"
                }
            }
        }
    }
}
```

---

## Configuration

### Config File

```toml
# ~/.codex/config.toml

[model]
default = "gpt-5.2-codex"  # Best for code review

[sandbox]
default = "read-only"  # Safe for reviews

[review]
# Custom review instructions applied to all reviews
instructions = """
Focus on:
1. Security vulnerabilities (OWASP Top 10)
2. Performance issues (N+1 queries, memory leaks)
3. Error handling gaps
4. Type safety issues
"""
```

### Per-Project Config

```toml
# .codex/config.toml (in project root)

[review]
instructions = """
This is a Python FastAPI project. Focus on:
- Async/await correctness
- Pydantic model validation
- SQL injection via SQLAlchemy
- Authentication/authorization gaps
"""
```

---

## CLI Quick Reference

```bash
# Interactive
codex                          # Start TUI
/review                        # Open review presets

# Headless
codex exec "prompt"            # Non-interactive execution
codex exec --json "prompt"     # JSON output
codex exec --full-auto "prompt"  # No approval prompts

# Key Flags
--output-last-message FILE     # Save response to file
--output-schema FILE           # Validate against JSON schema
--sandbox read-only            # Restrict file access
-m gpt-5.2-codex              # Use best review model
--json                         # Machine-readable output

# Resume
codex exec resume SESSION_ID   # Continue previous session
```

---

## Comparison: Claude vs Codex Review

| Aspect | Claude (Built-in) | Codex CLI |
|--------|-------------------|-----------|
| **Setup** | None (already in Claude Code) | Install CLI + auth |
| **Model** | Claude | GPT-5.2-Codex (specialized) |
| **Context** | Full conversation context | Fresh context per review |
| **Integration** | Native | GitHub, GitLab, Jenkins |
| **Output** | Markdown | JSON schema support |
| **Best for** | Quick reviews, in-flow | CI/CD, critical PRs |

---

## Security Considerations

### CI/CD Safety

```yaml
# Always use these flags in CI/CD:
--sandbox read-only           # Prevent file modifications
--safety-strategy drop-sudo   # Revoke elevated permissions
```

### API Key Protection

```yaml
# GitHub Actions - use secrets
env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

# Never hardcode keys
# Never echo keys in logs
```

### Public Repositories

For public repos, use `drop-sudo` safety strategy to prevent Codex from reading its own API key during execution.

---

## Troubleshooting

| Issue | Solution |
|-------|----------|
| `codex: command not found` | Run `npm install -g @openai/codex` |
| `Node.js version error` | Upgrade to Node.js 22+ |
| `Authentication failed` | Re-run `codex` and sign in again |
| `API key invalid` | Check `OPENAI_API_KEY` env var |
| `Timeout in CI` | Add `--timeout 300` flag |
| `Rate limited` | Reduce frequency or upgrade plan |

---

## Anti-Patterns

- **Using `--dangerously-bypass-approvals-and-sandbox` casually** - Only in isolated CI runners
- **Exposing API keys in logs** - Use secrets management
- **Skipping sandbox in CI** - Always use `--sandbox read-only`
- **Ignoring findings** - Review and address or document exceptions
- **Running on every commit** - Use on PRs only to save costs


================================================
FILE: skills/commit-hygiene/SKILL.md
================================================
---
name: commit-hygiene
description: Atomic commits, PR size limits, commit thresholds, stacked PRs
when-to-use: When committing code, creating PRs, or when change set is growing large
user-invocable: false
effort: low
---

# Commit Hygiene Skill


**Purpose:** Keep commits atomic, PRs reviewable, and git history clean. Advise when it's time to commit before changes become too large.

---

## Core Philosophy

```
┌─────────────────────────────────────────────────────────────────┐
│  ATOMIC COMMITS                                                  │
│  ─────────────────────────────────────────────────────────────  │
│  One logical change per commit.                                  │
│  Each commit should be self-contained and deployable.            │
│  If you need "and" to describe it, split it.                     │
├─────────────────────────────────────────────────────────────────┤
│  SMALL PRS WIN                                                   │
│  ─────────────────────────────────────────────────────────────  │
│  < 400 lines changed = reviewed in < 1 hour                      │
│  > 1000 lines = likely rubber-stamped or abandoned               │
│  Smaller PRs = faster reviews, fewer bugs, easier reverts        │
├─────────────────────────────────────────────────────────────────┤
│  COMMIT EARLY, COMMIT OFTEN                                      │
│  ─────────────────────────────────────────────────────────────  │
│  Working code? Commit it.                                        │
│  Test passing? Commit it.                                        │
│  Don't wait for "done" - commit at every stable point.           │
└─────────────────────────────────────────────────────────────────┘
```

---

## Commit Size Thresholds

### Warning Thresholds (Time to Commit!)

| Metric | Yellow Zone | Red Zone | Action |
|--------|-------------|----------|--------|
| **Files changed** | 5-10 files | > 10 files | Commit NOW |
| **Lines added** | 150-300 lines | > 300 lines | Commit NOW |
| **Lines deleted** | 100-200 lines | > 200 lines | Commit NOW |
| **Total changes** | 250-400 lines | > 400 lines | Commit NOW |
| **Time since last commit** | 30-60 min | > 60 min | Consider committing |

### Ideal Commit Size

```
┌─────────────────────────────────────────────────────────────────┐
│  IDEAL COMMIT                                                    │
│  ─────────────────────────────────────────────────────────────  │
│  Files: 1-5                                                      │
│  Lines: 50-200 total changes                                     │
│  Scope: Single logical unit of work                              │
│  Message: Describes ONE thing                                    │
└─────────────────────────────────────────────────────────────────┘
```

---

## Check Current State (Run Frequently)

### Quick Status Check

```bash
# See what's changed (staged + unstaged)
git status --short

# Count files and lines changed
git diff --stat
git diff --cached --stat  # Staged only

# Get totals
git diff --shortstat
# Example output: 8 files changed, 245 insertions(+), 32 deletions(-)
```

### Detailed Change Analysis

```bash
# Full diff summary with file names
git diff --stat HEAD

# Just the numbers
git diff --numstat HEAD | awk '{add+=$1; del+=$2} END {print "+"add" -"del" total:"add+del}'

# Files changed count
git status --porcelain | wc -l
```

### Pre-Commit Check Script

```bash
#!/bin/bash
# scripts/check-commit-size.sh

# Thresholds
MAX_FILES=10
MAX_LINES=400
WARN_FILES=5
WARN_LINES=200

# Get stats
FILES=$(git status --porcelain | wc -l | tr -d ' ')
STATS=$(git diff --shortstat HEAD 2>/dev/null)
INSERTIONS=$(echo "$STATS" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)
DELETIONS=$(echo "$STATS" | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo 0)
TOTAL=$((INSERTIONS + DELETIONS))

echo "📊 Current changes: $FILES files, +$INSERTIONS -$DELETIONS ($TOTAL total lines)"

# Check thresholds
if [ "$FILES" -gt "$MAX_FILES" ] || [ "$TOTAL" -gt "$MAX_LINES" ]; then
    echo "🔴 RED ZONE: Commit immediately! Changes are too large."
    echo "   Consider splitting into multiple commits."
    exit 1
elif [ "$FILES" -gt "$WARN_FILES" ] || [ "$TOTAL" -gt "$WARN_LINES" ]; then
    echo "🟡 WARNING: Changes getting large. Commit soon."
    exit 0
else
    echo "🟢 OK: Changes are within healthy limits."
    exit 0
fi
```

---

## When to Commit

### Commit Triggers (Any One = Commit)

| Trigger | Example |
|---------|---------|
| **Test passes** | Just got a test green → commit |
| **Feature complete** | Finished a function → commit |
| **Refactor done** | Renamed variable across files → commit |
| **Bug fixed** | Fixed the issue → commit |
| **Before switching context** | About to work on something else → commit |
| **Clean compile** | Code compiles/lints clean → commit |
| **Threshold hit** | > 5 files or > 200 lines → commit |

### Commit Immediately If

- ✅ Tests are passing after being red
- ✅ You're about to make a "big change"
- ✅ You've been coding for 30+ minutes
- ✅ You're about to try something risky
- ✅ The current state is "working"

### Don't Wait For

- ❌ "Perfect" code
- ❌ All features done
- ❌ Full test coverage
- ❌ Code review from yourself
- ❌ Documentation complete

---

## Atomic Commit Patterns

### Good Atomic Commits

```
✅ "Add email validation to signup form"
   - 3 files: validator.ts, signup.tsx, signup.test.ts
   - 120 lines changed
   - Single purpose: email validation

✅ "Fix null pointer in user lookup"
   - 2 files: userService.ts, userService.test.ts
   - 25 lines changed
   - Single purpose: fix one bug

✅ "Refactor: Extract PaymentProcessor class"
   - 4 files: payment.ts → paymentProcessor.ts + types
   - 180 lines changed
   - Single purpose: refactoring
```

### Bad Commits (Too Large)

```
❌ "Add authentication, fix bugs, update styles"
   - 25 files changed
   - 800 lines changed
   - Multiple purposes mixed

❌ "WIP"
   - Unknown scope
   - No clear purpose
   - Hard to review/revert

❌ "Updates"
   - 15 files changed
   - Mix of features, fixes, refactors
   - Impossible to review properly
```

---

## Splitting Large Changes

### Strategy 1: By Layer

```
Instead of one commit with:
  - API endpoint + database migration + frontend + tests

Split into:
  1. "Add users table migration"
  2. "Add User model and repository"
  3. "Add GET /users endpoint"
  4. "Add UserList component"
  5. "Add integration tests for user flow"
```

### Strategy 2: By Feature Slice

```
Instead of one commit with:
  - All CRUD operations for users

Split into:
  1. "Add create user functionality"
  2. "Add read user functionality"
  3. "Add update user functionality"
  4. "Add delete user functionality"
```

### Strategy 3: Refactor First

```
Instead of:
  - Feature + refactoring mixed

Split into:
  1. "Refactor: Extract validation helpers" (no behavior change)
  2. "Add email validation using new helpers" (new feature)
```

### Strategy 4: By Risk Level

```
Instead of:
  - Safe changes + risky changes together

Split into:
  1. "Update dependencies" (safe, isolated)
  2. "Migrate to new API version" (risky, separate)
```

---

## PR Size Guidelines

### Optimal PR Size

| Metric | Optimal | Acceptable | Too Large |
|--------|---------|------------|-----------|
| **Files** | 1-10 | 10-20 | > 20 |
| **Lines changed** | 50-200 | 200-400 | > 400 |
| **Commits** | 1-5 | 5-10 | > 10 |
| **Review time** | < 30 min | 30-60 min | > 60 min |

### PR Size vs Defect Rate

```
┌─────────────────────────────────────────────────────────────────┐
│  RESEARCH FINDINGS (Google, Microsoft studies)                  │
│  ─────────────────────────────────────────────────────────────  │
│  PRs < 200 lines: 15% defect rate                               │
│  PRs 200-400 lines: 23% defect rate                             │
│  PRs > 400 lines: 40%+ defect rate                              │
│                                                                 │
│  Review quality drops sharply after 200-400 lines.              │
│  Large PRs get "LGTM" rubber stamps, not real reviews.          │
└─────────────────────────────────────────────────────────────────┘
```

### When PR is Too Large

```bash
# Check PR size before creating
git diff main --stat
git diff main --shortstat

# If too large, consider:
# 1. Split into multiple PRs (stacked PRs)
# 2. Create feature flag and merge incrementally
# 3. Use draft PR for early feedback
```

---

## Commit Message Format

### Structure

```
<type>: <description> (50 chars max)

[optional body - wrap at 72 chars]

[optional footer]
```

### Types

| Type | Use For |
|------|---------|
| `feat` | New feature |
| `fix` | Bug fix |
| `refactor` | Code change that neither fixes nor adds |
| `test` | Adding/updating tests |
| `docs` | Documentation only |
| `style` | Formatting, no code change |
| `chore` | Build, config, dependencies |

### Examples

```
feat: Add email validation to signup form

fix: Prevent null pointer in user lookup

refactor: Extract PaymentProcessor class

test: Add integration tests for checkout flow

chore: Update dependencies to latest versions
```

---

## Git Workflow Integration

### Pre-Commit Hook for Size Check

```bash
#!/bin/bash
# .git/hooks/pre-commit

MAX_LINES=400
MAX_FILES=15

FILES=$(git diff --cached --name-only | wc -l | tr -d ' ')
STATS=$(git diff --cached --shortstat)
INSERTIONS=$(echo "$STATS" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)
DELETIONS=$(echo "$STATS" | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo 0)
TOTAL=$((INSERTIONS + DELETIONS))

if [ "$TOTAL" -gt "$MAX_LINES" ]; then
    echo "❌ Commit too large: $TOTAL lines (max: $MAX_LINES)"
    echo "   Consider splitting into smaller commits."
    echo "   Use 'git add -p' for partial staging."
    exit 1
fi

if [ "$FILES" -gt "$MAX_FILES" ]; then
    echo "❌ Too many files: $FILES (max: $MAX_FILES)"
    echo "   Consider splitting into smaller commits."
    exit 1
fi

echo "✅ Commit size OK: $FILES files, $TOTAL lines"
```

### Partial Staging (Split Large Changes)

```bash
# Stage specific hunks interactively
git add -p

# Stage specific files
git add path/to/specific/file.ts

# Stage with preview
git add -N file.ts  # Intent to add
git diff            # See what would be added
git add file.ts     # Actually add
```

### Unstage If Too Large

```bash
# Unstage everything
git reset HEAD

# Unstage specific files
git reset HEAD path/to/file.ts

# Stage just what you need for THIS commit
git add -p
```

---

## Claude Integration

### Periodic Check During Development

**Claude should run this check after every significant change:**

```bash
# Quick status
git diff --shortstat HEAD
```

**Thresholds for Claude to advise committing:**

| Condition | Claude Action |
|-----------|---------------|
| > 5 files changed | Suggest: "Consider committing current changes" |
| > 200 lines changed | Suggest: "Changes are getting large, commit recommended" |
| > 10 files OR > 400 lines | Warn: "⚠️ Commit now before changes become unmanageable" |
| Test just passed | Suggest: "Good checkpoint - commit these passing tests" |
| Refactoring complete | Suggest: "Refactoring done - commit before adding features" |

### Claude Commit Reminder Messages

```
📊 Status: 7 files changed, +180 -45 (225 total)
💡 Approaching commit threshold. Consider committing current work.

---

📊 Status: 12 files changed, +320 -80 (400 total)
⚠️ Changes are large! Commit now to keep PRs reviewable.
   Suggested commit: "feat: Add user authentication flow"

---

📊 Status: 3 files changed, +85 -10 (95 total)
✅ Tests passing. Good time to commit!
   Suggested commit: "fix: Validate email format on signup"
```

---

## Stacked PRs (For Large Features)

When a feature is genuinely large, use stacked PRs:

```
┌─────────────────────────────────────────────────────────────────┐
│  STACKED PR PATTERN                                             │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  main ─────────────────────────────────────────────────────────│
│    └── PR #1: Database schema (200 lines) ← Review first       │
│         └── PR #2: API endpoints (250 lines) ← Review second   │
│              └── PR #3: Frontend (300 lines) ← Review third    │
│                                                                 │
│  Each PR is reviewable independently.                           │
│  Merge in order: #1 → #2 → #3                                   │
└─────────────────────────────────────────────────────────────────┘
```

### Creating Stacked PRs

```bash
# Create base branch
git checkout -b feature/auth-schema
# ... make changes ...
git commit -m "feat: Add users table schema"
git push -u origin feature/auth-schema
gh pr create --base main --title "feat: Add users table schema"

# Create next branch FROM the first
git checkout -b feature/auth-api
# ... make changes ...
git commit -m "feat: Add authentication API endpoints"
git push -u origin feature/auth-api
gh pr create --base feature/auth-schema --title "feat: Add auth API endpoints"

# And so on...
```

---

## Checklist

### Before Every Commit

- [ ] Changes are for ONE logical purpose
- [ ] Tests pass (if applicable)
- [ ] Lint/typecheck pass
- [ ] < 10 files changed
- [ ] < 400 lines total
- [ ] Commit message describes ONE thing

### Before Creating PR

- [ ] Total lines < 400 (ideal < 200)
- [ ] All commits are atomic
- [ ] No "WIP" or "fixup" commits
- [ ] PR title describes the change
- [ ] Description explains why, not just what

### Red Flags (Stop and Split)

- ❌ Commit message needs "and"
- ❌ > 10 files in one commit
- ❌ > 400 lines in one commit
- ❌ Mix of features, fixes, and refactors
- ❌ "I'll clean this up later"

---

## Quick Reference

### Thresholds

```
Files:  ≤ 5 = 🟢  |  6-10 = 🟡  |  > 10 = 🔴
Lines:  ≤ 200 = 🟢  |  201-400 = 🟡  |  > 400 = 🔴
Time:   ≤ 30min = 🟢  |  30-60min = 🟡  |  > 60min = 🔴
```

### Commands

```bash
# Quick status
git diff --shortstat HEAD

# Detailed file list
git diff --stat HEAD

# Partial staging
git add -p

# Check before PR
git diff main --shortstat
```

### Commit Now If

- ✅ Tests just passed
- ✅ > 200 lines changed
- ✅ > 5 files changed
- ✅ About to switch tasks
- ✅ Current state is "working"


================================================
FILE: skills/cpg-analysis/SKILL.md
================================================
---
name: cpg-analysis
description: Deep code property graph analysis with Joern CPG (AST+CFG+PDG) and CodeQL for control flow, data flow, taint analysis, and security auditing
when-to-use: "When deep code analysis is needed — control flow, data flow, taint tracking, or security auditing"
user-invocable: true
effort: high
---

# CPG Analysis Skill


**Purpose:** Deep code analysis beyond AST. Use Joern for full Code
Property Graph (control flow, data flow, program dependencies) and CodeQL
for interprocedural taint analysis and vulnerability detection.

**These are opt-in tools.** They require Docker/JVM (Joern) or CodeQL CLI.
Use codebase-memory-mcp (Tier 1, always-on) for everyday navigation.
Use these for deep analysis when Tier 1 is not enough.

```
┌────────────────────────────────────────────────────────────────┐
│  CODE PROPERTY GRAPH = AST + CFG + CDG + DDG + PDG             │
│  ─────────────────────────────────────────────────────────────│
│  AST  = Abstract Syntax Tree (structure)                       │
│  CFG  = Control Flow Graph (execution paths)                   │
│  CDG  = Control Dependency Graph (conditional dependencies)    │
│  DDG  = Data Dependency Graph (data flow between statements)   │
│  PDG  = Program Dependency Graph (CDG + DDG combined)          │
│                                                                │
│  Tier 2 (Joern): Full CPG with 40+ query tools                │
│  Tier 3 (CodeQL): Interprocedural taint + security queries     │
└────────────────────────────────────────────────────────────────┘
```

---

## Tier Selection Guide

```
Simple symbol lookup, dependency trace, blast radius?
  → Tier 1: codebase-memory-mcp (always on, sub-ms)

Control flow paths, data flow, dead code, complex refactoring?
  → Tier 2: Joern CPG (on-demand, seconds)

Security audit, taint analysis, vulnerability detection?
  → Tier 3: CodeQL (on-demand, seconds to minutes)

Full security review before release?
  → All three tiers in sequence
```

---

## Tier 2: Joern CPG (CodeBadger MCP)

### When to Use Joern

| Scenario | Why Joern | Tier 1 Can't Do This |
|----------|-----------|---------------------|
| Trace data flow through functions | Full DDG traversal | Tier 1 has no data flow |
| Understanding control flow paths | CFG analysis with branch conditions | Tier 1 has no CFG |
| Finding dead/unreachable code | PDG reachability analysis | Tier 1 only detects unused exports |
| Complex refactoring impact | Cross-function dependency chains | Tier 1 limited to call graph |
| Auditing third-party library usage | Deep call chain traversal | Tier 1 stops at import boundary |
| Understanding exception flow | CFG includes throw/catch paths | Tier 1 ignores exceptions |

### Key MCP Tools (Joern/CodeBadger)

| Tool | Purpose | Example Query |
|------|---------|---------------|
| `generate_cpg` | Build CPG for project | First-time setup or after major changes |
| `get_cpg_status` | Check CPG build status | Verify CPG is ready before querying |
| `run_cpgql_query` | Run arbitrary CPGQL queries | `cpg.method("login").callOut.code.l` |
| `get_cpgql_syntax_help` | Query language reference | When unsure about query syntax |
| `get_cfg` | Control flow graph for a method | Understand execution paths in a function |
| `list_methods` | List all methods in project | Overview of available functions |
| `get_method_source` | Get source code of a method | Read specific function source |
| `list_calls` | List calls from/to a method | Caller/callee analysis |
| `get_call_graph` | Full call graph visualization | Understand call chains |
| `get_type_definition` | Type/class definitions | Understand type hierarchy |

### Supported Languages (Joern)

Java, Scala, C/C++, Python, JavaScript, TypeScript, PHP, Ruby, Go,
Kotlin, Swift, Lua

**Not supported:** Rust (use CodeQL for Rust)

### MCP Configuration (Joern)

```json
{
  "mcpServers": {
    "codebadger": {
      "url": "http://localhost:4242/mcp",
      "type": "http"
    }
  }
}
```

### Prerequisites

- Docker (for Joern backend)
- Python 3.10+ (for MCP server)
- Install: `~/.claude/install-graph-tools.sh --joern`

### Common CPGQL Queries

```scala
// Find all methods that handle user input
cpg.method.where(_.parameter.name(".*input.*|.*request.*")).name.l

// Trace data flow from parameter to return
cpg.method("processPayment").parameter.reachableBy(cpg.method("processPayment").methodReturn).l

// Find methods with high cyclomatic complexity
cpg.method.where(_.controlStructure.size > 10).name.l

// Dead code: methods with no callers
cpg.method.where(_.callIn.size == 0).filter(_.name != "main").name.l

// Exception flow: methods that can throw but callers don't catch
cpg.method.where(_.ast.isThrow.size > 0).callIn.method.filter(_.ast.isTry.size == 0).name.l
```

---

## Tier 3: CodeQL

### When to Use CodeQL

| Scenario | Why CodeQL | Other Tiers Can't Do This |
|----------|-----------|--------------------------|
| Security audit before release | Interprocedural taint analysis | Joern has basic taint, CodeQL is deeper |
| Reviewing auth/payment code | Data flow from source to sink | Cross-function, cross-file taint |
| PR security review | Targeted vulnerability scan | Pre-built OWASP query packs |
| Compliance checking | CWE/OWASP pattern matching | Curated security query suites |
| Rust security analysis | Full Rust support | Joern doesn't support Rust |

### Key MCP Tools (CodeQL)

| Tool | Purpose |
|------|---------|
| `run_query` | Execute a CodeQL query against the database |
| `find_definitions` | Locate symbol definitions |
| `find_references` | Find all references to a symbol |
| `get_results` | Parse BQRS (Binary Query Result Sets) |

### Supported Languages (CodeQL)

C/C++, C#, Go, Java, Kotlin, JavaScript, TypeScript, Python, Ruby,
Swift, **Rust**

### MCP Configuration (CodeQL)

```json
{
  "mcpServers": {
    "codeql": {
      "command": "codeql-mcp",
      "args": ["--database", ".code-graph/codeql-db"]
    }
  }
}
```

### Prerequisites

- CodeQL CLI (`brew install codeql` on macOS)
- Install: `~/.claude/install-graph-tools.sh --codeql`

### Common CodeQL Patterns

```ql
// SQL injection: user input flows to SQL query
import python
from DataFlow::PathNode source, DataFlow::PathNode sink
where TaintTracking::hasFlowPath(source, sink)
  and source instanceof RemoteFlowSource
  and sink instanceof SqlExecution
select sink, source, sink, "SQL injection from $@.", source, "user input"

// Unvalidated redirect
from DataFlow::PathNode source, DataFlow::PathNode sink
where source instanceof RemoteFlowSource
  and sink instanceof RedirectSink
select sink, "Unvalidated redirect from user input"
```

---

## Combined Workflow: Deep Analysis

When performing security review or complex refactoring, use all tiers:

```
1. SCOPE       → Tier 1: detect_changes / get_architecture
                 Identify files and modules in scope

2. STRUCTURE   → Tier 1: search_graph / trace_call_path
                 Map the call graph and dependencies

3. FLOW        → Tier 2: get_cfg / run_cpgql_query
                 Analyze control flow and data flow paths

4. SECURITY    → Tier 3: run_query with taint analysis
                 Check for vulnerabilities in data paths

5. REPORT      → Combine findings from all tiers
                 Prioritize: Critical > High > Medium > Low
```

---

## Anti-Patterns

| Anti-Pattern | Do This Instead |
|-------------|-----------------|
| Using Joern/CodeQL for simple symbol lookup | Use Tier 1 `search_graph` (sub-ms vs seconds) |
| Running full CPG build on every commit | Build CPG on-demand; use Tier 1 for continuous monitoring |
| Querying Joern without checking `get_cpg_status` | Always verify CPG is built and current before querying |
| Running CodeQL without a specific security question | Have a hypothesis first; CodeQL queries are expensive |
| Ignoring Tier 1 blast radius before deep analysis | Always scope with Tier 1 first, then go deep on flagged areas |
| Using CodeQL for non-security structural queries | Use Joern CPGQL for structural/flow queries; CodeQL for security |


================================================
FILE: skills/credentials/SKILL.md
================================================
---
name: credentials
description: Centralized API key management from Access.txt
when-to-use: When setting up a new project that needs API keys or environment variables
user-invocable: false
effort: low
---

# Credentials Management Skill


For securely loading API keys from a centralized access file and configuring project environments.

---

## Credentials File Discovery

**REQUIRED**: When a project needs API keys, ask the user:

```
I need API credentials for [service]. Do you have a centralized access keys file?

Please provide the path (e.g., ~/Documents/Access.txt) or type 'manual' to enter keys directly.
```

### Default Locations to Check

```bash
~/Documents/Access.txt
~/Access.txt
~/.secrets/keys.txt
~/.credentials.txt
```

---

## Supported File Formats

The credentials file can use any of these formats:

### Format 1: Colon-separated
```
Render API: rnd_xxxxx
OpenAI API: sk-proj-xxxxx
Claude API: sk-ant-xxxxx
Reddit client id: xxxxx
Reddit secret: xxxxx
```

### Format 2: Key=Value
```
RENDER_API_KEY=rnd_xxxxx
OPENAI_API_KEY=sk-proj-xxxxx
ANTHROPIC_API_KEY=sk-ant-xxxxx
```

### Format 3: Mixed/Informal
```
Reddit api access:
client id Y1FgKALKmb6f6UxFtyMXfA
and secret is -QLoYdxMqOJkYrgk5KeGPa6Ps6vIiQ
```

---

## Key Identification Patterns

Use these patterns to identify keys in the file:

| Service | Pattern | Env Variable |
|---------|---------|--------------|
| OpenAI | `sk-proj-*` or `sk-*` | `OPENAI_API_KEY` |
| Claude/Anthropic | `sk-ant-*` | `ANTHROPIC_API_KEY` |
| Render | `rnd_*` | `RENDER_API_KEY` |
| Eleven Labs | `sk_*` (not sk-ant/sk-proj) | `ELEVEN_LABS_API_KEY` |
| Replicate | `r8_*` | `REPLICATE_API_TOKEN` |
| Supabase | URL + `eyJ*` (JWT) | `SUPABASE_URL`, `SUPABASE_ANON_KEY`, `SUPABASE_SERVICE_ROLE_KEY` |
| Reddit | client_id + secret pair | `REDDIT_CLIENT_ID`, `REDDIT_CLIENT_SECRET` |
| GitHub | `ghp_*` or `github_pat_*` | `GITHUB_TOKEN` |
| Vercel | `*_*` (from vercel.com) | `VERCEL_TOKEN` |
| Stripe (Test) | `sk_test_*`, `pk_test_*` | `STRIPE_SECRET_KEY`, `STRIPE_PUBLISHABLE_KEY` |
| Stripe (Live) | `sk_live_*`, `pk_live_*` | `STRIPE_SECRET_KEY`, `STRIPE_PUBLISHABLE_KEY` |
| Stripe Webhook | `whsec_*` | `STRIPE_WEBHOOK_SECRET` |
| Twilio | `SK*` + Account SID | `TWILIO_API_KEY`, `TWILIO_ACCOUNT_SID` |
| SendGrid | `SG.*` | `SENDGRID_API_KEY` |
| AWS | `AKIA*` + secret | `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` |
| PostHog | `phc_*` | `POSTHOG_API_KEY`, `NEXT_PUBLIC_POSTHOG_KEY` |

---

## Parsing Credentials File

When reading the user's access file, extract keys using these rules:

```python
# Python parsing logic
import re
from pathlib import Path

def parse_credentials_file(file_path: str) -> dict[str, str]:
    """Parse various credential file formats."""
    content = Path(file_path).expanduser().read_text()
    credentials = {}

    # Pattern matching for known key formats
    patterns = {
        'OPENAI_API_KEY': r'sk-proj-[A-Za-z0-9_-]+',
        'ANTHROPIC_API_KEY': r'sk-ant-[A-Za-z0-9_-]+',
        'RENDER_API_KEY': r'rnd_[A-Za-z0-9]+',
        'REPLICATE_API_TOKEN': r'r8_[A-Za-z0-9]+',
        'ELEVEN_LABS_API_KEY': r'sk_[a-f0-9]{40,}',
        'GITHUB_TOKEN': r'ghp_[A-Za-z0-9]+|github_pat_[A-Za-z0-9_]+',
        'STRIPE_SECRET_KEY': r'sk_(live|test)_[A-Za-z0-9]+',
        'STRIPE_PUBLISHABLE_KEY': r'pk_(live|test)_[A-Za-z0-9]+',
        'STRIPE_WEBHOOK_SECRET': r'whsec_[A-Za-z0-9]+',
        'POSTHOG_API_KEY': r'phc_[A-Za-z0-9]+',
    }

    # Supabase requires special handling (URL + JWT tokens)
    supabase_url = re.search(r'https://[a-z0-9]+\.supabase\.co', content)
    anon_key = re.search(r'anon[^:]*:\s*(eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+)', content, re.I)
    service_role = re.search(r'service.?role[^:]*:\s*(eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+)', content, re.I)

    if supabase_url:
        credentials['SUPABASE_URL'] = supabase_url.group(0)
    if anon_key:
        credentials['SUPABASE_ANON_KEY'] = anon_key.group(1)
    if service_role:
        credentials['SUPABASE_SERVICE_ROLE_KEY'] = service_role.group(1)

    for env_var, pattern in patterns.items():
        match = re.search(pattern, content)
        if match:
            credentials[env_var] = match.group(0)

    # Reddit requires special handling (client_id + secret pair)
    reddit_id = re.search(r'client.?id[:\s]+([A-Za-z0-9_-]+)', content, re.I)
    reddit_secret = re.search(r'secret[:\s]+([A-Za-z0-9_-]+)', content, re.I)
    if reddit_id:
        credentials['REDDIT_CLIENT_ID'] = reddit_id.group(1)
    if reddit_secret:
        credentials['REDDIT_CLIENT_SECRET'] = reddit_secret.group(1)

    return credentials
```

```typescript
// TypeScript parsing logic
function parseCredentialsFile(content: string): Record<string, string> {
  const credentials: Record<string, string> = {};

  const patterns: Record<string, RegExp> = {
    OPENAI_API_KEY: /sk-proj-[A-Za-z0-9_-]+/,
    ANTHROPIC_API_KEY: /sk-ant-[A-Za-z0-9_-]+/,
    RENDER_API_KEY: /rnd_[A-Za-z0-9]+/,
    REPLICATE_API_TOKEN: /r8_[A-Za-z0-9]+/,
    ELEVEN_LABS_API_KEY: /sk_[a-f0-9]{40,}/,
    GITHUB_TOKEN: /ghp_[A-Za-z0-9]+|github_pat_[A-Za-z0-9_]+/,
    STRIPE_SECRET_KEY: /sk_(live|test)_[A-Za-z0-9]+/,
    STRIPE_PUBLISHABLE_KEY: /pk_(live|test)_[A-Za-z0-9]+/,
    STRIPE_WEBHOOK_SECRET: /whsec_[A-Za-z0-9]+/,
    POSTHOG_API_KEY: /phc_[A-Za-z0-9]+/,
  };

  for (const [envVar, pattern] of Object.entries(patterns)) {
    const match = content.match(pattern);
    if (match) credentials[envVar] = match[0];
  }

  // Reddit pair
  const redditId = content.match(/client.?id[:\s]+([A-Za-z0-9_-]+)/i);
  const redditSecret = content.match(/secret[:\s]+([A-Za-z0-9_-]+)/i);
  if (redditId) credentials.REDDIT_CLIENT_ID = redditId[1];
  if (redditSecret) credentials.REDDIT_CLIENT_SECRET = redditSecret[1];

  return credentials;
}
```

---

## Validation Commands

After extracting keys, validate them:

### OpenAI
```bash
curl -s -o /dev/null -w "%{http_code}" \
  -H "Authorization: Bearer $OPENAI_API_KEY" \
  https://api.openai.com/v1/models
# 200 = valid
```

### Anthropic/Claude
```bash
curl -s -o /dev/null -w "%{http_code}" \
  -H "x-api-key: $ANTHROPIC_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  https://api.anthropic.com/v1/models
# 200 = valid
```

### Render
```bash
curl -s -o /dev/null -w "%{http_code}" \
  -H "Authorization: Bearer $RENDER_API_KEY" \
  https://api.render.com/v1/services
# 200 = valid
```

### Reddit
```bash
# Get OAuth token first
TOKEN=$(curl -s -X POST \
  -u "$REDDIT_CLIENT_ID:$REDDIT_CLIENT_SECRET" \
  -d "grant_type=client_credentials" \
  -A "CredentialTest/1.0" \
  https://www.reddit.com/api/v1/access_token | jq -r '.access_token')
# Non-null token = valid
```

### Replicate
```bash
curl -s -o /dev/null -w "%{http_code}" \
  -H "Authorization: Token $REPLICATE_API_TOKEN" \
  https://api.replicate.com/v1/models
# 200 = valid
```

---

## Project Setup Workflow

When initializing a project that needs API keys:

### Step 1: Ask for Credentials File
```
This project needs the following API keys:
- ANTHROPIC_API_KEY (for Claude)
- SUPABASE_URL and SUPABASE_ANON_KEY

Do you have an access keys file? Please provide the path:
```

### Step 2: Read and Parse
```python
# Read the file
credentials = parse_credentials_file("~/Documents/Access.txt")

# Show what was found
print("Found credentials:")
for key, value in credentials.items():
    masked = value[:8] + "..." + value[-4:]
    print(f"  {key}: {masked}")
```

### Step 3: Validate Keys
```
Validating credentials...
✓ ANTHROPIC_API_KEY: Valid
✓ REDDIT_CLIENT_ID: Valid
✗ SUPABASE_URL: Not found in file
```

### Step 4: Create .env File
```bash
# Write to project .env
cat > .env << EOF
# Auto-generated from ~/Documents/Access.txt
ANTHROPIC_API_KEY=sk-ant-xxx...
REDDIT_CLIENT_ID=xxx...
REDDIT_CLIENT_SECRET=xxx...
EOF

# Add to .gitignore if not present
echo ".env" >> .gitignore
```

### Step 5: Report Missing Keys
```
Missing credentials that need manual setup:
- SUPABASE_URL: Get from supabase.com/dashboard/project/[ref]/settings/api
- SUPABASE_ANON_KEY: Same location as above

Would you like me to open these URLs?
```

---

## Service-Specific Setup Guides

### Reddit (from Access.txt)
```
Found in your access file:
- REDDIT_CLIENT_ID: Y1FgKA...
- REDDIT_CLIENT_SECRET: -QLoYd...

Also needed (add to Access.txt or enter manually):
- REDDIT_USER_AGENT: YourApp/1.0 by YourUsername
```

### Supabase (typically not in file)
```
Supabase credentials are project-specific. Get them from:
https://supabase.com/dashboard/project/[your-ref]/settings/api

Required:
- SUPABASE_URL
- SUPABASE_ANON_KEY
- SUPABASE_SERVICE_ROLE_KEY (for admin operations)
```

---

## Security Rules

- **NEVER** commit Access.txt or its path to git
- **NEVER** log full API keys - always mask middle characters
- **ALWAYS** add `.env` to `.gitignore`
- **ALWAYS** use environment variables, never hardcode keys
- **VALIDATE** keys before using them in production setup

---

## Quick Reference

```bash
# Check if credentials file exists
ls -la ~/Documents/Access.txt

# Common env var names
OPENAI_API_KEY
ANTHROPIC_API_KEY
RENDER_API_KEY
REDDIT_CLIENT_ID
REDDIT_CLIENT_SECRET
REPLICATE_API_TOKEN
ELEVEN_LABS_API_KEY
SUPABASE_URL
SUPABASE_ANON_KEY
GITHUB_TOKEN
```

### Prompt Template
```
I need API credentials for this project.

Do you have a centralized access keys file (like ~/Documents/Access.txt)?

If yes, provide the path and I'll:
1. Read and parse your keys
2. Validate they're working
3. Set up your project's .env file
4. Tell you which keys are missing
```


================================================
FILE: skills/cross-agent-delegation/SKILL.md
================================================
---
name: cross-agent-delegation
description: Cross-agent task routing — Codex auto-review, Kimi delegation by complexity score (iCPG + Claude reasoning), iCPG + Mnemos mandatory for all agents
when-to-use: Always loaded when multiple AI CLI tools are available (Claude, Kimi, Codex)
user-invocable: false
effort: medium
---

# Cross-Agent Delegation

Claude Code orchestrates task routing to Kimi and Codex. The user interacts with Claude only — delegation happens behind the scenes.

---

## Tool Detection

At session start, detect available tools:

```bash
command -v kimi &>/dev/null && HAS_KIMI=true || HAS_KIMI=false
command -v codex &>/dev/null && HAS_CODEX=true || HAS_CODEX=false
```

---

## Codex Auto-Review (Stop Hook — Automatic)

When Codex is installed, a Stop hook reviews code after tests pass:

1. TDD loop check runs tests
2. `codex-auto-review.sh` runs Codex on the diff
3. Critical/High findings feed back to Claude (exit 2)
4. Clean reviews pass through (exit 0)

**Fully automatic.** No user or Claude action needed.

---

## Kimi Delegation (Claude Orchestrates)

When Kimi is installed and the task complexity is bounded, Claude delegates directly — the user does not need to run anything.

### Step 1: Score complexity, not file count

File count is a poor proxy for delegation risk. A 1-file change to an authz path is harder than a 12-file rename. Score the task on five dimensions, each 0-2, sourced from iCPG signals plus Claude's semantic reasoning:

| Dimension | 0 (low) | 1 (medium) | 2 (high) | Source |
|---|---|---|---|---|
| **Cyclomatic / surface depth** | <10 LOC, no branches | 10-50 LOC, ≤3 branches | 50+ LOC or nested control flow | iCPG `query_graph` over function bodies |
| **Fan-out (consumer blast radius)** | 0-2 callers | 3-10 callers | 11+ callers | iCPG `trace_path(<symbol>, mode=callers)` |
| **Crosses a security boundary** (SEC-006, auth, PII, RLS, org-scope, billing, payments) | None | Tangential | Direct read or write | iCPG SEC-* / R-063 tags + grep for `org_id`, `user_id`, `auth`, `pii` |
| **Concurrency / transactional** | Pure / sync | Async only | Locks, transactions, atomic claims, `FOR UPDATE`, `asyncio.Lock`, `session.begin` | iCPG concurrency flags + grep |
| **Domain invariants required** | None / well-documented inline | Some implicit (need to read 1-2 files) | Heavy (cross-doc, ADR-bound, RFC-bound) | Claude reasoning + iCPG ADR linkage |

```bash
# Auto-collect signals
icpg query blast <scope> --format json    # fan-out, async flags, sec tags
grep -rE "org_id|user_id|auth|pii"  <file>  # cheap sec heuristic if iCPG flags absent
grep -rE "asyncio.Lock|FOR UPDATE|session.begin" <file>  # concurrency heuristic
```

### Step 2: Sum → routing

| Total score | Route | Rationale |
|---|---|---|
| **0-3** | Kimi solo | Bounded surface, no security/concurrency/cross-doc concerns |
| **4-6** | Kimi → Codex auto-review (no user prompt) | Real risk, but not so high that we need full Claude context — Codex catches what Kimi might miss |
| **7-10** | Claude handles directly | Cross-cutting / security-critical / concurrency-heavy — needs full context |

### Step 3: Floor — trivial-case shortcut

To skip iCPG-query cost on truly trivial work:

```bash
# If <2 files changed AND no SEC/auth/PII/concurrency keyword in diff,
# → auto-Kimi without scoring.
FILES=$(git diff --name-only | wc -l)
HAS_RISK_KEYWORDS=$(git diff | grep -ciE "org_id|auth|pii|asyncio|FOR UPDATE|transaction|session\.begin" || true)
if [ "$FILES" -lt 2 ] && [ "$HAS_RISK_KEYWORDS" -eq 0 ]; then
  AUTO_KIMI=true
fi
```

This handles the trivial-rename / typo-fix case without paying the iCPG round-trip.

### When NOT to Delegate (overrides scoring)

- User explicitly asked Claude to do it
- Cross-service changes (API + frontend + database) — needs full context regardless of score
- Production hotfix on a release branch — cross-tool review latency is too high
- Score 7+ in any single dimension (one critical axis is enough to keep Claude in the loop)

### Step 4: Delegate via Bash

Claude writes a mnemos checkpoint, then runs Kimi headless:

```bash
# 1. Save current context to disk
mnemos checkpoint --force

# 2. Get context summary for Kimi
CONTEXT=$(mnemos resume 2>/dev/null)

# 3. Get constraints for target files
CONSTRAINTS=$(icpg query constraints <target-file> 2>/dev/null)

# 4. Run Kimi headless with full context
kimi --print -y -w . -p "
## Context (from mnemos checkpoint)
$CONTEXT

## Constraints (from iCPG)
$CONSTRAINTS

## Task
<specific task description>

## Rules
- Run tests after changes
- Record changes: icpg record --base main
- Write checkpoint when done: mnemos checkpoint --force
"
```

### Step 4: Read Results

After Kimi finishes, Claude:

```bash
# Read what Kimi did
mnemos resume          # Kimi's checkpoint
icpg status            # Kimi's recorded symbols
git diff               # Kimi's file changes
```

### When NOT to Delegate

- Security-sensitive code (auth, crypto, payments)
- Cross-service changes (API + frontend + database)
- Refactors that touch shared interfaces
- User explicitly asked Claude to do it

---

## iCPG — Mandatory for All Agents

Before ANY code change, Claude runs these (and includes results when delegating):

### Pre-Task Queries

```bash
# 1. Duplicate check — already done?
icpg query prior "<goal>"

# 2. Constraints — what invariants apply?
icpg query constraints <file-path>

# 3. Risk — is this symbol fragile?
icpg query risk <symbol-name>
```

### After Code Changes

```bash
icpg record --reason <id> --base main
icpg drift check
```

---

## Mnemos — Mandatory for All Agents

### At Task Start

```bash
mnemos add goal "<task description>"
```

### At Sub-Goal Boundaries

```bash
mnemos checkpoint
```

### At Task End (auto-handled by Stop hook)

```bash
mnemos checkpoint --force
```

### Context Transfer Between Tools

The checkpoint is the bridge. Claude writes it, Kimi reads it:

```bash
# Claude saves state
mnemos checkpoint --force

# Kimi (or Codex) reads state
mnemos resume
```

The checkpoint contains: goal, constraints, recent files, git state, fatigue level.

---

## Full Orchestration Flow

```
TASK ARRIVES (user tells Claude)
    |
    v
[1] Claude: icpg query prior "<goal>"     ← Already done?
[2] Claude: trivial-case shortcut         ← <2 files & no risk keywords?
    |
    +-- YES + Kimi installed -----> AUTO-KIMI (no scoring)
    |
    +-- NO ↓
    v
[3] Claude: score complexity (5 dims × 0-2, iCPG + reasoning)
    |
    +-- score 0-3   ----> KIMI SOLO PATH
    |   [a] mnemos checkpoint --force
    |   [b] kimi --print -y -p "..."
    |   [c] mnemos resume + git diff
    |   [d] Continue in Claude
    |
    +-- score 4-6   ----> KIMI + CODEX REVIEW PATH
    |   [a] mnemos checkpoint --force
    |   [b] kimi --print -y -p "..."
    |   [c] codex review --uncommitted    ← Auto-review the diff
    |   [d] If P0/P1 findings: re-prompt Kimi with findings
    |   [e] Once clean: continue in Claude
    |
    +-- score 7-10  ----> CLAUDE DIRECT PATH (full context)
    |
    v
[4] icpg query constraints <files>         ← Invariants
[5] icpg query risk <symbols>              ← Fragility
[6] mnemos add goal "<task>"               ← Track in memory
    |
    v
[7] IMPLEMENT (TDD: RED -> GREEN)
    |
    v
[8]  Stop: tdd-loop-check.sh               ← Tests pass?
[9]  Stop: codex-auto-review.sh            ← Codex reviews diff
[10] Stop: icpg-stop-record.sh             ← Record symbols
[11] Stop: mnemos-checkpoint.sh            ← Save memory
```


================================================
FILE: skills/database-schema/SKILL.md
================================================
---
name: database-schema
description: Schema awareness - read before coding, type generation, prevent column errors
when-to-use: Before writing any database queries or modifying data models
user-invocable: false
paths: ["**/schema.*", "**/migrations/**", "**/models/**", "**/*.prisma", "**/drizzle/**"]
effort: medium
---

# Database Schema Awareness Skill


**Problem:** Claude forgets schema details mid-session - wrong column names, missing fields, incorrect types. TDD catches this at runtime, but we can prevent it earlier.

---

## Core Rule: Read Schema Before Writing Database Code

**MANDATORY: Before writing ANY code that touches the database:**

```
┌─────────────────────────────────────────────────────────────┐
│  1. READ the schema file (see locations below)              │
│  2. VERIFY columns/types you're about to use exist          │
│  3. REFERENCE schema in your response when writing queries  │
│  4. TYPE-CHECK using generated types (Drizzle/Prisma/etc)   │
└─────────────────────────────────────────────────────────────┘
```

**If schema file doesn't exist → CREATE IT before proceeding.**

---

## Schema File Locations (By Stack)

| Stack | Schema Location | Type Generation |
|-------|-----------------|-----------------|
| **Drizzle** | `src/db/schema.ts` or `drizzle/schema.ts` | Built-in TypeScript |
| **Prisma** | `prisma/schema.prisma` | `npx prisma generate` |
| **Supabase** | `supabase/migrations/*.sql` + types | `supabase gen types typescript` |
| **SQLAlchemy** | `app/models/*.py` or `src/models.py` | Pydantic models |
| **TypeORM** | `src/entities/*.ts` | Decorators = types |
| **Raw SQL** | `schema.sql` or `migrations/` | Manual types required |

### Schema Reference File (Recommended)

Create `_project_specs/schema-reference.md` for quick lookup:

```markdown
# Database Schema Reference

*Auto-generated or manually maintained. Claude: READ THIS before database work.*

## Tables

### users
| Column | Type | Nullable | Default | Notes |
|--------|------|----------|---------|-------|
| id | uuid | NO | gen_random_uuid() | PK |
| email | text | NO | - | Unique |
| name | text | YES | - | Display name |
| created_at | timestamptz | NO | now() | - |
| updated_at | timestamptz | NO | now() | - |

### orders
| Column | Type | Nullable | Default | Notes |
|--------|------|----------|---------|-------|
| id | uuid | NO | gen_random_uuid() | PK |
| user_id | uuid | NO | - | FK → users.id |
| status | text | NO | 'pending' | enum: pending/paid/shipped/delivered |
| total_cents | integer | NO | - | Amount in cents |
| created_at | timestamptz | NO | now() | - |

## Relationships
- users 1:N orders (user_id)

## Enums
- order_status: pending, paid, shipped, delivered
```

---

## Pre-Code Checklist (Database Work)

Before writing any database code, Claude MUST:

```markdown
### Schema Verification Checklist
- [ ] Read schema file: `[path to schema]`
- [ ] Columns I'm using exist: [list columns]
- [ ] Types match my code: [list type mappings]
- [ ] Relationships are correct: [list FKs]
- [ ] Nullable fields handled: [list nullable columns]
```

**Example in practice:**

```markdown
### Schema Verification for TODO-042 (Add order history endpoint)

- [x] Read schema: `src/db/schema.ts`
- [x] Columns exist: orders.id, orders.user_id, orders.status, orders.total_cents, orders.created_at
- [x] Types: id=uuid→string, total_cents=integer→number, status=text→OrderStatus enum
- [x] Relationships: orders.user_id → users.id (many-to-one)
- [x] Nullable: none of these columns are nullable
```

---

## Type Generation Commands

### Drizzle (TypeScript)

```typescript
// Schema defines types automatically
// src/db/schema.ts
import { pgTable, uuid, text, integer, timestamp } from 'drizzle-orm/pg-core';

export const users = pgTable('users', {
  id: uuid('id').primaryKey().defaultRandom(),
  email: text('email').notNull().unique(),
  name: text('name'),
  createdAt: timestamp('created_at').notNull().defaultNow(),
});

export const orders = pgTable('orders', {
  id: uuid('id').primaryKey().defaultRandom(),
  userId: uuid('user_id').notNull().references(() => users.id),
  status: text('status').notNull().default('pending'),
  totalCents: integer('total_cents').notNull(),
  createdAt: timestamp('created_at').notNull().defaultNow(),
});

// Inferred types - USE THESE
export type User = typeof users.$inferSelect;
export type NewUser = typeof users.$inferInsert;
export type Order = typeof orders.$inferSelect;
export type NewOrder = typeof orders.$inferInsert;
```

### Prisma

```prisma
// prisma/schema.prisma
model User {
  id        String   @id @default(uuid())
  email     String   @unique
  name      String?
  orders    Order[]
  createdAt DateTime @default(now()) @map("created_at")

  @@map("users")
}

model Order {
  id         String   @id @default(uuid())
  userId     String   @map("user_id")
  user       User     @relation(fields: [userId], references: [id])
  status     String   @default("pending")
  totalCents Int      @map("total_cents")
  createdAt  DateTime @default(now()) @map("created_at")

  @@map("orders")
}
```

```bash
# Generate types after schema changes
npx prisma generate
```

### Supabase

```bash
# Generate TypeScript types from live database
supabase gen types typescript --local > src/types/database.ts

# Or from remote
supabase gen types typescript --project-id your-project-id > src/types/database.ts
```

```typescript
// Use generated types
import { Database } from '@/types/database';

type User = Database['public']['Tables']['users']['Row'];
type NewUser = Database['public']['Tables']['users']['Insert'];
type Order = Database['public']['Tables']['orders']['Row'];
```

### SQLAlchemy (Python)

```python
# app/models/user.py
from sqlalchemy import Column, String, DateTime
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.sql import func
from app.db import Base
import uuid

class User(Base):
    __tablename__ = "users"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    email = Column(String, nullable=False, unique=True)
    name = Column(String, nullable=True)
    created_at = Column(DateTime(timezone=True), server_default=func.now())

    # Relationships
    orders = relationship("Order", back_populates="user")
```

```python
# app/schemas/user.py - Pydantic for API validation
from pydantic import BaseModel, EmailStr
from uuid import UUID
from datetime import datetime

class UserBase(BaseModel):
    email: EmailStr
    name: str | None = None

class UserCreate(UserBase):
    pass

class User(UserBase):
    id: UUID
    created_at: datetime

    class Config:
        from_attributes = True
```

---

## Schema-Aware TDD Workflow

Extend the standard TDD workflow for database work:

```
┌─────────────────────────────────────────────────────────────┐
│  0. SCHEMA: Read and verify schema before anything else     │
│     └─ Read schema file                                     │
│     └─ Complete Schema Verification Checklist               │
│     └─ Note any missing columns/tables needed               │
├─────────────────────────────────────────────────────────────┤
│  1. RED: Write tests that use correct column names          │
│     └─ Import generated types                               │
│     └─ Use type-safe queries in tests                       │
│     └─ Tests should fail on logic, NOT schema errors        │
├─────────────────────────────────────────────────────────────┤
│  2. GREEN: Implement with type-safe queries                 │
│     └─ Use ORM types, not raw strings                       │
│     └─ TypeScript/mypy catches column mismatches            │
├─────────────────────────────────────────────────────────────┤
│  3. VALIDATE: Type check catches schema drift               │
│     └─ tsc --noEmit / mypy catches wrong columns            │
│     └─ Tests validate runtime behavior                      │
└─────────────────────────────────────────────────────────────┘
```

---

## Common Schema Mistakes (And How to Prevent)

| Mistake | Example | Prevention |
|---------|---------|------------|
| Wrong column name | `user.userName` vs `user.name` | Read schema, use generated types |
| Wrong type | `totalCents` as string | Type generation catches this |
| Missing nullable check | `user.name!` when nullable | Schema shows nullable fields |
| Wrong FK relationship | `order.userId` vs `order.user_id` | Check schema column names |
| Missing column | Using `user.avatar` that doesn't exist | Read schema before coding |
| Wrong enum value | `status: 'complete'` vs `'completed'` | Document enums in schema reference |

### Type-Safe Query Examples

**Drizzle (catches errors at compile time):**
```typescript
// ✅ Correct - uses schema-defined columns
const user = await db.select().from(users).where(eq(users.email, email));

// ❌ Wrong - TypeScript error: 'userName' doesn't exist
const user = await db.select().from(users).where(eq(users.userName, email));
```

**Prisma (catches errors at compile time):**
```typescript
// ✅ Correct
const user = await prisma.user.findUnique({ where: { email } });

// ❌ Wrong - TypeScript error
const user = await prisma.user.findUnique({ where: { userName: email } });
```

**Raw SQL (NO protection - avoid):**
```typescript
// ❌ Dangerous - no type checking, easy to get wrong
const result = await db.query('SELECT * FROM users WHERE user_name = $1', [email]);
// Should be 'email' not 'user_name' - won't catch until runtime
```

---

## Migration Workflow

When schema changes are needed:

```
┌─────────────────────────────────────────────────────────────┐
│  1. Update schema file (Drizzle/Prisma/SQLAlchemy)          │
├─────────────────────────────────────────────────────────────┤
│  2. Generate migration                                       │
│     └─ Drizzle: npx drizzle-kit generate                    │
│     └─ Prisma: npx prisma migrate dev --name add_column     │
│     └─ Supabase: supabase migration new add_column          │
├─────────────────────────────────────────────────────────────┤
│  3. Regenerate types                                         │
│     └─ Prisma: npx prisma generate                          │
│     └─ Supabase: supabase gen types typescript              │
├─────────────────────────────────────────────────────────────┤
│  4. Update schema-reference.md                               │
├─────────────────────────────────────────────────────────────┤
│  5. Run type check - find all broken code                    │
│     └─ npm run typecheck                                    │
├─────────────────────────────────────────────────────────────┤
│  6. Fix type errors, update tests, run full validation       │
└─────────────────────────────────────────────────────────────┘
```

---

## Session Start Protocol

**When starting a session that involves database work:**

1. Read schema file immediately
2. Read `_project_specs/schema-reference.md` if exists
3. Note in session state what tables/columns are relevant
4. Reference schema explicitly when writing code

**Session state example:**
```markdown
## Current Session - Database Context

**Schema read:** ✓ src/db/schema.ts
**Tables in scope:** users, orders, order_items
**Key columns:**
- users: id, email, name, created_at
- orders: id, user_id, status, total_cents
- order_items: id, order_id, product_id, quantity, price_cents
```

---

## Anti-Patterns

- ❌ **Guessing column names** - Always read schema first
- ❌ **Using raw SQL strings** - Use ORM with type generation
- ❌ **Hardcoding without verification** - Check schema before using any column
- ❌ **Ignoring type errors** - Schema drift shows up as type errors
- ❌ **Not regenerating types** - After migration, always regenerate
- ❌ **Assuming nullable** - Check schema for nullable columns

---

## Checklist

### Setup
- [ ] Schema file exists in standard location
- [ ] Type generation configured
- [ ] `_project_specs/schema-reference.md` created
- [ ] Types regenerate on schema change

### Per-Task
- [ ] Schema read before writing database code
- [ ] Schema Verification Checklist completed
- [ ] Using generated types (not raw strings)
- [ ] Type check passes (catches column errors)
- [ ] Tests use correct schema


================================================
FILE: skills/existing-repo/SKILL.md
================================================
---
name: existing-repo
description: Analyze existing repositories, maintain structure, setup guardrails and best practices
when-to-use: When working with an existing codebase for the first time or adding guardrails
user-invocable: true
allowed-tools: [Read, Glob, Grep, Bash]
effort: high
---

# Existing Repository Skill


For working with existing codebases - analyze structure, respect conventions, and set up proper guardrails without breaking anything.

**Sources:** [Husky](https://typicode.github.io/husky/) | [lint-staged](https://github.com/lint-staged/lint-staged) | [pre-commit](https://pre-commit.com/) | [commitlint](https://commitlint.js.org/)

---

## Core Principle

**Understand before modifying.** Existing repos have conventions, patterns, and history. Your job is to work within them, not reorganize them.

---

## Phase 1: Repository Analysis

**ALWAYS run this analysis first when joining an existing repo.**

### 1.1 Basic Detection

```bash
# Check git status
git remote -v 2>/dev/null
git branch -a 2>/dev/null
git log --oneline -5 2>/dev/null

# Check for existing configs
ls -la .* 2>/dev/null | head -20
ls *.json *.toml *.yaml *.yml 2>/dev/null
```

### 1.2 Tech Stack Detection

```bash
# JavaScript/TypeScript
ls package.json tsconfig.json 2>/dev/null

# Python
ls pyproject.toml setup.py requirements*.txt 2>/dev/null

# Mobile
ls pubspec.yaml 2>/dev/null          # Flutter
ls android/build.gradle 2>/dev/null   # Android
ls ios/*.xcodeproj 2>/dev/null        # iOS

# Other
ls Cargo.toml 2>/dev/null             # Rust
ls go.mod 2>/dev/null                 # Go
ls Gemfile 2>/dev/null                # Ruby
```

### 1.3 Repo Structure Type

| Pattern | Detection | Meaning |
|---------|-----------|---------|
| **Monorepo** | `packages/`, `apps/`, `workspaces` in package.json | Multiple projects, shared tooling |
| **Full-Stack Monolith** | `frontend/` + `backend/` in same repo | Single team, tightly coupled |
| **Separate Concerns** | Only frontend OR backend code | Split repos, separate deploys |
| **Microservices** | Multiple `service-*` or domain dirs | Distributed architecture |

```bash
# Detect repo structure type
if [ -d "packages" ] || [ -d "apps" ]; then
    echo "MONOREPO detected"
elif [ -d "frontend" ] && [ -d "backend" ]; then
    echo "FULL-STACK MONOLITH detected"
elif [ -d "src" ] || [ -d "app" ]; then
    # Check if it's frontend or backend
    grep -q "react\|vue\|angular" package.json 2>/dev/null && echo "FRONTEND detected"
    grep -q "fastapi\|express\|django" package.json pyproject.toml 2>/dev/null && echo "BACKEND detected"
fi
```

### 1.4 Directory Mapping

```bash
# Get directory structure (max 3 levels)
find . -type d -maxdepth 3 \
    -not -path "*/node_modules/*" \
    -not -path "*/.git/*" \
    -not -path "*/venv/*" \
    -not -path "*/__pycache__/*" \
    -not -path "*/dist/*" \
    -not -path "*/build/*" \
    2>/dev/null | head -50

# Identify key directories
for dir in src app lib core services api routes components pages hooks utils models; do
    [ -d "$dir" ] && echo "Found: $dir/"
done
```

### 1.5 Entry Points

```bash
# Find main entry points
ls index.ts index.js main.ts main.py app.py server.ts server.js 2>/dev/null
cat package.json 2>/dev/null | grep -A1 '"main"'
cat pyproject.toml 2>/dev/null | grep -A1 'scripts'
```

---

## Phase 2: Convention Detection

**Identify and document existing patterns before making changes.**

### 2.1 Code Style

```bash
# Check for formatters
ls .prettierrc* .editorconfig .eslintrc* biome.json 2>/dev/null  # JS/TS
ls pyproject.toml | xargs grep -l "ruff\|black\|isort" 2>/dev/null  # Python

# Check indent style from existing files
head -20 src/**/*.ts 2>/dev/null | grep "^\s" | head -1  # tabs vs spaces
```

### 2.2 Testing Setup

```bash
# JS/TS testing
grep -l "jest\|vitest\|mocha\|playwright" package.json 2>/dev/null
ls jest.config.* vitest.config.* playwright.config.* 2>/dev/null

# Python testing
grep -l "pytest\|unittest" pyproject.toml 2>/dev/null
ls pytest.ini conftest.py 2>/dev/null

# Test directories
ls -d tests/ test/ __tests__/ spec/ 2>/dev/null
```

### 2.3 CI/CD Setup

```bash
# Check existing workflows
ls -la .github/workflows/ 2>/dev/null
ls .gitlab-ci.yml Jenkinsfile .circleci/ 2>/dev/null

# Check deploy configs
ls vercel.json render.yaml fly.toml railway.json Dockerfile 2>/dev/null
```

### 2.4 Documentation Style

```bash
# Find README pattern
head -30 README.md 2>/dev/null

# Find existing docs
ls -la docs/ documentation/ wiki/ 2>/dev/null
ls CONTRIBUTING.md CHANGELOG.md 2>/dev/null
```

---

## Phase 3: Guardrails Audit

**Check what guardrails exist and what's missing.**

### 3.1 Pre-commit Hooks Status

```bash
# Check for hook managers
ls .husky/ 2>/dev/null && echo "Husky installed"
ls .pre-commit-config.yaml 2>/dev/null && echo "pre-commit framework installed"
ls .git/hooks/pre-commit 2>/dev/null && echo "Manual pre-commit hook exists"

# Check what hooks run
cat .husky/pre-commit 2>/dev/null
cat .pre-commit-config.yaml 2>/dev/null
```

### 3.2 Linting Status

```bash
# JS/TS linting
grep -q "eslint" package.json && echo "ESLint configured"
grep -q "biome" package.json && echo "Biome configured"
ls .eslintrc* biome.json 2>/dev/null

# Python linting
grep -q "ruff" pyproject.toml && echo "Ruff configured"
grep -q "flake8" pyproject.toml setup.cfg && echo "Flake8 configured"
```

### 3.3 Type Checking Status

```bash
# TypeScript
ls tsconfig.json 2>/dev/null && echo "TypeScript configured"
grep "strict" tsconfig.json 2>/dev/null

# Python type checking
grep -q "mypy" pyproject.toml && echo "mypy configured"
grep -q "pyright" pyproject.toml && echo "pyright configured"
ls py.typed 2>/dev/null
```

### 3.4 Commit Message Enforcement

```bash
# commitlint
ls commitlint.config.* 2>/dev/null && echo "commitlint configured"
cat .husky/commit-msg 2>/dev/null
grep "conventional" package.json 2>/dev/null
```

### 3.5 Security Scanning

```bash
# Check for security tools
grep -q "detect-secrets\|trufflehog" .pre-commit-config.yaml package.json 2>/dev/null
ls .github/workflows/*.yml | xargs grep -l "security\|audit" 2>/dev/null
```

---

## Phase 4: Guardrails Setup

**Only add missing guardrails. Never overwrite existing configurations.**

### 4.1 JavaScript/TypeScript Projects

#### Husky + lint-staged (if not present)

```bash
# Check if already installed
if [ ! -d ".husky" ]; then
    # Install Husky
    npm install -D husky lint-staged
    npx husky init

    # Create pre-commit hook
    echo 'npx lint-staged' > .husky/pre-commit
    chmod +x .husky/pre-commit
fi
```

**lint-staged config** (add to package.json if missing):

```json
{
  "lint-staged": {
    "*.{ts,tsx,js,jsx}": [
      "eslint --fix",
      "prettier --write"
    ],
    "*.{json,md,yml,yaml}": [
      "prettier --write"
    ]
  }
}
```

#### ESLint (if not present)

```bash
# Check if eslint exists
if ! grep -q "eslint" package.json; then
    npm install -D eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin
fi
```

**eslint.config.js** (ESLint 9+ flat config):

```javascript
import eslint from '@eslint/js'
import tseslint from 'typescript-eslint'

export default tseslint.config(
  eslint.configs.recommended,
  ...tseslint.configs.recommended,
  {
    rules: {
      '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
      '@typescript-eslint/explicit-function-return-type': 'off',
      'no-console': ['warn', { allow: ['warn', 'error'] }]
    }
  },
  {
    ignores: ['dist/', 'node_modules/', 'coverage/']
  }
)
```

#### Prettier (if not present)

```bash
if ! grep -q "prettier" package.json; then
    npm install -D prettier
fi
```

**.prettierrc** (respect existing style or use sensible defaults):

```json
{
  "semi": false,
  "singleQuote": true,
  "trailingComma": "es5",
  "tabWidth": 2,
  "printWidth": 100
}
```

#### commitlint (if not present)

```bash
if [ ! -f "commitlint.config.js" ]; then
    npm install -D @commitlint/cli @commitlint/config-conventional
    echo "npx commitlint --edit \$1" > .husky/commit-msg
    chmod +x .husky/commit-msg
fi
```

**commitlint.config.js**:

```javascript
export default {
  extends: ['@commitlint/config-conventional'],
  rules: {
    'type-enum': [
      2,
      'always',
      ['feat', 'fix', 'docs', 'style', 'refactor', 'test', 'chore', 'ci', 'perf', 'revert']
    ],
    'subject-case': [2, 'always', 'lower-case'],
    'subject-max-length': [2, 'always', 72]
  }
}
```

### 4.2 Python Projects

#### pre-commit framework (if not present)

```bash
# Install pre-commit
if [ ! -f ".pre-commit-config.yaml" ]; then
    pip install pre-commit
    pre-commit install
fi
```

**.pre-commit-config.yaml**:

```yaml
repos:
  # Ruff - linting and formatting (replaces black, isort, flake8)
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.14.13
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix]
      - id: ruff-format

  # Type checking
  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.16.0
    hooks:
      - id: mypy
        additional_dependencies: [types-requests]
        args: [--ignore-missing-imports]

  # Security
  - repo: https://github.com/Yelp/detect-secrets
    rev: v1.5.0
    hooks:
      - id: detect-secrets
        args: ['--baseline', '.secrets.baseline']

  # General
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: check-yaml
      - id: check-added-large-files
      - id: check-merge-conflict

  # Commit messages
  - repo: https://github.com/compilerla/conventional-pre-commit
    rev: v4.0.0
    hooks:
      - id: conventional-pre-commit
        stages: [commit-msg]
```

#### pyproject.toml additions (if not present)

```toml
[tool.ruff]
target-version = "py312"
line-length = 100

[tool.ruff.lint]
select = [
    "E",   # pycodestyle errors
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "UP",  # pyupgrade
    "S",   # flake8-bandit (security)
]
ignore = ["E501"]  # line length handled by formatter

[tool.mypy]
python_version = "3.12"
strict = true
ignore_missing_imports = true

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-v --cov=src --cov-report=term-missing --cov-fail-under=80"
```

### 4.3 Branch Protection (Document for User)

Recommend these GitHub branch protection rules:

```markdown
## Recommended Branch Protection (main branch)

1. **Require pull request before merging**
   - Require 1 approval
   - Dismiss stale reviews on new commits

2. **Require status checks**
   - Lint
   - Type check
   - Tests
   - Security scan

3. **Require signed commits** (optional but recommended)

4. **Do not allow bypassing above settings**
```

---

## Phase 5: Structure Preservation Rules

### NEVER Do These

- **Don't reorganize directory structure** - Work within existing patterns
- **Don't rename files for "consistency"** - Match existing naming conventions
- **Don't add new patterns** - Use patterns already in the codebase
- **Don't change import styles** - Match existing (relative vs absolute, etc.)
- **Don't change formatting** - Match existing style or use existing formatter config
- **Don't add new dependencies lightly** - Check if equivalent exists

### ALWAYS Do These

- **Read existing code first** - Understand patterns before writing new code
- **Match existing conventions** - Naming, structure, error handling
- **Use existing utilities** - Don't reinvent what exists
- **Follow existing test patterns** - Match test file naming and structure
- **Preserve existing configs** - Only add, don't modify unless fixing bugs

### Convention Detection Checklist

Before writing any code, identify:

| Convention | Example | Where to Check |
|------------|---------|----------------|
| Naming | camelCase vs snake_case | Existing file names |
| File structure | feature/ vs type/ | Directory layout |
| Export style | default vs named | Existing modules |
| Error handling | throw vs return Error | Existing functions |
| Logging | console vs logger | Existing code |
| Testing | describe/it vs test() | Existing tests |
| Comments | JSDoc vs inline | Existing code |

---

## Phase 6: Analysis Report Template

After running analysis, generate this report:

```markdown
# Repository Analysis Report

## Overview
- **Repo Type**: [Monorepo | Full-Stack | Frontend | Backend | Microservices]
- **Primary Language**: [TypeScript | Python | ...]
- **Framework**: [React | FastAPI | ...]
- **Age**: [X commits, Y contributors]

## Directory Structure
```
[tree output]
```

## Tech Stack
| Category | Technology | Config File |
|----------|------------|-------------|
| Language | TypeScript | tsconfig.json |
| Framework | React | - |
| Testing | Vitest | vitest.config.ts |
| Linting | ESLint | eslint.config.js |
| Formatting | Prettier | .prettierrc |

## Guardrails Status

### Present
- [x] ESLint configured
- [x] Prettier configured
- [x] TypeScript strict mode

### Missing (Recommended)
- [ ] Pre-commit hooks (Husky + lint-staged)
- [ ] Commit message validation (commitlint)
- [ ] Security scanning in CI

## Conventions Detected
| Pattern | Observed | Example |
|---------|----------|---------|
| Naming | camelCase | `getUserById.ts` |
| Imports | Absolute | `@/components/Button` |
| Testing | Colocated | `Button.test.tsx` |
| Exports | Named | `export { Button }` |

## Recommendations
1. Add Husky + lint-staged for pre-commit hooks
2. Add commitlint for conventional commits
3. Add security workflow to GitHub Actions

## Files to Review First
- `src/index.ts` - Main entry point
- `src/utils/` - Shared utilities
- `tests/setup.ts` - Test configuration
```

---

## Gradual Implementation Strategy

Don't add all guardrails at once. Follow this timeline:

| Week | Focus | Why |
|------|-------|-----|
| 1 | Formatting (Prettier/Ruff) | Non-breaking, easy wins |
| 2 | Linting (ESLint/Ruff) | Catches obvious issues |
| 3 | Pre-commit hooks | Automates week 1-2 |
| 4 | Commit message validation | Team consistency |
| 5 | Type checking strictness | Catches runtime errors |
| 6 | Security scanning | Catches vulnerabilities |

---

## Working with Separate Repos

When frontend and backend are in separate repos:

### Frontend Repo Setup

```bash
# Clone and analyze
git clone [frontend-repo]
cd frontend

# Run analysis
# Expect: React/Vue/Angular, no backend code

# Add frontend-specific guardrails
# - Husky + lint-staged
# - ESLint + Prettier
# - Component testing (Vitest/Jest)
```

### Backend Repo Setup

```bash
# Clone and analyze
git clone [backend-repo]
cd backend

# Run analysis
# Expect: FastAPI/Express/Django, no frontend code

# Add backend-specific guardrails
# - pre-commit framework
# - Ruff + mypy
# - API testing (pytest/Jest)
```

### Cross-Repo Coordination

| Concern | Solution |
|---------|----------|
| Shared types | Generate from OpenAPI spec |
| API contracts | Contract testing (Pact) |
| Deployments | Coordinate via CI/CD triggers |
| Versioning | Semantic versioning on both |

---

## Anti-Patterns

- **Adding unused guardrails** - Only add what the team will use
- **Strict rules on day 1** - Introduce gradually
- **Blocking on warnings** - Start permissive, tighten over time
- **Ignoring existing patterns** - Work with what exists
- **Over-engineering** - Simple rules > complex systems
- **Skipping the analysis phase** - Always understand before changing

---

## Quick Reference: Detection Commands

```bash
# One-liner repo analysis
echo "=== Repo Type ===" && \
ls -d packages apps frontend backend 2>/dev/null || echo "Standard repo" && \
echo "=== Tech Stack ===" && \
ls *.json *.toml *.yaml 2>/dev/null && \
echo "=== Existing Guardrails ===" && \
ls .husky .pre-commit-config.yaml .eslintrc* 2>/dev/null || echo "None detected" && \
echo "=== Entry Points ===" && \
ls index.* main.* app.* server.* 2>/dev/null
```


================================================
FILE: skills/firebase/SKILL.md
================================================
---
name: firebase
description: Firebase Firestore, Auth, Storage, real-time listeners, security rules
when-to-use: When working with Firebase services
user-invocable: false
paths: ["**/firebase*", "firestore.rules", "storage.rules", "firebase.json"]
effort: medium
---

# Firebase Skill


Firebase/Firestore patterns for web and mobile applications with real-time data, offline support, and security rules.

**Sources:** [Firebase Docs](https://firebase.google.com/docs) | [Firestore Best Practices](https://firebase.google.com/docs/firestore/best-practices) | [Security Rules](https://firebase.google.com/docs/rules)

---

## Core Principle

**Denormalize with purpose, secure with rules, scale horizontally.**

Firestore is a document database - embrace denormalization for read efficiency. Security rules are your server-side validation. Design for your access patterns.

---

## Firebase Stack

| Service | Purpose |
|---------|---------|
| **Firestore** | NoSQL document database with real-time sync |
| **Authentication** | User auth, OAuth, anonymous sessions |
| **Storage** | File uploads with security rules |
| **Functions** | Serverless backend (Node.js) |
| **Hosting** | Static site + CDN |
| **Extensions** | Pre-built solutions (Stripe, Algolia, etc.) |

---

## Project Setup

### Install Firebase CLI
```bash
# Install globally
npm install -g firebase-tools

# Login
firebase login

# Initialize in project
firebase init
```

### Initialize with Emulators
```bash
firebase init emulators

# Start local development
firebase emulators:start
```

### Project Structure
```
project/
├── firebase.json           # Firebase config
├── firestore.rules         # Security rules
├── firestore.indexes.json  # Composite indexes
├── storage.rules           # Storage security rules
└── functions/              # Cloud Functions
    ├── src/
    ├── package.json
    └── tsconfig.json
```

---

## Firestore Data Modeling

### Document Structure
```typescript
// Good: Flat documents with all needed data
interface Post {
  id: string;
  title: string;
  content: string;
  authorId: string;
  authorName: string;      // Denormalized for display
  authorAvatar: string;    // Denormalized
  tags: string[];
  likeCount: number;       // Aggregated counter
  createdAt: Timestamp;
  updatedAt: Timestamp;
}

// Collection: posts/{postId}
```

### When to Use Subcollections
```typescript
// Use subcollections for:
// 1. Unbounded lists (comments, messages)
// 2. Data with different access patterns
// 3. Data that grows independently

// posts/{postId}/comments/{commentId}
interface Comment {
  id: string;
  text: string;
  authorId: string;
  authorName: string;
  createdAt: Timestamp;
}
```

### Data Model Patterns

```typescript
// Pattern 1: Embedded data (bounded, always needed)
interface User {
  id: string;
  email: string;
  profile: {
    displayName: string;
    bio: string;
    avatar: string;
  };
  settings: {
    notifications: boolean;
    theme: 'light' | 'dark';
  };
}

// Pattern 2: Reference with denormalization
interface Order {
  id: string;
  userId: string;
  userEmail: string;  // Denormalized for display
  items: OrderItem[]; // Embedded (bounded)
  total: number;
  status: 'pending' | 'paid' | 'shipped';
}

// Pattern 3: Aggregation documents
// Keep counters in parent document
interface Channel {
  id: string;
  name: string;
  memberCount: number;  // Updated via Cloud Function
  messageCount: number;
}
```

---

## TypeScript SDK (Modular v9+)

### Initialize Firebase
```typescript
// lib/firebase.ts
import { initializeApp, getApps } from 'firebase/app';
import { getFirestore, connectFirestoreEmulator } from 'firebase/firestore';
import { getAuth, connectAuthEmulator } from 'firebase/auth';
import { getStorage, connectStorageEmulator } from 'firebase/storage';

const firebaseConfig = {
  apiKey: process.env.NEXT_PUBLIC_FIREBASE_API_KEY,
  authDomain: process.env.NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN,
  projectId: process.env.NEXT_PUBLIC_FIREBASE_PROJECT_ID,
  storageBucket: process.env.NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET,
  messagingSenderId: process.env.NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID,
  appId: process.env.NEXT_PUBLIC_FIREBASE_APP_ID
};

// Initialize only once
const app = getApps().length === 0 ? initializeApp(firebaseConfig) : getApps()[0];

export const db = getFirestore(app);
export const auth = getAuth(app);
export const storage = getStorage(app);

// Connect to emulators in development
if (process.env.NODE_ENV === 'development') {
  connectFirestoreEmulator(db, 'localhost', 8080);
  connectAuthEmulator(auth, 'http://localhost:9099');
  connectStorageEmulator(storage, 'localhost', 9199);
}
```

### CRUD Operations
```typescript
import {
  collection,
  doc,
  getDoc,
  getDocs,
  addDoc,
  setDoc,
  updateDoc,
  deleteDoc,
  query,
  where,
  orderBy,
  limit,
  startAfter,
  serverTimestamp,
  Timestamp
} from 'firebase/firestore';
import { db } from './firebase';

// Create
async function createPost(data: Omit<Post, 'id' | 'createdAt' | 'updatedAt'>) {
  const docRef = await addDoc(collection(db, 'posts'), {
    ...data,
    createdAt: serverTimestamp(),
    updatedAt: serverTimestamp()
  });
  return docRef.id;
}

// Read single document
async function getPost(postId: string): Promise<Post | null> {
  const docSnap = await getDoc(doc(db, 'posts', postId));
  if (!docSnap.exists()) return null;
  return { id: docSnap.id, ...docSnap.data() } as Post;
}

// Query with filters
async function getPostsByAuthor(authorId: string, pageSize = 10) {
  const q = query(
    collection(db, 'posts'),
    where('authorId', '==', authorId),
    orderBy('createdAt', 'desc'),
    limit(pageSize)
  );
  const snapshot = await getDocs(q);
  return snapshot.docs.map(doc => ({ id: doc.id, ...doc.data() } as Post));
}

// Pagination
async function getNextPage(lastDoc: Post, pageSize = 10) {
  const q = query(
    collection(db, 'posts'),
    orderBy('createdAt', 'desc'),
    startAfter(lastDoc.createdAt),
    limit(pageSize)
  );
  const snapshot = await getDocs(q);
  return snapshot.docs.map(doc => ({ id: doc.id, ...doc.data() } as Post));
}

// Update
async function updatePost(postId: string, data: Partial<Post>) {
  await updateDoc(doc(db, 'posts', postId), {
    ...data,
    updatedAt: serverTimestamp()
  });
}

// Delete
async function deletePost(postId: string) {
  await deleteDoc(doc(db, 'posts', postId));
}
```

### Real-time Listeners
```typescript
import { onSnapshot, QuerySnapshot, DocumentSnapshot } from 'firebase/firestore';

// Listen to single document
function subscribeToPost(
  postId: string,
  onData: (post: Post | null) => void,
  onError: (error: Error) => void
) {
  return onSnapshot(
    doc(db, 'posts', postId),
    (snapshot: DocumentSnapshot) => {
      if (!snapshot.exists()) {
        onData(null);
        return;
      }
      onData({ id: snapshot.id, ...snapshot.data() } as Post);
    },
    onError
  );
}

// Listen to collection with query
function subscribeToPosts(
  authorId: string,
  onData: (posts: Post[]) => void,
  onError: (error: Error) => void
) {
  const q = query(
    collection(db, 'posts'),
    where('authorId', '==', authorId),
    orderBy('createdAt', 'desc')
  );

  return onSnapshot(
    q,
    (snapshot: QuerySnapshot) => {
      const posts = snapshot.docs.map(doc => ({
        id: doc.id,
        ...doc.data()
      } as Post));
      onData(posts);
    },
    onError
  );
}

// React hook example
function usePost(postId: string) {
  const [post, setPost] = useState<Post | null>(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<Error | null>(null);

  useEffect(() => {
    const unsubscribe = subscribeToPost(
      postId,
      (data) => {
        setPost(data);
        setLoading(false);
      },
      (err) => {
        setError(err);
        setLoading(false);
      }
    );
    return unsubscribe;
  }, [postId]);

  return { post, loading, error };
}
```

### Offline Persistence (Web)
```typescript
import { enableIndexedDbPersistence, enableMultiTabIndexedDbPersistence } from 'firebase/firestore';

// Enable offline persistence (call once at startup)
async function enableOffline() {
  try {
    // Single tab
    await enableIndexedDbPersistence(db);

    // OR multi-tab (recommended)
    await enableMultiTabIndexedDbPersistence(db);
  } catch (err: any) {
    if (err.code === 'failed-precondition') {
      // Multiple tabs open, only works in one
      console.warn('Persistence only available in one tab');
    } else if (err.code === 'unimplemented') {
      // Browser doesn't support
      console.warn('Persistence not supported');
    }
  }
}

// Check if data is from cache
onSnapshot(docRef, (snapshot) => {
  const source = snapshot.metadata.fromCache ? 'cache' : 'server';
  console.log(`Data from ${source}`);

  if (snapshot.metadata.hasPendingWrites) {
    console.log('Local changes pending sync');
  }
});
```

---

## Security Rules

### Basic Rules Structure
```javascript
// firestore.rules
rules_version = '2';
service cloud.firestore {
  match /databases/{database}/documents {

    // Helper functions
    function isAuthenticated() {
      return request.auth != null;
    }

    function isOwner(userId) {
      return request.auth.uid == userId;
    }

    function isAdmin() {
      return request.auth.token.admin == true;
    }

    // Posts collection
    match /posts/{postId} {
      // Anyone can read published posts
      allow read: if resource.data.status == 'published';

      // Only authenticated users can create
      allow create: if isAuthenticated()
        && request.resource.data.authorId == request.auth.uid
        && request.resource.data.keys().hasAll(['title', 'content', 'authorId']);

      // Only author can update
      allow update: if isOwner(resource.data.authorId)
        && request.resource.data.authorId == resource.data.authorId; // Can't change author

      // Only author or admin can delete
      allow delete: if isOwner(resource.data.authorId) || isAdmin();

      // Comments subcollection
      match /comments/{commentId} {
        allow read: if true;
        allow create: if isAuthenticated();
        allow update, delete: if isOwner(resource.data.authorId);
      }
    }

    // User profiles
    match /users/{userId} {
      allow read: if true;
      allow create: if isAuthenticated() && isOwner(userId);
      allow update: if isOwner(userId);
      allow delete: if false; // Never allow delete
    }

    // Private user data
    match /users/{userId}/private/{document=**} {
      allow read, write: if isOwner(userId);
    }
  }
}
```

### Data Validation in Rules
```javascript
match /posts/{postId} {
  function isValidPost() {
    let data = request.resource.data;
    return data.title is string
      && data.title.size() >= 3
      && data.title.size() <= 100
      && data.content is string
      && data.content.size() <= 50000
      && data.tags is list
      && data.tags.size() <= 5;
  }

  allow create: if isAuthenticated() && isValidPost();
  allow update: if isOwner(resource.data.authorId) && isValidPost();
}
```

### Test Rules Locally
```bash
# Install emulators
firebase emulators:start

# Run rules tests
npm test
```

```typescript
// tests/firestore.rules.test.ts
import { assertFails, assertSucceeds, initializeTestEnvironment } from '@firebase/rules-unit-testing';

describe('Firestore Rules', () => {
  let testEnv: RulesTestEnvironment;

  beforeAll(async () => {
    testEnv = await initializeTestEnvironment({
      projectId: 'test-project',
      firestore: { rules: fs.readFileSync('firestore.rules', 'utf8') }
    });
  });

  test('unauthenticated users cannot write', async () => {
    const unauthedDb = testEnv.unauthenticatedContext().firestore();
    await assertFails(
      setDoc(doc(unauthedDb, 'posts/test'), { title: 'Test' })
    );
  });

  test('users can only update own posts', async () => {
    const aliceDb = testEnv.authenticatedContext('alice').firestore();
    const bobDb = testEnv.authenticatedContext('bob').firestore();

    // Create as Alice
    await assertSucceeds(
      setDoc(doc(aliceDb, 'posts/test'), { title: 'Test', authorId: 'alice' })
    );

    // Bob cannot update
    await assertFails(
      updateDoc(doc(bobDb, 'posts/test'), { title: 'Hacked' })
    );
  });
});
```

---

## Authentication

### Email/Password Auth
```typescript
import {
  createUserWithEmailAndPassword,
  signInWithEmailAndPassword,
  signOut,
  onAuthStateChanged,
  User
} from 'firebase/auth';
import { auth } from './firebase';

// Sign up
async function signUp(email: string, password: string) {
  const credential = await createUserWithEmailAndPassword(auth, email, password);
  return credential.user;
}

// Sign in
async function signIn(email: string, password: string) {
  const credential = await signInWithEmailAndPassword(auth, email, password);
  return credential.user;
}

// Sign out
async function logout() {
  await signOut(auth);
}

// Auth state listener
function onAuthChange(callback: (user: User | null) => void) {
  return onAuthStateChanged(auth, callback);
}
```

### OAuth Providers
```typescript
import {
  GoogleAuthProvider,
  signInWithPopup,
  signInWithRedirect
} from 'firebase/auth';

const googleProvider = new GoogleAuthProvider();

async function signInWithGoogle() {
  try {
    const result = await signInWithPopup(auth, googleProvider);
    return result.user;
  } catch (error) {
    // Handle errors
    throw error;
  }
}
```

---

## Cloud Functions

### Basic HTTP Function
```typescript
// functions/src/index.ts
import { onRequest } from 'firebase-functions/v2/https';
import { onDocumentCreated } from 'firebase-functions/v2/firestore';
import { initializeApp } from 'firebase-admin/app';
import { getFirestore } from 'firebase-admin/firestore';

initializeApp();
const db = getFirestore();

// HTTP endpoint
export const helloWorld = onRequest((request, response) => {
  response.json({ message: 'Hello from Firebase!' });
});

// Firestore trigger
export const onPostCreated = onDocumentCreated('posts/{postId}', async (event) => {
  const snapshot = event.data;
  if (!snapshot) return;

  const post = snapshot.data();

  // Update author's post count
  await db.doc(`users/${post.authorId}`).update({
    postCount: FieldValue.increment(1)
  });
});
```

### Callable Functions
```typescript
// Backend
import { onCall, HttpsError } from 'firebase-functions/v2/https';

export const createPost = onCall(async (request) => {
  // Auth check
  if (!request.auth) {
    throw new HttpsError('unauthenticated', 'Must be logged in');
  }

  const { title, content } = request.data;

  // Validation
  if (!title || title.length < 3) {
    throw new HttpsError('invalid-argument', 'Title must be at least 3 characters');
  }

  // Create post
  const postRef = await db.collection('posts').add({
    title,
    content,
    authorId: request.auth.uid,
    createdAt: FieldValue.serverTimestamp()
  });

  return { postId: postRef.id };
});

// Frontend
import { getFunctions, httpsCallable } from 'firebase/functions';

const functions = getFunctions();
const createPostFn = httpsCallable(functions, 'createPost');

async function createPost(title: string, content: string) {
  const result = await createPostFn({ title, content });
  return result.data as { postId: string };
}
```

---

## Batch Operations & Transactions

### Batch Writes
```typescript
import { writeBatch, doc } from 'firebase/firestore';

async function batchUpdate(updates: { id: string; data: any }[]) {
  const batch = writeBatch(db);

  updates.forEach(({ id, data }) => {
    batch.update(doc(db, 'posts', id), data);
  });

  await batch.commit(); // Atomic
}
```

### Transactions
```typescript
import { runTransaction, doc, increment } from 'firebase/firestore';

async function likePost(postId: string, userId: string) {
  await runTransaction(db, async (transaction) => {
    const postRef = doc(db, 'posts', postId);
    const likeRef = doc(db, 'posts', postId, 'likes', userId);

    const postSnap = await transaction.get(postRef);
    if (!postSnap.exists()) throw new Error('Post not found');

    const likeSnap = await transaction.get(likeRef);
    if (likeSnap.exists()) throw new Error('Already liked');

    transaction.set(likeRef, { createdAt: serverTimestamp() });
    transaction.update(postRef, { likeCount: increment(1) });
  });
}
```

---

## Indexes

### Composite Indexes
```json
// firestore.indexes.json
{
  "indexes": [
    {
      "collectionGroup": "posts",
      "queryScope": "COLLECTION",
      "fields": [
        { "fieldPath": "authorId", "order": "ASCENDING" },
        { "fieldPath": "createdAt", "order": "DESCENDING" }
      ]
    },
    {
      "collectionGroup": "posts",
      "queryScope": "COLLECTION",
      "fields": [
        { "fieldPath": "tags", "arrayConfig": "CONTAINS" },
        { "fieldPath": "createdAt", "order": "DESCENDING" }
      ]
    }
  ]
}
```

```bash
# Deploy indexes
firebase deploy --only firestore:indexes
```

---

## CLI Quick Reference

```bash
# Project setup
firebase login                       # Authenticate
firebase init                        # Initialize project
firebase projects:list               # List projects

# Emulators
firebase emulators:start             # Start all emulators
firebase emulators:start --only firestore,auth  # Specific emulators

# Deploy
firebase deploy                      # Deploy everything
firebase deploy --only firestore     # Deploy rules + indexes
firebase deploy --only functions     # Deploy functions
firebase deploy --only hosting       # Deploy hosting

# Functions
cd functions && npm run build        # Build TypeScript
firebase functions:log               # View logs
```

---

## Anti-Patterns

- **No security rules** - Always write rules, never use test mode in production
- **Deep nesting** - Keep documents flat, max 2-3 levels
- **Large documents** - Max 1MB, split if larger
- **Unbounded arrays** - Use subcollections for lists that grow
- **No offline handling** - Enable persistence for mobile/PWA
- **Reading all fields** - Use field masks or Firestore Lite
- **Ignoring indexes** - Check console for missing index errors
- **No emulator testing** - Always test rules before deploy


================================================
FILE: skills/flutter/SKILL.md
================================================
---
name: flutter
description: Flutter development with Riverpod state management, Freezed, go_router, and mocktail testing
when-to-use: When working on Flutter/Dart code
user-invocable: false
paths: ["**/*.dart", "pubspec.yaml", "lib/**", "test/**"]
effort: medium
---

# Flutter Skill


---

## Project Structure

```
project/
├── lib/
│   ├── core/                           # Core utilities
│   │   ├── constants/                  # App constants
│   │   ├── extensions/                 # Dart extensions
│   │   ├── router/                     # go_router configuration
│   │   │   └── app_router.dart
│   │   └── theme/                      # App theme
│   │       └── app_theme.dart
│   ├── data/                           # Data layer
│   │   ├── models/                     # Freezed data models
│   │   ├── repositories/               # Repository implementations
│   │   └── services/                   # API services
│   ├── domain/                         # Domain layer
│   │   ├── entities/                   # Business entities
│   │   └── repositories/               # Repository interfaces
│   ├── presentation/                   # UI layer
│   │   ├── common/                     # Shared widgets
│   │   ├── features/                   # Feature modules
│   │   │   └── feature_name/
│   │   │       ├── providers/          # Riverpod providers
│   │   │       ├── widgets/            # Feature-specific widgets
│   │   │       └── feature_screen.dart
│   │   └── providers/                  # Global providers
│   ├── main.dart
│   └── app.dart
├── test/
│   ├── unit/                           # Unit tests
│   ├── widget/                         # Widget tests
│   └── integration/                    # Integration tests
├── pubspec.yaml
├── analysis_options.yaml
└── CLAUDE.md
```

---

## Riverpod State Management

### Provider Types
```dart
// Simple value provider
final appNameProvider = Provider<String>((ref) => 'My App');

// StateProvider for simple mutable state
final counterProvider = StateProvider<int>((ref) => 0);

// NotifierProvider for complex state logic
final userProvider = NotifierProvider<UserNotifier, User?>(() => UserNotifier());

// AsyncNotifierProvider for async operations
final usersProvider = AsyncNotifierProvider<UsersNotifier, List<User>>(
  () => UsersNotifier(),
);

// FutureProvider for simple async data
final configProvider = FutureProvider<Config>((ref) async {
  return await ref.watch(configServiceProvider).loadConfig();
});

// StreamProvider for real-time data
final messagesProvider = StreamProvider<List<Message>>((ref) {
  return ref.watch(messageServiceProvider).watchMessages();
});

// Family providers for parameterized data
final userByIdProvider = FutureProvider.family<User, String>((ref, userId) async {
  return await ref.watch(userRepositoryProvider).getUser(userId);
});
```

### Notifier Pattern
```dart
@riverpod
class Users extends _$Users {
  @override
  Future<List<User>> build() async {
    return await _fetchUsers();
  }

  Future<List<User>> _fetchUsers() async {
    final repository = ref.read(userRepositoryProvider);
    return await repository.getUsers();
  }

  Future<void> refresh() async {
    state = const AsyncLoading();
    state = await AsyncValue.guard(() => _fetchUsers());
  }

  Future<void> addUser(User user) async {
    final repository = ref.read(userRepositoryProvider);
    await repository.addUser(user);
    ref.invalidateSelf();
  }
}
```

### AsyncValue Handling
```dart
class UsersScreen extends ConsumerWidget {
  const UsersScreen({super.key});

  @override
  Widget build(BuildContext context, WidgetRef ref) {
    final usersAsync = ref.watch(usersProvider);

    return usersAsync.when(
      data: (users) => UsersList(users: users),
      loading: () => const Center(child: CircularProgressIndicator()),
      error: (error, stack) => ErrorDisplay(
        error: error,
        onRetry: () => ref.invalidate(usersProvider),
      ),
    );
  }
}

// Pattern matching alternative
Widget build(BuildContext context, WidgetRef ref) {
  final usersAsync = ref.watch(usersProvider);

  return switch (usersAsync) {
    AsyncData(:final value) => UsersList(users: value),
    AsyncLoading() => const LoadingIndicator(),
    AsyncError(:final error) => ErrorDisplay(error: error),
  };
}
```

### ref Methods
```dart
// watch - rebuilds when provider changes
final users = ref.watch(usersProvider);

// read - one-time read, no rebuild
void onButtonPressed() {
  ref.read(counterProvider.notifier).state++;
}

// listen - react to changes without rebuild
ref.listen(authProvider, (previous, next) {
  if (next == null) {
    context.go('/login');
  }
});

// invalidate - force refresh
ref.invalidate(usersProvider);

// keepAlive - prevent auto-dispose
final link = ref.keepAlive();
// Later: link.close() to allow disposal
```

---

## Freezed Data Models

### Model Definition
```dart
import 'package:freezed_annotation/freezed_annotation.dart';

part 'user.freezed.dart';
part 'user.g.dart';

@freezed
class User with _$User {
  const factory User({
    required String id,
    required String name,
    required String email,
    @Default(false) bool isActive,
    DateTime? createdAt,
  }) = _User;

  factory User.fromJson(Map<String, dynamic> json) => _$UserFromJson(json);
}

// Union types for states
@freezed
sealed class AuthState with _$AuthState {
  const factory AuthState.initial() = _Initial;
  const factory AuthState.loading() = _Loading;
  const factory AuthState.authenticated(User user) = _Authenticated;
  const factory AuthState.unauthenticated() = _Unauthenticated;
  const factory AuthState.error(String message) = _Error;
}
```

### Using Freezed Unions
```dart
Widget build(BuildContext context, WidgetRef ref) {
  final authState = ref.watch(authProvider);

  return authState.when(
    initial: () => const SplashScreen(),
    loading: () => const LoadingScreen(),
    authenticated: (user) => HomeScreen(user: user),
    unauthenticated: () => const LoginScreen(),
    error: (message) => ErrorScreen(message: message),
  );
}
```

---

## go_router Navigation

### Router Configuration
```dart
final routerProvider = Provider<GoRouter>((ref) {
  final authState = ref.watch(authProvider);

  return GoRouter(
    initialLocation: '/',
    refreshListenable: authState,
    redirect: (context, state) {
      final isLoggedIn = authState.valueOrNull != null;
      final isLoggingIn = state.matchedLocation == '/login';

      if (!isLoggedIn && !isLoggingIn) return '/login';
      if (isLoggedIn && isLoggingIn) return '/';
      return null;
    },
    routes: [
      GoRoute(
        path: '/',
        builder: (context, state) => const HomeScreen(),
        routes: [
          GoRoute(
            path: 'user/:id',
            builder: (context, state) => UserScreen(
              userId: state.pathParameters['id']!,
            ),
          ),
        ],
      ),
      GoRoute(
        path: '/login',
        builder: (context, state) => const LoginScreen(),
      ),
    ],
    errorBuilder: (context, state) => ErrorScreen(error: state.error),
  );
});
```

### Navigation
```dart
// Navigate to route
context.go('/user/123');

// Push onto stack
context.push('/user/123');

// Pop current route
context.pop();

// Replace current route
context.pushReplacement('/home');

// Named routes
context.goNamed('user', pathParameters: {'id': '123'});
```

---

## Widget Patterns

### ConsumerWidget vs ConsumerStatefulWidget
```dart
// Stateless with Riverpod
class UserCard extends ConsumerWidget {
  const UserCard({super.key, required this.userId});

  final String userId;

  @override
  Widget build(BuildContext context, WidgetRef ref) {
    final user = ref.watch(userByIdProvider(userId));
    return user.when(
      data: (user) => Card(child: Text(user.name)),
      loading: () => const CardSkeleton(),
      error: (e, _) => ErrorCard(error: e),
    );
  }
}

// Stateful with Riverpod
class SearchScreen extends ConsumerStatefulWidget {
  const SearchScreen({super.key});

  @override
  ConsumerState<SearchScreen> createState() => _SearchScreenState();
}

class _SearchScreenState extends ConsumerState<SearchScreen> {
  final _controller = TextEditingController();

  @override
  void dispose() {
    _controller.dispose();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    final results = ref.watch(searchProvider(_controller.text));
    return Column(
      children: [
        TextField(
          controller: _controller,
          onChanged: (_) => setState(() {}),
        ),
        Expanded(child: SearchResults(results: results)),
      ],
    );
  }
}
```

### HookConsumerWidget (with flutter_hooks)
```dart
class AnimatedCounter extends HookConsumerWidget {
  const AnimatedCounter({super.key});

  @override
  Widget build(BuildContext context, WidgetRef ref) {
    final controller = useAnimationController(duration: const Duration(milliseconds: 300));
    final count = ref.watch(counterProvider);

    useEffect(() {
      controller.forward(from: 0);
      return null;
    }, [count]);

    return ScaleTransition(
      scale: controller,
      child: Text('$count'),
    );
  }
}
```

---

## Testing with Mocktail

### Unit Tests
```dart
import 'package:flutter_test/flutter_test.dart';
import 'package:mocktail/mocktail.dart';
import 'package:riverpod/riverpod.dart';

class MockUserRepository extends Mock implements UserRepository {}

void main() {
  late MockUserRepository mockRepository;
  late ProviderContainer container;

  setUp(() {
    mockRepository = MockUserRepository();
    container = ProviderContainer(
      overrides: [
        userRepositoryProvider.overrideWithValue(mockRepository),
      ],
    );
  });

  tearDown(() {
    container.dispose();
  });

  test('usersProvider returns list of users', () async {
    final users = [User(id: '1', name: 'John', email: 'john@example.com')];
    when(() => mockRepository.getUsers()).thenAnswer((_) async => users);

    final result = await container.read(usersProvider.future);

    expect(result, equals(users));
    verify(() => mockRepository.getUsers()).called(1);
  });
}
```

### Widget Tests
```dart
void main() {
  testWidgets('UserCard displays user name', (tester) async {
    final user = User(id: '1', name: 'John', email: 'john@example.com');

    await tester.pumpWidget(
      ProviderScope(
        overrides: [
          userByIdProvider('1').overrideWith((_) => AsyncData(user)),
        ],
        child: const MaterialApp(home: UserCard(userId: '1')),
      ),
    );

    expect(find.text('John'), findsOneWidget);
  });

  testWidgets('UserCard shows loading indicator', (tester) async {
    await tester.pumpWidget(
      ProviderScope(
        overrides: [
          userByIdProvider('1').overrideWith((_) => const AsyncLoading()),
        ],
        child: const MaterialApp(home: UserCard(userId: '1')),
      ),
    );

    expect(find.byType(CircularProgressIndicator), findsOneWidget);
  });
}
```

---

## pubspec.yaml

```yaml
name: my_app
description: A Flutter application
publish_to: 'none'
version: 1.0.0+1

environment:
  sdk: '>=3.2.0 <4.0.0'

dependencies:
  flutter:
    sdk: flutter

  # State management
  flutter_riverpod: ^2.4.9
  riverpod_annotation: ^2.3.3

  # Data models
  freezed_annotation: ^2.4.1
  json_annotation: ^4.8.1

  # Navigation
  go_router: ^13.0.0

  # Networking
  dio: ^5.4.0

  # Storage
  shared_preferences: ^2.2.2

  # Utils
  intl: ^0.19.0

dev_dependencies:
  flutter_test:
    sdk: flutter

  # Code generation
  build_runner: ^2.4.8
  freezed: ^2.4.6
  json_serializable: ^6.7.1
  riverpod_generator: ^2.3.9

  # Testing
  mocktail: ^1.0.2

  # Linting
  flutter_lints: ^3.0.1
```

---

## GitHub Actions

```yaml
name: Flutter CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - uses: subosito/flutter-action@v2
        with:
          flutter-version: '3.16.0'
          channel: 'stable'
          cache: true

      - name: Install dependencies
        run: flutter pub get

      - name: Generate code
        run: dart run build_runner build --delete-conflicting-outputs

      - name: Analyze
        run: flutter analyze --fatal-infos

      - name: Run tests
        run: flutter test --coverage

      - name: Build APK
        run: flutter build apk --release
```

---

## analysis_options.yaml

```yaml
include: package:flutter_lints/flutter.yaml

analyzer:
  exclude:
    - "**/*.g.dart"
    - "**/*.freezed.dart"
  errors:
    invalid_annotation_target: ignore
  language:
    strict-casts: true
    strict-inference: true
    strict-raw-types: true

linter:
  rules:
    - always_declare_return_types
    - avoid_dynamic_calls
    - avoid_print
    - avoid_type_to_string
    - cancel_subscriptions
    - close_sinks
    - prefer_const_constructors
    - prefer_const_declarations
    - prefer_final_locals
    - require_trailing_commas
    - unawaited_futures
    - use_super_parameters
```

---

## Flutter Anti-Patterns

- ❌ **Provider without autoDispose** - Use `.autoDispose` to prevent memory leaks
- ❌ **watch in callbacks** - Use `ref.read()` in onPressed/callbacks, not `ref.watch()`
- ❌ **Business logic in widgets** - Move to Notifiers/providers
- ❌ **Mutable state in providers** - Use Freezed for immutable models
- ❌ **Not using AsyncValue** - Handle loading/error states with `when()`
- ❌ **setState with Riverpod** - Use providers for shared state
- ❌ **Passing ref to functions** - Keep ref usage within widgets/providers
- ❌ **Deeply nested Consumer** - Use ConsumerWidget instead
- ❌ **Not using family for params** - Use `.family` for parameterized providers
- ❌ **Global GoRouter instance** - Use Provider for router with redirect logic
- ❌ **BuildContext across async** - Store values before await, not context
- ❌ **Ignoring dispose** - Clean up controllers in ConsumerStatefulWidget


================================================
FILE: skills/gemini-review/SKILL.md
================================================
---
name: gemini-review
description: Google Gemini CLI code review with Gemini 2.5 Pro, 1M token context, CI/CD integration
when-to-use: When user requests Gemini-powered code review or needs large-context review
user-invocable: true
effort: medium
---

# Google Gemini Code Review Skill


Use Google's Gemini CLI for code review with Gemini 2.5 Pro - featuring a massive 1M token context window that can analyze entire repositories at once.

**Sources:** [Gemini CLI](https://github.com/google-gemini/gemini-cli) | [Code Review Extension](https://github.com/gemini-cli-extensions/code-review) | [Gemini Code Assist](https://codeassist.google/) | [GitHub Action](https://github.com/google-github-actions/run-gemini-cli)

---

## Why Gemini for Code Review?

| Feature | Benefit |
|---------|---------|
| **Gemini 2.5 Pro** | State-of-the-art reasoning for code |
| **1M token context** | Entire repositories fit - no chunking needed |
| **Free tier** | 1,000 requests/day with Google account |
| **Consistent output** | Clean formatting, predictable structure |
| **GitHub native** | Gemini Code Assist app for auto PR reviews |

### Benchmark Performance

| Benchmark | Score | Notes |
|-----------|-------|-------|
| SWE-Bench Verified | 63.8% | Agentic coding benchmark |
| Qodo PR Benchmark | 56.3% | PR review quality |
| LiveCodeBench v5 | 70.4% | Code generation |
| WebDev Arena | #1 | Web development |

---

## Installation

### Prerequisites

```bash
# Check Node.js version (requires 20+)
node --version

# Install Node.js 20 if needed
# macOS
brew install node@20

# Or via nvm
nvm install 20
nvm use 20
```

### Install Gemini CLI

```bash
# Via npm (recommended)
npm install -g @google/gemini-cli

# Via Homebrew (macOS)
brew install gemini-cli

# Or run without installing
npx @google/gemini-cli

# Verify installation
gemini --version
```

### Install Code Review Extension

```bash
# Requires Gemini CLI v0.4.0+
gemini extensions install https://github.com/gemini-cli-extensions/code-review

# Verify extension
gemini extensions list
```

---

## Authentication

### Option 1: Google Account (Recommended)

**Free tier: 1,000 requests/day, 60 requests/min**

```bash
# Run gemini and follow browser login
gemini

# Select: "Login with Google Account"
# Opens browser for OAuth
```

This gives you access to Gemini 2.5 Pro with the full 1M token context window.

### Option 2: Gemini API Key

**Free tier: 100 requests/day**

```bash
# Get API key from https://aistudio.google.com/apikey

# Set environment variable
export GEMINI_API_KEY="your-api-key"

# Or add to shell profile
echo 'export GEMINI_API_KEY="your-api-key"' >> ~/.zshrc

# Run Gemini
gemini
```

### Option 3: Vertex AI (Enterprise)

```bash
# For Google Cloud projects
export GOOGLE_API_KEY="your-api-key"
export GOOGLE_GENAI_USE_VERTEXAI=true
export GOOGLE_CLOUD_PROJECT="your-project-id"

gemini
```

---

## Interactive Code Review

### Using the Code Review Extension

```bash
# Start Gemini CLI
gemini

# Run code review on current branch
/code-review
```

The extension analyzes:
- Code changes on your current branch
- Identifies quality issues
- Suggests fixes

### Manual Review Prompts

```bash
# In interactive mode
gemini

# Then ask:
> Review the changes in this branch for bugs and security issues
> Analyze src/api/users.ts for potential vulnerabilities
> What are the code quality issues in the last 3 commits?
```

---

## Headless Mode (Automation)

### Basic Usage

```bash
# Simple prompt execution
gemini -p "Review the code changes for bugs and security issues"

# With JSON output (for parsing)
gemini -p "Review the changes" --output-format json

# Stream JSON events (real-time)
gemini -p "Review and fix issues" --output-format stream-json

# Specify model
gemini -m gemini-2.5-pro -p "Deep code review of this PR"
```

### Full CI/CD Example

```bash
# Get diff and review
git diff origin/main...HEAD > diff.txt

gemini -p "Review this code diff for:
1. Security vulnerabilities
2. Performance issues
3. Code quality problems
4. Missing error handling

Diff:
$(cat diff.txt)
" --output-format json > review.json
```

### Session Tracking

```bash
# Track token usage and costs
gemini -p "Review changes" --session-summary metrics.json

# View metrics
cat metrics.json
```

---

## GitHub Integration

### Option 1: Gemini Code Assist App (Easiest)

Install from [GitHub Marketplace](https://github.com/marketplace/gemini-code-assist):

1. Go to GitHub Marketplace → Gemini Code Assist
2. Click "Install" and select repositories
3. PRs automatically get reviewed when opened

**Commands in PR comments:**
```
/gemini review     # Request code review
/gemini summary    # Get PR summary
/gemini help       # Show available commands
```

**Quota:**
- Free: 33 PRs/day
- Enterprise: 100+ PRs/day

### Option 2: GitHub Action

```yaml
# .github/workflows/gemini-review.yml
name: Gemini Code Review

on:
  pull_request:
    types: [opened, synchronize]

jobs:
  review:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '20'

      - name: Install Gemini CLI
        run: npm install -g @google/gemini-cli

      - name: Run Review
        env:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
          # Get diff
          git diff origin/${{ github.base_ref }}...HEAD > diff.txt

          # Run Gemini review
          gemini -p "Review this pull request diff for bugs, security issues, and code quality problems. Be specific about file names and line numbers.

          $(cat diff.txt)" > review.md

      - name: Post Review Comment
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs');
            const review = fs.readFileSync('review.md', 'utf8');
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: `## 🤖 Gemini Code Review\n\n${review}`
            });
```

### Option 3: Official GitHub Action

```yaml
# .github/workflows/gemini-review.yml
name: Gemini Code Review

on:
  pull_request:
    types: [opened, synchronize]
  issue_comment:
    types: [created]

jobs:
  review:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write
      issues: write

    steps:
      - uses: actions/checkout@v4

      - name: Run Gemini CLI
        uses: google-github-actions/run-gemini-cli@v1
        with:
          gemini_api_key: ${{ secrets.GEMINI_API_KEY }}
          prompt: "Review this pull request for code quality, security issues, and potential bugs."
```

**On-demand commands in comments:**
```
@gemini-cli /review
@gemini-cli explain this code change
@gemini-cli write unit tests for this component
```

---

## GitLab CI/CD

```yaml
# .gitlab-ci.yml
gemini-review:
  image: node:20
  stage: review
  script:
    - npm install -g @google/gemini-cli
    - |
      gemini -p "Review the merge request changes for bugs, security issues, and code quality" > review.md
    - cat review.md
  artifacts:
    paths:
      - review.md
  rules:
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
  variables:
    GEMINI_API_KEY: $GEMINI_API_KEY
```

---

## Configuration

### Global Config

```bash
# ~/.gemini/settings.json
{
  "model": "gemini-2.5-pro",
  "theme": "dark",
  "sandbox": true
}
```

### Project Config (GEMINI.md)

Create a `GEMINI.md` file in your project root for project-specific context:

```markdown
# Project Context for Gemini

## Tech Stack
- TypeScript with strict mode
- React 18 with hooks
- FastAPI backend
- PostgreSQL database

## Code Review Focus Areas
1. Type safety - ensure proper TypeScript types
2. React hooks rules - check for dependency array issues
3. SQL injection - verify parameterized queries
4. Authentication - check all endpoints have proper auth

## Conventions
- Use camelCase for variables
- Use PascalCase for components
- All API errors should use AppError class
```

---

## CLI Quick Reference

```bash
# Interactive
gemini                          # Start interactive mode
/code-review                    # Run code review extension

# Headless
gemini -p "prompt"              # Single prompt, exit
gemini -p "prompt" --output-format json   # JSON output
gemini -m gemini-2.5-flash -p "prompt"    # Use faster model

# Extensions
gemini extensions list          # List installed
gemini extensions install URL   # Install extension
gemini extensions update        # Update all

# Key Flags
--output-format json            # Structured output
--output-format stream-json     # Real-time events
--session-summary FILE          # Track metrics
-m MODEL                        # Select model
```

---

## Comparison: Claude vs Codex vs Gemini

| Aspect | Claude | Codex CLI | Gemini CLI |
|--------|--------|-----------|------------|
| **Setup** | None (built-in) | npm + OpenAI API | npm + Google Account |
| **Model** | Claude | GPT-5.2-Codex | Gemini 2.5 Pro |
| **Context** | Conversation | Fresh per review | 1M tokens (huge!) |
| **Free Tier** | N/A | Limited | 1,000/day |
| **Best For** | Quick reviews | High accuracy | Large codebases |
| **GitHub Native** | No | @codex | Gemini Code Assist |

### When to Use Each

| Scenario | Recommended Engine |
|----------|-------------------|
| Quick in-flow review | Claude |
| Critical security review | Codex (88% detection) |
| Large codebase (100+ files) | Gemini (1M context) |
| Free automated reviews | Gemini |
| Multiple perspectives | All three (dual/triple engine) |

---

## Troubleshooting

| Issue | Solution |
|-------|----------|
| `gemini: command not found` | `npm install -g @google/gemini-cli` |
| `Node.js version error` | Upgrade to Node.js 20+ |
| `Authentication failed` | Re-run `gemini` and login again |
| `Extension not found` | `gemini extensions install https://github.com/gemini-cli-extensions/code-review` |
| `Rate limited` | Wait or upgrade to Vertex AI |
| `Hangs in CI` | Ensure `DEBUG` env var is not set |

---

## Anti-Patterns

- **Skipping authentication setup** - Always configure before CI/CD
- **Using API key in logs** - Use secrets management
- **Ignoring context limits** - Even 1M tokens has limits for huge monorepos
- **Running on every commit** - Use on PRs only to save quota
- **Not setting project context** - Add GEMINI.md for better reviews


================================================
FILE: skills/icpg/SKILL.md
================================================
---
name: icpg
description: Intent-Augmented Code Property Graph — tracks WHY code exists via ReasonNodes with formal contracts, 6-dimension drift detection, and 3 canonical pre-task queries for autonomous development
when-to-use: "Before any code change — query the reason graph for intent, constraints, and risk"
user-invocable: false
effort: high
---

# iCPG Skill (Intent-Augmented Code Property Graph)


**Purpose:** Add a Reason Graph layer on top of code structure so every
function, class, and module is traceable to the goal that created it,
the agent or human that owns it, and whether it's still doing what it
was supposed to do.

```
┌────────────────────────────────────────────────────────────────┐
│  iCPG = AST + CFG + PDG + RG (Reason Graph)                    │
│  ─────────────────────────────────────────────────────────────│
│  AST  = Abstract Syntax Tree (structure)      ← existing       │
│  CFG  = Control Flow Graph (execution paths)  ← existing       │
│  PDG  = Program Dependency Graph              ← existing       │
│  RG   = Reason Graph (WHY layer)              ← THIS SKILL     │
│                                                                │
│  The RG stores ReasonNodes (goals/tasks), links them to code   │
│  symbols via typed edges, enforces contracts (DbC), and        │
│  detects when code drifts from its original purpose.           │
│                                                                │
│  Storage: .icpg/reason.db (SQLite, per-project, gitignored)   │
│  CLI: icpg init | create | record | query | drift | bootstrap │
└────────────────────────────────────────────────────────────────┘
```

---

## Core Principle

**Intent first, code second.** Before writing or modifying code, query
the reason graph to understand WHY existing code was written, WHAT
constraints it must preserve, and WHETHER your change duplicates prior
work.

---

## The 3 Canonical Pre-Task Queries

**Every agent MUST run these before writing code:**

| # | Query | Command | What It Answers |
|---|-------|---------|-----------------|
| 1 | **search_prior_work** | `icpg query prior "<goal>"` | Has this been attempted before? Prevents duplication. |
| 2 | **get_constraints** | `icpg query constraints <file>` | What invariants apply to files I'll touch? Prevents breakage. |
| 3 | **get_risk_profile** | `icpg query risk <symbol>` | Is this symbol fragile? Drift history, ownership changes. |

---

## ReasonNode — The Core Primitive

Each ReasonNode captures a stated purpose with a formal contract:

```
id              UUID
goal            Natural language: what is this trying to achieve
decision_type   business_goal | arch_decision | task | workaround | constraint | patch
scope           Files/modules expected to be touched
owner           Human or agent accountable
status          proposed | executing | fulfilled | drifted | abandoned
source          manual | commit | inferred | agent-session

FORMAL CONTRACT (Design by Contract):
  preconditions    What must be true before this intent executes
  postconditions   What must be true when fulfilled
  invariants       What must remain true throughout and after
```

**Drift = predicate failure.** A symbol has drifted when its current
behavior no longer satisfies the postconditions of the ReasonNode that
created it, or when an invariant is violated.

---

## Six Edge Types

```
CREATES      Reason  → Symbol   (this intent created this function)
MODIFIES     Reason  → Symbol   (this intent changed this function)
REQUIRES     Reason  → Reason   (B depends on A being done first)
DUPLICATES   Reason  → Reason   (these two goals overlap)
VALIDATED_BY Reason  → Test     (this test proves the intent was satisfied)
DRIFTS_FROM  Symbol  → Reason   (this symbol no longer does what it was made for)
```

---

## 6-Dimension Drift Model

| Dimension | What It Means | Detection |
|-----------|--------------|-----------|
| **Spec drift** | Symbol checksum changed without a MODIFIES edge | Compare stored vs current checksum |
| **Decision drift** | Postconditions no longer hold | Evaluate predicates against codebase |
| **Ownership drift** | >3 different owners without coherent oversight | Count unique owners on edges |
| **Test drift** | VALIDATED_BY tests missing or failing | Check test file existence + run |
| **Usage drift** | Symbol used outside original scope | Grep for imports beyond scope |
| **Dependency drift** | Downstream REQUIRES reasons have drifted | Traverse REQUIRES edges |

Run `icpg drift check` to scan all dimensions. Each produces a 0-1 severity score.

---

## CLI Reference

### Setup
```bash
icpg init                          # Create .icpg/ and database
icpg bootstrap --days 90           # Infer ReasonNodes from git history
icpg bootstrap --days 90 --no-llm  # Without LLM (commit-message only)
```

### Create & Record
```bash
icpg create "Add JWT auth" --scope src/auth/ --owner feature-auth --type task
icpg record --reason <id> --base main         # Record symbols from git diff
icpg record --reason <id> --edge-type MODIFIES # Record as modifications
```

### Query (the 3 canonical queries)
```bash
icpg query prior "user authentication"     # 1. Duplicate detection
icpg query constraints src/auth/service.ts  # 2. Invariants for file
icpg query risk validateToken              # 3. Symbol risk profile
icpg query context src/auth/service.ts     # All intents for a file
icpg query blast <reason-id>               # Full blast radius
```

### Drift
```bash
icpg drift check          # Full scan across all dimensions
icpg drift resolve <id>   # Mark drift event resolved
```

### Status
```bash
icpg status               # Stats: reasons, symbols, edges, drift
```

---

## Storage

Per-project, gitignored, zero infrastructure:

```
.icpg/
  reason.db       SQLite database (4 tables: reasons, symbols, edges, drift_events)
  .gitignore      Contains: *
  chroma/         ChromaDB vectors (if chromadb installed)
  tfidf_cache.json  TF-IDF fallback cache
  .current-intent   Marker file for active intent (used by Stop hook)
```

Install options:
```bash
pip install ./scripts/icpg            # Core (zero deps)
pip install "./scripts/icpg[vectors]"  # + ChromaDB for duplicate detection
pip install "./scripts/icpg[all]"      # + ChromaDB + scikit-learn + openai
```

---

## Workflow: Before Any Code Change

```
0. INTENT       → icpg create (or identify existing intent)
1. DEDUP        → icpg query prior (check for duplicate work)
2. CONSTRAINTS  → icpg query constraints (understand invariants)
3. RISK         → icpg query risk (check fragile symbols)
4. LOCATE       → search_graph to find symbols (code-graph skill)
5. CHANGE       → Make the edit (PreToolUse hook shows context)
6. RECORD       → icpg record (link symbols to intent)
7. DRIFT CHECK  → icpg drift check (verify no unintended drift)
8. VERIFY       → Run tests, lint, typecheck
```

**Step 0 is non-negotiable for autonomous agents.** Every change must
be linked to a stated purpose. Without an intent, there's nothing to
measure drift against.

---

## Hook Integration

### PreToolUse Hook (automatic context injection)

Add to `.claude/settings.json`:
```json
{
  "hooks": {
    "PreToolUse": [{
      "matcher": "Edit|Write",
      "hooks": [{
        "type": "command",
        "command": "scripts/icpg-pre-edit.sh",
        "timeout": 3,
        "statusMessage": "Checking intent context..."
      }]
    }]
  }
}
```

Before every file edit, agents see:
```
═══ iCPG CONTEXT ═══
INTENTS for src/auth/service.ts:
  [>] a1b2c3d4 — User authentication with JWT tokens
      Owner: feature-auth | Status: executing
      Invariants: 2
CONSTRAINTS for src/auth/service.ts:
  From intent: User authentication with JWT tokens
    INV: file_exists("src/auth/middleware.ts")
    POST: test_exists("src/auth/__tests__/service.test.ts")
PRESERVE function signatures unless your task requires changing them.
═══════════════════
```

### Stop Hook (automatic symbol recording)

After implementation passes tests, auto-records symbols:
```json
{
  "hooks": {
    "Stop": [{
      "hooks": [
        {"type": "command", "command": "scripts/tdd-loop-check.sh", "timeout": 60},
        {"type": "command", "command": "scripts/icpg-stop-record.sh", "timeout": 5}
      ]
    }]
  }
}
```

---

## Agent Teams Integration

### Updated Pipeline (agent-teams + iCPG)

```
 0. INTENT       Team lead creates ReasonNode from feature spec
 0b. DEDUP       icpg query prior — check for duplicate intents
 1. SPEC         Feature agent writes spec
 2. SPEC-REVIEW  Quality agent reviews spec + intent alignment
 3. TESTS (RED)  Feature agent writes tests
 4. RED-VERIFY   Quality agent verifies tests fail
 5. IMPLEMENT    Feature agent codes (PreEdit hook shows context)
 5b. RECORD      Auto-record symbols → intent (Stop hook)
 5c. DRIFT-CHECK Quality agent verifies no scope drift
 6. GREEN-VERIFY Quality agent verifies tests pass + coverage
 7. VALIDATE     Lint + typecheck + full suite
 8. CODE-REVIEW  Review agent (sees intent context per file)
 9. SECURITY     Security agent
10. BRANCH-PR    Merger agent (PR includes intent traceability)
```

### Agent Responsibilities

| Agent | iCPG Action |
|-------|-------------|
| **Team Lead** | `icpg create` when creating task chains. `icpg query prior` to check duplicates. |
| **Feature Agent** | `icpg query constraints` before implementing. Writes `.icpg/.current-intent` for auto-recording. |
| **Quality Agent** | `icpg drift check` during GREEN verify. Verifies scope alignment. |
| **Review Agent** | Sees intent context via PreToolUse hook when reviewing files. |
| **Merger Agent** | Includes intent traceability in PR description. |

---

## Bootstrapping from Git History

For existing codebases, infer ReasonNodes from commit history:

```bash
icpg bootstrap --days 90 --verbose
```

This will:
1. Get commits from last 90 days
2. Cluster by temporal proximity (2-hour window)
3. Infer intent via LLM (Claude or OpenAI) or commit message parsing
4. Create ReasonNodes with `source: "inferred"`, `confidence: 0.6-0.8`
5. Extract symbols from changed files, create CREATES edges
6. Run duplicate detection against existing ReasonNodes

**Quality note:** Inferred intents are marked low-confidence. Review and
promote high-value ones manually.

---

## Contract Predicates

Predicates are structured assertions over codebase state:

```
file_exists("src/auth/middleware.ts")
test_exists("src/auth/__tests__/service.test.ts")
symbol_count("src/auth/") <= 15
function_signature("validateToken") == "(token: string) => Promise<User>"
```

Contracts can be:
- **Hand-authored** for high-risk ReasonNodes
- **LLM-inferred** via `icpg create --infer-contracts`
- **Heuristic** (scope → file_exists, test → test_exists)

---

## Anti-Patterns

| Anti-Pattern | Do This Instead |
|-------------|-----------------|
| Coding without stating intent | `icpg create` before every non-trivial change |
| Assuming your change is isolated | `icpg query constraints` + `icpg query risk` first |
| Rebuilding what already exists | `icpg query prior` to check for prior work |
| Leaving intent in 'executing' forever | Update status to 'fulfilled' when done |
| Ignoring drift events | `icpg drift check` weekly, resolve or create new intents |
| Storing full source in symbols | Store signature + checksum only — read source from files |
| Skipping bootstrap on existing repos | `icpg bootstrap --days 90` to build initial graph |


================================================
FILE: skills/iterative-development/SKILL.md
================================================
---
name: iterative-development
description: TDD iteration loops using Claude Code Stop hooks - runs tests after each response, feeds failures back automatically
when-to-use: When setting up or configuring TDD loops via Stop hooks
user-invocable: false
effort: medium
---

# Iterative Development Skill (Stop Hook TDD Loops)


**Concept:** Claude Code's Stop hook fires right before Claude finishes a response. Exit code 2 feeds stderr back to the model and continues the conversation. This creates a real TDD loop without any plugins.

---

## How It Actually Works

Claude Code has a **Stop hook** that runs when Claude is about to conclude its response. If the hook script exits with code 2, its stderr is shown to the model and the conversation continues automatically.

```
┌─────────────────────────────────────────────────────────────┐
│  1. User asks Claude to implement a feature                 │
├─────────────────────────────────────────────────────────────┤
│  2. Claude writes tests + implementation                    │
├─────────────────────────────────────────────────────────────┤
│  3. Claude finishes its response                            │
├─────────────────────────────────────────────────────────────┤
│  4. Stop hook runs: executes tests, lint, typecheck         │
├─────────────────────────────────────────────────────────────┤
│  5a. All pass (exit 0) → Claude stops, work is done         │
│  5b. Failures (exit 2) → stderr fed back to Claude          │
├─────────────────────────────────────────────────────────────┤
│  6. Claude sees failures, fixes code, response ends         │
├─────────────────────────────────────────────────────────────┤
│  7. Stop hook runs again → repeat until green or max tries  │
└─────────────────────────────────────────────────────────────┘
```

**Key insight:** No fake plugins, no `/ralph-loop` command. The hook is real Claude Code infrastructure that runs automatically.

---

## Setup: Stop Hook Configuration

Add this to your project's `.claude/settings.json`:

```json
{
  "hooks": {
    "Stop": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "scripts/tdd-loop-check.sh",
            "timeout": 60,
            "statusMessage": "Running tests..."
          }
        ]
      }
    ]
  }
}
```

### The TDD Loop Check Script

Create `scripts/tdd-loop-check.sh` in your project:

```bash
#!/bin/bash
# TDD Loop Check - runs after each Claude response
# Exit 0 = all good, Claude stops
# Exit 2 = failures, stderr fed back to Claude to fix

MAX_ITERATIONS=25
ITERATION_FILE=".claude/.tdd-iteration-count"

# Track iteration count
if [ -f "$ITERATION_FILE" ]; then
    count=$(cat "$ITERATION_FILE")
    count=$((count + 1))
else
    count=1
fi
echo "$count" > "$ITERATION_FILE"

# Safety: stop after max iterations
if [ "$count" -ge "$MAX_ITERATIONS" ]; then
    rm -f "$ITERATION_FILE"
    echo "Max iterations ($MAX_ITERATIONS) reached. Stopping loop." >&2
    exit 0
fi

# Skip if no test files exist yet
if ! find . -name "*.test.*" -o -name "*.spec.*" -o -name "test_*" 2>/dev/null | grep -q .; then
    rm -f "$ITERATION_FILE"
    exit 0
fi

# Run tests
TEST_OUTPUT=$(npm test 2>&1) || {
    echo "ITERATION $count/$MAX_ITERATIONS - Tests failing:" >&2
    echo "$TEST_OUTPUT" | tail -30 >&2
    echo "" >&2
    echo "Fix the failing tests and try again." >&2
    exit 2
}

# Run lint (if configured)
if [ -f "package.json" ] && grep -q '"lint"' package.json; then
    LINT_OUTPUT=$(npm run lint 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Lint errors:" >&2
        echo "$LINT_OUTPUT" | tail -20 >&2
        echo "" >&2
        echo "Fix lint errors and try again." >&2
        exit 2
    }
fi

# Run typecheck (if configured)
if [ -f "tsconfig.json" ]; then
    TYPE_OUTPUT=$(npx tsc --noEmit 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Type errors:" >&2
        echo "$TYPE_OUTPUT" | tail -20 >&2
        echo "" >&2
        echo "Fix type errors and try again." >&2
        exit 2
    }
fi

# All green - reset counter and let Claude stop
rm -f "$ITERATION_FILE"
exit 0
```

### Python Variant

```bash
#!/bin/bash
# Python TDD Loop Check

MAX_ITERATIONS=25
ITERATION_FILE=".claude/.tdd-iteration-count"

if [ -f "$ITERATION_FILE" ]; then
    count=$(cat "$ITERATION_FILE")
    count=$((count + 1))
else
    count=1
fi
echo "$count" > "$ITERATION_FILE"

if [ "$count" -ge "$MAX_ITERATIONS" ]; then
    rm -f "$ITERATION_FILE"
    echo "Max iterations ($MAX_ITERATIONS) reached." >&2
    exit 0
fi

if ! find . -name "test_*" -o -name "*_test.py" 2>/dev/null | grep -q .; then
    rm -f "$ITERATION_FILE"
    exit 0
fi

TEST_OUTPUT=$(pytest -v 2>&1) || {
    echo "ITERATION $count/$MAX_ITERATIONS - Tests failing:" >&2
    echo "$TEST_OUTPUT" | tail -30 >&2
    exit 2
}

if command -v ruff &>/dev/null; then
    LINT_OUTPUT=$(ruff check . 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Lint errors:" >&2
        echo "$LINT_OUTPUT" | tail -20 >&2
        exit 2
    }
fi

if command -v mypy &>/dev/null; then
    TYPE_OUTPUT=$(mypy . 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Type errors:" >&2
        echo "$TYPE_OUTPUT" | tail -20 >&2
        exit 2
    }
fi

rm -f "$ITERATION_FILE"
exit 0
```

---

## Additional Hooks for Quality Enforcement

### PreToolUse Hook: Lint Before File Writes

Runs a linter before any Write/Edit lands:

```json
{
  "hooks": {
    "PreToolUse": [
      {
        "matcher": "Write|Edit",
        "hooks": [
          {
            "type": "command",
            "command": "scripts/pre-write-lint.sh",
            "timeout": 10,
            "statusMessage": "Checking code quality..."
          }
        ]
      }
    ]
  }
}
```

### SessionStart Hook: Auto-Inject Context

Runs at session start to inject project info:

```json
{
  "hooks": {
    "SessionStart": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "echo 'TDD loop active. Tests run automatically after each response. Fix failures to continue.'",
            "statusMessage": "Loading project context..."
          }
        ]
      }
    ]
  }
}
```

---

## Core Philosophy

```
┌─────────────────────────────────────────────────────────────┐
│  ITERATION > PERFECTION                                     │
│  ─────────────────────────────────────────────────────────  │
│  Don't aim for perfect on first try.                        │
│  Let the loop refine the work. Each iteration builds on     │
│  previous attempts visible in files and git history.        │
├─────────────────────────────────────────────────────────────┤
│  FAILURES ARE DATA                                          │
│  ─────────────────────────────────────────────────────────  │
│  Failed tests, lint errors, type mismatches are signals.    │
│  The Stop hook feeds them directly to Claude as context.    │
├─────────────────────────────────────────────────────────────┤
│  CLEAR COMPLETION CRITERIA                                  │
│  ─────────────────────────────────────────────────────────  │
│  The hook defines "done": tests pass, lint clean, types ok. │
│  No ambiguity about when to stop.                           │
└─────────────────────────────────────────────────────────────┘
```

---

## Error Classification

Not all failures should loop. The hook script should distinguish:

| Type | Examples | Action |
|------|----------|--------|
| **Code Error** | Logic bug, wrong assertion, type mismatch | Exit 2 → loop continues |
| **Access Error** | Missing API key, DB connection refused | Exit 0 → stop, report to user |
| **Environment Error** | Missing package, wrong runtime version | Exit 0 → stop, report to user |

The sample scripts above handle this — they only exit 2 for test/lint/type failures, not for environment issues.

---

## When to Use TDD Loops

### Good For
| Use Case | Why |
|----------|-----|
| Feature development | Tests provide clear pass/fail signal |
| Bug fixes | Write failing test, fix, loop until green |
| Refactoring | Existing tests catch regressions |
| API development | Each endpoint independently testable |

### Not Good For
| Use Case | Why |
|----------|-----|
| UI/UX work | Requires human judgment |
| One-shot operations | No iteration needed |
| Unclear requirements | No clear "done" criteria |
| Subjective design | No objective success metric |

---

## Disabling the Loop

To temporarily disable the TDD loop for a session:

1. Remove or rename the Stop hook in `.claude/settings.json`
2. Or set `MAX_ITERATIONS=1` in the script
3. Or delete `scripts/tdd-loop-check.sh`

The hook only fires if the script exists and is configured.

---

## Gitignore Additions

```gitignore
# TDD loop state
.claude/.tdd-iteration-count
```


================================================
FILE: skills/klaviyo/SKILL.md
================================================
---
name: klaviyo
description: Klaviyo email/SMS marketing - profiles, events, flows, segmentation
when-to-use: When integrating Klaviyo for email/SMS marketing
user-invocable: false
effort: medium
---

# Klaviyo E-Commerce Marketing Skill


For integrating Klaviyo email/SMS marketing - customer profiles, event tracking, campaigns, flows, and segmentation.

**Sources:** [Klaviyo API Docs](https://developers.klaviyo.com/en/docs) | [API Reference](https://developers.klaviyo.com/en/reference/api-overview)

---

## Why Klaviyo

| Feature | Benefit |
|---------|---------|
| **E-commerce Native** | Built for online stores, deep integrations |
| **Event-Based** | Trigger flows from any customer action |
| **Segmentation** | Advanced filtering on behavior + properties |
| **Email + SMS** | Unified platform for both channels |
| **Analytics** | Revenue attribution per campaign |

---

## API Basics

### Base URLs

| Type | URL |
|------|-----|
| Server-side (Private) | `https://a.klaviyo.com/api` |
| Client-side (Public) | `https://a.klaviyo.com/client` |

### Authentication

```typescript
// Server-side: Private API Key
const headers = {
  "Authorization": "Klaviyo-API-Key pk_xxxxxxxxxxxxxxxxxxxxxxxx",
  "Content-Type": "application/json",
  "revision": "2024-10-15",  // API version
};

// Client-side: Public API Key (6 characters)
const publicKey = "XXXXXX";  // Company ID
// Use as query param: ?company_id=XXXXXX
```

### API Key Scopes

| Scope | Access |
|-------|--------|
| Read-only | View data only |
| Full | Read + write (default) |
| Custom | Specific permissions |

---

## Installation

### Node.js

```bash
npm install klaviyo-api
```

```typescript
// lib/klaviyo.ts
import { ApiClient, EventsApi, ProfilesApi, ListsApi } from "klaviyo-api";

const client = new ApiClient();
client.setApiKey(process.env.KLAVIYO_PRIVATE_KEY!);

export const eventsApi = new EventsApi(client);
export const profilesApi = new ProfilesApi(client);
export const listsApi = new ListsApi(client);
```

### Python

```bash
pip install klaviyo-api
```

```python
# lib/klaviyo.py
from klaviyo_api import KlaviyoAPI

klaviyo = KlaviyoAPI(
    api_key=os.environ["KLAVIYO_PRIVATE_KEY"],
    max_delay=60,
    max_retries=3
)
```

### Direct HTTP (Any Language)

```typescript
// lib/klaviyo.ts
const KLAVIYO_BASE_URL = "https://a.klaviyo.com/api";

async function klaviyoRequest(
  endpoint: string,
  method: "GET" | "POST" | "PATCH" | "DELETE" = "GET",
  body?: object
) {
  const response = await fetch(`${KLAVIYO_BASE_URL}${endpoint}`, {
    method,
    headers: {
      Authorization: `Klaviyo-API-Key ${process.env.KLAVIYO_PRIVATE_KEY}`,
      "Content-Type": "application/json",
      revision: "2024-10-15",
    },
    body: body ? JSON.stringify(body) : undefined,
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(`Klaviyo API error: ${JSON.stringify(error)}`);
  }

  return response.json();
}
```

---

## Profiles (Customers)

### Create/Update Profile

```typescript
// Upsert profile (create or update)
async function upsertProfile(data: ProfileInput) {
  return klaviyoRequest("/profiles", "POST", {
    data: {
      type: "profile",
      attributes: {
        email: data.email,
        phone_number: data.phone, // E.164 format: +1234567890
        first_name: data.firstName,
        last_name: data.lastName,
        properties: {
          // Custom properties
          lifetime_value: data.ltv,
          plan: data.plan,
          signup_source: data.source,
        },
        location: {
          city: data.city,
          region: data.state,
          country: data.country,
          zip: data.zip,
        },
      },
    },
  });
}
```

```python
# Python
def upsert_profile(data):
    return klaviyo.Profiles.create_or_update_profile({
        "data": {
            "type": "profile",
            "attributes": {
                "email": data["email"],
                "first_name": data["first_name"],
                "last_name": data["last_name"],
                "properties": {
                    "plan": data.get("plan"),
                }
            }
        }
    })
```

### Get Profile

```typescript
async function getProfileByEmail(email: string) {
  const response = await klaviyoRequest(
    `/profiles?filter=equals(email,"${email}")`
  );
  return response.data[0];
}

async function getProfileById(profileId: string) {
  return klaviyoRequest(`/profiles/${profileId}`);
}
```

### Update Profile Properties

```typescript
async function updateProfileProperties(
  profileId: string,
  properties: Record<string, any>
) {
  return klaviyoRequest(`/profiles/${profileId}`, "PATCH", {
    data: {
      type: "profile",
      id: profileId,
      attributes: {
        properties,
      },
    },
  });
}

// Usage
await updateProfileProperties("profile_id", {
  last_purchase_date: new Date().toISOString(),
  total_orders: 5,
  vip_status: true,
});
```

---

## Events (Tracking)

### Track Event (Server-Side)

```typescript
async function trackEvent(data: EventInput) {
  return klaviyoRequest("/events", "POST", {
    data: {
      type: "event",
      attributes: {
        profile: {
          data: {
            type: "profile",
            attributes: {
              email: data.email,
              // or phone_number, or external_id
            },
          },
        },
        metric: {
          data: {
            type: "metric",
            attributes: {
              name: data.eventName,
            },
          },
        },
        properties: data.properties,
        value: data.value, // For revenue tracking
        unique_id: data.uniqueId, // Deduplication
        time: data.timestamp || new Date().toISOString(),
      },
    },
  });
}
```

### Common E-Commerce Events

```typescript
// Viewed Product
await trackEvent({
  email: customer.email,
  eventName: "Viewed Product",
  properties: {
    ProductID: product.id,
    ProductName: product.name,
    ProductURL: product.url,
    ImageURL: product.image,
    Price: product.price,
    Categories: product.categories,
  },
});

// Added to Cart
await trackEvent({
  email: customer.email,
  eventName: "Added to Cart",
  properties: {
    ProductID: product.id,
    ProductName: product.name,
    Quantity: quantity,
    Price: product.price,
    CartTotal: cart.total,
    ItemNames: cart.items.map(i => i.name),
  },
  value: product.price * quantity,
});

// Started Checkout
await trackEvent({
  email: customer.email,
  eventName: "Started Checkout",
  properties: {
    CheckoutURL: checkout.url,
    ItemCount: cart.itemCount,
    Categories: cart.categories,
    ItemNames: cart.items.map(i => i.name),
  },
  value: cart.total,
});

// Placed Order
await trackEvent({
  email: customer.email,
  eventName: "Placed Order",
  properties: {
    OrderId: order.id,
    ItemCount: order.itemCount,
    Categories: order.categories,
    ItemNames: order.items.map(i => i.name),
    Items: order.items.map(i => ({
      ProductID: i.productId,
      ProductName: i.name,
      Quantity: i.quantity,
      Price: i.price,
      ImageURL: i.image,
      ProductURL: i.url,
    })),
    BillingAddress: order.billingAddress,
    ShippingAddress: order.shippingAddress,
  },
  value: order.total,
  uniqueId: order.id, // Prevent duplicate orders
});

// Fulfilled Order
await trackEvent({
  email: customer.email,
  eventName: "Fulfilled Order",
  properties: {
    OrderId: order.id,
    TrackingNumber: fulfillment.trackingNumber,
    TrackingURL: fulfillment.trackingUrl,
    Carrier: fulfillment.carrier,
  },
});

// Cancelled Order
await trackEvent({
  email: customer.email,
  eventName: "Cancelled Order",
  properties: {
    OrderId: order.id,
    Reason: cancellation.reason,
  },
  value: -order.total, // Negative value for refunds
});
```

### Client-Side Tracking (JavaScript)

```html
<!-- Add to your site -->
<script async src="https://static.klaviyo.com/onsite/js/klaviyo.js?company_id=XXXXXX"></script>

<script>
  // Identify user
  klaviyo.identify({
    email: "customer@example.com",
    first_name: "John",
    last_name: "Doe",
  });

  // Track event
  klaviyo.track("Viewed Product", {
    ProductID: "prod_123",
    ProductName: "Blue T-Shirt",
    Price: 29.99,
  });

  // Track with value
  klaviyo.track("Added to Cart", {
    ProductID: "prod_123",
    ProductName: "Blue T-Shirt",
    Price: 29.99,
    $value: 29.99,  // Revenue tracking
  });
</script>
```

---

## Lists & Segments

### Add Profile to List

```typescript
async function addToList(listId: string, emails: string[]) {
  return klaviyoRequest(`/lists/${listId}/relationships/profiles`, "POST", {
    data: emails.map(email => ({
      type: "profile",
      attributes: { email },
    })),
  });
}

// By profile ID
async function addProfileToList(listId: string, profileId: string) {
  return klaviyoRequest(`/lists/${listId}/relationships/profiles`, "POST", {
    data: [{ type: "profile", id: profileId }],
  });
}
```

### Remove from List

```typescript
async function removeFromList(listId: string, profileId: string) {
  return klaviyoRequest(
    `/lists/${listId}/relationships/profiles`,
    "DELETE",
    {
      data: [{ type: "profile", id: profileId }],
    }
  );
}
```

### Get List Members

```typescript
async function getListMembers(listId: string, cursor?: string) {
  const params = new URLSearchParams({
    "page[size]": "100",
  });
  if (cursor) {
    params.set("page[cursor]", cursor);
  }

  return klaviyoRequest(`/lists/${listId}/profiles?${params}`);
}
```

### Create List

```typescript
async function createList(name: string) {
  return klaviyoRequest("/lists", "POST", {
    data: {
      type: "list",
      attributes: { name },
    },
  });
}
```

---

## Campaigns

### Get Campaigns

```typescript
async function getCampaigns(status?: "draft" | "scheduled" | "sent") {
  const params = new URLSearchParams();
  if (status) {
    params.set("filter", `equals(status,"${status}")`);
  }

  return klaviyoRequest(`/campaigns?${params}`);
}
```

### Get Campaign Performance

```typescript
async function getCampaignMetrics(campaignId: string) {
  return klaviyoRequest(
    `/campaign-recipient-estimations/${campaignId}`,
    "GET"
  );
}
```

---

## Flows (Automations)

### Get Flows

```typescript
async function getFlows() {
  return klaviyoRequest("/flows");
}

async function getFlowById(flowId: string) {
  return klaviyoRequest(`/flows/${flowId}`);
}
```

### Common Flow Triggers

| Flow Type | Trigger Event |
|-----------|---------------|
| Welcome Series | Added to List |
| Abandoned Cart | Added to Cart + No Purchase |
| Browse Abandon | Viewed Product + No Cart |
| Post-Purchase | Placed Order |
| Winback | No Order in X Days |
| Review Request | Fulfilled Order |

---

## Webhooks

### Create Webhook

```typescript
async function createWebhook(data: WebhookInput) {
  return klaviyoRequest("/webhooks", "POST", {
    data: {
      type: "webhook",
      attributes: {
        name: data.name,
        endpoint_url: data.url,
        secret_key: data.secret,
        topics: data.topics, // e.g., ["profile.created", "event.created"]
      },
    },
  });
}
```

### Webhook Topics

| Topic | Trigger |
|-------|---------|
| `profile.created` | New profile created |
| `profile.updated` | Profile properties changed |
| `profile.merged` | Profiles merged |
| `event.created` | New event tracked |
| `list.member.added` | Profile added to list |
| `list.member.removed` | Profile removed from list |

### Verify Webhook Signature

```typescript
import crypto from "crypto";

function verifyKlaviyoWebhook(
  payload: string,
  signature: string,
  secret: string
): boolean {
  const expectedSignature = crypto
    .createHmac("sha256", secret)
    .update(payload)
    .digest("base64");

  return crypto.timingSafeEqual(
    Buffer.from(signature),
    Buffer.from(expectedSignature)
  );
}

// Express handler
app.post("/webhooks/klaviyo", (req, res) => {
  const signature = req.headers["klaviyo-webhook-signature"] as string;

  if (!verifyKlaviyoWebhook(JSON.stringify(req.body), signature, WEBHOOK_SECRET)) {
    return res.status(401).json({ error: "Invalid signature" });
  }

  const { type, data } = req.body;

  switch (type) {
    case "profile.created":
      handleNewProfile(data);
      break;
    case "event.created":
      handleNewEvent(data);
      break;
  }

  res.status(200).json({ received: true });
});
```

---

## Rate Limits

| Window | Limit |
|--------|-------|
| Burst | 75 requests/second |
| Steady | 700 requests/minute |

### Handle Rate Limiting

```typescript
async function klaviyoRequestWithRetry(
  endpoint: string,
  method: "GET" | "POST" | "PATCH" | "DELETE" = "GET",
  body?: object,
  retries = 3
): Promise<any> {
  for (let attempt = 0; attempt < retries; attempt++) {
    const response = await fetch(`${KLAVIYO_BASE_URL}${endpoint}`, {
      method,
      headers: {
        Authorization: `Klaviyo-API-Key ${process.env.KLAVIYO_PRIVATE_KEY}`,
        "Content-Type": "application/json",
        revision: "2024-10-15",
      },
      body: body ? JSON.stringify(body) : undefined,
    });

    if (response.status === 429) {
      const retryAfter = parseInt(response.headers.get("Retry-After") || "5");
      await new Promise(r => setTimeout(r, retryAfter * 1000));
      continue;
    }

    if (!response.ok) {
      throw new Error(`Klaviyo error: ${response.status}`);
    }

    return response.json();
  }

  throw new Error("Max retries exceeded");
}
```

---

## Pagination

```typescript
async function getAllProfiles() {
  const profiles = [];
  let cursor: string | undefined;

  do {
    const params = new URLSearchParams({ "page[size]": "100" });
    if (cursor) {
      params.set("page[cursor]", cursor);
    }

    const response = await klaviyoRequest(`/profiles?${params}`);
    profiles.push(...response.data);

    cursor = response.links?.next
      ? new URL(response.links.next).searchParams.get("page[cursor]")
      : undefined;
  } while (cursor);

  return profiles;
}
```

---

## Filtering & Sorting

```typescript
// Filter by date
const recentEvents = await klaviyoRequest(
  `/events?filter=greater-than(datetime,2024-01-01T00:00:00Z)`
);

// Filter by property
const vipProfiles = await klaviyoRequest(
  `/profiles?filter=equals(properties.vip_status,true)`
);

// Multiple filters (AND)
const filtered = await klaviyoRequest(
  `/profiles?filter=and(equals(properties.plan,"pro"),greater-than(properties.ltv,1000))`
);

// Sorting
const sorted = await klaviyoRequest(
  `/profiles?sort=-created`  // Descending by created date
);

// Sparse fieldsets (only return specific fields)
const sparse = await klaviyoRequest(
  `/profiles?fields[profile]=email,first_name,properties`
);
```

---

## Integration Patterns

### E-Commerce Order Sync

```typescript
// After order is placed
async function syncOrderToKlaviyo(order: Order) {
  // 1. Upsert customer profile
  await upsertProfile({
    email: order.customerEmail,
    firstName: order.customerFirstName,
    lastName: order.customerLastName,
    phone: order.customerPhone,
  });

  // 2. Update lifetime metrics
  await updateProfileProperties(
    await getProfileIdByEmail(order.customerEmail),
    {
      last_order_date: new Date().toISOString(),
      total_orders: order.customerOrderCount,
      lifetime_value: order.customerLifetimeValue,
    }
  );

  // 3. Track order event
  await trackEvent({
    email: order.customerEmail,
    eventName: "Placed Order",
    properties: {
      OrderId: order.id,
      Items: order.items,
      // ... other properties
    },
    value: order.total,
    uniqueId: order.id,
  });
}
```

### Subscription Status Sync

```typescript
// When subscription changes
async function syncSubscriptionStatus(user: User, status: string) {
  await updateProfileProperties(user.klaviyoProfileId, {
    subscription_status: status,
    subscription_plan: user.plan,
    subscription_updated_at: new Date().toISOString(),
  });

  await trackEvent({
    email: user.email,
    eventName: `Subscription ${status}`,
    properties: {
      plan: user.plan,
      mrr: user.mrr,
    },
    value: status === "cancelled" ? -user.mrr : user.mrr,
  });
}
```

---

## Environment Variables

```bash
# .env
KLAVIYO_PRIVATE_KEY=pk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
KLAVIYO_PUBLIC_KEY=XXXXXX
KLAVIYO_WEBHOOK_SECRET=your_webhook_secret
```

Add to `credentials.md`:
```python
'KLAVIYO_PRIVATE_KEY': r'pk_[a-f0-9]{32}',
'KLAVIYO_PUBLIC_KEY': r'[A-Z0-9]{6}',
```

---

## Checklist

### Setup

- [ ] Klaviyo account created
- [ ] Private API key generated
- [ ] Public API key noted (company ID)
- [ ] API revision set in headers

### Integration

- [ ] Profile sync on signup/update
- [ ] Key events tracked (view, cart, order)
- [ ] Order events include Items array
- [ ] Revenue tracked with $value
- [ ] Unique IDs for deduplication

### Testing

- [ ] Test profile creation
- [ ] Test event tracking
- [ ] Verify events in Klaviyo dashboard
- [ ] Test webhook delivery
- [ ] Test rate limit handling

---

## Anti-Patterns

- **Missing email/phone** - Every profile needs at least one identifier
- **Duplicate events** - Use unique_id for orders/transactions
- **Missing Items array** - Required for product recommendations
- **Client-side only** - Server-side tracking is more reliable
- **Ignoring rate limits** - Implement exponential backoff
- **Hardcoded API keys** - Use environment variables
- **Missing revenue tracking** - Include $value for ROI attribution


================================================
FILE: skills/llm-patterns/SKILL.md
================================================
---
name: llm-patterns
description: AI-first application patterns, LLM testing, prompt management
when-to-use: When building apps where LLMs handle core logic - classification, extraction, generation
user-invocable: false
effort: medium
---

# LLM Patterns Skill


For AI-first applications where LLMs handle logical operations.

---

## Core Principle

**LLM for logic, code for plumbing.**

Use LLMs for:
- Classification, extraction, summarization
- Decision-making with natural language reasoning
- Content generation and transformation
- Complex conditional logic that would be brittle in code

Use traditional code for:
- Data validation (Zod/Pydantic)
- API routing and HTTP handling
- Database operations
- Authentication/authorization
- Orchestration and error handling

---

## Project Structure

```
project/
├── src/
│   ├── core/
│   │   ├── prompts/           # Prompt templates
│   │   │   ├── classify.ts
│   │   │   └── extract.ts
│   │   ├── llm/               # LLM client and utilities
│   │   │   ├── client.ts      # LLM client wrapper
│   │   │   ├── schemas.ts     # Response schemas (Zod)
│   │   │   └── index.ts
│   │   └── services/          # Business logic using LLM
│   ├── infra/
│   └── ...
├── tests/
│   ├── unit/
│   ├── integration/
│   └── llm/                   # LLM-specific tests
│       ├── fixtures/          # Saved responses for deterministic tests
│       ├── evals/             # Evaluation test suites
│       └── mocks/             # Mock LLM responses
└── _project_specs/
    └── prompts/               # Prompt specifications
```

---

## LLM Client Pattern

### Typed LLM Wrapper
```typescript
// core/llm/client.ts
import Anthropic from '@anthropic-ai/sdk';
import { z } from 'zod';

const client = new Anthropic();

interface LLMCallOptions<T> {
  prompt: string;
  schema: z.ZodSchema<T>;
  model?: string;
  maxTokens?: number;
}

export async function llmCall<T>({
  prompt,
  schema,
  model = 'claude-sonnet-4-20250514',
  maxTokens = 1024,
}: LLMCallOptions<T>): Promise<T> {
  const response = await client.messages.create({
    model,
    max_tokens: maxTokens,
    messages: [{ role: 'user', content: prompt }],
  });

  const text = response.content[0].type === 'text'
    ? response.content[0].text
    : '';

  // Parse and validate response
  const parsed = JSON.parse(text);
  return schema.parse(parsed);
}
```

### Structured Outputs
```typescript
// core/llm/schemas.ts
import { z } from 'zod';

export const ClassificationSchema = z.object({
  category: z.enum(['support', 'sales', 'feedback', 'other']),
  confidence: z.number().min(0).max(1),
  reasoning: z.string(),
});

export type Classification = z.infer<typeof ClassificationSchema>;
```

---

## Prompt Patterns

### Template Functions
```typescript
// core/prompts/classify.ts
export function classifyTicketPrompt(ticket: string): string {
  return `Classify this support ticket into one of these categories:
- support: Technical issues or help requests
- sales: Pricing, plans, or purchase inquiries
- feedback: Suggestions or complaints
- other: Anything else

Respond with JSON:
{
  "category": "...",
  "confidence": 0.0-1.0,
  "reasoning": "brief explanation"
}

Ticket:
${ticket}`;
}
```

### Prompt Versioning
```typescript
// core/prompts/index.ts
export const PROMPTS = {
  classify: {
    v1: classifyTicketPromptV1,
    v2: classifyTicketPromptV2,  // improved accuracy
    current: classifyTicketPromptV2,
  },
} as const;
```

---

## Testing LLM Calls

### 1. Unit Tests with Mocks (Fast, Deterministic)
```typescript
// tests/llm/mocks/classify.mock.ts
export const mockClassifyResponse = {
  category: 'support',
  confidence: 0.95,
  reasoning: 'User is asking for help with login',
};

// tests/unit/services/ticket.test.ts
import { classifyTicket } from '../../../src/core/services/ticket';
import { mockClassifyResponse } from '../../llm/mocks/classify.mock';

// Mock the LLM client
vi.mock('../../../src/core/llm/client', () => ({
  llmCall: vi.fn().mockResolvedValue(mockClassifyResponse),
}));

describe('classifyTicket', () => {
  it('returns classification for ticket', async () => {
    const result = await classifyTicket('I cannot log in');

    expect(result.category).toBe('support');
    expect(result.confidence).toBeGreaterThan(0.9);
  });
});
```

### 2. Fixture Tests (Deterministic, Tests Parsing)
```typescript
// tests/llm/fixtures/classify.fixtures.json
{
  "support_ticket": {
    "input": "I can't reset my password",
    "expected_category": "support",
    "raw_response": "{\"category\":\"support\",\"confidence\":0.98,\"reasoning\":\"Password reset is a support issue\"}"
  }
}

// tests/llm/classify.fixture.test.ts
import fixtures from './fixtures/classify.fixtures.json';
import { ClassificationSchema } from '../../src/core/llm/schemas';

describe('Classification Response Parsing', () => {
  Object.entries(fixtures).forEach(([name, fixture]) => {
    it(`parses ${name} correctly`, () => {
      const parsed = JSON.parse(fixture.raw_response);
      const result = ClassificationSchema.parse(parsed);

      expect(result.category).toBe(fixture.expected_category);
    });
  });
});
```

### 3. Evaluation Tests (Slow, Run in CI nightly)
```typescript
// tests/llm/evals/classify.eval.test.ts
import { classifyTicket } from '../../../src/core/services/ticket';

const TEST_CASES = [
  { input: 'How much does the pro plan cost?', expected: 'sales' },
  { input: 'The app crashes when I click save', expected: 'support' },
  { input: 'You should add dark mode', expected: 'feedback' },
  { input: 'What time is it in Tokyo?', expected: 'other' },
];

describe('Classification Accuracy (Eval)', () => {
  // Skip in regular CI, run nightly
  const runEvals = process.env.RUN_LLM_EVALS === 'true';

  it.skipIf(!runEvals)('achieves >90% accuracy on test set', async () => {
    let correct = 0;

    for (const testCase of TEST_CASES) {
      const result = await classifyTicket(testCase.input);
      if (result.category === testCase.expected) correct++;
    }

    const accuracy = correct / TEST_CASES.length;
    expect(accuracy).toBeGreaterThan(0.9);
  }, 60000); // 60s timeout for LLM calls
});
```

---

## GitHub Actions for LLM Tests

```yaml
# .github/workflows/quality.yml (add to existing)
jobs:
  quality:
    # ... existing steps ...

    - name: Run Tests (with LLM mocks)
      run: npm run test:coverage

  llm-evals:
    runs-on: ubuntu-latest
    # Run nightly or on-demand
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    steps:
      - uses: actions/checkout@v4

      - name: Setup Node
        uses: actions/setup-node@v4
        with:
          node-version: '20'

      - name: Install dependencies
        run: npm ci

      - name: Run LLM Evals
        run: npm run test:evals
        env:
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          RUN_LLM_EVALS: 'true'
```

---

## Cost & Performance Tracking

```typescript
// core/llm/client.ts - add tracking
interface LLMMetrics {
  model: string;
  inputTokens: number;
  outputTokens: number;
  latencyMs: number;
  cost: number;
}

export async function llmCallWithMetrics<T>(
  options: LLMCallOptions<T>
): Promise<{ result: T; metrics: LLMMetrics }> {
  const start = Date.now();

  const response = await client.messages.create({...});

  const metrics: LLMMetrics = {
    model: options.model,
    inputTokens: response.usage.input_tokens,
    outputTokens: response.usage.output_tokens,
    latencyMs: Date.now() - start,
    cost: calculateCost(response.usage, options.model),
  };

  // Log or send to monitoring
  console.log('[LLM]', metrics);

  return { result: parsed, metrics };
}
```

---

## LLM Anti-Patterns

- ❌ Hardcoded prompts in business logic - use prompt templates
- ❌ No schema validation on LLM responses - always use Zod
- ❌ Testing with live LLM calls in CI - use mocks for unit tests
- ❌ No cost tracking - monitor token usage
- ❌ Ignoring latency - LLM calls are slow, design for async
- ❌ No fallback for LLM failures - handle timeouts and errors
- ❌ Prompts without version control - track prompt changes
- ❌ No evaluation suite - measure accuracy over time
- ❌ Using LLM for deterministic logic - use code for validation, auth, math
- ❌ Giant monolithic prompts - compose smaller focused prompts


================================================
FILE: skills/maggy/SKILL.md
================================================
---
name: maggy
description: Maggy is a local AI engineering command center. AI-prioritized inbox across issue trackers (GitHub Issues/Asana), one-click TDD execute with iCPG context enrichment, daily competitor intelligence briefing.
when-to-use: "When you want a persistent dashboard to triage tickets and spawn Claude Code runs against any repo"
user-invocable: true
effort: medium
---

# Maggy Skill

**Maggy** is a generic, local AI engineering command center. Install once, point it at your team's issue tracker and codebases, and get:

- **AI-prioritized inbox** — ranks open issues by urgency, OKR alignment, and recency
- **One-click Execute** — spawns Claude Code locally with iCPG context injected
- **Competitor intelligence** — daily AI briefing on your competitive landscape
- **No hardcoding** — works for any team, any stack, any issue tracker

### ⚠️ Execute permission model (important)

Execute currently runs `claude -p --dangerously-skip-permissions` so the TDD
pipeline isn't blocked waiting on approval prompts (subprocess has no terminal).
That flag **grants Claude full permission to write/edit files and run shell
commands** inside the target codebase, and the prompt it receives includes
content from the issue tracker (which any team member can author).

**Hardening already in place:**
- `working_dir` is validated against the list of codebase roots in
  `~/.maggy/config.yaml` — Claude can't be pointed at arbitrary filesystem paths.
- Only tickets from your configured trackers reach Execute; no public-internet
  input flows into the prompt.

**Roadmap:** move the unconditional flag behind per-codebase config
(`auto_approve: true|false`) so privileged execution becomes opt-in.
Until then, treat Execute like `git pull && make` on any ticket you push
the button for — only run it on repos you own, against tickets from
authors you trust.

```
┌──────────────────────────────────────────────────────────────┐
│  maggy               ──────────────┐                          │
│  ├── skills/         ← installed globally → ~/.claude/       │
│  ├── commands/       ← installed globally → ~/.claude/       │
│  ├── scripts/icpg/   ← used by Maggy for context enrichment  │
│  └── maggy/          ← dashboard: run `./install.sh` to use  │
│      ├── src/                                                │
│      │   ├── providers/   ← GitHub / Asana / Linear          │
│      │   ├── services/    ← inbox, competitor, executor      │
│      │   └── api/         ← FastAPI routes                   │
│      └── install.sh                                          │
└──────────────────────────────────────────────────────────────┘
```

---

## When Maggy Helps

| Scenario                                 | How Maggy helps                               |
|------------------------------------------|-----------------------------------------------|
| Morning triage of 50 open issues         | AI ranks them; top items stay top             |
| Implementing a ticket                    | `Execute` → iCPG-enriched TDD pipeline        |
| "What are competitors shipping?"         | Daily briefing + filterable news feed         |
| Multiple repos per team                  | Auto-picks right repo based on ticket content |
| New team onboarding                      | Configure via `/maggy-init`, no code writing  |

---

## Install and Configure

```bash
# One-time install
cd $(cat ~/.claude/.bootstrap-dir)/maggy
./install.sh

# Configure
# Edit ~/.maggy/config.yaml — see maggy/config.example.yaml for the schema

# Credentials
export GITHUB_TOKEN=ghp_...
export ANTHROPIC_API_KEY=sk-ant-...

# Run
python3 -m src.main

# Or from Claude Code:
#   /maggy-init    # interactive wizard
#   /maggy         # launch dashboard
```

---

## Provider Abstraction

Maggy services never see GitHub/Asana directly — they talk to an `IssueTrackerProvider` Protocol. Drop-in swap between:

- `GitHubIssuesProvider` — scans multiple repos, aggregates open issues, maps "done" → closed
- `AsanaProvider` — queries projects, respects workspace scope
- `LinearProvider` — stub for future

The same inbox, Execute pipeline, and Competitor features work with any provider.

---

## Execute Pipeline

When you click Execute on a ticket:

1. Maggy queries the configured iCPG for relevant symbols, blast radius, and prior intents
2. Picks the right working directory based on ticket keywords + configured codebases
3. Spawns `claude -p --dangerously-skip-permissions` in that directory
4. Runs analyze → write failing tests → implement
5. Captures output in a session you can follow in the Sessions tab

Because the spawned Claude Code runs in the target repo, it picks up:
- That repo's `CLAUDE.md`
- Your global `~/.claude/CLAUDE.md`
- All bootstrap skills
- `.claude/hooks/`, `.mcp.json`

So Execute gets the full bootstrap experience — not a stripped-down version.

---

## Competitor Intelligence

Generic — works for any domain:

1. Configure `competitors.categories: ["fintech", "embedded-finance"]` in `~/.maggy/config.yaml`
2. Click Discover — Claude identifies 12-18 competitors (market leaders, AI-first challengers, vertical specialists)
3. Maggy monitors their RSS blogs + Google News daily
4. Daily briefing is generated once per day (cached), regeneratable on demand

---

## Not Included

Maggy MVP is focused. Not shipped:

- Meeting bot (voice)
- Slack integration
- P2P network + session handoff
- Self-improvement (`/improve-maggy`)
- Linear provider (stub only)

These are v2 work.

---

## Files

- `maggy/PLAN.md` — architecture rationale
- `maggy/README.md` — user docs
- `maggy/src/providers/base.py` — IssueTrackerProvider Protocol
- `maggy/src/services/executor.py` — TDD pipeline
- `maggy/src/services/competitor.py` — discovery + briefing
- `maggy/src/services/inbox.py` — AI prioritization
- `commands/maggy.md` — `/maggy` launcher
- `commands/maggy-init.md` — `/maggy-init` setup wizard


================================================
FILE: skills/medusa/SKILL.md
================================================
---
name: medusa
description: Medusa headless commerce - modules, workflows, API routes, admin UI
when-to-use: When building with Medusa commerce platform
user-invocable: false
effort: medium
---

# Medusa E-Commerce Skill


For building headless e-commerce with Medusa - open-source, Node.js native, fully customizable.

**Sources:** [Medusa Docs](https://docs.medusajs.com) | [API Reference](https://docs.medusajs.com/api/store) | [GitHub](https://github.com/medusajs/medusa)

---

## Why Medusa

| Feature | Benefit |
|---------|---------|
| **Open Source** | Self-host, no vendor lock-in, MIT license |
| **Node.js Native** | TypeScript, familiar stack, easy to customize |
| **Headless** | Any frontend (Next.js, Remix, mobile) |
| **Modular** | Use only what you need, extend anything |
| **Built-in Admin** | Dashboard included, customizable |

---

## Quick Start

### Prerequisites

```bash
# Required
node --version  # v20+ LTS
git --version
# PostgreSQL running locally or remote
```

### Create New Project

```bash
# Scaffold new Medusa application
npx create-medusa-app@latest my-store

# This creates:
# - Medusa backend
# - PostgreSQL database (auto-configured)
# - Admin dashboard
# - Optional: Next.js storefront

cd my-store
npm run dev
```

### Access Points

| URL | Purpose |
|-----|---------|
| `http://localhost:9000` | Backend API |
| `http://localhost:9000/app` | Admin dashboard |
| `http://localhost:8000` | Storefront (if installed) |

### Create Admin User

```bash
npx medusa user -e admin@example.com -p supersecret
```

---

## Project Structure

```
medusa-store/
├── src/
│   ├── admin/                    # Admin UI customizations
│   │   ├── widgets/              # Dashboard widgets
│   │   └── routes/               # Custom admin pages
│   ├── api/                      # Custom API routes
│   │   ├── store/                # Public storefront APIs
│   │   │   └── custom/
│   │   │       └── route.ts
│   │   └── admin/                # Admin APIs
│   │       └── custom/
│   │           └── route.ts
│   ├── jobs/                     # Scheduled tasks
│   ├── modules/                  # Custom business logic
│   ├── workflows/                # Multi-step processes
│   ├── subscribers/              # Event listeners
│   └── links/                    # Module relationships
├── .medusa/                      # Auto-generated (don't edit)
├── medusa-config.ts              # Configuration
├── package.json
└── tsconfig.json
```

---

## Configuration

### medusa-config.ts

```typescript
import { defineConfig, loadEnv } from "@medusajs/framework/utils";

loadEnv(process.env.NODE_ENV || "development", process.cwd());

export default defineConfig({
  projectConfig: {
    databaseUrl: process.env.DATABASE_URL,
    http: {
      storeCors: process.env.STORE_CORS || "http://localhost:8000",
      adminCors: process.env.ADMIN_CORS || "http://localhost:9000",
      authCors: process.env.AUTH_CORS || "http://localhost:9000",
    },
    redisUrl: process.env.REDIS_URL,
  },
  admin: {
    disable: false,
    backendUrl: process.env.MEDUSA_BACKEND_URL || "http://localhost:9000",
  },
  modules: [
    // Add custom modules here
  ],
});
```

### Environment Variables

```bash
# .env
DATABASE_URL=postgresql://user:pass@localhost:5432/medusa
REDIS_URL=redis://localhost:6379

# CORS (comma-separated for multiple origins)
STORE_CORS=http://localhost:8000
ADMIN_CORS=http://localhost:9000

# Backend URL
MEDUSA_BACKEND_URL=http://localhost:9000

# JWT Secrets
JWT_SECRET=your-super-secret-jwt-key
COOKIE_SECRET=your-super-secret-cookie-key
```

---

## Custom API Routes

### Store API (Public)

```typescript
// src/api/store/hello/route.ts
import type { MedusaRequest, MedusaResponse } from "@medusajs/framework/http";

export async function GET(
  req: MedusaRequest,
  res: MedusaResponse
) {
  res.json({
    message: "Hello from custom store API!",
  });
}

// Accessible at: GET /store/hello
```

### Admin API (Protected)

```typescript
// src/api/admin/analytics/route.ts
import type { MedusaRequest, MedusaResponse } from "@medusajs/framework/http";
import { Modules } from "@medusajs/framework/utils";

export async function GET(
  req: MedusaRequest,
  res: MedusaResponse
) {
  const orderService = req.scope.resolve(Modules.ORDER);

  const orders = await orderService.listOrders({
    created_at: {
      $gte: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000), // Last 30 days
    },
  });

  const totalRevenue = orders.reduce(
    (sum, order) => sum + (order.total || 0),
    0
  );

  res.json({
    orderCount: orders.length,
    totalRevenue,
  });
}

// Accessible at: GET /admin/analytics (requires auth)
```

### Route with Parameters

```typescript
// src/api/store/products/[id]/reviews/route.ts
import type { MedusaRequest, MedusaResponse } from "@medusajs/framework/http";

export async function GET(
  req: MedusaRequest,
  res: MedusaResponse
) {
  const { id } = req.params;

  // Fetch reviews for product
  const reviews = await getReviewsForProduct(id);

  res.json({ reviews });
}

export async function POST(
  req: MedusaRequest,
  res: MedusaResponse
) {
  const { id } = req.params;
  const { rating, comment, customerId } = req.body;

  const review = await createReview({
    productId: id,
    rating,
    comment,
    customerId,
  });

  res.status(201).json({ review });
}

// Accessible at:
// GET  /store/products/:id/reviews
// POST /store/products/:id/reviews
```

### Middleware

```typescript
// src/api/middlewares.ts
import { defineMiddlewares } from "@medusajs/framework/http";
import { authenticate } from "@medusajs/framework/http";

export default defineMiddlewares({
  routes: [
    {
      matcher: "/store/protected/*",
      middlewares: [authenticate("customer", ["session", "bearer"])],
    },
    {
      matcher: "/admin/*",
      middlewares: [authenticate("user", ["session", "bearer"])],
    },
  ],
});
```

---

## Modules (Custom Business Logic)

### Create Custom Module

```typescript
// src/modules/reviews/index.ts
import { Module } from "@medusajs/framework/utils";
import ReviewModuleService from "./service";

export const REVIEW_MODULE = "reviewModuleService";

export default Module(REVIEW_MODULE, {
  service: ReviewModuleService,
});
```

```typescript
// src/modules/reviews/service.ts
import { MedusaService } from "@medusajs/framework/utils";

class ReviewModuleService extends MedusaService({}) {
  async createReview(data: CreateReviewInput) {
    // Implementation
  }

  async getProductReviews(productId: string) {
    // Implementation
  }

  async getAverageRating(productId: string) {
    // Implementation
  }
}

export default ReviewModuleService;
```

### Register Module

```typescript
// medusa-config.ts
import { REVIEW_MODULE } from "./src/modules/reviews";

export default defineConfig({
  // ...
  modules: [
    {
      resolve: "./src/modules/reviews",
      options: {},
    },
  ],
});
```

### Use Module in API

```typescript
// src/api/store/products/[id]/reviews/route.ts
import { REVIEW_MODULE } from "../../../modules/reviews";

export async function GET(req: MedusaRequest, res: MedusaResponse) {
  const { id } = req.params;
  const reviewService = req.scope.resolve(REVIEW_MODULE);

  const reviews = await reviewService.getProductReviews(id);
  const averageRating = await reviewService.getAverageRating(id);

  res.json({ reviews, averageRating });
}
```

---

## Workflows

### Define Workflow

```typescript
// src/workflows/create-order-with-notification/index.ts
import {
  createWorkflow,
  createStep,
  StepResponse,
} from "@medusajs/framework/workflows-sdk";
import { Modules } from "@medusajs/framework/utils";

const createOrderStep = createStep(
  "create-order",
  async (input: CreateOrderInput, { container }) => {
    const orderService = container.resolve(Modules.ORDER);

    const order = await orderService.createOrders(input);

    return new StepResponse(order, order.id);
  },
  // Compensation (rollback) function
  async (orderId, { container }) => {
    const orderService = container.resolve(Modules.ORDER);
    await orderService.deleteOrders([orderId]);
  }
);

const sendNotificationStep = createStep(
  "send-notification",
  async (order: Order, { container }) => {
    const notificationService = container.resolve("notificationService");

    await notificationService.send({
      to: order.email,
      template: "order-confirmation",
      data: { order },
    });

    return new StepResponse({ sent: true });
  }
);

export const createOrderWithNotificationWorkflow = createWorkflow(
  "create-order-with-notification",
  (input: CreateOrderInput) => {
    const order = createOrderStep(input);
    const notification = sendNotificationStep(order);

    return { order, notification };
  }
);
```

### Execute Workflow

```typescript
// In an API route
import { createOrderWithNotificationWorkflow } from "../../../workflows/create-order-with-notification";

export async function POST(req: MedusaRequest, res: MedusaResponse) {
  const { result } = await createOrderWithNotificationWorkflow(req.scope).run({
    input: req.body,
  });

  res.json(result);
}
```

---

## Subscribers (Event Listeners)

### Create Subscriber

```typescript
// src/subscribers/order-placed.ts
import type { SubscriberArgs, SubscriberConfig } from "@medusajs/framework";

export default async function orderPlacedHandler({
  event,
  container,
}: SubscriberArgs<{ id: string }>) {
  const orderId = event.data.id;

  console.log(`Order placed: ${orderId}`);

  // Send notification, update analytics, etc.
  const notificationService = container.resolve("notificationService");
  await notificationService.sendOrderConfirmation(orderId);
}

export const config: SubscriberConfig = {
  event: "order.placed",
};
```

### Common Events

| Event | Trigger |
|-------|---------|
| `order.placed` | New order created |
| `order.updated` | Order modified |
| `order.canceled` | Order cancelled |
| `order.completed` | Order fulfilled |
| `customer.created` | New customer registered |
| `product.created` | New product added |
| `product.updated` | Product modified |
| `inventory.updated` | Stock changed |

---

## Scheduled Jobs

```typescript
// src/jobs/sync-inventory.ts
import type { MedusaContainer } from "@medusajs/framework";

export default async function syncInventoryJob(container: MedusaContainer) {
  const inventoryService = container.resolve("inventoryService");

  console.log("Running inventory sync...");

  await inventoryService.syncFromExternalSource();

  console.log("Inventory sync complete");
}

export const config = {
  name: "sync-inventory",
  schedule: "0 */6 * * *", // Every 6 hours
};
```

---

## Admin UI Customization

### Custom Widget

```tsx
// src/admin/widgets/sales-overview.tsx
import { defineWidgetConfig } from "@medusajs/admin-sdk";
import { Container, Heading, Text } from "@medusajs/ui";

const SalesOverviewWidget = () => {
  return (
    <Container>
      <Heading level="h2">Sales Overview</Heading>
      <Text>Your custom sales data here...</Text>
    </Container>
  );
};

export const config = defineWidgetConfig({
  zone: "order.list.before", // Where to show the widget
});

export default SalesOverviewWidget;
```

### Widget Zones

| Zone | Location |
|------|----------|
| `order.list.before` | Before order list |
| `order.details.after` | After order details |
| `product.list.before` | Before product list |
| `product.details.after` | After product details |
| `customer.list.before` | Before customer list |

### Custom Admin Route

```tsx
// src/admin/routes/analytics/page.tsx
import { defineRouteConfig } from "@medusajs/admin-sdk";
import { Container, Heading } from "@medusajs/ui";
import { ChartBar } from "@medusajs/icons";

const AnalyticsPage = () => {
  return (
    <Container>
      <Heading level="h1">Analytics Dashboard</Heading>
      {/* Your analytics charts */}
    </Container>
  );
};

export const config = defineRouteConfig({
  label: "Analytics",
  icon: ChartBar,
});

export default AnalyticsPage;
```

---

## Store API (Built-in)

### Products

```typescript
// Frontend: Fetch products
const response = await fetch("http://localhost:9000/store/products");
const { products } = await response.json();

// With filters
const response = await fetch(
  "http://localhost:9000/store/products?" +
  new URLSearchParams({
    category_id: "cat_123",
    limit: "20",
    offset: "0",
  })
);
```

### Cart

```typescript
// Create cart
const { cart } = await fetch("http://localhost:9000/store/carts", {
  method: "POST",
  headers: { "Content-Type": "application/json" },
  body: JSON.stringify({
    region_id: "reg_123",
  }),
}).then(r => r.json());

// Add item
await fetch(`http://localhost:9000/store/carts/${cart.id}/line-items`, {
  method: "POST",
  headers: { "Content-Type": "application/json" },
  body: JSON.stringify({
    variant_id: "variant_123",
    quantity: 1,
  }),
});

// Complete cart (create order)
const { order } = await fetch(
  `http://localhost:9000/store/carts/${cart.id}/complete`,
  { method: "POST" }
).then(r => r.json());
```

### Customer Authentication

```typescript
// Register
await fetch("http://localhost:9000/store/customers", {
  method: "POST",
  headers: { "Content-Type": "application/json" },
  body: JSON.stringify({
    email: "customer@example.com",
    password: "password123",
    first_name: "John",
    last_name: "Doe",
  }),
});

// Login
const { token } = await fetch("http://localhost:9000/store/auth/token", {
  method: "POST",
  headers: { "Content-Type": "application/json" },
  body: JSON.stringify({
    email: "customer@example.com",
    password: "password123",
  }),
}).then(r => r.json());

// Authenticated request
await fetch("http://localhost:9000/store/customers/me", {
  headers: {
    Authorization: `Bearer ${token}`,
  },
});
```

---

## Payment Integration

### Stripe Setup

```bash
npm install @medusajs/payment-stripe
```

```typescript
// medusa-config.ts
export default defineConfig({
  modules: [
    {
      resolve: "@medusajs/payment-stripe",
      options: {
        apiKey: process.env.STRIPE_API_KEY,
      },
    },
  ],
});
```

### In Admin

1. Go to Settings → Regions
2. Add Stripe as payment provider
3. Configure for each region

---

## Deployment

### Railway

```bash
# Install Railway CLI
npm install -g @railway/cli

# Login and deploy
railway login
railway init
railway up
```

### Render

```yaml
# render.yaml
services:
  - type: web
    name: medusa-backend
    runtime: node
    plan: starter
    buildCommand: npm install && npm run build
    startCommand: npm run start
    envVars:
      - key: NODE_ENV
        value: production
      - key: DATABASE_URL
        fromDatabase:
          name: medusa-db
          property: connectionString
      - key: JWT_SECRET
        generateValue: true
      - key: COOKIE_SECRET
        generateValue: true

databases:
  - name: medusa-db
    plan: starter
```

### Docker

```dockerfile
FROM node:20-alpine

WORKDIR /app

COPY package*.json ./
RUN npm ci --only=production

COPY . .
RUN npm run build

EXPOSE 9000
CMD ["npm", "run", "start"]
```

---

## CLI Commands

```bash
# Development
npm run dev                    # Start dev server

# Database
npx medusa db:migrate          # Run migrations
npx medusa db:sync             # Sync schema

# Users
npx medusa user -e email -p pass  # Create admin user

# Build
npm run build                  # Build for production
npm run start                  # Start production server
```

---

## Checklist

### Setup

- [ ] PostgreSQL database configured
- [ ] Redis configured (optional but recommended)
- [ ] Admin user created
- [ ] CORS origins configured
- [ ] JWT/Cookie secrets set

### Customization

- [ ] Custom modules for business logic
- [ ] Custom API routes for frontend
- [ ] Subscribers for event handling
- [ ] Workflows for complex operations

### Deployment

- [ ] Environment variables configured
- [ ] Database migrations run
- [ ] HTTPS enabled
- [ ] Admin URL secured

---

## Anti-Patterns

- **Editing .medusa folder** - Auto-generated, will be overwritten
- **Direct database access** - Use services and modules
- **Skipping workflows for complex ops** - Workflows provide rollback
- **Hardcoding URLs** - Use environment variables
- **Ignoring TypeScript errors** - Framework relies on types


================================================
FILE: skills/mnemos/SKILL.md
================================================
---
name: mnemos
description: Task-scoped memory lifecycle — typed MnemoGraph prevents lossy context compaction by treating facts/decisions/code-refs/handoffs as distinct node types with per-type eviction policies
when-to-use: "When you need durable working memory across compactions — checkpoint decisions, preserve task handoffs, or audit what was remembered"
user-invocable: false
effort: high
---

# Mnemos — Task-Scoped Memory Lifecycle

## What It Does

Mnemos prevents lossy context compaction from destroying the structured knowledge you need most. It treats your working memory as a **typed graph** (MnemoGraph) where different types of knowledge have different eviction policies:

- **GoalNodes** and **ConstraintNodes** are NEVER evicted — they survive all compaction
- **ResultNodes** are compressed (summary kept) before eviction
- **ContextNodes** are evictable when their activation weight drops
- **CheckpointNodes** persist to disk for session resume

## Fatigue Model

Mnemos monitors 4 dimensions of "agent fatigue" — all passively observed from hook data, no manual input needed:

| Dimension | Weight | Signal Source | What It Measures |
|-----------|--------|--------------|-----------------|
| Token utilization | 0.40 | Statusline JSON | How full the context window is |
| Scope scatter | 0.25 | PreToolUse file paths | How many directories the agent is bouncing between |
| Re-read ratio | 0.20 | PreToolUse Read calls | How often the agent re-reads files it already read (context loss) |
| Error density | 0.15 | PostToolUse outcomes | What fraction of tool calls are failing (agent struggling) |

Fatigue states and actions:

| State | Score | Action |
|-------|-------|--------|
| FLOW | 0.0–0.4 | Normal operation |
| COMPRESS | 0.4–0.6 | Micro-consolidation runs (compress 3 ResultNodes, evict 1 cold ContextNode) |
| PRE-SLEEP | 0.6–0.75 | Checkpoint written, consolidation runs |
| REM | 0.75–0.9 | Emergency checkpoint, consider wrapping up |
| EMERGENCY | 0.9+ | Checkpoint written, hand off immediately |

## How To Use

### Automatic (hooks handle everything):
1. **Statusline** writes `fatigue.json` on every API call
2. **PreToolUse** hook reads fatigue before every edit, auto-checkpoints at 0.60+
3. **PreCompact** hook writes emergency checkpoint, compaction marker, and tells summarizer what to preserve
4. **Post-Compaction Injection** (PreToolUse, no matcher) detects the compaction marker on the first tool call after compaction and re-injects the full checkpoint into context
5. **SessionStart** hook loads last checkpoint on new session resume

### Post-Compaction Recovery (Two-Layer Defense):
When Claude Code compacts the context (~83% full), Mnemos uses two layers:
- **Layer 1**: PreCompact outputs strong preservation instructions with inline checkpoint content for the summarizer
- **Layer 2**: After compaction, the first tool call triggers `mnemos-post-compact-inject.sh` which detects the `.mnemos/just-compacted` marker and re-injects the full checkpoint. This is the guaranteed path — it doesn't depend on the summarizer.

The result: after compaction, you'll see a "CONTEXT RESTORED AFTER COMPACTION" block with your goal, constraints, what you were working on, and progress. Resume from there.

### Manual CLI:
```bash
mnemos init                    # Initialize .mnemos/
mnemos status                  # Show node counts + fatigue
mnemos fatigue                 # Detailed fatigue breakdown
mnemos checkpoint --force      # Write checkpoint now
mnemos resume                  # Output checkpoint for context
mnemos consolidate             # Run micro-consolidation
mnemos nodes --type goal       # List active GoalNodes
mnemos add goal "Build auth"   # Add a GoalNode
mnemos bridge-icpg             # Import iCPG ReasonNodes
```

## Agent Instructions

When working on a task:

1. **Create a GoalNode** at the start: `mnemos add goal "what you're trying to achieve" --task-id session-1`
2. **Add ConstraintNodes** for invariants: `mnemos add constraint "API backward compatibility" --scope src/api/`
3. **Check fatigue** before long operations: `mnemos fatigue`
4. **Checkpoint at sub-goal boundaries**: `mnemos checkpoint`
5. **On session resume**: the SessionStart hook automatically loads your checkpoint

## iCPG Integration

Mnemos bridges with iCPG (Intent-Augmented Code Property Graph):
- `mnemos bridge-icpg` imports active ReasonNodes as GoalNodes
- Postconditions/invariants become ConstraintNodes
- Checkpoint includes iCPG state (active intent, unresolved drift)

## Storage

Everything lives in `.mnemos/` (gitignored):
- `mnemo.db` — SQLite MnemoGraph
- `fatigue.json` — Live token metrics (updated per API call by statusline)
- `signals.jsonl` — Behavioral signal log (appended by PreToolUse + PostToolUse hooks)
- `checkpoint-latest.json` — Most recent checkpoint
- `checkpoints/` — Archived checkpoints


================================================
FILE: skills/ms-teams-apps/SKILL.md
================================================
---
name: ms-teams-apps
description: Microsoft Teams bots and AI agents - Claude/OpenAI, Adaptive Cards, Graph API
when-to-use: When building Microsoft Teams bots, tabs, or message extensions
user-invocable: false
effort: medium
---

# Microsoft Teams Apps Skill


**Purpose:** Build AI-powered agents and apps for Microsoft Teams. Create conversational bots, message extensions, and intelligent assistants that integrate with LLMs like OpenAI and Claude.

---

## Architecture Overview

```
┌─────────────────────────────────────────────────────────────────┐
│  TEAMS APP TYPES                                                 │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. AI AGENTS (Bots)                                            │
│     Conversational apps powered by LLMs                         │
│     Handle messages, commands, and actions                      │
│                                                                 │
│  2. MESSAGE EXTENSIONS                                          │
│     Search external systems, insert cards into messages         │
│     Action commands with modal dialogs                          │
│                                                                 │
│  3. TABS                                                        │
│     Embedded web applications inside Teams                      │
│     Personal, channel, or meeting tabs                          │
│                                                                 │
│  4. WEBHOOKS & CONNECTORS                                       │
│     Incoming: Post messages to channels                         │
│     Outgoing: Respond to @mentions                              │
├─────────────────────────────────────────────────────────────────┤
│  SDK LANDSCAPE (2025)                                           │
│  ─────────────────────────────────────────────────────────────  │
│  Teams SDK v2: Primary SDK for Teams-only apps                  │
│  M365 Agents SDK: Multi-channel (Teams, Outlook, Copilot)       │
│  Teams Toolkit: VS Code extension for development               │
└─────────────────────────────────────────────────────────────────┘
```

---

## Quick Start

### Install Teams CLI

```bash
npm install -g @microsoft/teams.cli
```

### Create New Project

```bash
# TypeScript (Recommended)
npx @microsoft/teams.cli new typescript my-agent --template echo

# Python
npx @microsoft/teams.cli new python my-agent --template echo

# C#
npx @microsoft/teams.cli new csharp my-agent --template echo
```

### Project Structure

```
my-agent/
├── src/
│   ├── index.ts              # Entry point
│   ├── app.ts                # App configuration
│   └── handlers/
│       ├── message.ts        # Message handlers
│       └── commands.ts       # Command handlers
├── appPackage/
│   ├── manifest.json         # App manifest
│   ├── color.png             # App icon (192x192)
│   └── outline.png           # Outline icon (32x32)
├── .env                      # Environment variables
├── teamsapp.yml              # Teams Toolkit config
└── package.json
```

---

## App Manifest

### Basic Manifest Structure

```json
{
  "$schema": "https://developer.microsoft.com/json-schemas/teams/v1.17/MicrosoftTeams.schema.json",
  "manifestVersion": "1.17",
  "version": "1.0.0",
  "id": "{{APP_ID}}",
  "developer": {
    "name": "Your Company",
    "websiteUrl": "https://yourcompany.com",
    "privacyUrl": "https://yourcompany.com/privacy",
    "termsOfUseUrl": "https://yourcompany.com/terms"
  },
  "name": {
    "short": "AI Assistant",
    "full": "AI Assistant for Teams"
  },
  "description": {
    "short": "Your AI-powered assistant",
    "full": "An intelligent assistant that helps you with tasks using AI."
  },
  "icons": {
    "color": "color.png",
    "outline": "outline.png"
  },
  "accentColor": "#5558AF",
  "bots": [
    {
      "botId": "{{BOT_ID}}",
      "scopes": ["personal", "team", "groupChat"],
      "supportsFiles": false,
      "isNotificationOnly": false,
      "commandLists": [
        {
          "scopes": ["personal", "team", "groupChat"],
          "commands": [
            {
              "title": "help",
              "description": "Show available commands"
            },
            {
              "title": "ask",
              "description": "Ask the AI a question"
            }
          ]
        }
      ]
    }
  ],
  "permissions": ["identity", "messageTeamMembers"],
  "validDomains": ["*.azurewebsites.net"]
}
```

### Manifest with Message Extensions

```json
{
  "composeExtensions": [
    {
      "botId": "{{BOT_ID}}",
      "commands": [
        {
          "id": "searchQuery",
          "type": "query",
          "title": "Search",
          "description": "Search for information",
          "initialRun": true,
          "parameters": [
            {
              "name": "query",
              "title": "Search query",
              "description": "Enter your search terms",
              "inputType": "text"
            }
          ]
        },
        {
          "id": "createTask",
          "type": "action",
          "title": "Create Task",
          "description": "Create a new task",
          "fetchTask": true,
          "context": ["compose", "commandBox", "message"]
        }
      ]
    }
  ]
}
```

---

## AI Agent Development

### Basic Bot with Teams SDK v2

```typescript
// src/app.ts
import { App, HttpPlugin, DevtoolsPlugin } from '@microsoft/teams.ai';
import { OpenAIModel, ActionPlanner, PromptManager } from '@microsoft/teams.ai';

// Configure the AI model
const model = new OpenAIModel({
  azureApiKey: process.env.AZURE_OPENAI_API_KEY!,
  azureDefaultDeployment: process.env.AZURE_OPENAI_DEPLOYMENT!,
  azureEndpoint: process.env.AZURE_OPENAI_ENDPOINT!,
  // Or use OpenAI directly:
  // apiKey: process.env.OPENAI_API_KEY!,
  // defaultModel: 'gpt-4'
});

// Configure prompts
const prompts = new PromptManager({
  promptsFolder: './src/prompts'
});

// Create action planner
const planner = new ActionPlanner({
  model,
  prompts,
  defaultPrompt: 'chat'
});

// Create the app
const app = new App({
  plugins: [
    new HttpPlugin(),
    new DevtoolsPlugin()
  ],
  ai: {
    planner
  }
});

// Handle messages
app.on('message', async (context, state) => {
  // AI automatically handles the conversation
  // The planner uses the 'chat' prompt to generate responses
});

// Handle specific commands
app.message('/help', async (context, state) => {
  await context.sendActivity({
    type: 'message',
    text: 'Available commands:\n- /help - Show this message\n- /ask [question] - Ask me anything'
  });
});

// Start the app
app.start();
```

### Prompt Configuration

```yaml
# src/prompts/chat/config.json
{
  "schema": 1.1,
  "description": "AI Assistant for Teams",
  "type": "completion",
  "completion": {
    "model": "gpt-4",
    "max_tokens": 1000,
    "temperature": 0.7,
    "top_p": 1
  }
}
```

```text
# src/prompts/chat/skprompt.txt
You are an AI assistant for Microsoft Teams. You help users with their questions and tasks.

Current conversation:
{{$history}}

User: {{$input}}
Assistant:
```

---

## Integrating Claude/Anthropic

### Claude-Powered Teams Bot

```typescript
// src/claude-bot.ts
import { App, HttpPlugin } from '@microsoft/teams.ai';
import Anthropic from '@anthropic-ai/sdk';

const anthropic = new Anthropic({
  apiKey: process.env.ANTHROPIC_API_KEY!
});

const app = new App({
  plugins: [new HttpPlugin()]
});

// Conversation history store
const conversations = new Map<string, Anthropic.MessageParam[]>();

app.on('message', async (context, state) => {
  const userId = context.activity.from.id;
  const userMessage = context.activity.text;

  // Get or initialize conversation history
  if (!conversations.has(userId)) {
    conversations.set(userId, []);
  }
  const history = conversations.get(userId)!;

  // Add user message to history
  history.push({ role: 'user', content: userMessage });

  // Show typing indicator
  await context.sendActivity({ type: 'typing' });

  try {
    // Call Claude API
    const response = await anthropic.messages.create({
      model: 'claude-sonnet-4-20250514',
      max_tokens: 1024,
      system: `You are an AI assistant integrated into Microsoft Teams.
        Help users with their questions and tasks.
        Be concise and helpful. Use markdown formatting when appropriate.
        Current user: ${context.activity.from.name}`,
      messages: history
    });

    const assistantMessage = response.content[0].type === 'text'
      ? response.content[0].text
      : '';

    // Add assistant response to history
    history.push({ role: 'assistant', content: assistantMessage });

    // Keep history manageable (last 20 messages)
    if (history.length > 20) {
      history.splice(0, history.length - 20);
    }

    // Send response
    await context.sendActivity({
      type: 'message',
      text: assistantMessage
    });

  } catch (error) {
    console.error('Claude API error:', error);
    await context.sendActivity({
      type: 'message',
      text: 'Sorry, I encountered an error processing your request.'
    });
  }
});

// Clear conversation command
app.message('/clear', async (context, state) => {
  const userId = context.activity.from.id;
  conversations.delete(userId);
  await context.sendActivity('Conversation cleared. Starting fresh!');
});

app.start();
```

### Claude with Tools/Function Calling

```typescript
// src/claude-agent.ts
import Anthropic from '@anthropic-ai/sdk';

const anthropic = new Anthropic();

// Define tools the agent can use
const tools: Anthropic.Tool[] = [
  {
    name: 'search_knowledge_base',
    description: 'Search the company knowledge base for information',
    input_schema: {
      type: 'object' as const,
      properties: {
        query: {
          type: 'string',
          description: 'The search query'
        }
      },
      required: ['query']
    }
  },
  {
    name: 'create_task',
    description: 'Create a new task in the task management system',
    input_schema: {
      type: 'object' as const,
      properties: {
        title: { type: 'string', description: 'Task title' },
        description: { type: 'string', description: 'Task description' },
        assignee: { type: 'string', description: 'Person to assign the task to' },
        due_date: { type: 'string', description: 'Due date in YYYY-MM-DD format' }
      },
      required: ['title']
    }
  },
  {
    name: 'get_calendar',
    description: 'Get calendar events for a user',
    input_schema: {
      type: 'object' as const,
      properties: {
        user: { type: 'string', description: 'User email or name' },
        days: { type: 'number', description: 'Number of days to look ahead' }
      },
      required: ['user']
    }
  }
];

// Tool implementations
async function executeTools(toolName: string, toolInput: any): Promise<string> {
  switch (toolName) {
    case 'search_knowledge_base':
      // Implement your search logic
      return `Found 3 results for "${toolInput.query}":\n1. Document A\n2. Document B\n3. Document C`;

    case 'create_task':
      // Implement task creation (e.g., call Microsoft Graph API)
      return `Task created: "${toolInput.title}"`;

    case 'get_calendar':
      // Implement calendar lookup
      return `Calendar for ${toolInput.user}: 2 meetings today`;

    default:
      return 'Unknown tool';
  }
}

// Agent loop with tool use
async function runAgent(userMessage: string): Promise<string> {
  let messages: Anthropic.MessageParam[] = [
    { role: 'user', content: userMessage }
  ];

  while (true) {
    const response = await anthropic.messages.create({
      model: 'claude-sonnet-4-20250514',
      max_tokens: 1024,
      system: 'You are a helpful Teams assistant. Use tools when needed to help users.',
      tools,
      messages
    });

    // Check if we need to use tools
    if (response.stop_reason === 'tool_use') {
      const toolResults: Anthropic.MessageParam[] = [];

      for (const content of response.content) {
        if (content.type === 'tool_use') {
          const result = await executeTools(content.name, content.input);
          toolResults.push({
            role: 'user',
            content: [{
              type: 'tool_result',
              tool_use_id: content.id,
              content: result
            }]
          });
        }
      }

      messages.push({ role: 'assistant', content: response.content });
      messages.push(...toolResults);
      continue;
    }

    // Return final text response
    const textContent = response.content.find(c => c.type === 'text');
    return textContent?.text || 'No response';
  }
}
```

---

## Adaptive Cards

### Basic Adaptive Card

```typescript
// src/cards/welcome-card.ts
import { CardFactory } from 'botbuilder';

export function createWelcomeCard(userName: string) {
  return CardFactory.adaptiveCard({
    type: 'AdaptiveCard',
    $schema: 'http://adaptivecards.io/schemas/adaptive-card.json',
    version: '1.5',
    body: [
      {
        type: 'TextBlock',
        text: `Welcome, ${userName}!`,
        size: 'Large',
        weight: 'Bolder'
      },
      {
        type: 'TextBlock',
        text: 'I\'m your AI assistant. How can I help you today?',
        wrap: true
      },
      {
        type: 'ActionSet',
        actions: [
          {
            type: 'Action.Submit',
            title: 'Get Started',
            data: { action: 'getStarted' }
          },
          {
            type: 'Action.Submit',
            title: 'View Help',
            data: { action: 'help' }
          }
        ]
      }
    ]
  });
}
```

### AI Response Card with Actions

```typescript
// src/cards/ai-response-card.ts
export function createAIResponseCard(
  question: string,
  answer: string,
  sources?: string[]
) {
  return {
    type: 'AdaptiveCard',
    $schema: 'http://adaptivecards.io/schemas/adaptive-card.json',
    version: '1.5',
    body: [
      {
        type: 'Container',
        style: 'emphasis',
        items: [
          {
            type: 'TextBlock',
            text: 'Your Question',
            size: 'Small',
            weight: 'Bolder'
          },
          {
            type: 'TextBlock',
            text: question,
            wrap: true
          }
        ]
      },
      {
        type: 'Container',
        items: [
          {
            type: 'TextBlock',
            text: 'AI Response',
            size: 'Small',
            weight: 'Bolder'
          },
          {
            type: 'TextBlock',
            text: answer,
            wrap: true
          }
        ]
      },
      ...(sources && sources.length > 0 ? [{
        type: 'Container',
        items: [
          {
            type: 'TextBlock',
            text: 'Sources',
            size: 'Small',
            weight: 'Bolder'
          },
          ...sources.map(source => ({
            type: 'TextBlock',
            text: `• ${source}`,
            size: 'Small'
          }))
        ]
      }] : [])
    ],
    actions: [
      {
        type: 'Action.Submit',
        title: '👍 Helpful',
        data: { action: 'feedback', value: 'positive' }
      },
      {
        type: 'Action.Submit',
        title: '👎 Not Helpful',
        data: { action: 'feedback', value: 'negative' }
      },
      {
        type: 'Action.Submit',
        title: 'Ask Follow-up',
        data: { action: 'followUp' }
      }
    ]
  };
}
```

### Form Card for User Input

```typescript
// src/cards/task-form-card.ts
export function createTaskFormCard() {
  return {
    type: 'AdaptiveCard',
    $schema: 'http://adaptivecards.io/schemas/adaptive-card.json',
    version: '1.5',
    body: [
      {
        type: 'TextBlock',
        text: 'Create New Task',
        size: 'Large',
        weight: 'Bolder'
      },
      {
        type: 'Input.Text',
        id: 'taskTitle',
        label: 'Task Title',
        isRequired: true,
        placeholder: 'Enter task title'
      },
      {
        type: 'Input.Text',
        id: 'taskDescription',
        label: 'Description',
        isMultiline: true,
        placeholder: 'Enter task description'
      },
      {
        type: 'Input.ChoiceSet',
        id: 'priority',
        label: 'Priority',
        choices: [
          { title: 'High', value: 'high' },
          { title: 'Medium', value: 'medium' },
          { title: 'Low', value: 'low' }
        ],
        value: 'medium'
      },
      {
        type: 'Input.Date',
        id: 'dueDate',
        label: 'Due Date'
      }
    ],
    actions: [
      {
        type: 'Action.Submit',
        title: 'Create Task',
        data: { action: 'createTask' }
      },
      {
        type: 'Action.Submit',
        title: 'Cancel',
        data: { action: 'cancel' }
      }
    ]
  };
}
```

---

## Microsoft Graph Integration

### Setup Graph Client

```typescript
// src/graph/client.ts
import { Client } from '@microsoft/microsoft-graph-client';
import { TokenCredentialAuthenticationProvider } from '@microsoft/microsoft-graph-client/authProviders/azureTokenCredentials';
import { ClientSecretCredential } from '@azure/identity';

export function createGraphClient() {
  const credential = new ClientSecretCredential(
    process.env.AZURE_TENANT_ID!,
    process.env.AZURE_CLIENT_ID!,
    process.env.AZURE_CLIENT_SECRET!
  );

  const authProvider = new TokenCredentialAuthenticationProvider(credential, {
    scopes: ['https://graph.microsoft.com/.default']
  });

  return Client.initWithMiddleware({ authProvider });
}
```

### Common Graph Operations

```typescript
// src/graph/operations.ts
import { Client } from '@microsoft/microsoft-graph-client';

export class GraphOperations {
  constructor(private client: Client) {}

  // Get user profile
  async getUserProfile(userId: string) {
    return this.client.api(`/users/${userId}`).get();
  }

  // Get user's calendar events
  async getCalendarEvents(userId: string, days: number = 7) {
    const startDate = new Date().toISOString();
    const endDate = new Date(Date.now() + days * 24 * 60 * 60 * 1000).toISOString();

    return this.client
      .api(`/users/${userId}/calendarView`)
      .query({
        startDateTime: startDate,
        endDateTime: endDate
      })
      .select('subject,start,end,location')
      .orderby('start/dateTime')
      .get();
  }

  // Send email
  async sendEmail(
    fromUserId: string,
    to: string,
    subject: string,
    body: string
  ) {
    return this.client.api(`/users/${fromUserId}/sendMail`).post({
      message: {
        subject,
        body: { contentType: 'HTML', content: body },
        toRecipients: [{ emailAddress: { address: to } }]
      }
    });
  }

  // Create Teams meeting
  async createMeeting(
    userId: string,
    subject: string,
    startTime: string,
    endTime: string,
    attendees: string[]
  ) {
    return this.client.api(`/users/${userId}/onlineMeetings`).post({
      subject,
      startDateTime: startTime,
      endDateTime: endTime,
      participants: {
        attendees: attendees.map(email => ({
          upn: email,
          role: 'attendee'
        }))
      }
    });
  }

  // Post message to channel
  async postToChannel(teamId: string, channelId: string, message: string) {
    return this.client
      .api(`/teams/${teamId}/channels/${channelId}/messages`)
      .post({
        body: { content: message }
      });
  }
}
```

---

## Authentication

### SSO with Teams SDK

```typescript
// src/auth.ts
import { App } from '@microsoft/teams.ai';

const app = new App({
  // ... other config
});

app.on('message', async ({ userGraph, isSignedIn, send, signin }) => {
  // Check if user is signed in
  if (!isSignedIn) {
    // Initiate sign-in flow
    await signin();
    return;
  }

  // User is signed in, access Graph API
  const me = await userGraph.call({
    method: 'GET',
    path: '/me'
  });

  await send(`Hello, ${me.displayName}!`);
});
```

### Manual OAuth Flow

```typescript
// src/auth/oauth.ts
import { OAuthPrompt, OAuthPromptSettings } from 'botbuilder-dialogs';

const oauthSettings: OAuthPromptSettings = {
  connectionName: process.env.OAUTH_CONNECTION_NAME!,
  text: 'Please sign in to continue',
  title: 'Sign In',
  timeout: 300000 // 5 minutes
};

// In your dialog
async function handleAuth(context, state) {
  const tokenResponse = await context.adapter.getUserToken(
    context,
    oauthSettings.connectionName
  );

  if (!tokenResponse?.token) {
    // No token, show sign-in card
    await context.sendActivity({
      attachments: [
        CardFactory.oauthCard(
          oauthSettings.connectionName,
          oauthSettings.title,
          oauthSettings.text
        )
      ]
    });
    return null;
  }

  return tokenResponse.token;
}
```

---

## RAG (Retrieval-Augmented Generation)

### Vector Search with Azure AI Search

```typescript
// src/rag/azure-search.ts
import { SearchClient, AzureKeyCredential } from '@azure/search-documents';

const searchClient = new SearchClient(
  process.env.AZURE_SEARCH_ENDPOINT!,
  process.env.AZURE_SEARCH_INDEX!,
  new AzureKeyCredential(process.env.AZURE_SEARCH_KEY!)
);

export async function searchKnowledgeBase(
  query: string,
  topK: number = 5
): Promise<string[]> {
  const results = await searchClient.search(query, {
    top: topK,
    select: ['content', 'title', 'source'],
    queryType: 'semantic',
    semanticConfiguration: 'default'
  });

  const documents: string[] = [];
  for await (const result of results.results) {
    documents.push(`${result.document.title}: ${result.document.content}`);
  }

  return documents;
}
```

### RAG-Enhanced Claude Response

```typescript
// src/rag/claude-rag.ts
import Anthropic from '@anthropic-ai/sdk';
import { searchKnowledgeBase } from './azure-search';

const anthropic = new Anthropic();

export async function getRAGResponse(userQuery: string): Promise<string> {
  // 1. Search knowledge base
  const relevantDocs = await searchKnowledgeBase(userQuery);

  // 2. Build context
  const context = relevantDocs.join('\n\n---\n\n');

  // 3. Generate response with context
  const response = await anthropic.messages.create({
    model: 'claude-sonnet-4-20250514',
    max_tokens: 1024,
    system: `You are a helpful assistant for Teams. Answer questions based on the provided context.
If the context doesn't contain relevant information, say so and provide a general response.
Always cite your sources when using information from the context.`,
    messages: [
      {
        role: 'user',
        content: `Context:\n${context}\n\nQuestion: ${userQuery}`
      }
    ]
  });

  return response.content[0].type === 'text' ? response.content[0].text : '';
}
```

---

## Deployment

### Azure Bot Service Setup

```bash
# Create resource group
az group create --name rg-teams-bot --location eastus

# Create App Service plan
az appservice plan create \
  --name asp-teams-bot \
  --resource-group rg-teams-bot \
  --sku B1 \
  --is-linux

# Create Web App
az webapp create \
  --name my-teams-bot \
  --resource-group rg-teams-bot \
  --plan asp-teams-bot \
  --runtime "NODE:18-lts"

# Create Bot Channels Registration
az bot create \
  --resource-group rg-teams-bot \
  --name my-teams-bot \
  --kind registration \
  --endpoint https://my-teams-bot.azurewebsites.net/api/messages \
  --sku F0

# Enable Teams channel
az bot msteams create \
  --name my-teams-bot \
  --resource-group rg-teams-bot
```

### Environment Variables

```bash
# .env
# Azure Bot
BOT_ID=your-bot-id
BOT_PASSWORD=your-bot-password
BOT_TENANT_ID=your-tenant-id

# Azure OpenAI
AZURE_OPENAI_API_KEY=your-key
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
AZURE_OPENAI_DEPLOYMENT=gpt-4

# Or OpenAI
OPENAI_API_KEY=sk-xxx

# Or Anthropic
ANTHROPIC_API_KEY=sk-ant-xxx

# Microsoft Graph
AZURE_CLIENT_ID=your-client-id
AZURE_CLIENT_SECRET=your-client-secret
AZURE_TENANT_ID=your-tenant-id

# Azure AI Search (for RAG)
AZURE_SEARCH_ENDPOINT=https://your-search.search.windows.net
AZURE_SEARCH_KEY=your-key
AZURE_SEARCH_INDEX=knowledge-base
```

### Docker Deployment

```dockerfile
# Dockerfile
FROM node:18-alpine

WORKDIR /app

COPY package*.json ./
RUN npm ci --only=production

COPY . .
RUN npm run build

EXPOSE 3978

CMD ["node", "dist/index.js"]
```

```yaml
# docker-compose.yml
version: '3.8'

services:
  teams-bot:
    build: .
    ports:
      - "3978:3978"
    environment:
      - BOT_ID=${BOT_ID}
      - BOT_PASSWORD=${BOT_PASSWORD}
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
    restart: unless-stopped
```

### Teams Toolkit Deployment

```bash
# Login to Azure
npx teamsfx account login azure

# Provision resources
npx teamsfx provision --env dev

# Deploy
npx teamsfx deploy --env dev

# Publish to Teams
npx teamsfx publish --env dev
```

---

## Testing

### Local Testing with ngrok

```bash
# Start ngrok tunnel
ngrok http 3978

# Update manifest with ngrok URL
# Bot endpoint: https://xxxx.ngrok.io/api/messages
```

### Teams Toolkit Local Debug

```bash
# Start local debugging (opens Teams with your app)
npx teamsfx preview --local
```

### Unit Testing

```typescript
// tests/bot.test.ts
import { TestAdapter, TurnContext } from 'botbuilder';
import { createWelcomeCard } from '../src/cards/welcome-card';

describe('Bot Tests', () => {
  let adapter: TestAdapter;

  beforeEach(() => {
    adapter = new TestAdapter();
  });

  test('should respond to hello', async () => {
    await adapter
      .send('hello')
      .assertReply((activity) => {
        expect(activity.text).toContain('Hello');
      });
  });

  test('should create welcome card', () => {
    const card = createWelcomeCard('John');
    expect(card.content.body[0].text).toContain('John');
  });
});
```

---

## Best Practices

### Conversation Design

```
┌─────────────────────────────────────────────────────────────────┐
│  CONVERSATION UX GUIDELINES                                     │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. GREET INTELLIGENTLY                                         │
│     - Welcome new users with onboarding card                    │
│     - Return users get quick access to recent actions           │
│                                                                 │
│  2. HANDLE ERRORS GRACEFULLY                                    │
│     - Never show stack traces to users                          │
│     - Provide clear recovery options                            │
│     - Log errors for debugging                                  │
│                                                                 │
│  3. USE CARDS FOR RICH CONTENT                                  │
│     - Adaptive Cards for forms and structured data              │
│     - Hero Cards for simple actions                             │
│     - Keep cards concise and actionable                         │
│                                                                 │
│  4. TYPING INDICATORS                                           │
│     - Show typing for long operations                           │
│     - Provide progress updates for very long tasks              │
│                                                                 │
│  5. CONTEXT AWARENESS                                           │
│     - Remember conversation history                             │
│     - Personalize based on user preferences                     │
│     - Respect team/channel context                              │
└─────────────────────────────────────────────────────────────────┘
```

### Security Checklist

- [ ] Validate all incoming messages
- [ ] Use App-Only auth for Graph API when possible
- [ ] Never log sensitive user data
- [ ] Implement rate limiting
- [ ] Use managed identity in Azure
- [ ] Rotate secrets regularly
- [ ] Enable audit logging

### Performance Tips

| Tip | Description |
|-----|-------------|
| Cache Graph tokens | Token refresh is expensive |
| Stream long responses | Use typing indicator + chunked responses |
| Index knowledge base | Pre-embed documents for RAG |
| Use connection pooling | Reuse HTTP connections |
| Compress payloads | Gzip large card responses |

---

## Project Templates

### AI Assistant Template

```typescript
// Complete AI assistant with Claude
import { App, HttpPlugin } from '@microsoft/teams.ai';
import Anthropic from '@anthropic-ai/sdk';
import { createWelcomeCard } from './cards/welcome-card';
import { createAIResponseCard } from './cards/ai-response-card';

const anthropic = new Anthropic();
const app = new App({ plugins: [new HttpPlugin()] });
const conversations = new Map<string, Anthropic.MessageParam[]>();

// Welcome new users
app.conversationUpdate('membersAdded', async (context) => {
  for (const member of context.activity.membersAdded || []) {
    if (member.id !== context.activity.recipient.id) {
      await context.sendActivity({
        attachments: [createWelcomeCard(member.name || 'User')]
      });
    }
  }
});

// Handle messages
app.on('message', async (context) => {
  const userId = context.activity.from.id;
  const userMessage = context.activity.text;

  // Initialize or get conversation
  if (!conversations.has(userId)) {
    conversations.set(userId, []);
  }
  const history = conversations.get(userId)!;
  history.push({ role: 'user', content: userMessage });

  // Show typing
  await context.sendActivity({ type: 'typing' });

  // Get AI response
  const response = await anthropic.messages.create({
    model: 'claude-sonnet-4-20250514',
    max_tokens: 1024,
    system: 'You are a helpful Teams assistant.',
    messages: history
  });

  const answer = response.content[0].type === 'text'
    ? response.content[0].text
    : '';

  history.push({ role: 'assistant', content: answer });

  // Send rich card response
  await context.sendActivity({
    attachments: [{
      contentType: 'application/vnd.microsoft.card.adaptive',
      content: createAIResponseCard(userMessage, answer)
    }]
  });
});

// Handle card actions
app.on('adaptiveCard/action', async (context) => {
  const action = context.activity.value?.action;

  switch (action) {
    case 'feedback':
      // Log feedback
      console.log('Feedback:', context.activity.value);
      await context.sendActivity('Thanks for your feedback!');
      break;
    case 'followUp':
      await context.sendActivity('What would you like to know more about?');
      break;
  }
});

app.start();
```

---

## Troubleshooting

| Issue | Cause | Fix |
|-------|-------|-----|
| Bot not responding | Endpoint unreachable | Check ngrok/Azure URL in manifest |
| Auth failures | Token expired/invalid | Refresh OAuth connection |
| Cards not rendering | Invalid schema | Validate at adaptivecards.io/designer |
| Graph 403 errors | Missing permissions | Check app registration permissions |
| Slow responses | API latency | Add typing indicator, consider streaming |

---

## Resources

- [Teams SDK Documentation](https://microsoft.github.io/teams-sdk/)
- [Teams Platform Docs](https://learn.microsoft.com/en-us/microsoftteams/platform/)
- [Adaptive Cards Designer](https://adaptivecards.io/designer/)
- [Microsoft Graph Explorer](https://developer.microsoft.com/en-us/graph/graph-explorer)
- [Teams Toolkit](https://learn.microsoft.com/en-us/microsoftteams/platform/toolkit/teams-toolkit-fundamentals)
- [Bot Framework Emulator](https://github.com/Microsoft/BotFramework-Emulator)


================================================
FILE: skills/nodejs-backend/SKILL.md
================================================
---
name: nodejs-backend
description: Node.js backend patterns with Express/Fastify, repositories
when-to-use: When working on Node.js backend code - API routes, middleware, server setup
user-invocable: false
paths: ["src/api/**", "src/routes/**", "src/server/**", "src/middleware/**", "server/**", "api/**"]
effort: medium
---

# Node.js Backend Skill


---

## Project Structure

```
project/
├── src/
│   ├── core/                   # Pure business logic
│   │   ├── types.ts            # Domain types
│   │   ├── errors.ts           # Domain errors
│   │   └── services/           # Pure functions
│   │       ├── user.ts
│   │       └── order.ts
│   ├── infra/                  # Side effects
│   │   ├── http/               # HTTP layer
│   │   │   ├── server.ts       # Server setup
│   │   │   ├── routes/         # Route handlers
│   │   │   └── middleware/     # Express middleware
│   │   ├── db/                 # Database
│   │   │   ├── client.ts       # DB connection
│   │   │   ├── repositories/   # Data access
│   │   │   └── migrations/     # Schema migrations
│   │   └── external/           # Third-party APIs
│   ├── config/                 # Configuration
│   │   └── index.ts            # Env vars, validated
│   └── index.ts                # Entry point
├── tests/
│   ├── unit/
│   └── integration/
├── package.json
└── CLAUDE.md
```

---

## API Design

### Route Handler Pattern
```typescript
// routes/users.ts
import { Router } from 'express';
import { z } from 'zod';
import { createUser } from '../../core/services/user';
import { UserRepository } from '../db/repositories/user';

const CreateUserSchema = z.object({
  email: z.string().email(),
  name: z.string().min(1).max(100),
});

export function createUserRoutes(userRepo: UserRepository): Router {
  const router = Router();

  router.post('/', async (req, res, next) => {
    try {
      const input = CreateUserSchema.parse(req.body);
      const user = await createUser(input, userRepo);
      res.status(201).json(user);
    } catch (error) {
      next(error);
    }
  });

  return router;
}
```

### Dependency Injection at Composition Root
```typescript
// index.ts
import { createApp } from './infra/http/server';
import { createDbClient } from './infra/db/client';
import { UserRepository } from './infra/db/repositories/user';
import { createUserRoutes } from './infra/http/routes/users';

async function main(): Promise<void> {
  const db = await createDbClient();
  const userRepo = new UserRepository(db);
  
  const app = createApp({
    userRoutes: createUserRoutes(userRepo),
  });
  
  app.listen(3000);
}
```

---

## Error Handling

### Domain Errors
```typescript
// core/errors.ts
export class DomainError extends Error {
  constructor(
    message: string,
    public readonly code: string,
    public readonly statusCode: number = 400
  ) {
    super(message);
    this.name = 'DomainError';
  }
}

export class NotFoundError extends DomainError {
  constructor(resource: string, id: string) {
    super(`${resource} with id ${id} not found`, 'NOT_FOUND', 404);
  }
}

export class ValidationError extends DomainError {
  constructor(message: string) {
    super(message, 'VALIDATION_ERROR', 400);
  }
}
```

### Global Error Handler
```typescript
// middleware/errorHandler.ts
import { ErrorRequestHandler } from 'express';
import { DomainError } from '../../core/errors';
import { ZodError } from 'zod';

export const errorHandler: ErrorRequestHandler = (err, req, res, next) => {
  if (err instanceof DomainError) {
    return res.status(err.statusCode).json({
      error: { code: err.code, message: err.message },
    });
  }

  if (err instanceof ZodError) {
    return res.status(400).json({
      error: { code: 'VALIDATION_ERROR', details: err.errors },
    });
  }

  console.error('Unexpected error:', err);
  return res.status(500).json({
    error: { code: 'INTERNAL_ERROR', message: 'Something went wrong' },
  });
};
```

---

## Database Patterns

### Repository Pattern
```typescript
// db/repositories/user.ts
import { Kysely } from 'kysely';
import { Database, User } from '../types';

export class UserRepository {
  constructor(private db: Kysely<Database>) {}

  async findById(id: string): Promise<User | null> {
    return this.db
      .selectFrom('users')
      .where('id', '=', id)
      .selectAll()
      .executeTakeFirst() ?? null;
  }

  async create(data: Omit<User, 'id' | 'createdAt'>): Promise<User> {
    return this.db
      .insertInto('users')
      .values(data)
      .returningAll()
      .executeTakeFirstOrThrow();
  }
}
```

### Transactions
```typescript
async function transferFunds(
  fromId: string,
  toId: string,
  amount: number,
  db: Kysely<Database>
): Promise<void> {
  await db.transaction().execute(async (trx) => {
    await trx
      .updateTable('accounts')
      .set((eb) => ({ balance: eb('balance', '-', amount) }))
      .where('id', '=', fromId)
      .execute();

    await trx
      .updateTable('accounts')
      .set((eb) => ({ balance: eb('balance', '+', amount) }))
      .where('id', '=', toId)
      .execute();
  });
}
```

---

## Configuration

### Validated Config
```typescript
// config/index.ts
import { z } from 'zod';

const ConfigSchema = z.object({
  NODE_ENV: z.enum(['development', 'production', 'test']),
  PORT: z.coerce.number().default(3000),
  DATABASE_URL: z.string().url(),
  API_KEY: z.string().min(1),
});

export type Config = z.infer<typeof ConfigSchema>;

export function loadConfig(): Config {
  return ConfigSchema.parse(process.env);
}
```

---

## Testing

### Unit Tests (Core)
```typescript
// tests/unit/services/user.test.ts
import { createUser } from '../../../src/core/services/user';

describe('createUser', () => {
  it('creates user with valid data', async () => {
    const mockRepo = {
      create: jest.fn().mockResolvedValue({ id: '1', email: 'test@example.com' }),
      findByEmail: jest.fn().mockResolvedValue(null),
    };

    const result = await createUser({ email: 'test@example.com', name: 'Test' }, mockRepo);

    expect(result.email).toBe('test@example.com');
    expect(mockRepo.create).toHaveBeenCalledTimes(1);
  });
});
```

### Integration Tests (API)
```typescript
// tests/integration/users.test.ts
import request from 'supertest';
import { createTestApp, createTestDb } from '../helpers';

describe('POST /users', () => {
  let app: Express;
  let db: TestDb;

  beforeAll(async () => {
    db = await createTestDb();
    app = createTestApp(db);
  });

  afterAll(async () => {
    await db.destroy();
  });

  it('creates user and returns 201', async () => {
    const response = await request(app)
      .post('/users')
      .send({ email: 'new@example.com', name: 'New User' });

    expect(response.status).toBe(201);
    expect(response.body.email).toBe('new@example.com');
  });
});
```

---

## Node.js Anti-Patterns

- ❌ Callback hell - use async/await
- ❌ Unhandled promise rejections - always catch or let error handler catch
- ❌ Blocking the event loop - offload heavy computation
- ❌ Secrets in code - use environment variables
- ❌ SQL string concatenation - use parameterized queries
- ❌ No input validation - validate at API boundary
- ❌ Console.log in production - use proper logger
- ❌ No graceful shutdown - handle SIGTERM
- ❌ Monolithic route files - split by resource


================================================
FILE: skills/playwright-testing/SKILL.md
================================================
---
name: playwright-testing
description: E2E testing with Playwright - Page Objects, cross-browser, CI/CD
when-to-use: When writing or debugging E2E tests with Playwright
user-invocable: true
paths: ["**/e2e/**", "**/*.spec.ts", "**/playwright/**", "playwright.config.*"]
effort: medium
---

# Playwright E2E Testing Skill


For end-to-end testing of web applications with Playwright - cross-browser, fast, reliable.

**Sources:** [Playwright Best Practices](https://playwright.dev/docs/best-practices) | [Playwright Docs](https://playwright.dev/docs/intro) | [Better Stack Guide](https://betterstack.com/community/guides/testing/playwright-best-practices/)

---

## Setup

### Installation

```bash
# New project
npm init playwright@latest

# Existing project
npm install -D @playwright/test
npx playwright install
```

### Configuration

```typescript
// playwright.config.ts
import { defineConfig, devices } from '@playwright/test';

export default defineConfig({
  testDir: './e2e',
  fullyParallel: true,
  forbidOnly: !!process.env.CI,
  retries: process.env.CI ? 2 : 0,
  workers: process.env.CI ? 1 : undefined,
  reporter: [
    ['html'],
    ['list'],
    process.env.CI ? ['github'] : ['line'],
  ],

  use: {
    baseURL: process.env.BASE_URL || 'http://localhost:3000',
    trace: 'on-first-retry',
    screenshot: 'only-on-failure',
    video: 'retain-on-failure',
  },

  projects: [
    // Auth setup - runs once before all tests
    { name: 'setup', testMatch: /.*\.setup\.ts/ },

    {
      name: 'chromium',
      use: { ...devices['Desktop Chrome'] },
      dependencies: ['setup'],
    },
    {
      name: 'firefox',
      use: { ...devices['Desktop Firefox'] },
      dependencies: ['setup'],
    },
    {
      name: 'webkit',
      use: { ...devices['Desktop Safari'] },
      dependencies: ['setup'],
    },
    // Mobile viewports
    {
      name: 'mobile-chrome',
      use: { ...devices['Pixel 5'] },
      dependencies: ['setup'],
    },
    {
      name: 'mobile-safari',
      use: { ...devices['iPhone 12'] },
      dependencies: ['setup'],
    },
  ],

  // Start dev server before tests
  webServer: {
    command: 'npm run dev',
    url: 'http://localhost:3000',
    reuseExistingServer: !process.env.CI,
    timeout: 120 * 1000,
  },
});
```

---

## Project Structure

```
project/
├── e2e/
│   ├── fixtures/
│   │   ├── auth.fixture.ts      # Auth fixtures
│   │   └── test.fixture.ts      # Extended test with fixtures
│   ├── pages/
│   │   ├── base.page.ts         # Base page object
│   │   ├── login.page.ts        # Login page object
│   │   ├── dashboard.page.ts    # Dashboard page object
│   │   └── index.ts             # Export all pages
│   ├── tests/
│   │   ├── auth.spec.ts         # Auth tests
│   │   ├── dashboard.spec.ts    # Dashboard tests
│   │   └── checkout.spec.ts     # Checkout flow tests
│   ├── utils/
│   │   ├── helpers.ts           # Test helpers
│   │   └── test-data.ts         # Test data factories
│   └── auth.setup.ts            # Global auth setup
├── playwright.config.ts
└── .auth/                        # Stored auth state (gitignored)
```

---

## Locator Strategy (Priority Order)

Use locators that mirror how users interact with the page:

```typescript
// ✅ BEST: Role-based (accessible, resilient)
page.getByRole('button', { name: 'Submit' })
page.getByRole('textbox', { name: 'Email' })
page.getByRole('link', { name: 'Sign up' })
page.getByRole('heading', { name: 'Welcome' })

// ✅ GOOD: User-facing text
page.getByLabel('Email address')
page.getByPlaceholder('Enter your email')
page.getByText('Welcome back')
page.getByTitle('Profile settings')

// ✅ GOOD: Test IDs (stable, explicit)
page.getByTestId('submit-button')
page.getByTestId('user-avatar')

// ⚠️ AVOID: CSS selectors (brittle)
page.locator('.btn-primary')
page.locator('#submit')

// ❌ NEVER: XPath (extremely brittle)
page.locator('//div[@class="container"]/button[1]')
```

### Chaining Locators

```typescript
// Narrow down to specific section
const form = page.getByRole('form', { name: 'Login' });
await form.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
await form.getByRole('button', { name: 'Submit' }).click();

// Filter within a list
const productCard = page.getByTestId('product-card')
  .filter({ hasText: 'Pro Plan' });
await productCard.getByRole('button', { name: 'Buy' }).click();
```

---

## Page Object Model

### Base Page

```typescript
// e2e/pages/base.page.ts
import { Page, Locator } from '@playwright/test';

export abstract class BasePage {
  constructor(protected page: Page) {}

  async navigate(path: string = '/') {
    await this.page.goto(path);
  }

  async waitForPageLoad() {
    await this.page.waitForLoadState('networkidle');
  }

  // Common elements
  get header() {
    return this.page.getByRole('banner');
  }

  get footer() {
    return this.page.getByRole('contentinfo');
  }

  // Common actions
  async clickNavLink(name: string) {
    await this.header.getByRole('link', { name }).click();
  }
}
```

### Page Implementation

```typescript
// e2e/pages/login.page.ts
import { Page, expect } from '@playwright/test';
import { BasePage } from './base.page';

export class LoginPage extends BasePage {
  readonly emailInput: Locator;
  readonly passwordInput: Locator;
  readonly submitButton: Locator;
  readonly errorMessage: Locator;

  constructor(page: Page) {
    super(page);
    this.emailInput = page.getByLabel('Email');
    this.passwordInput = page.getByLabel('Password');
    this.submitButton = page.getByRole('button', { name: 'Sign in' });
    this.errorMessage = page.getByRole('alert');
  }

  async goto() {
    await this.navigate('/login');
  }

  async login(email: string, password: string) {
    await this.emailInput.fill(email);
    await this.passwordInput.fill(password);
    await this.submitButton.click();
  }

  async expectError(message: string) {
    await expect(this.errorMessage).toContainText(message);
  }

  async expectLoggedIn() {
    await expect(this.page).toHaveURL(/.*dashboard/);
  }
}
```

```typescript
// e2e/pages/dashboard.page.ts
import { Page, Locator, expect } from '@playwright/test';
import { BasePage } from './base.page';

export class DashboardPage extends BasePage {
  readonly welcomeHeading: Locator;
  readonly userMenu: Locator;
  readonly logoutButton: Locator;

  constructor(page: Page) {
    super(page);
    this.welcomeHeading = page.getByRole('heading', { name: /welcome/i });
    this.userMenu = page.getByTestId('user-menu');
    this.logoutButton = page.getByRole('button', { name: 'Logout' });
  }

  async goto() {
    await this.navigate('/dashboard');
  }

  async logout() {
    await this.userMenu.click();
    await this.logoutButton.click();
  }

  async expectWelcome(name: string) {
    await expect(this.welcomeHeading).toContainText(name);
  }
}
```

### Export All Pages

```typescript
// e2e/pages/index.ts
export { BasePage } from './base.page';
export { LoginPage } from './login.page';
export { DashboardPage } from './dashboard.page';
```

---

## Authentication

### Global Auth Setup

```typescript
// e2e/auth.setup.ts
import { test as setup, expect } from '@playwright/test';
import path from 'path';

const authFile = path.join(__dirname, '../.auth/user.json');

setup('authenticate', async ({ page }) => {
  // Go to login page
  await page.goto('/login');

  // Login with test credentials
  await page.getByLabel('Email').fill(process.env.TEST_USER_EMAIL!);
  await page.getByLabel('Password').fill(process.env.TEST_USER_PASSWORD!);
  await page.getByRole('button', { name: 'Sign in' }).click();

  // Wait for auth to complete
  await expect(page).toHaveURL(/.*dashboard/);

  // Save auth state for reuse
  await page.context().storageState({ path: authFile });
});
```

### Using Auth in Tests

```typescript
// playwright.config.ts
export default defineConfig({
  projects: [
    { name: 'setup', testMatch: /.*\.setup\.ts/ },
    {
      name: 'chromium',
      use: {
        ...devices['Desktop Chrome'],
        storageState: '.auth/user.json',
      },
      dependencies: ['setup'],
    },
  ],
});
```

### Tests Without Auth

```typescript
// e2e/tests/public.spec.ts
import { test } from '@playwright/test';

// Override to skip auth
test.use({ storageState: { cookies: [], origins: [] } });

test('homepage loads for anonymous users', async ({ page }) => {
  await page.goto('/');
  await expect(page.getByRole('heading', { name: 'Welcome' })).toBeVisible();
});
```

---

## Writing Tests

### Basic Test Structure

```typescript
// e2e/tests/auth.spec.ts
import { test, expect } from '@playwright/test';
import { LoginPage } from '../pages';

test.describe('Authentication', () => {
  test.beforeEach(async ({ page }) => {
    // Skip stored auth for login tests
    await page.context().clearCookies();
  });

  test('successful login redirects to dashboard', async ({ page }) => {
    const loginPage = new LoginPage(page);

    await loginPage.goto();
    await loginPage.login('user@example.com', 'password123');
    await loginPage.expectLoggedIn();
  });

  test('invalid credentials show error', async ({ page }) => {
    const loginPage = new LoginPage(page);

    await loginPage.goto();
    await loginPage.login('wrong@example.com', 'wrongpass');
    await loginPage.expectError('Invalid email or password');
  });

  test('empty form shows validation errors', async ({ page }) => {
    const loginPage = new LoginPage(page);

    await loginPage.goto();
    await loginPage.submitButton.click();

    await expect(page.getByText('Email is required')).toBeVisible();
    await expect(page.getByText('Password is required')).toBeVisible();
  });
});
```

### User Flow Tests

```typescript
// e2e/tests/checkout.spec.ts
import { test, expect } from '@playwright/test';

test.describe('Checkout Flow', () => {
  test('complete purchase flow', async ({ page }) => {
    // 1. Browse products
    await page.goto('/products');
    await page.getByTestId('product-card')
      .filter({ hasText: 'Pro Plan' })
      .getByRole('button', { name: 'Add to cart' })
      .click();

    // 2. View cart
    await page.getByRole('link', { name: 'Cart' }).click();
    await expect(page.getByText('Pro Plan')).toBeVisible();
    await expect(page.getByTestId('cart-total')).toContainText('$29.99');

    // 3. Checkout
    await page.getByRole('button', { name: 'Checkout' }).click();

    // 4. Fill payment (use Stripe test card)
    const stripeFrame = page.frameLocator('iframe[name*="stripe"]');
    await stripeFrame.getByPlaceholder('Card number').fill('4242424242424242');
    await stripeFrame.getByPlaceholder('MM / YY').fill('12/30');
    await stripeFrame.getByPlaceholder('CVC').fill('123');

    // 5. Complete purchase
    await page.getByRole('button', { name: 'Pay now' }).click();

    // 6. Verify success
    await expect(page).toHaveURL(/.*success/);
    await expect(page.getByRole('heading', { name: 'Thank you' })).toBeVisible();
  });
});
```

---

## Assertions

### Web-First Assertions (Auto-Wait)

```typescript
// ✅ These wait and retry automatically
await expect(page.getByRole('button')).toBeVisible();
await expect(page.getByRole('button')).toBeEnabled();
await expect(page.getByRole('button')).toHaveText('Submit');
await expect(page).toHaveURL('/dashboard');
await expect(page).toHaveTitle(/Dashboard/);

// ❌ Avoid manual waits
await page.waitForTimeout(3000);  // NEVER do this
```

### Soft Assertions

```typescript
// Continue test even if assertion fails
await expect.soft(page.getByTestId('price')).toHaveText('$29.99');
await expect.soft(page.getByTestId('stock')).toHaveText('In Stock');

// Fail at end if any soft assertions failed
```

### Common Assertions

```typescript
// Visibility
await expect(locator).toBeVisible();
await expect(locator).toBeHidden();
await expect(locator).toBeAttached();

// Text content
await expect(locator).toHaveText('exact text');
await expect(locator).toContainText('partial');
await expect(locator).toHaveValue('input value');

// State
await expect(locator).toBeEnabled();
await expect(locator).toBeDisabled();
await expect(locator).toBeChecked();
await expect(locator).toBeFocused();

// Count
await expect(locator).toHaveCount(5);

// Page
await expect(page).toHaveURL('/dashboard');
await expect(page).toHaveTitle('Dashboard | App');
await expect(page).toHaveScreenshot('dashboard.png');
```

---

## Mocking & Network

### Mock API Responses

```typescript
test('shows error when API fails', async ({ page }) => {
  // Mock API to return error
  await page.route('**/api/users', (route) => {
    route.fulfill({
      status: 500,
      body: JSON.stringify({ error: 'Server error' }),
    });
  });

  await page.goto('/users');
  await expect(page.getByText('Failed to load users')).toBeVisible();
});

test('displays user data from API', async ({ page }) => {
  // Mock successful response
  await page.route('**/api/users', (route) => {
    route.fulfill({
      status: 200,
      contentType: 'application/json',
      body: JSON.stringify([
        { id: 1, name: 'John Doe', email: 'john@example.com' },
        { id: 2, name: 'Jane Doe', email: 'jane@example.com' },
      ]),
    });
  });

  await page.goto('/users');
  await expect(page.getByText('John Doe')).toBeVisible();
  await expect(page.getByText('Jane Doe')).toBeVisible();
});
```

### Wait for API Calls

```typescript
test('submits form and shows success', async ({ page }) => {
  await page.goto('/contact');

  // Fill form
  await page.getByLabel('Name').fill('John');
  await page.getByLabel('Email').fill('john@example.com');
  await page.getByLabel('Message').fill('Hello!');

  // Wait for API call on submit
  const responsePromise = page.waitForResponse('**/api/contact');
  await page.getByRole('button', { name: 'Send' }).click();

  const response = await responsePromise;
  expect(response.status()).toBe(200);

  await expect(page.getByText('Message sent!')).toBeVisible();
});
```

---

## Visual Testing

```typescript
// Full page screenshot
await expect(page).toHaveScreenshot('homepage.png');

// Element screenshot
await expect(page.getByTestId('chart')).toHaveScreenshot('chart.png');

// With options
await expect(page).toHaveScreenshot('dashboard.png', {
  maxDiffPixels: 100,
  mask: [page.getByTestId('timestamp')], // Ignore dynamic content
});
```

---

## CI/CD Integration

### GitHub Actions

```yaml
# .github/workflows/e2e.yml
name: E2E Tests

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: 'npm'

      - name: Install dependencies
        run: npm ci

      - name: Install Playwright browsers
        run: npx playwright install --with-deps chromium

      - name: Run E2E tests
        run: npx playwright test --project=chromium
        env:
          BASE_URL: ${{ secrets.STAGING_URL }}
          TEST_USER_EMAIL: ${{ secrets.TEST_USER_EMAIL }}
          TEST_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}

      - uses: actions/upload-artifact@v4
        if: failure()
        with:
          name: playwright-report
          path: playwright-report/
          retention-days: 7
```

### Run Specific Tests

```bash
# Run all tests
npx playwright test

# Run specific file
npx playwright test e2e/tests/auth.spec.ts

# Run tests with tag
npx playwright test --grep @critical

# Run in headed mode (debug)
npx playwright test --headed

# Run specific browser
npx playwright test --project=chromium

# Debug mode
npx playwright test --debug

# Show HTML report
npx playwright show-report
```

---

## Test Data

### Factories

```typescript
// e2e/utils/test-data.ts
import { faker } from '@faker-js/faker';

export const createUser = (overrides = {}) => ({
  email: faker.internet.email(),
  password: faker.internet.password({ length: 12 }),
  name: faker.person.fullName(),
  ...overrides,
});

export const createProduct = (overrides = {}) => ({
  name: faker.commerce.productName(),
  price: faker.commerce.price({ min: 10, max: 100 }),
  description: faker.commerce.productDescription(),
  ...overrides,
});
```

### Environment Variables

```bash
# .env.test
BASE_URL=http://localhost:3000
TEST_USER_EMAIL=test@example.com
TEST_USER_PASSWORD=testpassword123
```

---

## Debugging

### Trace Viewer

```typescript
// Enable in config for failures
use: {
  trace: 'on-first-retry',
}

// View traces
npx playwright show-trace trace.zip
```

### Debug Mode

```bash
# Step through test
npx playwright test --debug

# Pause at specific point
await page.pause();  // In test code
```

### VS Code Extension

Install "Playwright Test for VS Code" for:
- Run tests from editor
- Debug with breakpoints
- Pick locators visually
- Watch mode

---

## Dead Link Detection (REQUIRED)

**Every project MUST include dead link detection tests.** Run these on every deployment.

### Link Validator Test

```typescript
// e2e/tests/links.spec.ts
import { test, expect } from '@playwright/test';

const PAGES_TO_CHECK = ['/', '/about', '/pricing', '/blog', '/contact'];

test.describe('Dead Link Detection', () => {
  for (const pagePath of PAGES_TO_CHECK) {
    test(`no dead links on ${pagePath}`, async ({ page, request }) => {
      await page.goto(pagePath);

      // Get all links on the page
      const links = await page.locator('a[href]').all();
      const hrefs = await Promise.all(
        links.map(link => link.getAttribute('href'))
      );

      // Filter to internal and absolute external links
      const uniqueLinks = [...new Set(hrefs.filter(Boolean))] as string[];

      for (const href of uniqueLinks) {
        // Skip mailto, tel, and anchor links
        if (href.startsWith('mailto:') || href.startsWith('tel:') || href.startsWith('#')) {
          continue;
        }

        // Build full URL
        const url = href.startsWith('http') ? href : new URL(href, page.url()).href;

        // Check link status
        const response = await request.get(url, {
          timeout: 10000,
          ignoreHTTPSErrors: true,
        });

        expect(
          response.ok(),
          `Dead link found on ${pagePath}: ${href} returned ${response.status()}`
        ).toBeTruthy();
      }
    });
  }
});
```

### Comprehensive Link Crawler

```typescript
// e2e/tests/site-links.spec.ts
import { test, expect, Page, APIRequestContext } from '@playwright/test';

interface LinkResult {
  url: string;
  status: number;
  foundOn: string;
}

async function checkAllLinks(
  page: Page,
  request: APIRequestContext,
  startUrl: string
): Promise<LinkResult[]> {
  const visited = new Set<string>();
  const results: LinkResult[] = [];
  const toVisit = [startUrl];
  const baseUrl = new URL(startUrl).origin;

  while (toVisit.length > 0) {
    const currentUrl = toVisit.pop()!;
    if (visited.has(currentUrl)) continue;
    visited.add(currentUrl);

    try {
      await page.goto(currentUrl);
      const links = await page.locator('a[href]').all();

      for (const link of links) {
        const href = await link.getAttribute('href');
        if (!href || href.startsWith('#') || href.startsWith('mailto:') || href.startsWith('tel:')) {
          continue;
        }

        const fullUrl = href.startsWith('http') ? href : new URL(href, currentUrl).href;

        // Check link
        const response = await request.get(fullUrl, {
          timeout: 10000,
          ignoreHTTPSErrors: true,
        });

        results.push({
          url: fullUrl,
          status: response.status(),
          foundOn: currentUrl,
        });

        // Add internal links to queue
        if (fullUrl.startsWith(baseUrl) && !visited.has(fullUrl)) {
          toVisit.push(fullUrl);
        }
      }
    } catch (error) {
      results.push({
        url: currentUrl,
        status: 0,
        foundOn: 'navigation',
      });
    }
  }

  return results;
}

test('no dead links on entire site', async ({ page, request, baseURL }) => {
  const results = await checkAllLinks(page, request, baseURL!);
  const deadLinks = results.filter(r => r.status >= 400 || r.status === 0);

  if (deadLinks.length > 0) {
    console.error('Dead links found:');
    deadLinks.forEach(link => {
      console.error(`  ${link.url} (${link.status}) - found on ${link.foundOn}`);
    });
  }

  expect(deadLinks, `Found ${deadLinks.length} dead links`).toHaveLength(0);
});
```

### Image Link Validation

```typescript
// e2e/tests/images.spec.ts
import { test, expect } from '@playwright/test';

test('no broken images on homepage', async ({ page, request }) => {
  await page.goto('/');

  const images = await page.locator('img[src]').all();

  for (const img of images) {
    const src = await img.getAttribute('src');
    if (!src) continue;

    const url = src.startsWith('http') ? src : new URL(src, page.url()).href;

    // Skip data URLs
    if (url.startsWith('data:')) continue;

    const response = await request.get(url);
    expect(
      response.ok(),
      `Broken image: ${src}`
    ).toBeTruthy();

    // Verify it's actually an image
    const contentType = response.headers()['content-type'];
    expect(
      contentType?.startsWith('image/'),
      `${src} is not an image (${contentType})`
    ).toBeTruthy();
  }
});
```

### CI Integration for Link Checking

```yaml
# .github/workflows/link-check.yml
name: Link Check

on:
  schedule:
    - cron: '0 6 * * 1'  # Weekly on Monday
  push:
    branches: [main]

jobs:
  link-check:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
      - run: npm ci
      - run: npx playwright install chromium
      - run: npx playwright test e2e/tests/links.spec.ts --project=chromium
        env:
          BASE_URL: ${{ secrets.PRODUCTION_URL }}
```

---

## Anti-Patterns

- **Hardcoded waits** - Use auto-waiting assertions instead
- **CSS/XPath selectors** - Use role/text/testid locators
- **Testing third-party sites** - Mock external dependencies
- **Shared state between tests** - Each test must be isolated
- **Missing awaits** - Use ESLint rule `no-floating-promises`
- **Flaky time-based tests** - Mock dates/times
- **Testing implementation details** - Test user-visible behavior
- **Huge test files** - Split by feature/page

---

## Quick Reference

```bash
# Install
npm init playwright@latest

# Run tests
npx playwright test
npx playwright test --headed
npx playwright test --project=chromium
npx playwright test --grep @smoke

# Debug
npx playwright test --debug
npx playwright show-report
npx playwright show-trace trace.zip

# Generate tests
npx playwright codegen localhost:3000
```

### Package.json Scripts

```json
{
  "scripts": {
    "test:e2e": "playwright test",
    "test:e2e:headed": "playwright test --headed",
    "test:e2e:debug": "playwright test --debug",
    "test:e2e:report": "playwright show-report",
    "test:e2e:codegen": "playwright codegen"
  }
}
```


================================================
FILE: skills/polyphony/SKILL.md
================================================
---
name: polyphony
description: Multi-agent orchestration with container-isolated workspaces — each agent session runs in its own Docker container with independent git branches
when-to-use: Always loaded when container isolation is available (Docker/OrbStack installed). Default for /spawn-team.
user-invocable: false
effort: high
---

# Polyphony — Multi-Agent Orchestration

Container-isolated workspaces for parallel agent execution. Each agent gets its own Docker container with a full git clone on its own branch. No conflicts, independent tests, clean PRs.

---

## Architecture (6 Layers)

1. **Work Source** — Tasks from GitHub Issues (`gh api`) or local SQLite queue
2. **Orchestrator** — Supervisor loop: discover -> claim -> route -> provision -> run -> verify -> land
3. **Router** — Pure function: Task x Policy -> RunSpec (5-dimension complexity scoring)
4. **Identity Broker** — Resolves named credentials to volume mounts + env overlays
5. **Workspace Manager** — Per-task `git clone --reference`, branch checkout, cleanup
6. **Worker Runtime** — Docker container create/start/stop/logs lifecycle

---

## Task Lifecycle

```
DISCOVERED -> CLAIMED -> ROUTED -> PROVISIONED -> RUNNING -> VERIFYING -> LANDED
                                                     |           |
                                                     v           v
                                                   FAILED --> BLOCKED
                                                     |
                                                     v
                                                   CLAIMED (retry)
```

---

## Prerequisites

- Docker or OrbStack installed and running
- At least one agent CLI available (Claude, Codex, or Kimi)
- CLI subscriptions configured (not API keys)

Check:
```bash
command -v docker &>/dev/null || command -v orbctl &>/dev/null
```

---

## Configuration

All config lives in `~/.polyphony/`:

| File | Purpose |
|------|---------|
| `config.yaml` | Workspace root, poll interval, max concurrency |
| `identities.yaml` | Named credential bundles with volume paths |
| `agents.yaml` | Agent profiles (CLI commands, strengths) |
| `routing.yaml` | Routing rules and fallback chains |

Initialize with: `polyphony init`

---

## Routing Rules

Rules are evaluated top-down; first match wins. Each rule has a `match` predicate and an `agent` target.

```yaml
rules:
  - match: { task_type: docs, risk: low }
    agent: kimi
  - match: { task_type: bugfix }
    agent: codex
  - match: { risk: high }
    agent: claude
default:
  agent: claude
  fallback: [codex, kimi]
```

---

## Complexity Scoring (5 Dimensions)

Each dimension scores 0-2. Total 0-10.

| Dimension | Source |
|-----------|--------|
| Cyclomatic depth | LOC + scope size |
| Fan-out | Number of callers |
| Security boundary | Auth/PII keywords |
| Concurrency | Lock/transaction keywords |
| Domain invariants | Risk level + task type |

Routing thresholds:
- **0-3**: Delegate to Kimi solo
- **4-6**: Kimi + Codex review
- **7-10**: Claude direct

---

## Container Isolation

Each task gets:
- Its own Docker container from `polyphony-worker:latest`
- A full git clone at `/workspace` (not a worktree)
- Auth volumes mounted read-only (e.g., `~/.claude:/home/worker/.claude:ro`)
- Independent test execution
- Its own branch for PRs

---

## CLI Commands

```bash
polyphony init                    # Create ~/.polyphony/ with config templates
polyphony spawn "Fix auth bug"    # Create and route a task
polyphony status                  # Show task states
polyphony cleanup                 # Remove completed workspaces
```

---

## Integration with Existing Skills

- **cross-agent-delegation**: Uses Polyphony's complexity scoring for routing decisions
- **agent-teams**: Uses Polyphony's workspace isolation instead of shared directories
- **spawn-team**: Uses Polyphony's container provisioning for feature agents


================================================
FILE: skills/posthog-analytics/SKILL.md
================================================
---
name: posthog-analytics
description: PostHog analytics, event tracking, feature flags, dashboards
when-to-use: When adding analytics, feature flags, or event tracking with PostHog
user-invocable: false
effort: medium
---

# PostHog Analytics Skill


For implementing product analytics with PostHog - event tracking, user identification, feature flags, and project-specific dashboards.

**Sources:** [PostHog Docs](https://posthog.com/docs) | [Product Analytics](https://posthog.com/docs/product-analytics) | [Feature Flags](https://posthog.com/docs/feature-flags)

---

## Philosophy

**Measure what matters, not everything.**

Analytics should answer specific questions:
- Are users getting value? (activation, retention)
- Where do users struggle? (funnels, drop-offs)
- What features drive engagement? (feature usage)
- Is the product growing? (acquisition, referrals)

Don't track everything. Track what informs decisions.

---

## Installation

### Next.js (App Router)

```bash
npm install posthog-js
```

```typescript
// lib/posthog.ts
import posthog from 'posthog-js';

export function initPostHog() {
  if (typeof window !== 'undefined' && !posthog.__loaded) {
    posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY!, {
      api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST || 'https://us.i.posthog.com',
      person_profiles: 'identified_only', // Only create profiles for identified users
      capture_pageview: false, // We'll handle this manually for SPA
      capture_pageleave: true,
      loaded: (posthog) => {
        if (process.env.NODE_ENV === 'development') {
          posthog.debug();
        }
      },
    });
  }
  return posthog;
}

export { posthog };
```

```typescript
// app/providers.tsx
'use client';

import { useEffect } from 'react';
import { usePathname, useSearchParams } from 'next/navigation';
import { initPostHog, posthog } from '@/lib/posthog';

export function PostHogProvider({ children }: { children: React.ReactNode }) {
  const pathname = usePathname();
  const searchParams = useSearchParams();

  useEffect(() => {
    initPostHog();
  }, []);

  // Track pageviews
  useEffect(() => {
    if (pathname) {
      let url = window.origin + pathname;
      if (searchParams.toString()) {
        url += `?${searchParams.toString()}`;
      }
      posthog.capture('$pageview', { $current_url: url });
    }
  }, [pathname, searchParams]);

  return <>{children}</>;
}
```

```typescript
// app/layout.tsx
import { PostHogProvider } from './providers';

export default function RootLayout({ children }: { children: React.ReactNode }) {
  return (
    <html lang="en">
      <body>
        <PostHogProvider>
          {children}
        </PostHogProvider>
      </body>
    </html>
  );
}
```

### React (Vite/CRA)

```typescript
// src/posthog.ts
import posthog from 'posthog-js';

posthog.init(import.meta.env.VITE_POSTHOG_KEY, {
  api_host: import.meta.env.VITE_POSTHOG_HOST || 'https://us.i.posthog.com',
  person_profiles: 'identified_only',
});

export { posthog };
```

```typescript
// src/main.tsx
import { PostHogProvider } from 'posthog-js/react';
import { posthog } from './posthog';

ReactDOM.createRoot(document.getElementById('root')!).render(
  <PostHogProvider client={posthog}>
    <App />
  </PostHogProvider>
);
```

### Python (FastAPI/Flask)

```bash
pip install posthog
```

```python
# analytics/posthog_client.py
import posthog
from functools import lru_cache

@lru_cache()
def get_posthog():
    posthog.project_api_key = os.environ["POSTHOG_API_KEY"]
    posthog.host = os.environ.get("POSTHOG_HOST", "https://us.i.posthog.com")
    posthog.debug = os.environ.get("ENV") == "development"
    return posthog

# Usage
def track_event(user_id: str, event: str, properties: dict = None):
    ph = get_posthog()
    ph.capture(
        distinct_id=user_id,
        event=event,
        properties=properties or {}
    )

def identify_user(user_id: str, properties: dict):
    ph = get_posthog()
    ph.identify(user_id, properties)
```

### Node.js (Express/Hono)

```bash
npm install posthog-node
```

```typescript
// lib/posthog.ts
import { PostHog } from 'posthog-node';

const posthog = new PostHog(process.env.POSTHOG_API_KEY!, {
  host: process.env.POSTHOG_HOST || 'https://us.i.posthog.com',
});

// Flush on shutdown
process.on('SIGTERM', () => posthog.shutdown());

export { posthog };

// Usage
export function trackEvent(userId: string, event: string, properties?: Record<string, any>) {
  posthog.capture({
    distinctId: userId,
    event,
    properties,
  });
}

export function identifyUser(userId: string, properties: Record<string, any>) {
  posthog.identify({
    distinctId: userId,
    properties,
  });
}
```

---

## Environment Variables

```bash
# .env.local (Next.js) - SAFE: These are meant to be public
NEXT_PUBLIC_POSTHOG_KEY=phc_xxxxxxxxxxxxxxxxxxxx
NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com

# .env (Backend) - Keep private
POSTHOG_API_KEY=phc_xxxxxxxxxxxxxxxxxxxx
POSTHOG_HOST=https://us.i.posthog.com
```

Add to `credentials.md` patterns:
```python
'POSTHOG_API_KEY': r'phc_[A-Za-z0-9]+',
```

---

## User Identification

### When to Identify

```typescript
// Identify on signup
async function handleSignup(email: string, name: string) {
  const user = await createUser(email, name);

  posthog.identify(user.id, {
    email: user.email,
    name: user.name,
    created_at: user.createdAt,
    plan: 'free',
  });

  posthog.capture('user_signed_up', {
    signup_method: 'email',
  });
}

// Identify on login
async function handleLogin(email: string) {
  const user = await authenticateUser(email);

  posthog.identify(user.id, {
    email: user.email,
    name: user.name,
    plan: user.plan,
    last_login: new Date().toISOString(),
  });

  posthog.capture('user_logged_in');
}

// Reset on logout
function handleLogout() {
  posthog.capture('user_logged_out');
  posthog.reset(); // Clears identity
}
```

### User Properties

```typescript
// Standard properties to track
interface UserProperties {
  // Identity
  email: string;
  name: string;

  // Lifecycle
  created_at: string;
  plan: 'free' | 'pro' | 'enterprise';

  // Engagement
  onboarding_completed: boolean;
  feature_count: number;

  // Business
  company_name?: string;
  company_size?: string;
  industry?: string;
}

// Update properties when they change
posthog.capture('$set', {
  $set: { plan: 'pro' },
});
```

---

## Event Tracking Patterns

### Event Naming Convention

```typescript
// Format: [object]_[action]
// Use snake_case, past tense for actions

// ✅ Good event names
'user_signed_up'
'feature_created'
'subscription_upgraded'
'onboarding_completed'
'invite_sent'
'file_uploaded'
'search_performed'
'checkout_started'
'payment_completed'

// ❌ Bad event names
'click'           // Too vague
'ButtonClick'     // Not snake_case
'user signup'     // Spaces
'creatingFeature' // Not past tense
```

### Core Events by Category

```typescript
// === AUTHENTICATION ===
posthog.capture('user_signed_up', {
  signup_method: 'google' | 'email' | 'github',
  referral_source: 'organic' | 'paid' | 'referral',
});

posthog.capture('user_logged_in', {
  login_method: 'google' | 'email' | 'magic_link',
});

posthog.capture('user_logged_out');

posthog.capture('password_reset_requested');

// === ONBOARDING ===
posthog.capture('onboarding_started');

posthog.capture('onboarding_step_completed', {
  step_name: 'profile' | 'preferences' | 'first_action',
  step_number: 1,
  total_steps: 3,
});

posthog.capture('onboarding_completed', {
  duration_seconds: 120,
  steps_skipped: 0,
});

posthog.capture('onboarding_skipped', {
  skipped_at_step: 2,
});

// === FEATURE USAGE ===
posthog.capture('feature_used', {
  feature_name: 'export' | 'share' | 'duplicate',
  context: 'dashboard' | 'editor',
});

posthog.capture('[resource]_created', {
  resource_type: 'project' | 'document' | 'team',
  // Resource-specific properties
});

posthog.capture('[resource]_updated', {
  resource_type: 'project',
  fields_changed: ['name', 'description'],
});

posthog.capture('[resource]_deleted', {
  resource_type: 'project',
});

// === BILLING ===
posthog.capture('pricing_page_viewed', {
  current_plan: 'free',
});

posthog.capture('checkout_started', {
  plan: 'pro',
  billing_period: 'monthly' | 'annual',
  price: 29,
});

posthog.capture('subscription_upgraded', {
  from_plan: 'free',
  to_plan: 'pro',
  mrr_change: 29,
});

posthog.capture('subscription_downgraded', {
  from_plan: 'pro',
  to_plan: 'free',
  reason: 'too_expensive' | 'missing_features' | 'not_using',
});

posthog.capture('subscription_cancelled', {
  plan: 'pro',
  reason: 'string',
  feedback: 'string',
});

// === ERRORS ===
posthog.capture('error_occurred', {
  error_type: 'api_error' | 'validation_error' | 'network_error',
  error_message: 'string',
  error_code: 'string',
  page: '/dashboard',
});
```

### React Hook for Tracking

```typescript
// hooks/useTrack.ts
import { useCallback } from 'react';
import { posthog } from '@/lib/posthog';

export function useTrack() {
  const track = useCallback((event: string, properties?: Record<string, any>) => {
    posthog.capture(event, {
      ...properties,
      timestamp: new Date().toISOString(),
    });
  }, []);

  return { track };
}

// Usage
function CreateProjectButton() {
  const { track } = useTrack();

  const handleCreate = async () => {
    track('project_creation_started');

    try {
      const project = await createProject();
      track('project_created', {
        project_id: project.id,
        template_used: project.template,
      });
    } catch (error) {
      track('project_creation_failed', {
        error_message: error.message,
      });
    }
  };

  return <button onClick={handleCreate}>Create Project</button>;
}
```

---

## Feature Flags

### Setup

```typescript
// Check feature flag (client-side)
import { useFeatureFlagEnabled } from 'posthog-js/react';

function NewFeature() {
  const showNewUI = useFeatureFlagEnabled('new-dashboard-ui');

  if (showNewUI) {
    return <NewDashboard />;
  }
  return <OldDashboard />;
}

// With payload
import { useFeatureFlagPayload } from 'posthog-js/react';

function PricingPage() {
  const pricingConfig = useFeatureFlagPayload('pricing-experiment');
  // pricingConfig = { price: 29, showAnnual: true }

  return <Pricing config={pricingConfig} />;
}
```

### Server-Side (Next.js)

```typescript
// app/dashboard/page.tsx
import { PostHog } from 'posthog-node';
import { cookies } from 'next/headers';

async function getFeatureFlags(userId: string) {
  const posthog = new PostHog(process.env.POSTHOG_API_KEY!);

  const flags = await posthog.getAllFlags(userId);
  await posthog.shutdown();

  return flags;
}

export default async function Dashboard() {
  const cookieStore = cookies();
  const userId = cookieStore.get('user_id')?.value;

  const flags = await getFeatureFlags(userId);

  return (
    <div>
      {flags['new-dashboard'] && <NewFeature />}
    </div>
  );
}
```

### A/B Testing

```typescript
// Track experiment exposure
function ExperimentComponent() {
  const variant = useFeatureFlagEnabled('checkout-experiment');

  useEffect(() => {
    posthog.capture('experiment_viewed', {
      experiment: 'checkout-experiment',
      variant: variant ? 'test' : 'control',
    });
  }, [variant]);

  return variant ? <NewCheckout /> : <OldCheckout />;
}
```

---

## Project-Specific Dashboards

### SaaS Product

```markdown
## Essential SaaS Dashboards

### 1. Acquisition Dashboard
**Questions answered:** Where do users come from? What converts?

Insights to create:
- [ ] Signups by source (daily/weekly trend)
- [ ] Signup conversion rate by landing page
- [ ] Time from first visit to signup
- [ ] Signup funnel: Visit → Signup Page → Form Start → Complete

### 2. Activation Dashboard
**Questions answered:** Are new users getting value?

Insights to create:
- [ ] Onboarding completion rate
- [ ] Time to first key action
- [ ] Activation rate (% reaching "aha moment" in first 7 days)
- [ ] Drop-off by onboarding step
- [ ] Feature adoption in first session

### 3. Engagement Dashboard
**Questions answered:** How are users using the product?

Insights to create:
- [ ] DAU/WAU/MAU trends
- [ ] Feature usage heatmap
- [ ] Session duration distribution
- [ ] Actions per session
- [ ] Power users vs casual users

### 4. Retention Dashboard
**Questions answered:** Are users coming back?

Insights to create:
- [ ] Retention cohorts (D1, D7, D30)
- [ ] Churn rate by plan
- [ ] Reactivation rate
- [ ] Last action before churn
- [ ] Features correlated with retention

### 5. Revenue Dashboard
**Questions answered:** Is the business growing?

Insights to create:
- [ ] MRR trend
- [ ] Upgrades vs downgrades
- [ ] Trial to paid conversion
- [ ] Revenue by plan
- [ ] LTV by acquisition source
```

### E-Commerce

```markdown
## Essential E-Commerce Dashboards

### 1. Conversion Funnel
Insights to create:
- [ ] Full funnel: Browse → PDP → Add to Cart → Checkout → Purchase
- [ ] Cart abandonment rate
- [ ] Checkout drop-off by step
- [ ] Payment failure rate

### 2. Product Performance
Insights to create:
- [ ] Product views → purchases (by product)
- [ ] Add to cart rate by category
- [ ] Search → purchase correlation
- [ ] Cross-sell effectiveness

### 3. Customer Dashboard
Insights to create:
- [ ] Repeat purchase rate
- [ ] Average order value trend
- [ ] Customer lifetime value
- [ ] Purchase frequency distribution
```

### Content/Media

```markdown
## Essential Content Dashboards

### 1. Consumption Dashboard
Insights to create:
- [ ] Content views by type
- [ ] Read/watch completion rate
- [ ] Time on content
- [ ] Scroll depth distribution

### 2. Engagement Dashboard
Insights to create:
- [ ] Shares by content
- [ ] Comments per article
- [ ] Save/bookmark rate
- [ ] Return visits to same content

### 3. Growth Dashboard
Insights to create:
- [ ] New vs returning visitors
- [ ] Email signup rate
- [ ] Referral traffic sources
```

### AI/LLM Application

```markdown
## Essential AI App Dashboards

### 1. Usage Dashboard
Insights to create:
- [ ] Queries per user per day
- [ ] Token usage distribution
- [ ] Response time p50/p95
- [ ] Error rate by query type

### 2. Quality Dashboard
Insights to create:
- [ ] User feedback (thumbs up/down)
- [ ] Regeneration rate (user asked for new response)
- [ ] Edit rate (user modified AI output)
- [ ] Follow-up query rate

### 3. Cost Dashboard
Insights to create:
- [ ] Token cost per user
- [ ] Cost by model
- [ ] Cost by feature
- [ ] Efficiency trends (value/cost)
```

---

## Creating Dashboards

### Using PostHog MCP

```markdown
When setting up analytics for a project:

1. First, check existing dashboards:
   - Use `dashboards-get-all` to list current dashboards

2. Create project-appropriate dashboards:
   - Use `dashboard-create` with descriptive name

3. Create insights for each dashboard:
   - Use `query-run` to test queries
   - Use `insight-create-from-query` to save
   - Use `add-insight-to-dashboard` to organize

4. Set up key funnels:
   - Signup funnel
   - Onboarding funnel
   - Purchase/conversion funnel
```

### Dashboard Creation Workflow

```typescript
// Example: Creating SaaS dashboards via MCP

// 1. Create dashboard
const dashboard = await mcp_posthog_dashboard_create({
  name: "Activation Metrics",
  description: "Track new user activation and onboarding",
  tags: ["saas", "activation"],
});

// 2. Create insights
const signupFunnel = await mcp_posthog_query_run({
  query: {
    kind: "InsightVizNode",
    source: {
      kind: "FunnelsQuery",
      series: [
        { kind: "EventsNode", event: "user_signed_up", name: "Signed Up" },
        { kind: "EventsNode", event: "onboarding_started", name: "Started Onboarding" },
        { kind: "EventsNode", event: "onboarding_completed", name: "Completed Onboarding" },
        { kind: "EventsNode", event: "first_value_action", name: "First Value" },
      ],
      dateRange: { date_from: "-30d" },
    },
  },
});

// 3. Save and add to dashboard
const insight = await mcp_posthog_insight_create_from_query({
  name: "Signup to Activation Funnel",
  query: signupFunnel.query,
  favorited: true,
});

await mcp_posthog_add_insight_to_dashboard({
  insightId: insight.id,
  dashboardId: dashboard.id,
});
```

---

## Privacy & Compliance

### GDPR Compliance

```typescript
// Opt-out handling
export function handleCookieConsent(consent: boolean) {
  if (consent) {
    posthog.opt_in_capturing();
  } else {
    posthog.opt_out_capturing();
  }
}

// Check consent status
const hasConsent = posthog.has_opted_in_capturing();

// Initialize with consent check
posthog.init(key, {
  opt_out_capturing_by_default: true, // Require explicit opt-in
  respect_dnt: true, // Respect Do Not Track
});
```

### Data to Never Track

```typescript
// ❌ NEVER track these
posthog.capture('event', {
  password: '...',           // Credentials
  credit_card: '...',        // Payment info
  ssn: '...',                // Government IDs
  medical_info: '...',       // Health data
  full_address: '...',       // Detailed location
});

// ✅ OK to track
posthog.capture('event', {
  country: 'US',             // General location
  plan: 'pro',               // Product info
  feature_used: 'export',    // Usage
});
```

### Property Sanitization

```typescript
// lib/analytics.ts
const SENSITIVE_KEYS = ['password', 'token', 'secret', 'credit', 'ssn'];

function sanitizeProperties(props: Record<string, any>): Record<string, any> {
  return Object.fromEntries(
    Object.entries(props).filter(([key]) =>
      !SENSITIVE_KEYS.some(sensitive => key.toLowerCase().includes(sensitive))
    )
  );
}

export function safeCapture(event: string, properties?: Record<string, any>) {
  posthog.capture(event, sanitizeProperties(properties || {}));
}
```

---

## Testing Analytics

### Development Mode

```typescript
// Disable in development
if (process.env.NODE_ENV === 'development') {
  posthog.opt_out_capturing();
  // Or use debug mode
  posthog.debug();
}
```

### E2E Testing

```typescript
// playwright/fixtures.ts
import { test as base } from '@playwright/test';

export const test = base.extend({
  page: async ({ page }, use) => {
    // Mock PostHog to capture events
    await page.addInitScript(() => {
      window.capturedEvents = [];
      window.posthog = {
        capture: (event, props) => {
          window.capturedEvents.push({ event, props });
        },
        identify: () => {},
        reset: () => {},
      };
    });
    await use(page);
  },
});

// In tests
test('tracks signup event', async ({ page }) => {
  await page.goto('/signup');
  await page.fill('[name=email]', 'test@example.com');
  await page.click('button[type=submit]');

  const events = await page.evaluate(() => window.capturedEvents);
  expect(events).toContainEqual({
    event: 'user_signed_up',
    props: expect.objectContaining({ signup_method: 'email' }),
  });
});
```

---

## Debugging

### PostHog Toolbar

```typescript
// Enable toolbar for debugging
posthog.init(key, {
  // ...
  loaded: (posthog) => {
    if (process.env.NODE_ENV === 'development') {
      posthog.debug();
      // Toolbar available via PostHog dashboard
    }
  },
});
```

### Event Debugging

```typescript
// Log all events in development
posthog.init(key, {
  _onCapture: (eventName, eventData) => {
    if (process.env.NODE_ENV === 'development') {
      console.log('PostHog Event:', eventName, eventData);
    }
  },
});
```

---

## Quick Reference

### Event Checklist by User Lifecycle

```markdown
## Must-Track Events

### Acquisition
- [ ] `page_viewed` (automatic with capture_pageview)
- [ ] `user_signed_up`
- [ ] `user_logged_in`

### Activation
- [ ] `onboarding_started`
- [ ] `onboarding_step_completed`
- [ ] `onboarding_completed`
- [ ] `first_[key_action]` (your "aha moment")

### Engagement
- [ ] `[feature]_used`
- [ ] `[resource]_created`
- [ ] `search_performed`
- [ ] `invite_sent`

### Revenue
- [ ] `pricing_page_viewed`
- [ ] `checkout_started`
- [ ] `subscription_upgraded`
- [ ] `subscription_cancelled`

### Retention
- [ ] `session_started`
- [ ] `feature_[x]_used` (power features)
```

### Dashboard Templates

| Project Type | Key Dashboards |
|--------------|----------------|
| **SaaS** | Acquisition, Activation, Engagement, Retention, Revenue |
| **E-Commerce** | Conversion Funnel, Product Performance, Customer LTV |
| **Content** | Consumption, Engagement, Growth |
| **AI/LLM** | Usage, Quality, Cost |
| **Mobile App** | Installs, Onboarding, DAU/MAU, Crashes |

### Properties to Always Include

```typescript
// Auto-enriched by PostHog
$current_url
$browser
$device_type
$os

// Add these yourself
user_plan       // 'free' | 'pro' | 'enterprise'
user_role       // 'admin' | 'member'
company_id      // For B2B
feature_context // Where in the app
```


================================================
FILE: skills/project-tooling/SKILL.md
================================================
---
name: project-tooling
description: gh, vercel, supabase, render CLI and deployment platform setup
when-to-use: When setting up deployment, CI/CD, or when CLI tools are needed
user-invocable: false
effort: low
---

# Project Tooling Skill


Standard CLI tools for project infrastructure management.

---

## Required CLI Tools

Before starting any project, verify these tools are installed and authenticated:

### 1. GitHub CLI (gh)
```bash
# Verify installation
gh --version

# Verify authentication
gh auth status

# If not authenticated:
gh auth login
```

### 2. Vercel CLI
```bash
# Verify installation
vercel --version

# Verify authentication
vercel whoami

# If not authenticated:
vercel login
```

### 3. Supabase CLI
```bash
# Verify installation
supabase --version

# Verify authentication (check if linked to a project or logged in)
supabase projects list

# If not authenticated:
supabase login
```

### 4. Render CLI (optional - for Render deployments)
```bash
# Verify installation
render --version

# If using Render API instead:
# Ensure RENDER_API_KEY is set in environment
```

---

## Validation Script

Run this at project initialization to verify all tools:

```bash
#!/bin/bash
# scripts/verify-tooling.sh

set -e

echo "Verifying project tooling..."

# GitHub CLI
if command -v gh &> /dev/null; then
  if gh auth status &> /dev/null; then
    echo "✓ GitHub CLI authenticated"
  else
    echo "✗ GitHub CLI not authenticated. Run: gh auth login"
    exit 1
  fi
else
  echo "✗ GitHub CLI not installed. Run: brew install gh"
  exit 1
fi

# Vercel CLI
if command -v vercel &> /dev/null; then
  if vercel whoami &> /dev/null; then
    echo "✓ Vercel CLI authenticated"
  else
    echo "✗ Vercel CLI not authenticated. Run: vercel login"
    exit 1
  fi
else
  echo "✗ Vercel CLI not installed. Run: npm i -g vercel"
  exit 1
fi

# Supabase CLI
if command -v supabase &> /dev/null; then
  if supabase projects list &> /dev/null; then
    echo "✓ Supabase CLI authenticated"
  else
    echo "✗ Supabase CLI not authenticated. Run: supabase login"
    exit 1
  fi
else
  echo "✗ Supabase CLI not installed. Run: brew install supabase/tap/supabase"
  exit 1
fi

echo ""
echo "All tools verified!"
```

---

## GitHub Repository Setup

### Create New Repository
```bash
# Create and push in one command
gh repo create <repo-name> --private --source=. --remote=origin --push

# Or public:
gh repo create <repo-name> --public --source=. --remote=origin --push
```

### Connect Existing Repository
```bash
# If repo exists on GitHub but not linked locally
gh repo clone <owner>/<repo>

# Or add remote to existing local project
git remote add origin https://github.com/<owner>/<repo>.git
git push -u origin main
```

### Repository Settings
```bash
# Enable branch protection on main
gh api repos/{owner}/{repo}/branches/main/protection -X PUT \
  -F required_status_checks='{"strict":true,"contexts":["quality"]}' \
  -F enforce_admins=false \
  -F required_pull_request_reviews='{"required_approving_review_count":1}'

# Set default branch
gh repo edit --default-branch main
```

---

## Vercel Deployment

### Link Project
```bash
# Link current directory to Vercel project
vercel link

# Or create new project
vercel
```

### Environment Variables
```bash
# Add environment variable
vercel env add ANTHROPIC_API_KEY production

# Pull env vars to local .env
vercel env pull .env.local
```

### Deploy
```bash
# Deploy to preview
vercel

# Deploy to production
vercel --prod
```

---

## Supabase Setup

### Create New Project
```bash
# Create project (interactive)
supabase projects create <project-name> --org-id <org-id>

# Link local to remote
supabase link --project-ref <project-ref>
```

### Local Development
```bash
# Start local Supabase
supabase start

# Stop local Supabase
supabase stop

# Reset database (apply all migrations fresh)
supabase db reset
```

### Migrations
```bash
# Create new migration
supabase migration new <migration-name>

# Apply migrations to remote
supabase db push

# Pull remote schema to local
supabase db pull
```

### Generate Types
```bash
# Generate TypeScript types from schema
supabase gen types typescript --local > src/types/database.ts

# Or from remote
supabase gen types typescript --project-id <ref> > src/types/database.ts
```

---

## Render Setup (API-based)

### Environment
```bash
# Set API key
export RENDER_API_KEY=<your-api-key>
```

### Common Operations via API
```bash
# List services
curl -H "Authorization: Bearer $RENDER_API_KEY" \
  https://api.render.com/v1/services

# Trigger deploy
curl -X POST -H "Authorization: Bearer $RENDER_API_KEY" \
  https://api.render.com/v1/services/<service-id>/deploys

# Get deploy status
curl -H "Authorization: Bearer $RENDER_API_KEY" \
  https://api.render.com/v1/services/<service-id>/deploys/<deploy-id>
```

---

## Package.json Scripts

Add these scripts for common operations:

```json
{
  "scripts": {
    "verify-tools": "./scripts/verify-tooling.sh",
    "deploy:preview": "vercel",
    "deploy:prod": "vercel --prod",
    "db:start": "supabase start",
    "db:stop": "supabase stop",
    "db:reset": "supabase db reset",
    "db:migrate": "supabase db push",
    "db:types": "supabase gen types typescript --local > src/types/database.ts"
  }
}
```

---

## CI/CD Integration

### GitHub Actions with Vercel
```yaml
# .github/workflows/deploy.yml
name: Deploy

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Deploy to Vercel
        uses: amondnet/vercel-action@v25
        with:
          vercel-token: ${{ secrets.VERCEL_TOKEN }}
          vercel-org-id: ${{ secrets.VERCEL_ORG_ID }}
          vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }}
          vercel-args: ${{ github.ref == 'refs/heads/main' && '--prod' || '' }}
```

### GitHub Actions with Supabase
```yaml
# .github/workflows/migrate.yml
name: Migrate Database

on:
  push:
    branches: [main]
    paths:
      - 'supabase/migrations/**'

jobs:
  migrate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Setup Supabase CLI
        uses: supabase/setup-cli@v1
        with:
          version: latest

      - name: Push migrations
        run: supabase db push
        env:
          SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }}
          SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD }}
```

---

## Deployment Platform Setup

**REQUIRED**: When initializing a project, always create todos for deployment platform connection based on the stack.

### Platform Selection by Stack

| Stack | Default Platform | Action Required |
|-------|-----------------|-----------------|
| Next.js / Node.js | **Vercel** | Connect Git repo to Vercel |
| Python (FastAPI, Flask) | **Render** | Connect Git repo to Render, get API key |
| Static sites | **Vercel** or **Cloudflare Pages** | Connect Git repo |

### Vercel: Connect Git Repository

When Vercel is the deployment platform, create this todo:
```
TODO: Connect Git repository to Vercel for automatic deployments
```

Steps:
```bash
# Option 1: Via CLI
vercel link
vercel git connect

# Option 2: Via Dashboard (recommended for first setup)
# 1. Go to vercel.com/new
# 2. Import Git repository
# 3. Configure project settings
# 4. Deploy
```

After connecting:
- Push to `main` → Production deploy
- Push to other branches → Preview deploy
- PRs get deploy previews automatically

### Render: Connect Git Repository (Python)

When Render is the deployment platform for Python projects:

**Step 1: Ask user for Render API key**
```
Before proceeding, please provide your Render API key.
Get it from: https://dashboard.render.com/u/settings/api-keys

Store it securely - we'll add it to your environment.
```

**Step 2: Create todos**
```
TODO: Get Render API key from user
TODO: Connect Git repository to Render
TODO: Configure Render service (web service or background worker)
TODO: Set environment variables on Render
```

**Step 3: Connect via Dashboard (recommended)**
```bash
# 1. Go to dashboard.render.com/create
# 2. Select "Web Service" for APIs, "Background Worker" for async
# 3. Connect your GitHub/GitLab repository
# 4. Configure:
#    - Name: <project-name>
#    - Runtime: Python 3
#    - Build Command: pip install -r requirements.txt
#    - Start Command: uvicorn main:app --host 0.0.0.0 --port $PORT
```

**Step 4: Store API key for CI/CD**
```bash
# Add to GitHub secrets for CI/CD
gh secret set RENDER_API_KEY

# Or add to local env
echo "RENDER_API_KEY=<your-key>" >> .env
```

**Step 5: Configure render.yaml (optional - Infrastructure as Code)**
```yaml
# render.yaml
services:
  - type: web
    name: <project-name>-api
    runtime: python
    buildCommand: pip install -r requirements.txt
    startCommand: uvicorn main:app --host 0.0.0.0 --port $PORT
    envVars:
      - key: PYTHON_VERSION
        value: "3.11"
      - key: DATABASE_URL
        fromDatabase:
          name: <project-name>-db
          property: connectionString

databases:
  - name: <project-name>-db
    plan: free
```

### Deployment Checklist Template

Add to project todos when setting up deployment:

```markdown
## Deployment Setup
- [ ] Create Git repository (gh repo create)
- [ ] Choose deployment platform (Vercel/Render/other)
- [ ] Connect Git to deployment platform
- [ ] Configure environment variables
- [ ] Set up CI/CD workflow
- [ ] Verify preview deployments work
- [ ] Configure production domain
```

---

## Tooling Anti-Patterns

- ❌ Hardcoded secrets - use CLI env management or GitHub secrets
- ❌ Manual deployments - automate via CI/CD
- ❌ Skipping local Supabase - always develop locally first
- ❌ Direct production database changes - use migrations
- ❌ No branch protection - require PR reviews and CI checks
- ❌ Missing environment separation - keep dev/staging/prod separate


================================================
FILE: skills/pwa-development/SKILL.md
================================================
---
name: pwa-development
description: Progressive Web Apps - service workers, caching strategies, offline, Workbox
when-to-use: When building PWA features - service workers, caching, offline support
user-invocable: false
paths: ["**/sw.*", "**/service-worker.*", "**/workbox-config.*", "**/manifest.json"]
effort: medium
---

# PWA Development Skill


**Purpose:** Build Progressive Web Apps that work offline, install like native apps, and deliver fast, reliable experiences across all devices.

---

## Core PWA Requirements

```
┌─────────────────────────────────────────────────────────────────┐
│  THE THREE PILLARS OF PWA                                       │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  1. HTTPS                                                       │
│     Required for service workers and security.                  │
│     localhost allowed for development.                          │
│                                                                 │
│  2. SERVICE WORKER                                              │
│     JavaScript that runs in background.                         │
│     Enables offline, caching, push notifications.               │
│                                                                 │
│  3. WEB APP MANIFEST                                            │
│     JSON file describing app metadata.                          │
│     Enables installation and app-like experience.               │
├─────────────────────────────────────────────────────────────────┤
│  INSTALLABILITY CRITERIA (Chrome)                               │
│  ─────────────────────────────────────────────────────────────  │
│  • HTTPS (or localhost)                                         │
│  • Service worker with fetch handler                            │
│  • Web app manifest with: name, icons (192px + 512px),          │
│    start_url, display: standalone/fullscreen/minimal-ui         │
└─────────────────────────────────────────────────────────────────┘
```

---

## Web App Manifest

### Required Fields

```json
{
  "name": "My Progressive Web App",
  "short_name": "MyPWA",
  "description": "A description of what the app does",
  "start_url": "/",
  "display": "standalone",
  "background_color": "#ffffff",
  "theme_color": "#000000",
  "icons": [
    {
      "src": "/icons/icon-192.png",
      "sizes": "192x192",
      "type": "image/png"
    },
    {
      "src": "/icons/icon-512.png",
      "sizes": "512x512",
      "type": "image/png"
    },
    {
      "src": "/icons/icon-512-maskable.png",
      "sizes": "512x512",
      "type": "image/png",
      "purpose": "maskable"
    }
  ]
}
```

### Enhanced Manifest (Full Features)

```json
{
  "name": "My Progressive Web App",
  "short_name": "MyPWA",
  "description": "A full-featured PWA",
  "start_url": "/?source=pwa",
  "scope": "/",
  "display": "standalone",
  "orientation": "portrait-primary",
  "background_color": "#ffffff",
  "theme_color": "#3367D6",
  "dir": "ltr",
  "lang": "en",
  "categories": ["productivity", "utilities"],

  "icons": [
    { "src": "/icons/icon-72.png", "sizes": "72x72", "type": "image/png" },
    { "src": "/icons/icon-96.png", "sizes": "96x96", "type": "image/png" },
    { "src": "/icons/icon-128.png", "sizes": "128x128", "type": "image/png" },
    { "src": "/icons/icon-144.png", "sizes": "144x144", "type": "image/png" },
    { "src": "/icons/icon-152.png", "sizes": "152x152", "type": "image/png" },
    { "src": "/icons/icon-192.png", "sizes": "192x192", "type": "image/png" },
    { "src": "/icons/icon-384.png", "sizes": "384x384", "type": "image/png" },
    { "src": "/icons/icon-512.png", "sizes": "512x512", "type": "image/png" },
    { "src": "/icons/icon-maskable.png", "sizes": "512x512", "type": "image/png", "purpose": "maskable" }
  ],

  "screenshots": [
    {
      "src": "/screenshots/desktop.png",
      "sizes": "1280x720",
      "type": "image/png",
      "form_factor": "wide"
    },
    {
      "src": "/screenshots/mobile.png",
      "sizes": "750x1334",
      "type": "image/png",
      "form_factor": "narrow"
    }
  ],

  "shortcuts": [
    {
      "name": "New Item",
      "short_name": "New",
      "description": "Create a new item",
      "url": "/new?source=shortcut",
      "icons": [{ "src": "/icons/shortcut-new.png", "sizes": "192x192" }]
    }
  ],

  "share_target": {
    "action": "/share",
    "method": "POST",
    "enctype": "multipart/form-data",
    "params": {
      "title": "title",
      "text": "text",
      "url": "url",
      "files": [{ "name": "files", "accept": ["image/*"] }]
    }
  },

  "protocol_handlers": [
    {
      "protocol": "web+myapp",
      "url": "/handle?url=%s"
    }
  ],

  "file_handlers": [
    {
      "action": "/open-file",
      "accept": {
        "text/plain": [".txt"]
      }
    }
  ]
}
```

### Manifest Checklist

- [ ] `name` and `short_name` defined
- [ ] `start_url` set (use query param for analytics)
- [ ] `display` set to `standalone` or `fullscreen`
- [ ] Icons: 192x192 and 512x512 minimum
- [ ] Maskable icon included for Android adaptive icons
- [ ] `theme_color` matches app design
- [ ] `background_color` for splash screen
- [ ] Screenshots for richer install UI (optional)
- [ ] Shortcuts for quick actions (optional)

---

## Service Worker Patterns

### Basic Service Worker

```javascript
// sw.js
const CACHE_NAME = 'app-cache-v1';
const STATIC_ASSETS = [
  '/',
  '/index.html',
  '/styles/main.css',
  '/scripts/app.js',
  '/offline.html'
];

// Install: Cache static assets
self.addEventListener('install', (event) => {
  event.waitUntil(
    caches.open(CACHE_NAME)
      .then((cache) => cache.addAll(STATIC_ASSETS))
      .then(() => self.skipWaiting())
  );
});

// Activate: Clean old caches
self.addEventListener('activate', (event) => {
  event.waitUntil(
    caches.keys()
      .then((keys) => Promise.all(
        keys
          .filter((key) => key !== CACHE_NAME)
          .map((key) => caches.delete(key))
      ))
      .then(() => self.clients.claim())
  );
});

// Fetch: Serve from cache, fall back to network
self.addEventListener('fetch', (event) => {
  event.respondWith(
    caches.match(event.request)
      .then((cached) => cached || fetch(event.request))
      .catch(() => caches.match('/offline.html'))
  );
});
```

### Registration

```javascript
// main.js
if ('serviceWorker' in navigator) {
  window.addEventListener('load', async () => {
    try {
      const registration = await navigator.serviceWorker.register('/sw.js', {
        scope: '/'
      });
      console.log('SW registered:', registration.scope);
    } catch (error) {
      console.error('SW registration failed:', error);
    }
  });
}
```

---

## Caching Strategies

### Strategy Selection Guide

| Strategy | Use Case | Description |
|----------|----------|-------------|
| **Cache First** | Static assets (CSS, JS, images) | Check cache, fall back to network |
| **Network First** | API responses, dynamic content | Try network, fall back to cache |
| **Stale While Revalidate** | Semi-static content (avatars, articles) | Serve cache immediately, update in background |
| **Network Only** | Non-cacheable requests (analytics) | Always use network |
| **Cache Only** | Offline-only assets | Only serve from cache |

### Cache First (Offline First)

```javascript
// Best for: Static assets that rarely change
self.addEventListener('fetch', (event) => {
  if (event.request.destination === 'image' ||
      event.request.destination === 'style' ||
      event.request.destination === 'script') {
    event.respondWith(
      caches.match(event.request)
        .then((cached) => {
          if (cached) return cached;
          return fetch(event.request).then((response) => {
            const clone = response.clone();
            caches.open(CACHE_NAME).then((cache) => {
              cache.put(event.request, clone);
            });
            return response;
          });
        })
    );
  }
});
```

### Network First (Fresh First)

```javascript
// Best for: API data, frequently updated content
self.addEventListener('fetch', (event) => {
  if (event.request.url.includes('/api/')) {
    event.respondWith(
      fetch(event.request)
        .then((response) => {
          const clone = response.clone();
          caches.open(CACHE_NAME).then((cache) => {
            cache.put(event.request, clone);
          });
          return response;
        })
        .catch(() => caches.match(event.request))
    );
  }
});
```

### Stale While Revalidate

```javascript
// Best for: Content that's okay to be slightly outdated
self.addEventListener('fetch', (event) => {
  if (event.request.url.includes('/articles/')) {
    event.respondWith(
      caches.open(CACHE_NAME).then((cache) => {
        return cache.match(event.request).then((cached) => {
          const fetchPromise = fetch(event.request).then((response) => {
            cache.put(event.request, response.clone());
            return response;
          });
          return cached || fetchPromise;
        });
      })
    );
  }
});
```

---

## Workbox (Recommended)

### Why Workbox?

- Battle-tested caching strategies
- Precaching with revision management
- Background sync for offline forms
- Automatic cache cleanup
- TypeScript support

### Installation

```bash
npm install workbox-webpack-plugin  # Webpack
npm install @vite-pwa/vite-plugin   # Vite
```

### Workbox with Vite

```javascript
// vite.config.js
import { VitePWA } from 'vite-plugin-pwa';

export default {
  plugins: [
    VitePWA({
      registerType: 'autoUpdate',
      includeAssets: ['favicon.ico', 'robots.txt', 'apple-touch-icon.png'],
      manifest: {
        name: 'My App',
        short_name: 'App',
        theme_color: '#ffffff',
        icons: [
          { src: 'pwa-192x192.png', sizes: '192x192', type: 'image/png' },
          { src: 'pwa-512x512.png', sizes: '512x512', type: 'image/png' }
        ]
      },
      workbox: {
        globPatterns: ['**/*.{js,css,html,ico,png,svg}'],
        runtimeCaching: [
          {
            urlPattern: /^https:\/\/api\.example\.com\/.*/i,
            handler: 'NetworkFirst',
            options: {
              cacheName: 'api-cache',
              expiration: {
                maxEntries: 100,
                maxAgeSeconds: 60 * 60 * 24 // 24 hours
              }
            }
          },
          {
            urlPattern: /\.(?:png|jpg|jpeg|svg|gif)$/,
            handler: 'CacheFirst',
            options: {
              cacheName: 'image-cache',
              expiration: {
                maxEntries: 50,
                maxAgeSeconds: 60 * 60 * 24 * 30 // 30 days
              }
            }
          }
        ]
      }
    })
  ]
};
```

### Workbox Manual Service Worker

```javascript
// sw.js
import { precacheAndRoute } from 'workbox-precaching';
import { registerRoute } from 'workbox-routing';
import { CacheFirst, NetworkFirst, StaleWhileRevalidate } from 'workbox-strategies';
import { ExpirationPlugin } from 'workbox-expiration';
import { CacheableResponsePlugin } from 'workbox-cacheable-response';

// Precache static assets (generated by build tool)
precacheAndRoute(self.__WB_MANIFEST);

// Cache images
registerRoute(
  ({ request }) => request.destination === 'image',
  new CacheFirst({
    cacheName: 'images',
    plugins: [
      new CacheableResponsePlugin({ statuses: [0, 200] }),
      new ExpirationPlugin({
        maxEntries: 60,
        maxAgeSeconds: 30 * 24 * 60 * 60 // 30 days
      })
    ]
  })
);

// Cache API responses
registerRoute(
  ({ url }) => url.pathname.startsWith('/api/'),
  new NetworkFirst({
    cacheName: 'api-responses',
    plugins: [
      new CacheableResponsePlugin({ statuses: [0, 200] }),
      new ExpirationPlugin({
        maxEntries: 100,
        maxAgeSeconds: 24 * 60 * 60 // 24 hours
      })
    ]
  })
);

// Cache page navigations
registerRoute(
  ({ request }) => request.mode === 'navigate',
  new NetworkFirst({
    cacheName: 'pages',
    plugins: [
      new CacheableResponsePlugin({ statuses: [0, 200] })
    ]
  })
);
```

---

## Offline Experience

### Offline Page

```html
<!-- offline.html -->
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Offline - App Name</title>
  <style>
    body {
      font-family: system-ui, sans-serif;
      display: flex;
      align-items: center;
      justify-content: center;
      min-height: 100vh;
      margin: 0;
      background: #f5f5f5;
    }
    .offline-content {
      text-align: center;
      padding: 2rem;
    }
    .offline-icon { font-size: 4rem; }
    h1 { color: #333; }
    p { color: #666; }
    button {
      background: #3367D6;
      color: white;
      border: none;
      padding: 0.75rem 1.5rem;
      border-radius: 4px;
      cursor: pointer;
      font-size: 1rem;
    }
  </style>
</head>
<body>
  <div class="offline-content">
    <div class="offline-icon">📡</div>
    <h1>You're offline</h1>
    <p>Check your connection and try again.</p>
    <button onclick="location.reload()">Retry</button>
  </div>
</body>
</html>
```

### Offline Detection

```javascript
// Online/offline status handling
function updateOnlineStatus() {
  const status = navigator.onLine ? 'online' : 'offline';
  document.body.dataset.connectionStatus = status;

  if (!navigator.onLine) {
    showNotification('You are offline. Some features may be unavailable.');
  }
}

window.addEventListener('online', updateOnlineStatus);
window.addEventListener('offline', updateOnlineStatus);
updateOnlineStatus();
```

### Background Sync (Queue Offline Actions)

```javascript
// sw.js with Workbox
import { BackgroundSyncPlugin } from 'workbox-background-sync';
import { registerRoute } from 'workbox-routing';
import { NetworkOnly } from 'workbox-strategies';

const bgSyncPlugin = new BackgroundSyncPlugin('formQueue', {
  maxRetentionTime: 24 * 60 // Retry for 24 hours
});

registerRoute(
  ({ url }) => url.pathname === '/api/submit',
  new NetworkOnly({
    plugins: [bgSyncPlugin]
  }),
  'POST'
);
```

```javascript
// main.js - Queue form submission
async function submitForm(data) {
  try {
    const response = await fetch('/api/submit', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(data)
    });
    return response.json();
  } catch (error) {
    // Will be retried by background sync when online
    showNotification('Saved offline. Will sync when connected.');
  }
}
```

---

## App-Like Features

### Install Prompt

```javascript
let deferredPrompt;

window.addEventListener('beforeinstallprompt', (e) => {
  e.preventDefault();
  deferredPrompt = e;
  showInstallButton();
});

async function installApp() {
  if (!deferredPrompt) return;

  deferredPrompt.prompt();
  const { outcome } = await deferredPrompt.userChoice;

  console.log(`User ${outcome === 'accepted' ? 'accepted' : 'dismissed'} install`);
  deferredPrompt = null;
  hideInstallButton();
}

window.addEventListener('appinstalled', () => {
  console.log('App installed');
  deferredPrompt = null;
});
```

### Detecting Standalone Mode

```javascript
// Check if running as installed PWA
function isInstalledPWA() {
  return window.matchMedia('(display-mode: standalone)').matches ||
         window.navigator.standalone === true; // iOS
}

// Listen for display mode changes
window.matchMedia('(display-mode: standalone)')
  .addEventListener('change', (e) => {
    console.log('Display mode:', e.matches ? 'standalone' : 'browser');
  });
```

### Push Notifications

```javascript
// Request permission
async function requestNotificationPermission() {
  const permission = await Notification.requestPermission();
  if (permission === 'granted') {
    await subscribeToPush();
  }
  return permission;
}

// Subscribe to push
async function subscribeToPush() {
  const registration = await navigator.serviceWorker.ready;
  const subscription = await registration.pushManager.subscribe({
    userVisibleOnly: true,
    applicationServerKey: urlBase64ToUint8Array(VAPID_PUBLIC_KEY)
  });

  // Send subscription to server
  await fetch('/api/push/subscribe', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(subscription)
  });
}

// sw.js - Handle push events
self.addEventListener('push', (event) => {
  const data = event.data.json();
  event.waitUntil(
    self.registration.showNotification(data.title, {
      body: data.body,
      icon: '/icons/icon-192.png',
      badge: '/icons/badge-72.png',
      data: { url: data.url }
    })
  );
});

// Handle notification click
self.addEventListener('notificationclick', (event) => {
  event.notification.close();
  event.waitUntil(
    clients.openWindow(event.notification.data.url)
  );
});
```

### Share Target

```javascript
// sw.js - Handle share target
self.addEventListener('fetch', (event) => {
  if (event.request.url.endsWith('/share') &&
      event.request.method === 'POST') {
    event.respondWith((async () => {
      const formData = await event.request.formData();
      const title = formData.get('title');
      const text = formData.get('text');
      const url = formData.get('url');

      // Store or process shared content
      // Redirect to app with shared data
      return Response.redirect(`/?shared=true&title=${encodeURIComponent(title)}`);
    })());
  }
});
```

---

## Performance Optimization

### Critical Rendering Path

```html
<!-- Inline critical CSS -->
<style>
  /* Critical above-the-fold styles */
</style>

<!-- Preload important resources -->
<link rel="preload" href="/fonts/main.woff2" as="font" type="font/woff2" crossorigin>
<link rel="preload" href="/scripts/app.js" as="script">

<!-- Defer non-critical CSS -->
<link rel="stylesheet" href="/styles/main.css" media="print" onload="this.media='all'">
<noscript><link rel="stylesheet" href="/styles/main.css"></noscript>
```

### Image Optimization

```html
<!-- Responsive images -->
<img
  src="/images/hero-800.webp"
  srcset="
    /images/hero-400.webp 400w,
    /images/hero-800.webp 800w,
    /images/hero-1200.webp 1200w
  "
  sizes="(max-width: 600px) 400px, (max-width: 1200px) 800px, 1200px"
  alt="Hero image"
  loading="lazy"
  decoding="async"
>

<!-- Modern formats with fallback -->
<picture>
  <source srcset="/images/hero.avif" type="image/avif">
  <source srcset="/images/hero.webp" type="image/webp">
  <img src="/images/hero.jpg" alt="Hero image" loading="lazy">
</picture>
```

### Code Splitting

```javascript
// Dynamic imports for route-based splitting
const routes = {
  '/': () => import('./pages/Home.js'),
  '/about': () => import('./pages/About.js'),
  '/settings': () => import('./pages/Settings.js')
};

async function loadPage(path) {
  const loader = routes[path];
  if (loader) {
    const module = await loader();
    return module.default;
  }
}
```

---

## Testing PWA

### Lighthouse Audit

```bash
# Run Lighthouse from CLI
npx lighthouse https://your-app.com --view

# Key metrics to check:
# - PWA badge (installable, offline-ready)
# - Performance score
# - Best practices
# - Accessibility
```

### Manual Testing Checklist

- [ ] **Installability**
  - [ ] Install prompt appears on desktop Chrome
  - [ ] Can be added to home screen on mobile
  - [ ] App opens in standalone mode after install

- [ ] **Offline Support**
  - [ ] App loads when offline (airplane mode)
  - [ ] Cached pages display correctly
  - [ ] Offline fallback page shows for uncached routes
  - [ ] Background sync works when coming back online

- [ ] **Performance**
  - [ ] First Contentful Paint < 1.8s
  - [ ] Largest Contentful Paint < 2.5s
  - [ ] Time to Interactive < 3.8s
  - [ ] Cumulative Layout Shift < 0.1

- [ ] **Service Worker**
  - [ ] SW registers successfully
  - [ ] Static assets cached on install
  - [ ] SW updates correctly (new version)
  - [ ] No stale cache issues

- [ ] **Manifest**
  - [ ] All required fields present
  - [ ] Icons display correctly
  - [ ] Theme color applied
  - [ ] Splash screen shows on launch

### Testing Service Worker Updates

```javascript
// Force update check
if ('serviceWorker' in navigator) {
  navigator.serviceWorker.ready.then((registration) => {
    registration.update();
  });
}

// Listen for updates
navigator.serviceWorker.addEventListener('controllerchange', () => {
  // New service worker activated
  window.location.reload();
});
```

---

## Project Structure

```
project/
├── public/
│   ├── manifest.json           # Web app manifest
│   ├── sw.js                   # Service worker (if not bundled)
│   ├── offline.html            # Offline fallback page
│   ├── robots.txt
│   └── icons/
│       ├── icon-72.png
│       ├── icon-96.png
│       ├── icon-128.png
│       ├── icon-144.png
│       ├── icon-152.png
│       ├── icon-192.png
│       ├── icon-384.png
│       ├── icon-512.png
│       ├── icon-maskable.png   # For adaptive icons
│       ├── apple-touch-icon.png
│       └── favicon.ico
├── src/
│   ├── sw.js                   # Service worker source (if bundled)
│   ├── pwa/
│   │   ├── install.js          # Install prompt handling
│   │   ├── offline.js          # Offline detection
│   │   └── push.js             # Push notification handling
│   └── ...
└── tests/
    └── pwa/
        ├── manifest.test.js
        ├── sw.test.js
        └── offline.test.js
```

---

## Common Mistakes

| Mistake | Fix |
|---------|-----|
| Missing maskable icon | Add icon with `"purpose": "maskable"` |
| No offline fallback | Create `offline.html` and cache it |
| Cache never expires | Use `ExpirationPlugin` with Workbox |
| SW caches too aggressively | Use appropriate strategies per resource type |
| No update mechanism | Implement `skipWaiting()` + reload prompt |
| Broken install prompt | Ensure manifest meets all criteria |
| No HTTPS in production | Configure SSL certificate |
| Large cache size | Set `maxEntries` and `maxAgeSeconds` |
| Stale API responses | Use `NetworkFirst` for dynamic data |
| Missing start_url tracking | Add query param: `/?source=pwa` |

---

## PWA Development Checklist

### Before Launch

- [ ] HTTPS configured (production)
- [ ] Manifest complete with all required fields
- [ ] Icons in all required sizes (192, 512, maskable)
- [ ] Service worker registered and working
- [ ] Offline page created and cached
- [ ] Cache strategies defined for all resource types
- [ ] Install prompt handling implemented
- [ ] Lighthouse PWA audit passes

### After Launch

- [ ] Monitor cache sizes
- [ ] Test SW updates don't break app
- [ ] Track PWA installs via analytics
- [ ] Test on multiple devices/browsers
- [ ] Monitor Core Web Vitals
- [ ] Set up push notification flow (if needed)

---

## Framework-Specific Guides

### Next.js

```bash
npm install next-pwa
```

```javascript
// next.config.js
const withPWA = require('next-pwa')({
  dest: 'public',
  disable: process.env.NODE_ENV === 'development'
});

module.exports = withPWA({
  // Your Next.js config
});
```

### Create React App

```bash
# CRA 4+ has PWA support built-in
npx create-react-app my-pwa --template cra-template-pwa
```

### Vite (Any Framework)

```bash
npm install vite-plugin-pwa -D
```

See Workbox with Vite section above for configuration.

---

## Quick Reference

### Caching Strategy Cheat Sheet

```
Static Assets (CSS, JS, images)     → Cache First
API Responses                        → Network First
User-generated content              → Stale While Revalidate
Analytics, non-cacheable            → Network Only
Offline-only assets                 → Cache Only
```

### Manifest Minimum Requirements

```json
{
  "name": "App Name",
  "short_name": "App",
  "start_url": "/",
  "display": "standalone",
  "icons": [
    { "src": "/icon-192.png", "sizes": "192x192", "type": "image/png" },
    { "src": "/icon-512.png", "sizes": "512x512", "type": "image/png" }
  ]
}
```

### Service Worker Lifecycle

```
1. Register → 2. Install → 3. Activate → 4. Fetch
     ↓              ↓            ↓           ↓
  Load app    Cache assets  Clean old   Serve requests
                            caches      from cache/network
```


================================================
FILE: skills/python/SKILL.md
================================================
---
name: python
description: Python development with ruff, mypy, pytest - TDD and type safety
when-to-use: When working on Python files
user-invocable: false
paths: ["**/*.py", "pyproject.toml", "setup.py", "requirements*.txt"]
effort: medium
---

# Python Skill


---

## Type Hints

- Use type hints on all function signatures
- Use `typing` module for complex types
- Run `mypy --strict` in CI

```python
def process_user(user_id: int, options: dict[str, Any] | None = None) -> User:
    ...
```

---

## Project Structure

```
project/
├── src/
│   └── package_name/
│       ├── __init__.py
│       ├── core/           # Pure business logic
│       │   ├── __init__.py
│       │   ├── models.py   # Pydantic models / dataclasses
│       │   └── services.py # Pure functions
│       ├── infra/          # Side effects
│       │   ├── __init__.py
│       │   ├── api.py      # FastAPI routes
│       │   └── db.py       # Database operations
│       └── utils/          # Shared utilities
├── tests/
│   ├── unit/
│   └── integration/
├── pyproject.toml
└── CLAUDE.md
```

---

## Tooling (Required)

```toml
# pyproject.toml
[tool.ruff]
line-length = 100
select = ["E", "F", "I", "N", "W", "UP"]

[tool.mypy]
strict = true

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--cov=src --cov-report=term-missing --cov-fail-under=80"
```

---

## Testing with Pytest

```python
# tests/unit/test_services.py
import pytest
from package_name.core.services import calculate_total

class TestCalculateTotal:
    def test_returns_sum_of_items(self):
        # Arrange
        items = [{"price": 10}, {"price": 20}]
        
        # Act
        result = calculate_total(items)
        
        # Assert
        assert result == 30

    def test_returns_zero_for_empty_list(self):
        assert calculate_total([]) == 0

    def test_raises_on_invalid_item(self):
        with pytest.raises(ValueError):
            calculate_total([{"invalid": "item"}])
```

---

## GitHub Actions

```yaml
name: Python Quality Gate

on: [push, pull_request]

jobs:
  quality:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'
          
      - name: Install dependencies
        run: |
          pip install -e ".[dev]"
          
      - name: Lint (Ruff)
        run: ruff check .
        
      - name: Format Check (Ruff)
        run: ruff format --check .
        
      - name: Type Check (mypy)
        run: mypy src/
        
      - name: Test with Coverage
        run: pytest
```

---

## Pre-Commit Hooks

```yaml
# .pre-commit-config.yaml
repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.8.0
    hooks:
      - id: ruff
        args: [--fix]
      - id: ruff-format

  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.13.0
    hooks:
      - id: mypy
        additional_dependencies: [pydantic]
        args: [--strict]

  - repo: local
    hooks:
      - id: pytest
        name: pytest
        entry: pytest tests/unit -x --tb=short
        language: system
        pass_filenames: false
        always_run: true
```

Install and setup:
```bash
pip install pre-commit
pre-commit install
```

---

## Patterns

### Pydantic for Data Validation
```python
from pydantic import BaseModel, Field

class CreateUserRequest(BaseModel):
    email: str = Field(..., min_length=5)
    name: str = Field(..., max_length=100)
```

### Dependency Injection
```python
# Don't import dependencies directly in business logic
# Pass them in

# Bad
from .db import database
def get_user(user_id: int) -> User:
    return database.fetch(user_id)

# Good
def get_user(user_id: int, db: Database) -> User:
    return db.fetch(user_id)
```

### Result Pattern (No Exceptions in Core)
```python
from dataclasses import dataclass

@dataclass
class Result[T]:
    value: T | None
    error: str | None
    
    @property
    def is_ok(self) -> bool:
        return self.error is None
```

---

## Python Anti-Patterns

- ❌ `from module import *`
- ❌ Mutable default arguments
- ❌ Bare `except:` clauses
- ❌ Using `type: ignore` without explanation
- ❌ Global variables for state
- ❌ Classes when functions suffice


================================================
FILE: skills/react-native/SKILL.md
================================================
---
name: react-native
description: React Native mobile patterns, platform-specific code
when-to-use: When working on React Native mobile app code
user-invocable: false
paths: ["**/*.tsx", "**/*.jsx", "ios/**", "android/**", "app.json"]
effort: medium
---

# React Native Skill


---

## Project Structure

```
project/
├── src/
│   ├── core/                   # Pure business logic (no React)
│   │   ├── types.ts
│   │   └── services/
│   ├── components/             # Reusable UI components
│   │   ├── Button/
│   │   │   ├── Button.tsx
│   │   │   ├── Button.test.tsx
│   │   │   └── index.ts
│   │   └── index.ts            # Barrel export
│   ├── screens/                # Screen components
│   │   ├── Home/
│   │   │   ├── HomeScreen.tsx
│   │   │   ├── useHome.ts      # Screen-specific hook
│   │   │   └── index.ts
│   │   └── index.ts
│   ├── navigation/             # Navigation configuration
│   ├── hooks/                  # Shared custom hooks
│   ├── store/                  # State management
│   └── utils/                  # Utilities
├── __tests__/
├── android/
├── ios/
└── CLAUDE.md
```

---

## Component Patterns

### Functional Components Only
```typescript
// Good - simple, testable
interface ButtonProps {
  label: string;
  onPress: () => void;
  disabled?: boolean;
}

export function Button({ label, onPress, disabled = false }: ButtonProps): JSX.Element {
  return (
    <Pressable onPress={onPress} disabled={disabled}>
      <Text>{label}</Text>
    </Pressable>
  );
}
```

### Extract Logic to Hooks
```typescript
// useHome.ts - all logic here
export function useHome() {
  const [items, setItems] = useState<Item[]>([]);
  const [loading, setLoading] = useState(false);

  const refresh = useCallback(async () => {
    setLoading(true);
    const data = await fetchItems();
    setItems(data);
    setLoading(false);
  }, []);

  return { items, loading, refresh };
}

// HomeScreen.tsx - pure presentation
export function HomeScreen(): JSX.Element {
  const { items, loading, refresh } = useHome();
  
  return (
    <ItemList items={items} loading={loading} onRefresh={refresh} />
  );
}
```

### Props Interface Always Explicit
```typescript
// Always define props interface, even if simple
interface ItemCardProps {
  item: Item;
  onPress: (id: string) => void;
}

export function ItemCard({ item, onPress }: ItemCardProps): JSX.Element {
  ...
}
```

---

## State Management

### Local State First
```typescript
// Start with useState, escalate only when needed
const [value, setValue] = useState('');
```

### Zustand for Global State (if needed)
```typescript
// store/useAppStore.ts
import { create } from 'zustand';

interface AppState {
  user: User | null;
  setUser: (user: User | null) => void;
}

export const useAppStore = create<AppState>((set) => ({
  user: null,
  setUser: (user) => set({ user }),
}));
```

### React Query for Server State
```typescript
// hooks/useItems.ts
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';

export function useItems() {
  return useQuery({
    queryKey: ['items'],
    queryFn: fetchItems,
  });
}

export function useCreateItem() {
  const queryClient = useQueryClient();
  
  return useMutation({
    mutationFn: createItem,
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['items'] });
    },
  });
}
```

---

## Testing

### Component Testing with React Native Testing Library
```typescript
import { render, fireEvent } from '@testing-library/react-native';
import { Button } from './Button';

describe('Button', () => {
  it('calls onPress when pressed', () => {
    const onPress = jest.fn();
    const { getByText } = render(<Button label="Click me" onPress={onPress} />);
    
    fireEvent.press(getByText('Click me'));
    
    expect(onPress).toHaveBeenCalledTimes(1);
  });

  it('does not call onPress when disabled', () => {
    const onPress = jest.fn();
    const { getByText } = render(<Button label="Click me" onPress={onPress} disabled />);
    
    fireEvent.press(getByText('Click me'));
    
    expect(onPress).not.toHaveBeenCalled();
  });
});
```

### Hook Testing
```typescript
import { renderHook, act } from '@testing-library/react-hooks';
import { useCounter } from './useCounter';

describe('useCounter', () => {
  it('increments counter', () => {
    const { result } = renderHook(() => useCounter());
    
    act(() => {
      result.current.increment();
    });
    
    expect(result.current.count).toBe(1);
  });
});
```

---

## Platform-Specific Code

### Use Platform.select Sparingly
```typescript
import { Platform } from 'react-native';

const styles = StyleSheet.create({
  shadow: Platform.select({
    ios: {
      shadowColor: '#000',
      shadowOffset: { width: 0, height: 2 },
      shadowOpacity: 0.1,
    },
    android: {
      elevation: 2,
    },
  }),
});
```

### Separate Files for Complex Differences
```
Component/
├── Component.tsx          # Shared logic
├── Component.ios.tsx      # iOS-specific
├── Component.android.tsx  # Android-specific
└── index.ts
```

---

## React Native Anti-Patterns

- ❌ Inline styles - use StyleSheet.create
- ❌ Logic in render - extract to hooks
- ❌ Deep component nesting - flatten hierarchy
- ❌ Anonymous functions in props - use useCallback
- ❌ Index as key in lists - use stable IDs
- ❌ Direct state mutation - always use setter
- ❌ Mixing business logic with UI - keep core/ pure
- ❌ Ignoring TypeScript errors - fix them
- ❌ Large components - split into smaller pieces


================================================
FILE: skills/react-web/SKILL.md
================================================
---
name: react-web
description: React web development with hooks, React Query, Zustand
when-to-use: When working on React web components or pages
user-invocable: false
paths: ["**/*.tsx", "**/*.jsx", "src/components/**", "src/pages/**", "src/app/**"]
effort: medium
---

# React Web Skill


---

## Test-First Development (MANDATORY)

**CRITICAL: Tests MUST be written BEFORE implementation code. This is non-negotiable for frontend components.**

### The TFD Workflow

```
1. Write test file first → Defines expected behavior
2. Run test (it fails) → Confirms test is valid
3. Write minimal code → Just enough to pass
4. Run test (it passes) → Validates implementation
5. Refactor if needed → Tests catch regressions
```

### Component Development Order

```bash
# CORRECT ORDER - Test first
1. Create Button.test.tsx    # Write tests for expected behavior
2. Run tests (they fail)     # npm test -- Button
3. Create Button.tsx         # Implement to pass tests
4. Run tests (they pass)     # Verify implementation
5. Create Button.module.css  # Style after logic works

# WRONG ORDER - Never do this
1. Create Button.tsx         # ❌ No tests exist yet
2. Create Button.module.css  # ❌ Still no tests
3. "I'll add tests later"    # ❌ Tests never get written
```

### Test File Structure (Create First)

```typescript
// Button.test.tsx - CREATE THIS FIRST
import { render, screen, fireEvent } from '@testing-library/react';
import { Button } from './Button';

describe('Button', () => {
  // Define ALL expected behaviors upfront
  describe('rendering', () => {
    it('renders with label', () => {
      render(<Button label="Click me" onClick={() => {}} />);
      expect(screen.getByRole('button', { name: 'Click me' })).toBeInTheDocument();
    });

    it('applies variant class', () => {
      render(<Button label="Click" onClick={() => {}} variant="secondary" />);
      expect(screen.getByRole('button')).toHaveClass('secondary');
    });
  });

  describe('interactions', () => {
    it('calls onClick when clicked', () => {
      const onClick = vi.fn();
      render(<Button label="Click me" onClick={onClick} />);
      fireEvent.click(screen.getByRole('button'));
      expect(onClick).toHaveBeenCalledTimes(1);
    });

    it('does not call onClick when disabled', () => {
      const onClick = vi.fn();
      render(<Button label="Click me" onClick={onClick} disabled />);
      fireEvent.click(screen.getByRole('button'));
      expect(onClick).not.toHaveBeenCalled();
    });
  });

  describe('accessibility', () => {
    it('has correct aria attributes when disabled', () => {
      render(<Button label="Click" onClick={() => {}} disabled />);
      expect(screen.getByRole('button')).toHaveAttribute('aria-disabled', 'true');
    });
  });
});
```

### Hook Test First Pattern

```typescript
// useCounter.test.ts - CREATE THIS FIRST
import { renderHook, act } from '@testing-library/react';
import { useCounter } from './useCounter';

describe('useCounter', () => {
  it('starts at initial value', () => {
    const { result } = renderHook(() => useCounter(5));
    expect(result.current.count).toBe(5);
  });

  it('increments', () => {
    const { result } = renderHook(() => useCounter());
    act(() => result.current.increment());
    expect(result.current.count).toBe(1);
  });

  it('decrements', () => {
    const { result } = renderHook(() => useCounter(5));
    act(() => result.current.decrement());
    expect(result.current.count).toBe(4);
  });

  it('resets to initial value', () => {
    const { result } = renderHook(() => useCounter(10));
    act(() => result.current.increment());
    act(() => result.current.reset());
    expect(result.current.count).toBe(10);
  });
});
```

### Enforcement Checklist

Before writing ANY component/hook implementation:

- [ ] Test file exists: `Component.test.tsx`
- [ ] All expected behaviors have test cases
- [ ] Tests run and FAIL (proves tests are valid)
- [ ] Only THEN create implementation file

**If tests are skipped, Claude MUST:**
```
⚠️ TEST-FIRST VIOLATION

Cannot create [Component].tsx - no test file exists.

Creating [Component].test.tsx first with tests for:
- Rendering with required props
- User interactions
- Edge cases
- Accessibility
```

---

## Project Structure

```
project/
├── src/
│   ├── core/                   # Pure business logic (no React)
│   │   ├── types.ts
│   │   └── services/
│   ├── components/             # Reusable UI components
│   │   ├── Button/
│   │   │   ├── Button.tsx
│   │   │   ├── Button.test.tsx
│   │   │   ├── Button.module.css  # or .styles.ts
│   │   │   └── index.ts
│   │   └── index.ts            # Barrel export
│   ├── pages/                  # Route-level components
│   │   ├── Home/
│   │   │   ├── HomePage.tsx
│   │   │   ├── useHome.ts      # Page-specific hook
│   │   │   └── index.ts
│   │   └── index.ts
│   ├── hooks/                  # Shared custom hooks
│   ├── store/                  # State management
│   ├── api/                    # API client and queries
│   ├── utils/                  # Utilities
│   ├── App.tsx
│   └── main.tsx
├── tests/
│   ├── unit/
│   └── e2e/
├── public/
├── package.json
├── tsconfig.json
├── vite.config.ts              # or next.config.js
└── CLAUDE.md
```

---

## Component Patterns

### Functional Components Only
```typescript
// Good - simple, testable
interface ButtonProps {
  label: string;
  onClick: () => void;
  disabled?: boolean;
  variant?: 'primary' | 'secondary';
}

export function Button({
  label,
  onClick,
  disabled = false,
  variant = 'primary'
}: ButtonProps): JSX.Element {
  return (
    <button
      className={styles[variant]}
      onClick={onClick}
      disabled={disabled}
    >
      {label}
    </button>
  );
}
```

### Extract Logic to Hooks
```typescript
// useHome.ts - all logic here
export function useHome() {
  const [items, setItems] = useState<Item[]>([]);
  const [loading, setLoading] = useState(false);

  const refresh = useCallback(async () => {
    setLoading(true);
    const data = await fetchItems();
    setItems(data);
    setLoading(false);
  }, []);

  useEffect(() => {
    refresh();
  }, [refresh]);

  return { items, loading, refresh };
}

// HomePage.tsx - pure presentation
export function HomePage(): JSX.Element {
  const { items, loading, refresh } = useHome();

  if (loading) return <Spinner />;

  return <ItemList items={items} onRefresh={refresh} />;
}
```

### Props Interface Always Explicit
```typescript
// Always define props interface, even if simple
interface ItemCardProps {
  item: Item;
  onClick: (id: string) => void;
}

export function ItemCard({ item, onClick }: ItemCardProps): JSX.Element {
  return (
    <div onClick={() => onClick(item.id)}>
      <h3>{item.title}</h3>
    </div>
  );
}
```

---

## State Management

### Local State First
```typescript
// Start with useState, escalate only when needed
const [value, setValue] = useState('');
```

### Zustand for Global State (if needed)
```typescript
// store/useAppStore.ts
import { create } from 'zustand';

interface AppState {
  user: User | null;
  theme: 'light' | 'dark';
  setUser: (user: User | null) => void;
  toggleTheme: () => void;
}

export const useAppStore = create<AppState>((set) => ({
  user: null,
  theme: 'light',
  setUser: (user) => set({ user }),
  toggleTheme: () => set((state) => ({
    theme: state.theme === 'light' ? 'dark' : 'light'
  })),
}));
```

### React Query for Server State
```typescript
// api/queries/useItems.ts
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { itemsApi } from '../client';

export function useItems() {
  return useQuery({
    queryKey: ['items'],
    queryFn: itemsApi.getAll,
    staleTime: 5 * 60 * 1000, // 5 minutes
  });
}

export function useCreateItem() {
  const queryClient = useQueryClient();

  return useMutation({
    mutationFn: itemsApi.create,
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['items'] });
    },
  });
}
```

---

## Routing

### React Router (Vite/CRA)
```typescript
// App.tsx
import { BrowserRouter, Routes, Route } from 'react-router-dom';

export function App(): JSX.Element {
  return (
    <BrowserRouter>
      <Routes>
        <Route path="/" element={<HomePage />} />
        <Route path="/items/:id" element={<ItemPage />} />
        <Route path="*" element={<NotFoundPage />} />
      </Routes>
    </BrowserRouter>
  );
}
```

### Protected Routes
```typescript
interface ProtectedRouteProps {
  children: JSX.Element;
}

function ProtectedRoute({ children }: ProtectedRouteProps): JSX.Element {
  const { user } = useAppStore();
  const location = useLocation();

  if (!user) {
    return <Navigate to="/login" state={{ from: location }} replace />;
  }

  return children;
}
```

---

## Styling

### CSS Modules (Preferred)
```typescript
// Button.module.css
.primary {
  background: var(--color-primary);
  color: white;
}

.secondary {
  background: transparent;
  border: 1px solid var(--color-primary);
}

// Button.tsx
import styles from './Button.module.css';

<button className={styles.primary}>Click</button>
```

### Tailwind (Alternative)
```typescript
// Use consistent patterns, extract repeated combinations
const buttonVariants = {
  primary: 'bg-blue-500 text-white hover:bg-blue-600',
  secondary: 'bg-transparent border border-blue-500 text-blue-500',
} as const;

<button className={buttonVariants[variant]}>{label}</button>
```

---

## Forms

### React Hook Form + Zod
```typescript
import { useForm } from 'react-hook-form';
import { zodResolver } from '@hookform/resolvers/zod';
import { z } from 'zod';

const schema = z.object({
  email: z.string().email('Invalid email'),
  password: z.string().min(8, 'Password must be at least 8 characters'),
});

type FormData = z.infer<typeof schema>;

export function LoginForm(): JSX.Element {
  const { register, handleSubmit, formState: { errors } } = useForm<FormData>({
    resolver: zodResolver(schema),
  });

  const onSubmit = (data: FormData) => {
    // handle submit
  };

  return (
    <form onSubmit={handleSubmit(onSubmit)}>
      <input {...register('email')} />
      {errors.email && <span>{errors.email.message}</span>}

      <input type="password" {...register('password')} />
      {errors.password && <span>{errors.password.message}</span>}

      <button type="submit">Login</button>
    </form>
  );
}
```

---

## Testing

### Component Testing with React Testing Library
```typescript
import { render, screen, fireEvent } from '@testing-library/react';
import { Button } from './Button';

describe('Button', () => {
  it('calls onClick when clicked', () => {
    const onClick = vi.fn();
    render(<Button label="Click me" onClick={onClick} />);

    fireEvent.click(screen.getByText('Click me'));

    expect(onClick).toHaveBeenCalledTimes(1);
  });

  it('does not call onClick when disabled', () => {
    const onClick = vi.fn();
    render(<Button label="Click me" onClick={onClick} disabled />);

    fireEvent.click(screen.getByText('Click me'));

    expect(onClick).not.toHaveBeenCalled();
  });

  it('applies correct variant class', () => {
    render(<Button label="Click" onClick={() => {}} variant="secondary" />);

    expect(screen.getByRole('button')).toHaveClass('secondary');
  });
});
```

### Hook Testing
```typescript
import { renderHook, act, waitFor } from '@testing-library/react';
import { useCounter } from './useCounter';

describe('useCounter', () => {
  it('increments counter', () => {
    const { result } = renderHook(() => useCounter());

    act(() => {
      result.current.increment();
    });

    expect(result.current.count).toBe(1);
  });
});
```

### E2E with Playwright
```typescript
// tests/e2e/login.spec.ts
import { test, expect } from '@playwright/test';

test('user can login', async ({ page }) => {
  await page.goto('/login');

  await page.fill('[name="email"]', 'test@example.com');
  await page.fill('[name="password"]', 'password123');
  await page.click('button[type="submit"]');

  await expect(page).toHaveURL('/dashboard');
  await expect(page.getByText('Welcome')).toBeVisible();
});
```

---

## Performance

### Memoization
```typescript
// Memoize expensive components
const ItemList = memo(function ItemList({ items }: ItemListProps) {
  return items.map(item => <ItemCard key={item.id} item={item} />);
});

// Memoize callbacks passed to children
const handleClick = useCallback((id: string) => {
  setSelectedId(id);
}, []);

// Memoize expensive computations
const sortedItems = useMemo(() => {
  return [...items].sort((a, b) => a.name.localeCompare(b.name));
}, [items]);
```

### Code Splitting
```typescript
// Lazy load routes
const ItemPage = lazy(() => import('./pages/Item'));

<Suspense fallback={<Spinner />}>
  <Route path="/items/:id" element={<ItemPage />} />
</Suspense>
```

---

## React Web Anti-Patterns

- ❌ Inline functions in JSX - use useCallback
- ❌ Logic in render - extract to hooks
- ❌ Deep component nesting - flatten hierarchy
- ❌ Index as key in lists - use stable IDs
- ❌ Direct state mutation - always use setter
- ❌ Prop drilling > 2 levels - use context or state management
- ❌ useEffect for derived state - use useMemo
- ❌ Fetching in useEffect - use React Query
- ❌ Mixing business logic with UI - keep core/ pure
- ❌ Large components (>100 lines) - split into smaller pieces
- ❌ CSS in JS objects - use CSS modules or Tailwind
- ❌ Ignoring TypeScript errors - fix them


================================================
FILE: skills/reddit-ads/SKILL.md
================================================
---
name: reddit-ads
description: Reddit Ads API - campaigns, targeting, conversions, agentic optimization
when-to-use: When building Reddit ad campaign management or optimization tools
user-invocable: false
effort: medium
---

# Reddit Ads API Skill


**Purpose:** Automate Reddit advertising campaigns using the Reddit Ads API. Create, manage, and optimize campaigns, ad groups, and ads programmatically.

---

## API Overview

```
┌─────────────────────────────────────────────────────────────────┐
│  REDDIT ADS API HIERARCHY                                        │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  Account                                                        │
│    └── Campaign (objective, budget, schedule)                   │
│         └── Ad Group (targeting, bidding, placement)            │
│              └── Ad (creative, headline, CTA)                   │
│                                                                 │
│  + Custom Audiences (customer lists, lookalikes)                │
│  + Conversions API (track events server-side)                   │
├─────────────────────────────────────────────────────────────────┤
│  BASE URL: https://ads-api.reddit.com/api/v2.0                  │
│  DOCS: https://ads-api.reddit.com/docs/                         │
│  RATE LIMIT: 1 request per second                               │
│  AUTH: OAuth 2.0 with Bearer token                              │
└─────────────────────────────────────────────────────────────────┘
```

---

## Authentication

### Step 1: Create Reddit Developer App

1. Go to https://www.reddit.com/prefs/apps/
2. Click "Create App" or "Create Another App"
3. Fill in:
   - **Name:** Your app name
   - **Type:** Select `script` for server-side automation
   - **Redirect URI:** Your callback URL (e.g., `https://yourapp.com/callback`)
4. Note your **Client ID** (under app name) and **Client Secret**

### Step 2: Authorization Flow

```javascript
// Node.js OAuth2 flow
const REDDIT_CLIENT_ID = process.env.REDDIT_ADS_CLIENT_ID;
const REDDIT_CLIENT_SECRET = process.env.REDDIT_ADS_CLIENT_SECRET;
const REDIRECT_URI = 'https://yourapp.com/callback';

// Step 1: Generate authorization URL
function getAuthorizationUrl(state) {
  const scopes = 'adsread,adsedit,history';
  return `https://www.reddit.com/api/v1/authorize?` +
    `client_id=${REDDIT_CLIENT_ID}` +
    `&response_type=code` +
    `&state=${state}` +
    `&redirect_uri=${encodeURIComponent(REDIRECT_URI)}` +
    `&duration=permanent` +
    `&scope=${scopes}`;
}

// Step 2: Exchange code for tokens
async function getAccessToken(authorizationCode) {
  const credentials = Buffer.from(
    `${REDDIT_CLIENT_ID}:${REDDIT_CLIENT_SECRET}`
  ).toString('base64');

  const response = await fetch('https://www.reddit.com/api/v1/access_token', {
    method: 'POST',
    headers: {
      'Authorization': `Basic ${credentials}`,
      'Content-Type': 'application/x-www-form-urlencoded',
      'User-Agent': 'YourApp/1.0.0'
    },
    body: new URLSearchParams({
      grant_type: 'authorization_code',
      code: authorizationCode,
      redirect_uri: REDIRECT_URI
    })
  });

  return response.json();
  // Returns: { access_token, refresh_token, expires_in, scope }
}

// Step 3: Refresh token when expired
async function refreshAccessToken(refreshToken) {
  const credentials = Buffer.from(
    `${REDDIT_CLIENT_ID}:${REDDIT_CLIENT_SECRET}`
  ).toString('base64');

  const response = await fetch('https://www.reddit.com/api/v1/access_token', {
    method: 'POST',
    headers: {
      'Authorization': `Basic ${credentials}`,
      'Content-Type': 'application/x-www-form-urlencoded',
      'User-Agent': 'YourApp/1.0.0'
    },
    body: new URLSearchParams({
      grant_type: 'refresh_token',
      refresh_token: refreshToken
    })
  });

  return response.json();
}
```

### Python OAuth2 Flow

```python
import requests
import base64
import os

REDDIT_CLIENT_ID = os.environ['REDDIT_ADS_CLIENT_ID']
REDDIT_CLIENT_SECRET = os.environ['REDDIT_ADS_CLIENT_SECRET']
REDIRECT_URI = 'https://yourapp.com/callback'
USER_AGENT = 'YourApp/1.0.0'

def get_authorization_url(state: str) -> str:
    """Generate OAuth authorization URL."""
    scopes = 'adsread,adsedit,history'
    return (
        f"https://www.reddit.com/api/v1/authorize?"
        f"client_id={REDDIT_CLIENT_ID}"
        f"&response_type=code"
        f"&state={state}"
        f"&redirect_uri={REDIRECT_URI}"
        f"&duration=permanent"
        f"&scope={scopes}"
    )

def get_access_token(authorization_code: str) -> dict:
    """Exchange authorization code for access token."""
    credentials = base64.b64encode(
        f"{REDDIT_CLIENT_ID}:{REDDIT_CLIENT_SECRET}".encode()
    ).decode()

    response = requests.post(
        'https://www.reddit.com/api/v1/access_token',
        headers={
            'Authorization': f'Basic {credentials}',
            'User-Agent': USER_AGENT
        },
        data={
            'grant_type': 'authorization_code',
            'code': authorization_code,
            'redirect_uri': REDIRECT_URI
        }
    )
    return response.json()

def refresh_access_token(refresh_token: str) -> dict:
    """Refresh expired access token."""
    credentials = base64.b64encode(
        f"{REDDIT_CLIENT_ID}:{REDDIT_CLIENT_SECRET}".encode()
    ).decode()

    response = requests.post(
        'https://www.reddit.com/api/v1/access_token',
        headers={
            'Authorization': f'Basic {credentials}',
            'User-Agent': USER_AGENT
        },
        data={
            'grant_type': 'refresh_token',
            'refresh_token': refresh_token
        }
    )
    return response.json()
```

### Required Scopes

| Scope | Access Level |
|-------|--------------|
| `adsread` | Read campaigns, ad groups, ads, reports |
| `adsedit` | Create/update campaigns, ad groups, ads |
| `history` | Access account history |

---

## Reddit Ads Client

### Node.js Client

```typescript
// lib/reddit-ads-client.ts
interface RedditAdsConfig {
  accessToken: string;
  accountId: string;
}

class RedditAdsClient {
  private baseUrl = 'https://ads-api.reddit.com/api/v2.0';
  private accessToken: string;
  private accountId: string;

  constructor(config: RedditAdsConfig) {
    this.accessToken = config.accessToken;
    this.accountId = config.accountId;
  }

  private async request<T>(
    method: string,
    endpoint: string,
    body?: object
  ): Promise<T> {
    const url = `${this.baseUrl}${endpoint}`;

    const response = await fetch(url, {
      method,
      headers: {
        'Authorization': `Bearer ${this.accessToken}`,
        'Content-Type': 'application/json',
        'User-Agent': 'YourApp/1.0.0'
      },
      body: body ? JSON.stringify(body) : undefined
    });

    if (!response.ok) {
      const error = await response.json();
      throw new Error(`Reddit Ads API Error: ${JSON.stringify(error)}`);
    }

    return response.json();
  }

  // Account
  async getAccount() {
    return this.request('GET', `/accounts/${this.accountId}`);
  }

  // Campaigns
  async getCampaigns() {
    return this.request('GET', `/accounts/${this.accountId}/campaigns`);
  }

  async getCampaign(campaignId: string) {
    return this.request('GET', `/accounts/${this.accountId}/campaigns/${campaignId}`);
  }

  async createCampaign(campaign: CampaignCreate) {
    return this.request('POST', `/accounts/${this.accountId}/campaigns`, campaign);
  }

  async updateCampaign(campaignId: string, updates: Partial<CampaignCreate>) {
    return this.request('PUT', `/accounts/${this.accountId}/campaigns/${campaignId}`, updates);
  }

  // Ad Groups
  async getAdGroups(campaignId?: string) {
    const endpoint = campaignId
      ? `/accounts/${this.accountId}/campaigns/${campaignId}/ad_groups`
      : `/accounts/${this.accountId}/ad_groups`;
    return this.request('GET', endpoint);
  }

  async getAdGroup(adGroupId: string) {
    return this.request('GET', `/accounts/${this.accountId}/ad_groups/${adGroupId}`);
  }

  async createAdGroup(adGroup: AdGroupCreate) {
    return this.request('POST', `/accounts/${this.accountId}/ad_groups`, adGroup);
  }

  async updateAdGroup(adGroupId: string, updates: Partial<AdGroupCreate>) {
    return this.request('PUT', `/accounts/${this.accountId}/ad_groups/${adGroupId}`, updates);
  }

  // Ads
  async getAds(adGroupId?: string) {
    const endpoint = adGroupId
      ? `/accounts/${this.accountId}/ad_groups/${adGroupId}/ads`
      : `/accounts/${this.accountId}/ads`;
    return this.request('GET', endpoint);
  }

  async createAd(ad: AdCreate) {
    return this.request('POST', `/accounts/${this.accountId}/ads`, ad);
  }

  async updateAd(adId: string, updates: Partial<AdCreate>) {
    return this.request('PUT', `/accounts/${this.accountId}/ads/${adId}`, updates);
  }

  // Reports
  async getReport(reportRequest: ReportRequest) {
    return this.request('POST', `/accounts/${this.accountId}/reports`, reportRequest);
  }

  // Custom Audiences
  async getCustomAudiences() {
    return this.request('GET', `/accounts/${this.accountId}/custom_audiences`);
  }

  async createCustomAudience(audience: CustomAudienceCreate) {
    return this.request('POST', `/accounts/${this.accountId}/custom_audiences`, audience);
  }
}

export default RedditAdsClient;
```

### Python Client

```python
# lib/reddit_ads_client.py
import requests
from typing import Optional, Dict, Any, List
from dataclasses import dataclass

@dataclass
class RedditAdsConfig:
    access_token: str
    account_id: str

class RedditAdsClient:
    BASE_URL = 'https://ads-api.reddit.com/api/v2.0'

    def __init__(self, config: RedditAdsConfig):
        self.access_token = config.access_token
        self.account_id = config.account_id
        self.session = requests.Session()
        self.session.headers.update({
            'Authorization': f'Bearer {self.access_token}',
            'Content-Type': 'application/json',
            'User-Agent': 'YourApp/1.0.0'
        })

    def _request(
        self,
        method: str,
        endpoint: str,
        json: Optional[Dict] = None
    ) -> Dict[str, Any]:
        url = f"{self.BASE_URL}{endpoint}"
        response = self.session.request(method, url, json=json)
        response.raise_for_status()
        return response.json()

    # Account
    def get_account(self) -> Dict:
        return self._request('GET', f'/accounts/{self.account_id}')

    # Campaigns
    def get_campaigns(self) -> List[Dict]:
        return self._request('GET', f'/accounts/{self.account_id}/campaigns')

    def get_campaign(self, campaign_id: str) -> Dict:
        return self._request('GET', f'/accounts/{self.account_id}/campaigns/{campaign_id}')

    def create_campaign(self, campaign: Dict) -> Dict:
        return self._request('POST', f'/accounts/{self.account_id}/campaigns', json=campaign)

    def update_campaign(self, campaign_id: str, updates: Dict) -> Dict:
        return self._request('PUT', f'/accounts/{self.account_id}/campaigns/{campaign_id}', json=updates)

    # Ad Groups
    def get_ad_groups(self, campaign_id: Optional[str] = None) -> List[Dict]:
        endpoint = (
            f'/accounts/{self.account_id}/campaigns/{campaign_id}/ad_groups'
            if campaign_id
            else f'/accounts/{self.account_id}/ad_groups'
        )
        return self._request('GET', endpoint)

    def create_ad_group(self, ad_group: Dict) -> Dict:
        return self._request('POST', f'/accounts/{self.account_id}/ad_groups', json=ad_group)

    def update_ad_group(self, ad_group_id: str, updates: Dict) -> Dict:
        return self._request('PUT', f'/accounts/{self.account_id}/ad_groups/{ad_group_id}', json=updates)

    # Ads
    def get_ads(self, ad_group_id: Optional[str] = None) -> List[Dict]:
        endpoint = (
            f'/accounts/{self.account_id}/ad_groups/{ad_group_id}/ads'
            if ad_group_id
            else f'/accounts/{self.account_id}/ads'
        )
        return self._request('GET', endpoint)

    def create_ad(self, ad: Dict) -> Dict:
        return self._request('POST', f'/accounts/{self.account_id}/ads', json=ad)

    # Reports
    def get_report(self, report_request: Dict) -> Dict:
        return self._request('POST', f'/accounts/{self.account_id}/reports', json=report_request)

    # Custom Audiences
    def get_custom_audiences(self) -> List[Dict]:
        return self._request('GET', f'/accounts/{self.account_id}/custom_audiences')

    def create_custom_audience(self, audience: Dict) -> Dict:
        return self._request('POST', f'/accounts/{self.account_id}/custom_audiences', json=audience)
```

---

## API Endpoints Reference

### Account Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/accounts/{account_id}` | Get account details |
| GET | `/accounts/{account_id}/funding` | Get funding information |

### Campaign Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/accounts/{account_id}/campaigns` | List all campaigns |
| GET | `/accounts/{account_id}/campaigns/{campaign_id}` | Get campaign by ID |
| POST | `/accounts/{account_id}/campaigns` | Create campaign |
| PUT | `/accounts/{account_id}/campaigns/{campaign_id}` | Update campaign |
| DELETE | `/accounts/{account_id}/campaigns/{campaign_id}` | Delete campaign |

### Ad Group Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/accounts/{account_id}/ad_groups` | List all ad groups |
| GET | `/accounts/{account_id}/ad_groups/{ad_group_id}` | Get ad group by ID |
| POST | `/accounts/{account_id}/ad_groups` | Create ad group |
| PUT | `/accounts/{account_id}/ad_groups/{ad_group_id}` | Update ad group |
| DELETE | `/accounts/{account_id}/ad_groups/{ad_group_id}` | Delete ad group |

### Ad Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/accounts/{account_id}/ads` | List all ads |
| GET | `/accounts/{account_id}/ads/{ad_id}` | Get ad by ID |
| POST | `/accounts/{account_id}/ads` | Create ad |
| PUT | `/accounts/{account_id}/ads/{ad_id}` | Update ad |
| DELETE | `/accounts/{account_id}/ads/{ad_id}` | Delete ad |

### Custom Audience Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/accounts/{account_id}/custom_audiences` | List custom audiences |
| POST | `/accounts/{account_id}/custom_audiences` | Create custom audience |
| PUT | `/accounts/{account_id}/custom_audiences/{audience_id}` | Update audience |
| DELETE | `/accounts/{account_id}/custom_audiences/{audience_id}` | Delete audience |

### Report Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| POST | `/accounts/{account_id}/reports` | Generate report |

---

## Campaign Creation

### Campaign Objectives

| Objective | Use Case |
|-----------|----------|
| `BRAND_AWARENESS` | Build brand recognition and reach |
| `TRAFFIC` | Drive clicks to website/landing page |
| `CONVERSIONS` | Track and optimize for conversions |
| `VIDEO_VIEWS` | Maximize video view engagement |
| `APP_INSTALLS` | Drive mobile app installations |
| `CATALOG_SALES` | Promote product catalog items |

### Budget Types

| Type | Description |
|------|-------------|
| `DAILY` | Average daily spend (may vary slightly) |
| `LIFETIME` | Total spend over campaign duration |

### Campaign Create Example

```typescript
interface CampaignCreate {
  name: string;
  objective: 'BRAND_AWARENESS' | 'TRAFFIC' | 'CONVERSIONS' | 'VIDEO_VIEWS' | 'APP_INSTALLS';
  is_enabled: boolean;
  budget_type: 'DAILY' | 'LIFETIME';
  budget_total_amount_micros: number; // Amount in micros (1 USD = 1,000,000 micros)
  start_time: string; // ISO 8601 format
  end_time?: string; // ISO 8601 format (optional)
}

// Create a traffic campaign with $50/day budget
const campaign: CampaignCreate = {
  name: 'Q1 2025 Traffic Campaign',
  objective: 'TRAFFIC',
  is_enabled: true,
  budget_type: 'DAILY',
  budget_total_amount_micros: 50_000_000, // $50
  start_time: '2025-01-15T00:00:00Z',
  end_time: '2025-03-31T23:59:59Z'
};

const result = await client.createCampaign(campaign);
```

```python
# Python example
campaign = {
    'name': 'Q1 2025 Traffic Campaign',
    'objective': 'TRAFFIC',
    'is_enabled': True,
    'budget_type': 'DAILY',
    'budget_total_amount_micros': 50_000_000,  # $50
    'start_time': '2025-01-15T00:00:00Z',
    'end_time': '2025-03-31T23:59:59Z'
}

result = client.create_campaign(campaign)
```

---

## Ad Group Creation

### Bidding Strategies

| Strategy | Description | Use Case |
|----------|-------------|----------|
| `LOWEST_COST` | Maximize conversions within budget | Best for most campaigns |
| `COST_CAP` | Set average CPC cap | Control cost per result |
| `MANUAL` | Set strict CPC/CPM bid | Maximum control |

### Targeting Options

| Targeting Type | Description |
|----------------|-------------|
| `communities` | Target specific subreddits |
| `interests` | Target by interest categories |
| `keywords` | Target by keyword engagement |
| `devices` | Target by device type |
| `locations` | Target by geography |
| `custom_audiences` | Target uploaded customer lists |

### Ad Group Create Example

```typescript
interface AdGroupCreate {
  name: string;
  campaign_id: string;
  is_enabled: boolean;
  bid_strategy: 'LOWEST_COST' | 'COST_CAP' | 'MANUAL';
  bid_amount_micros?: number; // For COST_CAP or MANUAL
  goal_type: 'CLICKS' | 'IMPRESSIONS' | 'CONVERSIONS';
  goal_value_micros?: number;
  targeting: {
    communities?: string[]; // Subreddit names without r/
    interests?: string[];
    keywords?: string[];
    geo_locations?: {
      countries?: string[];
      regions?: string[];
      cities?: string[];
    };
    devices?: ('DESKTOP' | 'MOBILE' | 'TABLET')[];
    custom_audience_ids?: string[];
  };
  start_time?: string;
  end_time?: string;
}

// Create ad group targeting specific subreddits
const adGroup: AdGroupCreate = {
  name: 'Tech Enthusiasts - Subreddit Targeting',
  campaign_id: 'campaign_123',
  is_enabled: true,
  bid_strategy: 'LOWEST_COST',
  goal_type: 'CLICKS',
  targeting: {
    communities: [
      'technology',
      'gadgets',
      'programming',
      'webdev',
      'startups'
    ],
    geo_locations: {
      countries: ['US', 'CA', 'GB']
    },
    devices: ['DESKTOP', 'MOBILE']
  },
  start_time: '2025-01-15T00:00:00Z'
};

const result = await client.createAdGroup(adGroup);
```

```python
# Python example
ad_group = {
    'name': 'Tech Enthusiasts - Subreddit Targeting',
    'campaign_id': 'campaign_123',
    'is_enabled': True,
    'bid_strategy': 'LOWEST_COST',
    'goal_type': 'CLICKS',
    'targeting': {
        'communities': [
            'technology',
            'gadgets',
            'programming',
            'webdev',
            'startups'
        ],
        'geo_locations': {
            'countries': ['US', 'CA', 'GB']
        },
        'devices': ['DESKTOP', 'MOBILE']
    },
    'start_time': '2025-01-15T00:00:00Z'
}

result = client.create_ad_group(ad_group)
```

---

## Ad Creation

### Ad Types

| Type | Description |
|------|-------------|
| `LINK` | Link ad with image/video |
| `TEXT` | Text-only promoted post |
| `VIDEO` | Video ad |
| `CAROUSEL` | Multiple images/cards |
| `PRODUCT` | Product catalog ad |

### Call-to-Action Options

| CTA | Use Case |
|-----|----------|
| `SHOP_NOW` | E-commerce |
| `SIGN_UP` | Lead generation |
| `LEARN_MORE` | Information |
| `DOWNLOAD` | App/content download |
| `INSTALL` | App install |
| `GET_QUOTE` | Services |
| `CONTACT_US` | B2B/Services |
| `APPLY_NOW` | Jobs/Finance |
| `BOOK_NOW` | Travel/Services |
| `WATCH_NOW` | Video content |
| `SUBSCRIBE` | Newsletters/SaaS |
| `GET_OFFER` | Promotions |
| `SEE_MENU` | Restaurants |

### Ad Create Example

```typescript
interface AdCreate {
  name: string;
  ad_group_id: string;
  is_enabled: boolean;
  type: 'LINK' | 'TEXT' | 'VIDEO' | 'CAROUSEL';
  headline: string; // Max 300 characters
  body?: string;
  url: string;
  display_url?: string;
  call_to_action: string;
  thumbnail_url?: string; // For image/video ads
  video_url?: string; // For video ads
}

// Create a link ad
const ad: AdCreate = {
  name: 'Product Launch Ad - v1',
  ad_group_id: 'ad_group_456',
  is_enabled: true,
  type: 'LINK',
  headline: 'Introducing Our Revolutionary New Product',
  body: 'Discover how our latest innovation can transform your workflow. Join 10,000+ satisfied customers.',
  url: 'https://yoursite.com/product?utm_source=reddit&utm_medium=paid',
  display_url: 'yoursite.com/product',
  call_to_action: 'LEARN_MORE',
  thumbnail_url: 'https://yoursite.com/images/ad-creative.jpg'
};

const result = await client.createAd(ad);
```

```python
# Python example
ad = {
    'name': 'Product Launch Ad - v1',
    'ad_group_id': 'ad_group_456',
    'is_enabled': True,
    'type': 'LINK',
    'headline': 'Introducing Our Revolutionary New Product',
    'body': 'Discover how our latest innovation can transform your workflow. Join 10,000+ satisfied customers.',
    'url': 'https://yoursite.com/product?utm_source=reddit&utm_medium=paid',
    'display_url': 'yoursite.com/product',
    'call_to_action': 'LEARN_MORE',
    'thumbnail_url': 'https://yoursite.com/images/ad-creative.jpg'
}

result = client.create_ad(ad)
```

---

## Conversions API

### Event Types

| Event Type | Description |
|------------|-------------|
| `PAGE_VISIT` | Page view |
| `VIEW_CONTENT` | Product/content view |
| `SEARCH` | Search action |
| `ADD_TO_CART` | Add to cart |
| `ADD_TO_WISHLIST` | Add to wishlist |
| `PURCHASE` | Completed purchase |
| `LEAD` | Lead submission |
| `SIGN_UP` | Account creation |
| `CUSTOM` | Custom event |

### Conversion Event Structure

```typescript
interface ConversionEvent {
  event_at: number; // Unix timestamp in milliseconds
  event_type: {
    tracking_type: string;
    custom_event_name?: string; // For CUSTOM type
  };
  user: {
    email?: string; // SHA256 hashed, lowercase
    phone_number?: string; // SHA256 hashed, E.164 format
    external_id?: string;
    ip_address?: string;
    user_agent?: string;
    aaid?: string; // Android Advertising ID
    idfa?: string; // iOS IDFA
  };
  event_metadata?: {
    item_count?: number;
    value_decimal?: number;
    currency?: string;
    conversion_id: string; // Unique event ID
    products?: Array<{
      id: string;
      name?: string;
      category?: string;
    }>;
  };
  click_id?: string; // Reddit click ID for attribution
}
```

### Send Conversion Events

```typescript
import crypto from 'crypto';

function hashPII(value: string): string {
  return crypto
    .createHash('sha256')
    .update(value.toLowerCase().trim())
    .digest('hex');
}

async function sendConversionEvent(
  accessToken: string,
  pixelId: string,
  event: ConversionEvent
) {
  const response = await fetch(
    `https://ads-api.reddit.com/api/v2.0/conversions/events/${pixelId}`,
    {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${accessToken}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        events: [event],
        test_mode: false // Set true for testing
      })
    }
  );

  return response.json();
}

// Example: Track a purchase
const purchaseEvent: ConversionEvent = {
  event_at: Date.now(),
  event_type: {
    tracking_type: 'PURCHASE'
  },
  user: {
    email: hashPII('customer@example.com'),
    ip_address: '192.168.1.1',
    user_agent: 'Mozilla/5.0...'
  },
  event_metadata: {
    conversion_id: 'order_12345',
    value_decimal: 99.99,
    currency: 'USD',
    item_count: 2,
    products: [
      { id: 'SKU001', name: 'Product A', category: 'Electronics' },
      { id: 'SKU002', name: 'Product B', category: 'Electronics' }
    ]
  },
  click_id: 'reddit_click_id_from_url' // From rdt_cid parameter
};

await sendConversionEvent(accessToken, 'pixel_123', purchaseEvent);
```

```python
import hashlib
import time
import requests

def hash_pii(value: str) -> str:
    """SHA256 hash PII data."""
    return hashlib.sha256(value.lower().strip().encode()).hexdigest()

def send_conversion_event(
    access_token: str,
    pixel_id: str,
    events: list[dict],
    test_mode: bool = False
) -> dict:
    """Send conversion events to Reddit."""
    response = requests.post(
        f'https://ads-api.reddit.com/api/v2.0/conversions/events/{pixel_id}',
        headers={
            'Authorization': f'Bearer {access_token}',
            'Content-Type': 'application/json'
        },
        json={
            'events': events,
            'test_mode': test_mode
        }
    )
    response.raise_for_status()
    return response.json()

# Example: Track a purchase
purchase_event = {
    'event_at': int(time.time() * 1000),
    'event_type': {
        'tracking_type': 'PURCHASE'
    },
    'user': {
        'email': hash_pii('customer@example.com'),
        'ip_address': '192.168.1.1',
        'user_agent': 'Mozilla/5.0...'
    },
    'event_metadata': {
        'conversion_id': 'order_12345',
        'value_decimal': 99.99,
        'currency': 'USD',
        'item_count': 2,
        'products': [
            {'id': 'SKU001', 'name': 'Product A', 'category': 'Electronics'},
            {'id': 'SKU002', 'name': 'Product B', 'category': 'Electronics'}
        ]
    },
    'click_id': 'reddit_click_id_from_url'
}

result = send_conversion_event(access_token, 'pixel_123', [purchase_event])
```

### Important Notes

- Events must occur within **last 7 days** to be processed
- Maximum **500 events per batch** request
- Include `click_id` when available for better attribution
- Use `test_mode: true` for testing without affecting campaigns

---

## Custom Audiences

### Audience Types

| Type | Description |
|------|-------------|
| `CUSTOMER_LIST` | Upload hashed emails/phone/MAIDs |
| `WEBSITE_VISITORS` | Pixel-based retargeting |
| `LOOKALIKE` | Similar to source audience |

### Create Customer List Audience

```typescript
interface CustomAudienceCreate {
  name: string;
  type: 'CUSTOMER_LIST';
  description?: string;
  users: Array<{
    email_sha256?: string;
    maid_sha256?: string; // Mobile Advertising ID
  }>;
}

// Create audience from customer emails
const audience: CustomAudienceCreate = {
  name: 'High Value Customers Q4 2024',
  type: 'CUSTOMER_LIST',
  description: 'Customers with LTV > $500',
  users: customerEmails.map(email => ({
    email_sha256: hashPII(email)
  }))
};

const result = await client.createCustomAudience(audience);
```

### Minimum Audience Size

- **1,000 matched users** minimum to be usable for targeting
- Match rates displayed as ranges for privacy

---

## Reporting

### Report Request

```typescript
interface ReportRequest {
  start_date: string; // YYYY-MM-DD
  end_date: string; // YYYY-MM-DD
  level: 'ACCOUNT' | 'CAMPAIGN' | 'AD_GROUP' | 'AD';
  metrics: string[];
  dimensions?: string[];
  filters?: {
    campaign_ids?: string[];
    ad_group_ids?: string[];
  };
}

// Get campaign performance report
const report = await client.getReport({
  start_date: '2025-01-01',
  end_date: '2025-01-31',
  level: 'CAMPAIGN',
  metrics: [
    'impressions',
    'clicks',
    'spend',
    'ctr',
    'cpc',
    'conversions',
    'conversion_rate',
    'cpa'
  ],
  dimensions: ['date']
});
```

### Available Metrics

| Metric | Description |
|--------|-------------|
| `impressions` | Total impressions |
| `clicks` | Total clicks |
| `spend` | Total spend (in account currency) |
| `ctr` | Click-through rate |
| `cpc` | Cost per click |
| `cpm` | Cost per 1,000 impressions |
| `conversions` | Total conversions |
| `conversion_rate` | Conversions / Clicks |
| `cpa` | Cost per acquisition |
| `video_views` | Video view count |
| `video_completions` | Videos watched to completion |

---

## Environment Variables

```bash
# .env
REDDIT_ADS_CLIENT_ID=your_client_id
REDDIT_ADS_CLIENT_SECRET=your_client_secret
REDDIT_ADS_ACCOUNT_ID=t2_xxxxx
REDDIT_ADS_ACCESS_TOKEN=your_access_token
REDDIT_ADS_REFRESH_TOKEN=your_refresh_token
REDDIT_ADS_PIXEL_ID=your_pixel_id
```

---

## Best Practices

### Campaign Structure

```
┌─────────────────────────────────────────────────────────────────┐
│  RECOMMENDED STRUCTURE                                          │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  Campaign (by objective/product line)                           │
│  ├── Ad Group: Subreddit Targeting - Tech                      │
│  │   ├── Ad: Headline A + Image 1                              │
│  │   └── Ad: Headline B + Image 1                              │
│  ├── Ad Group: Subreddit Targeting - Business                  │
│  │   ├── Ad: Headline A + Image 1                              │
│  │   └── Ad: Headline B + Image 1                              │
│  └── Ad Group: Interest Targeting - Entrepreneurs              │
│      ├── Ad: Headline A + Image 2                              │
│      └── Ad: Headline B + Image 2                              │
│                                                                 │
│  • Separate ad groups by targeting type                         │
│  • Test 2-3 ad variations per ad group                          │
│  • Use clear naming conventions                                 │
└─────────────────────────────────────────────────────────────────┘
```

### Naming Conventions

```
Campaign:  [Objective] - [Product/Brand] - [Date Range]
           Example: TRAFFIC - ProductX - Q1-2025

Ad Group:  [Targeting Type] - [Audience Description]
           Example: Subreddits - Tech Enthusiasts

Ad:        [Headline Type] - [Creative Version]
           Example: Problem-Solution - Image-A
```

### Rate Limiting

- **1 request per second** limit
- Implement exponential backoff for retries
- Batch operations where possible

```typescript
async function rateLimitedRequest<T>(
  fn: () => Promise<T>,
  retries = 3
): Promise<T> {
  for (let i = 0; i < retries; i++) {
    try {
      await new Promise(resolve => setTimeout(resolve, 1000)); // 1 second delay
      return await fn();
    } catch (error: any) {
      if (error.status === 429 && i < retries - 1) {
        const delay = Math.pow(2, i) * 1000;
        await new Promise(resolve => setTimeout(resolve, delay));
        continue;
      }
      throw error;
    }
  }
  throw new Error('Max retries exceeded');
}
```

---

## Complete Workflow Example

```typescript
// Full campaign creation workflow
async function createRedditAdCampaign(
  client: RedditAdsClient,
  config: {
    campaignName: string;
    dailyBudget: number;
    targetSubreddits: string[];
    headline: string;
    body: string;
    landingUrl: string;
    imageUrl: string;
  }
) {
  // 1. Create Campaign
  const campaign = await client.createCampaign({
    name: config.campaignName,
    objective: 'TRAFFIC',
    is_enabled: false, // Start paused for review
    budget_type: 'DAILY',
    budget_total_amount_micros: config.dailyBudget * 1_000_000,
    start_time: new Date().toISOString()
  });

  console.log(`Created campaign: ${campaign.id}`);

  // 2. Create Ad Group with targeting
  const adGroup = await client.createAdGroup({
    name: `${config.campaignName} - Subreddit Targeting`,
    campaign_id: campaign.id,
    is_enabled: true,
    bid_strategy: 'LOWEST_COST',
    goal_type: 'CLICKS',
    targeting: {
      communities: config.targetSubreddits,
      geo_locations: { countries: ['US'] },
      devices: ['DESKTOP', 'MOBILE']
    }
  });

  console.log(`Created ad group: ${adGroup.id}`);

  // 3. Create Ad
  const ad = await client.createAd({
    name: `${config.campaignName} - Ad v1`,
    ad_group_id: adGroup.id,
    is_enabled: true,
    type: 'LINK',
    headline: config.headline,
    body: config.body,
    url: config.landingUrl,
    call_to_action: 'LEARN_MORE',
    thumbnail_url: config.imageUrl
  });

  console.log(`Created ad: ${ad.id}`);

  return { campaign, adGroup, ad };
}

// Usage
const result = await createRedditAdCampaign(client, {
  campaignName: 'Product Launch - Jan 2025',
  dailyBudget: 50, // $50/day
  targetSubreddits: ['technology', 'gadgets', 'programming'],
  headline: 'Introducing the Future of Development',
  body: 'Join 50,000+ developers using our tool to ship faster.',
  landingUrl: 'https://yoursite.com?utm_source=reddit',
  imageUrl: 'https://yoursite.com/ad-image.jpg'
});
```

---

## Testing

### Test Checklist

- [ ] OAuth flow completes successfully
- [ ] Token refresh works before expiry
- [ ] Campaign creates with correct budget
- [ ] Ad group targeting is applied correctly
- [ ] Ad creative displays properly
- [ ] Conversion events tracked (use test_mode)
- [ ] Reports return expected metrics
- [ ] Rate limiting handled gracefully
- [ ] Error responses handled properly

### Mock API for Development

```typescript
// test/mocks/reddit-ads-mock.ts
import { rest } from 'msw';

export const redditAdsMocks = [
  rest.post('https://www.reddit.com/api/v1/access_token', (req, res, ctx) => {
    return res(ctx.json({
      access_token: 'mock_access_token',
      refresh_token: 'mock_refresh_token',
      expires_in: 3600,
      scope: 'adsread adsedit history'
    }));
  }),

  rest.get('https://ads-api.reddit.com/api/v2.0/accounts/:accountId', (req, res, ctx) => {
    return res(ctx.json({
      id: req.params.accountId,
      name: 'Test Account',
      currency: 'USD'
    }));
  }),

  rest.post('https://ads-api.reddit.com/api/v2.0/accounts/:accountId/campaigns', (req, res, ctx) => {
    return res(ctx.json({
      id: 'campaign_mock_123',
      ...req.body
    }));
  })
];
```

---

## Troubleshooting

| Error | Cause | Fix |
|-------|-------|-----|
| `401 Unauthorized` | Invalid/expired token | Refresh access token |
| `403 Forbidden` | Account not whitelisted | Contact Reddit Ads support |
| `429 Too Many Requests` | Rate limit exceeded | Implement backoff, slow down |
| `400 Bad Request` | Invalid payload | Check required fields, data types |
| `Audience too small` | < 1,000 matched users | Add more users to audience |

---

---

## Agentic Optimization Service

### Architecture Overview

```
┌─────────────────────────────────────────────────────────────────┐
│  AGENTIC REDDIT ADS OPTIMIZER                                   │
│  ─────────────────────────────────────────────────────────────  │
│                                                                 │
│  ┌─────────────┐    ┌─────────────┐    ┌─────────────┐         │
│  │  Scheduler  │───▶│  Analyzer   │───▶│  Optimizer  │         │
│  │  (Cron)     │    │  (AI/LLM)   │    │  (Actions)  │         │
│  └─────────────┘    └─────────────┘    └─────────────┘         │
│         │                  │                  │                 │
│         ▼                  ▼                  ▼                 │
│  ┌─────────────┐    ┌─────────────┐    ┌─────────────┐         │
│  │  Fetch      │    │  Decide     │    │  Execute    │         │
│  │  Reports    │    │  Strategy   │    │  Changes    │         │
│  └─────────────┘    └─────────────┘    └─────────────┘         │
│                                                                 │
│  Loop: Every 4-6 hours                                          │
│  Actions: Pause losers, scale winners, adjust bids, rotate ads  │
└─────────────────────────────────────────────────────────────────┘
```

### Background Service (Node.js)

```typescript
// services/reddit-ads-optimizer.ts
import Anthropic from '@anthropic-ai/sdk';
import { CronJob } from 'cron';
import RedditAdsClient from '../lib/reddit-ads-client';

interface OptimizationConfig {
  accountId: string;
  accessToken: string;
  refreshToken: string;
  // Thresholds
  minCTR: number;           // Pause ads below this CTR (e.g., 0.005 = 0.5%)
  maxCPA: number;           // Pause ads above this CPA
  minImpressions: number;   // Min impressions before decisions (e.g., 1000)
  budgetScaleFactor: number; // Scale winning ad groups by this factor (e.g., 1.5)
  // Optimization settings
  optimizationGoal: 'CLICKS' | 'CONVERSIONS' | 'ROAS';
  checkIntervalHours: number;
}

interface PerformanceData {
  campaignId: string;
  adGroupId: string;
  adId: string;
  impressions: number;
  clicks: number;
  spend: number;
  conversions: number;
  ctr: number;
  cpc: number;
  cpa: number;
  roas: number;
}

class RedditAdsOptimizerService {
  private client: RedditAdsClient;
  private anthropic: Anthropic;
  private config: OptimizationConfig;
  private cronJob: CronJob | null = null;

  constructor(config: OptimizationConfig) {
    this.config = config;
    this.client = new RedditAdsClient({
      accessToken: config.accessToken,
      accountId: config.accountId
    });
    this.anthropic = new Anthropic();
  }

  // Start the background optimization service
  start() {
    const cronSchedule = `0 */${this.config.checkIntervalHours} * * *`;

    this.cronJob = new CronJob(cronSchedule, async () => {
      console.log(`[${new Date().toISOString()}] Running optimization cycle...`);
      await this.runOptimizationCycle();
    });

    this.cronJob.start();
    console.log(`Reddit Ads Optimizer started. Running every ${this.config.checkIntervalHours} hours.`);
  }

  stop() {
    if (this.cronJob) {
      this.cronJob.stop();
      console.log('Reddit Ads Optimizer stopped.');
    }
  }

  // Main optimization cycle
  async runOptimizationCycle() {
    try {
      // 1. Fetch performance data
      const performanceData = await this.fetchPerformanceData();

      // 2. Analyze with AI agent
      const recommendations = await this.analyzeWithAgent(performanceData);

      // 3. Execute optimizations
      await this.executeOptimizations(recommendations);

      // 4. Log results
      await this.logOptimizationResults(recommendations);

    } catch (error) {
      console.error('Optimization cycle failed:', error);
      await this.sendAlert('Optimization cycle failed', error);
    }
  }

  // Fetch last 24h performance data
  private async fetchPerformanceData(): Promise<PerformanceData[]> {
    const endDate = new Date();
    const startDate = new Date(endDate.getTime() - 24 * 60 * 60 * 1000);

    const report = await this.client.getReport({
      start_date: startDate.toISOString().split('T')[0],
      end_date: endDate.toISOString().split('T')[0],
      level: 'AD',
      metrics: [
        'impressions', 'clicks', 'spend', 'conversions',
        'ctr', 'cpc', 'cpa', 'conversion_value'
      ]
    });

    return report.data.map((row: any) => ({
      campaignId: row.campaign_id,
      adGroupId: row.ad_group_id,
      adId: row.ad_id,
      impressions: row.impressions,
      clicks: row.clicks,
      spend: row.spend,
      conversions: row.conversions || 0,
      ctr: row.ctr,
      cpc: row.cpc,
      cpa: row.cpa || 0,
      roas: row.conversion_value ? row.conversion_value / row.spend : 0
    }));
  }

  // AI-powered analysis and decision making
  private async analyzeWithAgent(data: PerformanceData[]): Promise<OptimizationRecommendation[]> {
    const prompt = `You are a Reddit Ads optimization agent. Analyze the following campaign performance data and recommend specific actions.

## Performance Data (Last 24 Hours)
${JSON.stringify(data, null, 2)}

## Optimization Configuration
- Goal: ${this.config.optimizationGoal}
- Min CTR threshold: ${this.config.minCTR * 100}%
- Max CPA threshold: $${this.config.maxCPA}
- Min impressions for decisions: ${this.config.minImpressions}
- Budget scale factor for winners: ${this.config.budgetScaleFactor}x

## Your Task
Analyze each ad/ad group and recommend ONE action per item:
1. PAUSE - Poor performers (low CTR, high CPA, no conversions after sufficient impressions)
2. SCALE - Winners (high CTR, low CPA, good ROAS) - increase budget
3. ADJUST_BID - Moderate performers - suggest bid adjustment
4. KEEP - Insufficient data or acceptable performance
5. ROTATE_CREATIVE - Good targeting but ad fatigue (declining CTR over time)

Return a JSON array of recommendations:
[
  {
    "adId": "string",
    "adGroupId": "string",
    "action": "PAUSE|SCALE|ADJUST_BID|KEEP|ROTATE_CREATIVE",
    "reason": "Brief explanation",
    "newBidMicros": number (optional, for ADJUST_BID),
    "budgetMultiplier": number (optional, for SCALE)
  }
]

Be aggressive with pausing poor performers to protect budget. Be conservative with scaling (only clear winners).`;

    const response = await this.anthropic.messages.create({
      model: 'claude-sonnet-4-20250514',
      max_tokens: 4096,
      messages: [{ role: 'user', content: prompt }]
    });

    const content = response.content[0];
    if (content.type !== 'text') throw new Error('Unexpected response type');

    // Extract JSON from response
    const jsonMatch = content.text.match(/\[[\s\S]*\]/);
    if (!jsonMatch) throw new Error('No JSON found in response');

    return JSON.parse(jsonMatch[0]);
  }

  // Execute the AI recommendations
  private async executeOptimizations(recommendations: OptimizationRecommendation[]) {
    for (const rec of recommendations) {
      try {
        switch (rec.action) {
          case 'PAUSE':
            await this.client.updateAd(rec.adId, { is_enabled: false });
            console.log(`Paused ad ${rec.adId}: ${rec.reason}`);
            break;

          case 'SCALE':
            const adGroup = await this.client.getAdGroup(rec.adGroupId);
            const currentBudget = adGroup.budget_total_amount_micros;
            const newBudget = Math.round(currentBudget * (rec.budgetMultiplier || this.config.budgetScaleFactor));
            await this.client.updateAdGroup(rec.adGroupId, {
              budget_total_amount_micros: newBudget
            });
            console.log(`Scaled ad group ${rec.adGroupId} budget to ${newBudget / 1_000_000}: ${rec.reason}`);
            break;

          case 'ADJUST_BID':
            if (rec.newBidMicros) {
              await this.client.updateAdGroup(rec.adGroupId, {
                bid_amount_micros: rec.newBidMicros
              });
              console.log(`Adjusted bid for ${rec.adGroupId} to ${rec.newBidMicros / 1_000_000}: ${rec.reason}`);
            }
            break;

          case 'ROTATE_CREATIVE':
            // Flag for creative refresh (implement your creative rotation logic)
            console.log(`Creative rotation needed for ${rec.adId}: ${rec.reason}`);
            await this.flagForCreativeRefresh(rec.adId);
            break;

          case 'KEEP':
            // No action needed
            break;
        }
      } catch (error) {
        console.error(`Failed to execute ${rec.action} for ${rec.adId}:`, error);
      }
    }
  }

  private async flagForCreativeRefresh(adId: string) {
    // Implement: Add to queue, notify team, or auto-generate new creative
  }

  private async logOptimizationResults(recommendations: OptimizationRecommendation[]) {
    const summary = {
      timestamp: new Date().toISOString(),
      totalRecommendations: recommendations.length,
      actions: {
        paused: recommendations.filter(r => r.action === 'PAUSE').length,
        scaled: recommendations.filter(r => r.action === 'SCALE').length,
        bidAdjusted: recommendations.filter(r => r.action === 'ADJUST_BID').length,
        creativeRotation: recommendations.filter(r => r.action === 'ROTATE_CREATIVE').length,
        kept: recommendations.filter(r => r.action === 'KEEP').length
      }
    };
    console.log('Optimization Summary:', JSON.stringify(summary, null, 2));
    // Store in database for historical analysis
  }

  private async sendAlert(subject: string, error: any) {
    // Implement: Send email/Slack notification
  }
}

interface OptimizationRecommendation {
  adId: string;
  adGroupId: string;
  action: 'PAUSE' | 'SCALE' | 'ADJUST_BID' | 'KEEP' | 'ROTATE_CREATIVE';
  reason: string;
  newBidMicros?: number;
  budgetMultiplier?: number;
}

export default RedditAdsOptimizerService;
```

### Background Service (Python)

```python
# services/reddit_ads_optimizer.py
import anthropic
import schedule
import time
import json
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum

from lib.reddit_ads_client import RedditAdsClient, RedditAdsConfig

class OptimizationAction(Enum):
    PAUSE = "PAUSE"
    SCALE = "SCALE"
    ADJUST_BID = "ADJUST_BID"
    KEEP = "KEEP"
    ROTATE_CREATIVE = "ROTATE_CREATIVE"

@dataclass
class OptimizationConfig:
    account_id: str
    access_token: str
    refresh_token: str
    min_ctr: float = 0.005  # 0.5%
    max_cpa: float = 50.0
    min_impressions: int = 1000
    budget_scale_factor: float = 1.5
    optimization_goal: str = "CONVERSIONS"
    check_interval_hours: int = 4

@dataclass
class PerformanceData:
    campaign_id: str
    ad_group_id: str
    ad_id: str
    impressions: int
    clicks: int
    spend: float
    conversions: int
    ctr: float
    cpc: float
    cpa: float
    roas: float

@dataclass
class OptimizationRecommendation:
    ad_id: str
    ad_group_id: str
    action: OptimizationAction
    reason: str
    new_bid_micros: Optional[int] = None
    budget_multiplier: Optional[float] = None

class RedditAdsOptimizerService:
    def __init__(self, config: OptimizationConfig):
        self.config = config
        self.client = RedditAdsClient(RedditAdsConfig(
            access_token=config.access_token,
            account_id=config.account_id
        ))
        self.anthropic = anthropic.Anthropic()
        self._running = False

    def start(self):
        """Start the background optimization service."""
        self._running = True

        # Schedule optimization runs
        schedule.every(self.config.check_interval_hours).hours.do(
            self.run_optimization_cycle
        )

        print(f"Reddit Ads Optimizer started. Running every {self.config.check_interval_hours} hours.")

        # Run immediately on start
        self.run_optimization_cycle()

        # Keep running
        while self._running:
            schedule.run_pending()
            time.sleep(60)

    def stop(self):
        """Stop the optimization service."""
        self._running = False
        print("Reddit Ads Optimizer stopped.")

    def run_optimization_cycle(self):
        """Main optimization cycle."""
        print(f"[{datetime.now().isoformat()}] Running optimization cycle...")

        try:
            # 1. Fetch performance data
            performance_data = self._fetch_performance_data()

            # 2. Analyze with AI agent
            recommendations = self._analyze_with_agent(performance_data)

            # 3. Execute optimizations
            self._execute_optimizations(recommendations)

            # 4. Log results
            self._log_optimization_results(recommendations)

        except Exception as e:
            print(f"Optimization cycle failed: {e}")
            self._send_alert("Optimization cycle failed", str(e))

    def _fetch_performance_data(self) -> List[PerformanceData]:
        """Fetch last 24h performance data."""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=1)

        report = self.client.get_report({
            'start_date': start_date.strftime('%Y-%m-%d'),
            'end_date': end_date.strftime('%Y-%m-%d'),
            'level': 'AD',
            'metrics': [
                'impressions', 'clicks', 'spend', 'conversions',
                'ctr', 'cpc', 'cpa', 'conversion_value'
            ]
        })

        return [
            PerformanceData(
                campaign_id=row['campaign_id'],
                ad_group_id=row['ad_group_id'],
                ad_id=row['ad_id'],
                impressions=row['impressions'],
                clicks=row['clicks'],
                spend=row['spend'],
                conversions=row.get('conversions', 0),
                ctr=row['ctr'],
                cpc=row['cpc'],
                cpa=row.get('cpa', 0),
                roas=row.get('conversion_value', 0) / row['spend'] if row['spend'] > 0 else 0
            )
            for row in report.get('data', [])
        ]

    def _analyze_with_agent(self, data: List[PerformanceData]) -> List[OptimizationRecommendation]:
        """AI-powered analysis and decision making."""

        prompt = f"""You are a Reddit Ads optimization agent. Analyze the following campaign performance data and recommend specific actions.

## Performance Data (Last 24 Hours)
{json.dumps([vars(d) for d in data], indent=2)}

## Optimization Configuration
- Goal: {self.config.optimization_goal}
- Min CTR threshold: {self.config.min_ctr * 100}%
- Max CPA threshold: ${self.config.max_cpa}
- Min impressions for decisions: {self.config.min_impressions}
- Budget scale factor for winners: {self.config.budget_scale_factor}x

## Your Task
Analyze each ad/ad group and recommend ONE action per item:
1. PAUSE - Poor performers (low CTR, high CPA, no conversions after sufficient impressions)
2. SCALE - Winners (high CTR, low CPA, good ROAS) - increase budget
3. ADJUST_BID - Moderate performers - suggest bid adjustment
4. KEEP - Insufficient data or acceptable performance
5. ROTATE_CREATIVE - Good targeting but ad fatigue (declining CTR over time)

Return a JSON array of recommendations:
[
  {{
    "ad_id": "string",
    "ad_group_id": "string",
    "action": "PAUSE|SCALE|ADJUST_BID|KEEP|ROTATE_CREATIVE",
    "reason": "Brief explanation",
    "new_bid_micros": number (optional, for ADJUST_BID),
    "budget_multiplier": number (optional, for SCALE)
  }}
]

Be aggressive with pausing poor performers to protect budget. Be conservative with scaling (only clear winners)."""

        response = self.anthropic.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=4096,
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.content[0].text

        # Extract JSON from response
        import re
        json_match = re.search(r'\[[\s\S]*\]', content)
        if not json_match:
            raise ValueError("No JSON found in response")

        recommendations_data = json.loads(json_match.group())

        return [
            OptimizationRecommendation(
                ad_id=r['ad_id'],
                ad_group_id=r['ad_group_id'],
                action=OptimizationAction(r['action']),
                reason=r['reason'],
                new_bid_micros=r.get('new_bid_micros'),
                budget_multiplier=r.get('budget_multiplier')
            )
            for r in recommendations_data
        ]

    def _execute_optimizations(self, recommendations: List[OptimizationRecommendation]):
        """Execute the AI recommendations."""
        for rec in recommendations:
            try:
                if rec.action == OptimizationAction.PAUSE:
                    self.client.update_ad(rec.ad_id, {'is_enabled': False})
                    print(f"Paused ad {rec.ad_id}: {rec.reason}")

                elif rec.action == OptimizationAction.SCALE:
                    ad_group = self.client.get_ad_group(rec.ad_group_id)
                    current_budget = ad_group['budget_total_amount_micros']
                    multiplier = rec.budget_multiplier or self.config.budget_scale_factor
                    new_budget = int(current_budget * multiplier)
                    self.client.update_ad_group(rec.ad_group_id, {
                        'budget_total_amount_micros': new_budget
                    })
                    print(f"Scaled ad group {rec.ad_group_id} budget to ${new_budget / 1_000_000}: {rec.reason}")

                elif rec.action == OptimizationAction.ADJUST_BID:
                    if rec.new_bid_micros:
                        self.client.update_ad_group(rec.ad_group_id, {
                            'bid_amount_micros': rec.new_bid_micros
                        })
                        print(f"Adjusted bid for {rec.ad_group_id}: {rec.reason}")

                elif rec.action == OptimizationAction.ROTATE_CREATIVE:
                    print(f"Creative rotation needed for {rec.ad_id}: {rec.reason}")
                    self._flag_for_creative_refresh(rec.ad_id)

            except Exception as e:
                print(f"Failed to execute {rec.action} for {rec.ad_id}: {e}")

    def _flag_for_creative_refresh(self, ad_id: str):
        """Flag ad for creative refresh."""
        # Implement: Add to queue, notify team, or auto-generate new creative
        pass

    def _log_optimization_results(self, recommendations: List[OptimizationRecommendation]):
        """Log optimization results."""
        summary = {
            'timestamp': datetime.now().isoformat(),
            'total_recommendations': len(recommendations),
            'actions': {
                'paused': len([r for r in recommendations if r.action == OptimizationAction.PAUSE]),
                'scaled': len([r for r in recommendations if r.action == OptimizationAction.SCALE]),
                'bid_adjusted': len([r for r in recommendations if r.action == OptimizationAction.ADJUST_BID]),
                'creative_rotation': len([r for r in recommendations if r.action == OptimizationAction.ROTATE_CREATIVE]),
                'kept': len([r for r in recommendations if r.action == OptimizationAction.KEEP]),
            }
        }
        print(f"Optimization Summary: {json.dumps(summary, indent=2)}")

    def _send_alert(self, subject: str, error: str):
        """Send alert notification."""
        # Implement: Send email/Slack notification
        pass


# Entry point for running as background service
if __name__ == "__main__":
    import os

    config = OptimizationConfig(
        account_id=os.environ['REDDIT_ADS_ACCOUNT_ID'],
        access_token=os.environ['REDDIT_ADS_ACCESS_TOKEN'],
        refresh_token=os.environ['REDDIT_ADS_REFRESH_TOKEN'],
        min_ctr=0.005,
        max_cpa=50.0,
        min_impressions=1000,
        budget_scale_factor=1.5,
        optimization_goal="CONVERSIONS",
        check_interval_hours=4
    )

    optimizer = RedditAdsOptimizerService(config)
    optimizer.start()
```

### Docker Deployment

```dockerfile
# Dockerfile
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD ["python", "services/reddit_ads_optimizer.py"]
```

```yaml
# docker-compose.yml
version: '3.8'

services:
  reddit-ads-optimizer:
    build: .
    container_name: reddit-ads-optimizer
    restart: unless-stopped
    environment:
      - REDDIT_ADS_CLIENT_ID=${REDDIT_ADS_CLIENT_ID}
      - REDDIT_ADS_CLIENT_SECRET=${REDDIT_ADS_CLIENT_SECRET}
      - REDDIT_ADS_ACCOUNT_ID=${REDDIT_ADS_ACCOUNT_ID}
      - REDDIT_ADS_ACCESS_TOKEN=${REDDIT_ADS_ACCESS_TOKEN}
      - REDDIT_ADS_REFRESH_TOKEN=${REDDIT_ADS_REFRESH_TOKEN}
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
    volumes:
      - ./logs:/app/logs
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
```

### Optimization Strategies

```
┌─────────────────────────────────────────────────────────────────┐
│  AGENTIC OPTIMIZATION STRATEGIES                                │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  1. PERFORMANCE-BASED PAUSING                                   │
│     ─────────────────────────────────────────────────────────  │
│     IF impressions > 1000 AND ctr < 0.3% → PAUSE               │
│     IF impressions > 500 AND conversions = 0 → PAUSE           │
│     IF cpa > 2x target → PAUSE                                  │
│                                                                 │
│  2. WINNER SCALING                                              │
│     ─────────────────────────────────────────────────────────  │
│     IF ctr > 1% AND cpa < target AND conversions > 5           │
│     → SCALE budget by 1.5x                                      │
│     Cap at 3x original budget to manage risk                    │
│                                                                 │
│  3. BID OPTIMIZATION                                            │
│     ─────────────────────────────────────────────────────────  │
│     IF position low AND ctr good → INCREASE bid 10-20%         │
│     IF cpa high but converting → DECREASE bid 10-15%           │
│                                                                 │
│  4. CREATIVE FATIGUE DETECTION                                  │
│     ─────────────────────────────────────────────────────────  │
│     IF ctr declining 3 consecutive days → ROTATE_CREATIVE      │
│     IF frequency > 3 → ROTATE_CREATIVE                          │
│                                                                 │
│  5. BUDGET REALLOCATION                                         │
│     ─────────────────────────────────────────────────────────  │
│     Move budget from paused ads to scaled winners              │
│     Maintain total daily budget cap                             │
└─────────────────────────────────────────────────────────────────┘
```

### Advanced: Multi-Agent Optimization

```typescript
// services/multi-agent-optimizer.ts
import Anthropic from '@anthropic-ai/sdk';

interface AgentRole {
  name: string;
  systemPrompt: string;
}

const AGENTS: AgentRole[] = [
  {
    name: 'Performance Analyst',
    systemPrompt: `You analyze Reddit Ads performance data. Identify:
    - Top performers (high CTR, low CPA, good ROAS)
    - Poor performers (low CTR, high CPA, no conversions)
    - Trends (improving, declining, stable)
    Output structured analysis with confidence scores.`
  },
  {
    name: 'Budget Strategist',
    systemPrompt: `You optimize budget allocation across campaigns.
    Given performance analysis, recommend:
    - Budget increases for winners (max 50% increase)
    - Budget decreases for losers
    - Reallocation between ad groups
    Protect total budget while maximizing ROI.`
  },
  {
    name: 'Creative Director',
    systemPrompt: `You evaluate ad creative performance.
    Identify ads with:
    - Creative fatigue (declining engagement)
    - High potential but poor execution
    - A/B test winners
    Recommend creative refreshes and new variations.`
  },
  {
    name: 'Risk Manager',
    systemPrompt: `You ensure optimization safety.
    Review recommendations and flag:
    - Overly aggressive scaling
    - Insufficient data for decisions
    - Budget concentration risk
    - Compliance concerns
    Approve, modify, or reject recommendations.`
  }
];

class MultiAgentOptimizer {
  private anthropic: Anthropic;

  constructor() {
    this.anthropic = new Anthropic();
  }

  async runAgentPipeline(performanceData: any) {
    let context = { performanceData };

    // Run agents in sequence, each building on previous output
    for (const agent of AGENTS) {
      const response = await this.anthropic.messages.create({
        model: 'claude-sonnet-4-20250514',
        max_tokens: 4096,
        system: agent.systemPrompt,
        messages: [{
          role: 'user',
          content: `Previous context:\n${JSON.stringify(context, null, 2)}\n\nProvide your analysis and recommendations.`
        }]
      });

      context = {
        ...context,
        [agent.name.toLowerCase().replace(' ', '_')]: response.content[0]
      };
    }

    return context;
  }
}
```

### Monitoring Dashboard Data

```typescript
// api/optimization-stats.ts
interface OptimizationStats {
  period: string;
  totalOptimizations: number;
  actionBreakdown: {
    paused: number;
    scaled: number;
    bidAdjusted: number;
    creativeRotated: number;
  };
  performanceImpact: {
    ctrChange: number;
    cpaChange: number;
    roasChange: number;
    spendEfficiency: number;
  };
  budgetSaved: number;
  revenueIncreased: number;
}

async function getOptimizationStats(
  startDate: Date,
  endDate: Date
): Promise<OptimizationStats> {
  // Query optimization logs and performance data
  // Calculate before/after metrics
  // Return aggregated stats
}
```

---

## Resources

- [Reddit Ads API Docs](https://ads-api.reddit.com/docs/)
- [Reddit Developer Portal](https://www.reddit.com/prefs/apps/)
- [Reddit Ads Help Center](https://business.reddithelp.com/s/article/Reddit-Ads-API)
- [OAuth2 Documentation](https://www.reddit.com/dev/api/oauth/)


================================================
FILE: skills/reddit-api/SKILL.md
================================================
---
name: reddit-api
description: Reddit API with PRAW (Python) and Snoowrap (Node.js)
when-to-use: When building Reddit integrations or bots
user-invocable: false
effort: medium
---

# Reddit API Skill


For integrating Reddit data into applications - fetching posts, comments, subreddits, and user data.

**Sources:** [Reddit API Docs](https://www.reddit.com/dev/api/) | [OAuth2 Wiki](https://github.com/reddit-archive/reddit/wiki/oauth2) | [PRAW Docs](https://praw.readthedocs.io/)

---

## Setup

### 1. Create Reddit App

1. Go to https://www.reddit.com/prefs/apps
2. Click "Create App" or "Create Another App"
3. Fill in:
   - **Name**: Your app name
   - **App type**:
     - `script` - For personal use / bots you control
     - `web app` - For server-side apps with user auth
     - `installed app` - For mobile/desktop apps
   - **Redirect URI**: `http://localhost:8000/callback` (for dev)
4. Note your `client_id` (under app name) and `client_secret`

### 2. Environment Variables

```bash
# .env
REDDIT_CLIENT_ID=your_client_id
REDDIT_CLIENT_SECRET=your_client_secret
REDDIT_USER_AGENT=YourApp/1.0 by YourUsername
REDDIT_USERNAME=your_username        # For script apps only
REDDIT_PASSWORD=your_password        # For script apps only
```

**User-Agent Format**: `<platform>:<app_id>:<version> (by /u/<username>)`

---

## Rate Limits

| Tier | Limit | Notes |
|------|-------|-------|
| OAuth authenticated | 100 QPM | Per OAuth client ID |
| Non-authenticated | Blocked | Must use OAuth |

- Limits averaged over 10-minute window
- Include `User-Agent` header to avoid blocks
- Respect `X-Ratelimit-*` response headers

---

## Python: PRAW (Recommended)

### Installation

```bash
pip install praw
# or
uv add praw
```

### Script App (Personal Use / Bots)

```python
import praw
from pydantic_settings import BaseSettings

class RedditSettings(BaseSettings):
    reddit_client_id: str
    reddit_client_secret: str
    reddit_user_agent: str
    reddit_username: str
    reddit_password: str

    class Config:
        env_file = ".env"

settings = RedditSettings()

reddit = praw.Reddit(
    client_id=settings.reddit_client_id,
    client_secret=settings.reddit_client_secret,
    user_agent=settings.reddit_user_agent,
    username=settings.reddit_username,
    password=settings.reddit_password,
)

# Verify authentication
print(f"Logged in as: {reddit.user.me()}")
```

### Read-Only (No User Auth)

```python
import praw

reddit = praw.Reddit(
    client_id="your_client_id",
    client_secret="your_client_secret",
    user_agent="YourApp/1.0 by YourUsername",
)

# Read-only mode - can browse, can't post/vote
reddit.read_only = True
```

### Common Operations

```python
# Get subreddit posts
subreddit = reddit.subreddit("python")

# Hot posts
for post in subreddit.hot(limit=10):
    print(f"{post.title} - {post.score} upvotes")

# New posts
for post in subreddit.new(limit=10):
    print(post.title)

# Search posts
for post in subreddit.search("pydantic", limit=5):
    print(post.title)

# Get specific post
submission = reddit.submission(id="abc123")
print(submission.title)
print(submission.selftext)

# Get comments
submission.comments.replace_more(limit=0)  # Flatten comment tree
for comment in submission.comments.list():
    print(f"{comment.author}: {comment.body[:100]}")
```

### Posting & Voting (Requires Auth)

```python
# Submit text post
subreddit = reddit.subreddit("test")
submission = subreddit.submit(
    title="Test Post",
    selftext="This is the body of my post."
)

# Submit link post
submission = subreddit.submit(
    title="Check this out",
    url="https://example.com"
)

# Vote
submission.upvote()
submission.downvote()
submission.clear_vote()

# Comment
submission.reply("Great post!")

# Reply to comment
comment = reddit.comment(id="xyz789")
comment.reply("I agree!")
```

### Streaming (Real-time)

```python
# Stream new posts
for post in reddit.subreddit("python").stream.submissions():
    print(f"New post: {post.title}")
    # Process post...

# Stream new comments
for comment in reddit.subreddit("python").stream.comments():
    print(f"New comment by {comment.author}: {comment.body[:50]}")
```

### User Data

```python
# Get user info
user = reddit.redditor("spez")
print(f"Karma: {user.link_karma + user.comment_karma}")

# User's posts
for post in user.submissions.new(limit=5):
    print(post.title)

# User's comments
for comment in user.comments.new(limit=5):
    print(comment.body[:100])
```

---

## TypeScript / Node.js: Snoowrap

### Installation

```bash
npm install snoowrap
# or
pnpm add snoowrap
```

### Setup

```typescript
import Snoowrap from "snoowrap";

const reddit = new Snoowrap({
  userAgent: "YourApp/1.0 by YourUsername",
  clientId: process.env.REDDIT_CLIENT_ID!,
  clientSecret: process.env.REDDIT_CLIENT_SECRET!,
  username: process.env.REDDIT_USERNAME!,
  password: process.env.REDDIT_PASSWORD!,
});

// Configure rate limiting
reddit.config({
  requestDelay: 1000,  // 1 second between requests
  continueAfterRatelimitError: true,
});
```

### Common Operations

```typescript
// Get hot posts from subreddit
const posts = await reddit.getSubreddit("typescript").getHot({ limit: 10 });
posts.forEach((post) => {
  console.log(`${post.title} - ${post.score} upvotes`);
});

// Search posts
const results = await reddit.getSubreddit("programming").search({
  query: "typescript",
  sort: "relevance",
  time: "month",
  limit: 10,
});

// Get specific post
const submission = await reddit.getSubmission("abc123").fetch();
console.log(submission.title);

// Get comments
const comments = await submission.comments.fetchAll();
comments.forEach((comment) => {
  console.log(`${comment.author.name}: ${comment.body.slice(0, 100)}`);
});
```

### Posting

```typescript
// Submit text post
const post = await reddit.getSubreddit("test").submitSelfpost({
  title: "Test Post",
  text: "This is the body.",
});

// Submit link
const linkPost = await reddit.getSubreddit("test").submitLink({
  title: "Check this out",
  url: "https://example.com",
});

// Vote and comment
await post.upvote();
await post.reply("Great post!");
```

---

## Direct API (No Library)

### Python with httpx

```python
import httpx
import base64
from pydantic import BaseModel

class RedditClient:
    def __init__(self, client_id: str, client_secret: str, user_agent: str):
        self.client_id = client_id
        self.client_secret = client_secret
        self.user_agent = user_agent
        self.access_token: str | None = None
        self.client = httpx.AsyncClient()

    async def authenticate(self) -> None:
        """Get application-only OAuth token."""
        auth = base64.b64encode(
            f"{self.client_id}:{self.client_secret}".encode()
        ).decode()

        response = await self.client.post(
            "https://www.reddit.com/api/v1/access_token",
            headers={
                "Authorization": f"Basic {auth}",
                "User-Agent": self.user_agent,
            },
            data={
                "grant_type": "client_credentials",
            },
        )
        response.raise_for_status()
        self.access_token = response.json()["access_token"]

    async def get_posts(self, subreddit: str, sort: str = "hot", limit: int = 10) -> list[dict]:
        """Get posts from a subreddit."""
        if not self.access_token:
            await self.authenticate()

        response = await self.client.get(
            f"https://oauth.reddit.com/r/{subreddit}/{sort}",
            headers={
                "Authorization": f"Bearer {self.access_token}",
                "User-Agent": self.user_agent,
            },
            params={"limit": limit},
        )
        response.raise_for_status()
        return [post["data"] for post in response.json()["data"]["children"]]

    async def close(self) -> None:
        await self.client.aclose()


# Usage
async def main():
    client = RedditClient(
        client_id="your_id",
        client_secret="your_secret",
        user_agent="YourApp/1.0",
    )
    try:
        posts = await client.get_posts("python", limit=5)
        for post in posts:
            print(f"{post['title']} - {post['score']} upvotes")
    finally:
        await client.close()
```

### TypeScript with fetch

```typescript
interface RedditPost {
  title: string;
  score: number;
  url: string;
  selftext: string;
  author: string;
  created_utc: number;
}

class RedditClient {
  private accessToken: string | null = null;

  constructor(
    private clientId: string,
    private clientSecret: string,
    private userAgent: string
  ) {}

  async authenticate(): Promise<void> {
    const auth = Buffer.from(`${this.clientId}:${this.clientSecret}`).toString("base64");

    const response = await fetch("https://www.reddit.com/api/v1/access_token", {
      method: "POST",
      headers: {
        Authorization: `Basic ${auth}`,
        "User-Agent": this.userAgent,
        "Content-Type": "application/x-www-form-urlencoded",
      },
      body: "grant_type=client_credentials",
    });

    const data = await response.json();
    this.accessToken = data.access_token;
  }

  async getPosts(subreddit: string, sort = "hot", limit = 10): Promise<RedditPost[]> {
    if (!this.accessToken) await this.authenticate();

    const response = await fetch(
      `https://oauth.reddit.com/r/${subreddit}/${sort}?limit=${limit}`,
      {
        headers: {
          Authorization: `Bearer ${this.accessToken}`,
          "User-Agent": this.userAgent,
        },
      }
    );

    const data = await response.json();
    return data.data.children.map((child: any) => child.data);
  }
}
```

---

## OAuth2 Web Flow (User Authorization)

For apps where users log in with their Reddit account:

```python
from fastapi import FastAPI, Request
from fastapi.responses import RedirectResponse
import httpx
import secrets

app = FastAPI()
state_store: dict[str, bool] = {}

REDDIT_CLIENT_ID = "your_client_id"
REDDIT_CLIENT_SECRET = "your_client_secret"
REDIRECT_URI = "http://localhost:8000/callback"

@app.get("/login")
async def login():
    state = secrets.token_urlsafe(16)
    state_store[state] = True

    auth_url = (
        f"https://www.reddit.com/api/v1/authorize"
        f"?client_id={REDDIT_CLIENT_ID}"
        f"&response_type=code"
        f"&state={state}"
        f"&redirect_uri={REDIRECT_URI}"
        f"&duration=permanent"
        f"&scope=identity read submit vote"
    )
    return RedirectResponse(auth_url)

@app.get("/callback")
async def callback(code: str, state: str):
    if state not in state_store:
        return {"error": "Invalid state"}
    del state_store[state]

    # Exchange code for token
    async with httpx.AsyncClient() as client:
        response = await client.post(
            "https://www.reddit.com/api/v1/access_token",
            auth=(REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET),
            data={
                "grant_type": "authorization_code",
                "code": code,
                "redirect_uri": REDIRECT_URI,
            },
            headers={"User-Agent": "YourApp/1.0"},
        )

    tokens = response.json()
    # Store tokens securely, associate with user session
    return {"access_token": tokens["access_token"][:10] + "..."}
```

---

## Available Scopes

| Scope | Description |
|-------|-------------|
| `identity` | Access username and signup date |
| `read` | Access posts and comments |
| `submit` | Submit links and comments |
| `vote` | Upvote/downvote content |
| `edit` | Edit posts and comments |
| `history` | Access voting history |
| `subscribe` | Manage subreddit subscriptions |
| `mysubreddits` | Access subscribed subreddits |
| `privatemessages` | Access private messages |
| `save` | Save/unsave content |

Full list: https://www.reddit.com/api/v1/scopes

---

## Project Structure

```
project/
├── src/
│   ├── reddit/
│   │   ├── __init__.py
│   │   ├── client.py         # Reddit client wrapper
│   │   ├── models.py         # Pydantic models for posts/comments
│   │   └── scraper.py        # Data collection logic
│   └── main.py
├── .env
└── pyproject.toml
```

---

## Pydantic Models

```python
from pydantic import BaseModel
from datetime import datetime

class RedditPost(BaseModel):
    id: str
    title: str
    author: str
    subreddit: str
    score: int
    upvote_ratio: float
    url: str
    selftext: str
    created_utc: datetime
    num_comments: int
    is_self: bool

    @classmethod
    def from_praw(cls, submission) -> "RedditPost":
        return cls(
            id=submission.id,
            title=submission.title,
            author=str(submission.author),
            subreddit=submission.subreddit.display_name,
            score=submission.score,
            upvote_ratio=submission.upvote_ratio,
            url=submission.url,
            selftext=submission.selftext,
            created_utc=datetime.fromtimestamp(submission.created_utc),
            num_comments=submission.num_comments,
            is_self=submission.is_self,
        )

class RedditComment(BaseModel):
    id: str
    author: str
    body: str
    score: int
    created_utc: datetime
    parent_id: str
    is_submitter: bool
```

---

## Anti-Patterns

- **No User-Agent** - Reddit blocks requests without proper User-Agent
- **Ignoring rate limits** - Respect 100 QPM, check `X-Ratelimit-*` headers
- **Storing credentials in code** - Use environment variables
- **Not handling `MoreComments`** - Use `replace_more()` in PRAW
- **Polling instead of streaming** - Use `.stream` for real-time data
- **No error handling** - Handle 429 (rate limit), 403 (forbidden), 404 (not found)

---

## Quick Reference

```bash
# PRAW installation
pip install praw

# Snoowrap installation
npm install snoowrap

# Test authentication
python -c "import praw; r = praw.Reddit(...); print(r.user.me())"
```

### Endpoints

| Operation | Endpoint |
|-----------|----------|
| Auth token | `POST https://www.reddit.com/api/v1/access_token` |
| API requests | `https://oauth.reddit.com/...` |
| Subreddit posts | `GET /r/{subreddit}/{sort}` |
| Submission | `GET /comments/{id}` |
| User info | `GET /user/{username}/about` |
| Submit post | `POST /api/submit` |
| Vote | `POST /api/vote` |


================================================
FILE: skills/security/SKILL.md
================================================
---
name: security
description: OWASP security patterns, secrets management, security testing
when-to-use: When writing code that handles auth, user input, API keys, or when security review is requested
user-invocable: true
allowed-tools: [Read, Glob, Grep, Bash]
effort: high
---

# Security Skill


Security best practices and automated security testing for all projects.

---

## Core Principle

**Security is not optional.** Every project must pass security checks before merge. Assume all input is malicious, all secrets will leak if committed, and all dependencies have vulnerabilities.

---

## Required Security Setup

### 1. Gitignore (Non-Negotiable)

Every project must have these in `.gitignore`:

```gitignore
# Environment files - NEVER commit
.env
.env.*
!.env.example

# Secrets
*.pem
*.key
*.p12
*.pfx
credentials.json
secrets.json
*-credentials.json
service-account*.json

# IDE and OS
.idea/
.vscode/settings.json
.DS_Store
Thumbs.db

# Dependencies
node_modules/
__pycache__/
*.pyc
.venv/
venv/

# Build outputs
dist/
build/
*.egg-info/

# Logs that might contain sensitive data
*.log
logs/
```

### 2. Environment Variables

**Create `.env.example`** with all required vars (no values):
```bash
# .env.example - Copy to .env and fill in values

# Server-side only (NEVER prefix with VITE_ or NEXT_PUBLIC_)
DATABASE_URL=
ANTHROPIC_API_KEY=
SUPABASE_SERVICE_ROLE_KEY=

# Client-side safe (public, non-sensitive)
VITE_SUPABASE_URL=
VITE_SUPABASE_ANON_KEY=
```

### Frontend Environment Variables (Critical!)

**NEVER put secrets in client-exposed env vars:**

| Framework | Client-Exposed Prefix | Server-Only |
|-----------|----------------------|-------------|
| Vite | `VITE_*` | No prefix |
| Next.js | `NEXT_PUBLIC_*` | No prefix |
| Create React App | `REACT_APP_*` | N/A (no server) |

```typescript
// WRONG - Secret exposed to browser bundle!
const apiKey = import.meta.env.VITE_ANTHROPIC_API_KEY;

// CORRECT - Only public values client-side
const supabaseUrl = import.meta.env.VITE_SUPABASE_URL;

// CORRECT - Secrets stay server-side only
// In API route or server function:
const apiKey = process.env.ANTHROPIC_API_KEY;
```

**Vercel Environment Variables:**
- In Vercel dashboard, secrets without `VITE_` prefix are server-only
- Only `VITE_*` vars are bundled into client code
- Always verify in browser devtools → Sources → your bundle that secrets aren't exposed

**Validate environment at startup:**
```typescript
// config/env.ts
import { z } from 'zod';

const envSchema = z.object({
  DATABASE_URL: z.string().url(),
  ANTHROPIC_API_KEY: z.string().min(1),
  NODE_ENV: z.enum(['development', 'production', 'test']),
});

export const env = envSchema.parse(process.env);
```

```python
# config/env.py
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    database_url: str
    anthropic_api_key: str
    environment: str = "development"

    class Config:
        env_file = ".env"

settings = Settings()
```

---

## Security Tests

### Pre-Commit Security Checks

Add to pre-commit hooks:

**For all projects:**
```yaml
# .pre-commit-config.yaml (add to existing)
repos:
  # Detect secrets
  - repo: https://github.com/Yelp/detect-secrets
    rev: v1.4.0
    hooks:
      - id: detect-secrets
        args: ['--baseline', '.secrets.baseline']

  # Check for security issues in dependencies
  - repo: local
    hooks:
      - id: security-check
        name: security-check
        entry: ./scripts/security-check.sh
        language: script
        pass_filenames: false
```

**TypeScript/JavaScript:**
```json
// package.json scripts
{
  "scripts": {
    "security:audit": "npm audit --audit-level=high",
    "security:secrets": "npx secretlint '**/*'",
    "security:deps": "npx better-npm-audit audit"
  }
}
```

**Python:**
```bash
# Add to dev dependencies
pip install safety bandit

# Commands
safety check           # Check dependencies for vulnerabilities
bandit -r src/        # Static security analysis
```

### Security Check Script

Create `scripts/security-check.sh`:

```bash
#!/bin/bash
set -e

echo "Running security checks..."

# Check for secrets in staged files
echo "Checking for secrets..."
if command -v detect-secrets &> /dev/null; then
  detect-secrets scan --baseline .secrets.baseline
fi

# Check .env is not staged
if git diff --cached --name-only | grep -E '^\.env$|^\.env\.' | grep -v '\.example$'; then
  echo "ERROR: .env file is staged for commit!"
  exit 1
fi

# Check for common secret patterns in staged files
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
if echo "$STAGED_FILES" | xargs grep -l -E '(password|secret|api_key|apikey|token|private_key)\s*[:=]\s*["\047][^"\047]+["\047]' 2>/dev/null; then
  echo "ERROR: Possible secrets found in staged files!"
  exit 1
fi

# Language-specific checks
if [ -f "package.json" ]; then
  echo "Checking npm dependencies..."
  npm audit --audit-level=high || echo "Warning: npm audit found issues"
fi

if [ -f "pyproject.toml" ] || [ -f "requirements.txt" ]; then
  echo "Checking Python dependencies..."
  if command -v safety &> /dev/null; then
    safety check || echo "Warning: safety found issues"
  fi
fi

echo "Security checks passed!"
```

```bash
chmod +x scripts/security-check.sh
```

---

## GitHub Actions Security Workflow

Create `.github/workflows/security.yml`:

```yaml
name: Security

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  schedule:
    # Run weekly on Monday at 9am UTC
    - cron: '0 9 * * 1'

jobs:
  secrets-scan:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Detect secrets
        uses: trufflesecurity/trufflehog@main
        with:
          path: ./
          base: ${{ github.event.pull_request.base.sha }}
          head: ${{ github.event.pull_request.head.sha }}

  dependency-audit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      # Node.js projects
      - name: Setup Node
        if: hashFiles('package.json') != ''
        uses: actions/setup-node@v4
        with:
          node-version: '20'

      - name: Install dependencies
        if: hashFiles('package.json') != ''
        run: npm ci

      - name: NPM Audit
        if: hashFiles('package.json') != ''
        run: npm audit --audit-level=high

      # Python projects
      - name: Setup Python
        if: hashFiles('pyproject.toml') != '' || hashFiles('requirements.txt') != ''
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install safety
        if: hashFiles('pyproject.toml') != '' || hashFiles('requirements.txt') != ''
        run: pip install safety

      - name: Safety check
        if: hashFiles('pyproject.toml') != '' || hashFiles('requirements.txt') != ''
        run: safety check

  codeql:
    runs-on: ubuntu-latest
    permissions:
      security-events: write
    steps:
      - uses: actions/checkout@v4

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v3
        with:
          languages: ${{ hashFiles('package.json') != '' && 'javascript-typescript' || 'python' }}

      - name: Autobuild
        uses: github/codeql-action/autobuild@v3

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v3
```

---

## Input Validation (OWASP Top 10)

### 1. SQL Injection Prevention

**Never use string concatenation:**
```typescript
// BAD - SQL injection vulnerable
const user = await db.query(`SELECT * FROM users WHERE id = ${userId}`);

// GOOD - Parameterized query
const user = await db.query('SELECT * FROM users WHERE id = $1', [userId]);

// GOOD - Using ORM (Kysely, Prisma, Drizzle)
const user = await db.selectFrom('users').where('id', '=', userId).execute();
```

```python
# BAD - SQL injection vulnerable
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")

# GOOD - Parameterized query
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))

# GOOD - Using ORM (SQLAlchemy)
user = session.query(User).filter(User.id == user_id).first()
```

### 2. XSS Prevention

```typescript
// Always sanitize user input before rendering
import DOMPurify from 'dompurify';

// BAD - XSS vulnerable
element.innerHTML = userInput;

// GOOD - Sanitized
element.innerHTML = DOMPurify.sanitize(userInput);

// BEST - Use framework's built-in escaping (React does this by default)
return <div>{userInput}</div>;  // Safe in React

// DANGER - Bypasses React's protection
return <div dangerouslySetInnerHTML={{ __html: userInput }} />;  // Avoid!
```

### 3. Input Validation at Boundaries

```typescript
// Validate ALL external input with Zod
import { z } from 'zod';

const CreateUserSchema = z.object({
  email: z.string().email().max(255),
  name: z.string().min(1).max(100).regex(/^[a-zA-Z\s]+$/),
  age: z.number().int().min(0).max(150),
});

// In route handler
app.post('/users', async (req, res) => {
  const result = CreateUserSchema.safeParse(req.body);
  if (!result.success) {
    return res.status(400).json({ error: result.error });
  }
  // result.data is now typed and validated
});
```

### 4. Path Traversal Prevention

```typescript
import path from 'path';

// BAD - Path traversal vulnerable
const filePath = `./uploads/${req.params.filename}`;

// GOOD - Validate and sanitize path
const filename = path.basename(req.params.filename);  // Strips ../
const filePath = path.join('./uploads', filename);

// Verify it's still within allowed directory
if (!filePath.startsWith(path.resolve('./uploads'))) {
  throw new Error('Invalid path');
}
```

---

## Authentication & Authorization

### JWT Best Practices

```typescript
import jwt from 'jsonwebtoken';

// Token generation
function generateToken(userId: string): string {
  return jwt.sign(
    { sub: userId },
    process.env.JWT_SECRET!,
    {
      expiresIn: '15m',      // Short-lived access tokens
      algorithm: 'HS256',
    }
  );
}

// Token verification
function verifyToken(token: string): { sub: string } {
  return jwt.verify(token, process.env.JWT_SECRET!, {
    algorithms: ['HS256'],   // Explicitly specify allowed algorithms
  }) as { sub: string };
}
```

### Password Hashing

```typescript
import bcrypt from 'bcrypt';

const SALT_ROUNDS = 12;  // Minimum 10, recommended 12+

async function hashPassword(password: string): Promise<string> {
  return bcrypt.hash(password, SALT_ROUNDS);
}

async function verifyPassword(password: string, hash: string): Promise<boolean> {
  return bcrypt.compare(password, hash);
}
```

```python
from passlib.context import CryptContext

pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

def hash_password(password: str) -> str:
    return pwd_context.hash(password)

def verify_password(password: str, hashed: str) -> bool:
    return pwd_context.verify(password, hashed)
```

### Rate Limiting

```typescript
import rateLimit from 'express-rate-limit';

const limiter = rateLimit({
  windowMs: 15 * 60 * 1000,  // 15 minutes
  max: 100,                   // 100 requests per window
  standardHeaders: true,
  legacyHeaders: false,
});

// Apply to auth routes
app.use('/api/auth', rateLimit({
  windowMs: 60 * 1000,  // 1 minute
  max: 5,                // 5 attempts per minute
  message: 'Too many login attempts, please try again later',
}));
```

---

## Security Headers

```typescript
import helmet from 'helmet';

app.use(helmet({
  contentSecurityPolicy: {
    directives: {
      defaultSrc: ["'self'"],
      scriptSrc: ["'self'"],
      styleSrc: ["'self'", "'unsafe-inline'"],
      imgSrc: ["'self'", "data:", "https:"],
    },
  },
  hsts: {
    maxAge: 31536000,
    includeSubDomains: true,
  },
}));
```

---

## Security Testing Checklist

Run before every release:

```markdown
## Security Checklist

### Secrets & Environment
- [ ] No secrets in code (run detect-secrets)
- [ ] .env files in .gitignore
- [ ] .env.example exists with all required vars
- [ ] Environment validated at startup

### Dependencies
- [ ] npm audit / safety check passes
- [ ] No known vulnerabilities in dependencies
- [ ] Dependencies up to date (Dependabot enabled)

### Input Validation
- [ ] All API inputs validated with schema (Zod/Pydantic)
- [ ] File uploads restricted by type and size
- [ ] Path traversal prevented

### Authentication
- [ ] Passwords hashed with bcrypt (12+ rounds)
- [ ] JWTs use short expiration
- [ ] Rate limiting on auth endpoints
- [ ] Session tokens rotated on login

### Database
- [ ] Parameterized queries only
- [ ] Least privilege database user
- [ ] Connection strings not logged

### Headers & CORS
- [ ] Security headers enabled (helmet)
- [ ] CORS restricted to known origins
- [ ] HTTPS only in production

### Logging
- [ ] No secrets in logs
- [ ] No PII in logs (or properly masked)
- [ ] Failed auth attempts logged
```

---

## Security Anti-Patterns

- ❌ Secrets in `VITE_*`, `NEXT_PUBLIC_*`, or `REACT_APP_*` env vars (client-exposed!)
- ❌ Secrets in code or config files committed to git
- ❌ .env files without .gitignore entry
- ❌ String concatenation for SQL queries
- ❌ `dangerouslySetInnerHTML` without sanitization
- ❌ `eval()` or `new Function()` with user input
- ❌ Passwords stored as plain text or weak hash (MD5, SHA1)
- ❌ JWTs with no expiration or very long expiration
- ❌ No rate limiting on authentication endpoints
- ❌ Logging sensitive data (passwords, tokens, PII)
- ❌ Using `*` for CORS origins in production
- ❌ Ignoring npm audit / safety check warnings
- ❌ Running as root / admin in production
- ❌ Hardcoded credentials for any environment
- ❌ Disabling SSL/TLS verification


================================================
FILE: skills/session-management/SKILL.md
================================================
---
name: session-management
description: Context preservation, tiered summarization, resumability
when-to-use: At session checkpoints, after completing major tasks, or when resuming work
user-invocable: false
effort: low
---

# Session Management Skill


For maintaining context across long development sessions and enabling seamless resume after breaks.

---

## Core Principle

**Checkpoint at natural breakpoints, resume instantly.**

Long development sessions risk context loss. Proactively document state, decisions, and progress so any session can resume exactly where it left off - whether returning after a break or hitting context limits.

---

## Tiered Summarization Rules

### Tier 1: Quick Update (current-state.md only)
**Trigger**: After completing any small task or todo item
**Action**: Update "Active Task", "Progress", and "Next Steps" sections
**Time**: ~30 seconds

### Tier 2: Full Checkpoint (current-state.md + decisions.md)
**Trigger**:
- After completing a feature or significant change
- After any architectural/library decision
- After ~20 tool calls during active work
- When switching to a different area of the codebase

**Action**:
1. Update full current-state.md
2. Log any decisions to decisions.md
3. Update files being modified table

### Tier 3: Session Archive (archive/ + full checkpoint)
**Trigger**:
- End of work session
- Completing a major feature/milestone
- Before a significant context shift
- When context feels heavy (~50+ tool calls)

**Action**:
1. Create archive entry: `archive/YYYY-MM-DD[-topic].md`
2. Full checkpoint
3. Clear verbose notes from current-state.md
4. Update code-landmarks.md if new patterns introduced

### Decision Heuristic
```
┌─────────────────────────────────────────────────────┐
│ After completing work, ask:                         │
├─────────────────────────────────────────────────────┤
│ Was a decision made?        → Log to decisions.md   │
│ Task took >10 tool calls?   → Full Checkpoint       │
│ Major feature complete?     → Archive               │
│ Ending session?             → Archive + Handoff     │
│ Otherwise                   → Quick Update          │
└─────────────────────────────────────────────────────┘
```

---

## Session State Structure

Create `_project_specs/session/` directory:

```
_project_specs/
└── session/
    ├── current-state.md      # Live session state (update frequently)
    ├── decisions.md          # Key decisions log (append-only)
    ├── code-landmarks.md     # Important code locations
    └── archive/              # Past session summaries
        └── 2025-01-15.md
```

---

## Current State File

**`_project_specs/session/current-state.md`** - Update every 15-20 minutes or after significant progress.

```markdown
# Current Session State

*Last updated: 2025-01-15 14:32*

## Active Task
[One sentence: what are we working on right now]

Example: Implementing user authentication flow with JWT tokens

## Current Status
- **Phase**: [exploring | planning | implementing | testing | debugging | refactoring]
- **Progress**: [X of Y steps complete, or percentage]
- **Blocking Issues**: [None, or describe blockers]

## Context Summary
[2-3 sentences summarizing the current state of work]

Example: Created auth middleware and login endpoint. JWT signing works.
Currently implementing token refresh logic. Need to add refresh token
rotation for security.

## Files Being Modified
| File | Status | Notes |
|------|--------|-------|
| src/auth/middleware.ts | Done | JWT verification |
| src/auth/refresh.ts | In Progress | Token rotation |
| src/auth/types.ts | Done | Token interfaces |

## Next Steps
1. [ ] Complete refresh token rotation in refresh.ts
2. [ ] Add token blacklist for logout
3. [ ] Write integration tests for auth flow

## Key Context to Preserve
- Using RS256 algorithm (not HS256) per security requirements
- Refresh tokens stored in HttpOnly cookies
- Access tokens: 15 min, Refresh tokens: 7 days

## Resume Instructions
To continue this work:
1. Read src/auth/refresh.ts - currently at line 45
2. The rotateRefreshToken() function needs error handling
3. Check decisions.md for why we chose RS256 over HS256
```

---

## Decision Log

**`_project_specs/session/decisions.md`** - Append-only log of architectural and implementation decisions.

```markdown
# Decision Log

Track key decisions for future reference. Never delete entries.

---

## [2025-01-15] JWT Algorithm Choice

**Decision**: Use RS256 instead of HS256 for JWT signing

**Context**: Implementing authentication system

**Options Considered**:
1. HS256 (symmetric) - Simpler, single secret
2. RS256 (asymmetric) - Public/private key pair

**Choice**: RS256

**Reasoning**:
- Allows token verification without exposing signing key
- Better for microservices (services only need public key)
- Industry standard for production systems

**Trade-offs**:
- Slightly more complex key management
- Larger token size

**References**:
- src/auth/keys/ - Key storage
- docs/security.md - Security architecture

---

## [2025-01-14] Database Schema Approach

**Decision**: Use Drizzle ORM with PostgreSQL

**Context**: Setting up data layer

**Options Considered**:
1. Prisma - Popular, good DX
2. Drizzle - Type-safe, SQL-like
3. Raw SQL - Maximum control

**Choice**: Drizzle

**Reasoning**:
- Better TypeScript inference than Prisma
- More transparent SQL generation
- Lighter weight, faster cold starts

**References**:
- src/db/schema.ts - Schema definitions
- src/db/migrations/ - Migration files
```

---

## Code Landmarks

**`_project_specs/session/code-landmarks.md`** - Important code locations for quick reference.

```markdown
# Code Landmarks

Quick reference to important parts of the codebase.

## Entry Points
| Location | Purpose |
|----------|---------|
| src/index.ts | Main application entry |
| src/api/routes.ts | API route definitions |
| src/workers/index.ts | Background job entry |

## Core Business Logic
| Location | Purpose |
|----------|---------|
| src/core/auth/ | Authentication system |
| src/core/billing/ | Payment processing |
| src/core/workflows/ | Main workflow engine |

## Configuration
| Location | Purpose |
|----------|---------|
| src/config/index.ts | Environment config |
| src/config/features.ts | Feature flags |
| drizzle.config.ts | Database config |

## Key Patterns
| Pattern | Example Location | Notes |
|---------|------------------|-------|
| Service Layer | src/services/user.ts | Business logic encapsulation |
| Repository | src/repos/user.ts | Data access abstraction |
| Middleware | src/middleware/auth.ts | Request processing |

## Testing
| Location | Purpose |
|----------|---------|
| tests/unit/ | Unit tests |
| tests/integration/ | API tests |
| tests/e2e/ | End-to-end tests |
| tests/fixtures/ | Test data |

## Gotchas & Non-Obvious Behavior
| Location | Issue | Notes |
|----------|-------|-------|
| src/utils/date.ts | Timezone handling | Always use UTC internally |
| src/api/middleware.ts:45 | Auth bypass | Skip auth for health checks |
| src/db/pool.ts | Connection limit | Max 10 connections in dev |
```

---

## CLAUDE.md Session Rules

Add this section to CLAUDE.md:

```markdown
## Session Management

**IMPORTANT**: Follow session-management.md skill. Update session state at natural breakpoints.

### After Every Task Completion
Ask yourself:
1. Was a decision made? → Log to `decisions.md`
2. Did this take >10 tool calls? → Full checkpoint to `current-state.md`
3. Is a major feature complete? → Create archive entry
4. Otherwise → Quick update to `current-state.md`

### Checkpoint Triggers
**Quick Update** (current-state.md):
- After any todo completion
- After small changes

**Full Checkpoint** (current-state.md + decisions.md):
- After significant changes
- After ~20 tool calls
- After any decision
- When switching focus areas

**Archive** (archive/ + full checkpoint):
- End of session
- Major feature complete
- Context feels heavy

### Session Start Protocol
When beginning work:
1. Read `_project_specs/session/current-state.md`
2. Check `_project_specs/todos/active.md`
3. Review recent `decisions.md` entries if needed
4. Continue from "Next Steps"

### Session End Protocol
Before ending or when context limit approaches:
1. Create archive: `_project_specs/session/archive/YYYY-MM-DD.md`
2. Update current-state.md with handoff format
3. Ensure next steps are specific and actionable
```

---

## Compression Strategies

### When to Compress (Tier 3 Archive)

| Trigger | Action |
|---------|--------|
| ~50+ tool calls | Summarize progress, archive verbose notes |
| Major feature complete | Archive feature details, update landmarks |
| Context shift | Summarize previous context, archive, start fresh |
| End of session | Full session handoff with archive |

### What to Keep vs Archive

**Keep in active context:**
- Current task and immediate next steps
- Active file list with status
- Blocking issues
- Key decisions affecting current work

**Archive/summarize:**
- Exploration paths that didn't work out
- Detailed debugging traces (keep conclusion only)
- Verbose error messages (keep root cause only)
- Research notes (keep recommendations only)

### Compression Template

When compressing, use this format:

```markdown
## Compressed Context - [Topic]

**Summary**: [1-2 sentences]

**Key Findings**:
- [Bullet points of important discoveries]

**Decisions Made**:
- [Reference to decisions.md entries]

**Relevant Code**:
- [File:line references]

**Archived Details**: [Link to archive file if created]
```

---

## Session Archive

After significant work or at session end, create archive:

**`_project_specs/session/archive/YYYY-MM-DD[-topic].md`**

```markdown
# Session Archive: [Date] - [Topic]

## Summary
[Paragraph summarizing what was accomplished]

## Tasks Completed
- [TODO-XXX] Description - Done
- [TODO-YYY] Description - Done

## Key Decisions
- [Reference decisions.md entries made this session]

## Code Changes
| File | Change Type | Description |
|------|-------------|-------------|
| src/auth/login.ts | Created | Login endpoint |
| src/auth/types.ts | Modified | Added RefreshToken type |

## Tests Added
- tests/auth/login.test.ts - Login flow tests
- tests/auth/refresh.test.ts - Token refresh tests

## Open Items Carried Forward
- [Anything not finished, now in active.md]

## Session Stats
- Duration: ~3 hours
- Tool calls: ~120
- Files modified: 8
- Tests added: 12
```

---

## Integration with Todo System

### Link Todos to Sessions

In active todos, reference session context:

```markdown
## [TODO-042] Implement token refresh

**Status:** in-progress
**Session Context:** See current-state.md

### Progress Notes
- 2025-01-15: Started implementation, base structure done
- 2025-01-15: Added rotation logic, need error handling
```

### Auto-Update on Todo Completion

When completing a todo:
1. Mark todo complete in active.md
2. Update current-state.md progress
3. Log any decisions made
4. Update code-landmarks.md if new patterns introduced

---

## Quick Commands

Add to project scripts or aliases:

```bash
# Show current session state
alias session-status="cat _project_specs/session/current-state.md"

# Quick edit session state
alias session-edit="$EDITOR _project_specs/session/current-state.md"

# View recent decisions
alias decisions="tail -100 _project_specs/session/decisions.md"

# Create session archive
session-archive() {
  cp _project_specs/session/current-state.md \
     "_project_specs/session/archive/$(date +%Y-%m-%d).md"
  echo "Archived to _project_specs/session/archive/$(date +%Y-%m-%d).md"
}
```

---

## Enforcement Mechanisms

### 1. CLAUDE.md as Entry Point
CLAUDE.md must reference session-management.md in the Skills section. Claude reads CLAUDE.md first, which directs it to follow session rules.

### 2. Session File Headers with Reminders
Include enforcement reminders in session file headers:

**current-state.md header:**
```markdown
<!--
CHECKPOINT RULES (from session-management.md):
- Quick update: After any todo completion
- Full checkpoint: After ~20 tool calls or decisions
- Archive: End of session or major feature complete
-->
```

### 3. Self-Check Questions
After completing any task, Claude should ask:
```
□ Did I make a decision? → Log it
□ Did this take >10 tool calls? → Full checkpoint
□ Is a feature complete? → Archive
□ Am I ending/switching context? → Archive + handoff
```

### 4. Session Start Verification
When starting a session, Claude must:
1. Check if `current-state.md` exists and read it
2. Announce what it found: "Resuming from: [last state]"
3. Confirm next steps before proceeding

### 5. Periodic Self-Audit
Every ~20 tool calls, Claude should check:
- Is current-state.md up to date?
- Are there unlogged decisions?
- Is context getting heavy?

### 6. User Prompts
Users can enforce by asking:
- "Update session state" → Triggers checkpoint
- "What's the current state?" → Claude reads and reports
- "End session" → Triggers archive + handoff
- "Resume from last session" → Claude reads state files first

---

## Anti-Patterns

- **No state tracking** - Flying blind, can't resume
- **Overly verbose state** - Keep it scannable, not a novel
- **Stale state files** - Update regularly or they become useless
- **Missing decisions** - Future you won't remember why
- **No code landmarks** - Wastes time re-discovering the codebase
- **Never archiving** - Session files become cluttered
- **Ignoring compression signals** - Context overload degrades performance
- **Skipping checkpoint after decisions** - Key context lost
- **No handoff at session end** - Next session starts blind

---

## Quick Reference

### Checkpoint Decision Tree
```
Task completed?
    │
    ├── Decision made? ──────────────────→ Log to decisions.md
    │
    ├── >10 tool calls OR significant? ──→ Full Checkpoint
    │
    ├── Major feature done? ─────────────→ Archive
    │
    └── Otherwise ───────────────────────→ Quick Update
```

### Files at a Glance
| File | Update Frequency | Purpose |
|------|------------------|---------|
| current-state.md | Every task | Live state, next steps |
| decisions.md | When deciding | Architectural choices |
| code-landmarks.md | When patterns change | Code navigation |
| archive/*.md | End of session/feature | Historical record |


================================================
FILE: skills/shopify-apps/SKILL.md
================================================
---
name: shopify-apps
description: Shopify app development - Remix, Admin API, checkout extensions
when-to-use: When building Shopify apps or extensions
user-invocable: false
effort: medium
---

# Shopify App Development Skill


For building Shopify apps using Remix, the Shopify App framework, and checkout UI extensions.

**Sources:** [Shopify Dev Docs](https://shopify.dev/docs/apps) | [Shopify CLI](https://shopify.dev/docs/apps/tools/cli) | [Admin API](https://shopify.dev/docs/api/admin-graphql)

---

## Prerequisites

### Required Accounts & Tools

```bash
# 1. Shopify Partner Account (free)
# Sign up at: https://partners.shopify.com

# 2. Development Store
# Create in Partner Dashboard → Stores → Add store → Development store

# 3. Shopify CLI
npm install -g @shopify/cli

# 4. Node.js 18.20+ or 20.10+
node --version
```

### Partner Dashboard Setup

1. Create Partner account at partners.shopify.com
2. Create a development store for testing
3. Create an app in Partner Dashboard → Apps → Create app
4. Note your API key and API secret

---

## Quick Start

### Scaffold New App

```bash
# Create new Shopify app with Remix
shopify app init

# Answer prompts:
# - App name
# - Template: Remix (recommended)
# - Language: JavaScript or TypeScript

# Start development
cd your-app-name
shopify app dev
```

### Project Structure

```
shopify-app/
├── app/
│   ├── routes/
│   │   ├── app._index/          # Main app page
│   │   │   └── route.jsx
│   │   ├── app.jsx              # App layout with Polaris
│   │   ├── auth.$.jsx           # Auth catch-all
│   │   ├── auth.login/          # Login page
│   │   │   └── route.jsx
│   │   ├── webhooks.app.uninstalled.jsx
│   │   ├── webhooks.app.scopes_update.jsx
│   │   └── webhooks.gdpr.jsx    # GDPR compliance (REQUIRED)
│   ├── shopify.server.js        # Shopify app config
│   ├── db.server.js             # Prisma client
│   └── entry.server.jsx
├── extensions/                   # Checkout/theme extensions
│   └── my-extension/
│       ├── src/
│       │   └── index.tsx
│       ├── shopify.extension.toml
│       └── package.json
├── prisma/
│   └── schema.prisma            # Session storage
├── shopify.app.toml             # App configuration
├── package.json
└── vite.config.js
```

---

## App Configuration

### shopify.app.toml

```toml
# App configuration - managed by Shopify CLI
client_id = "your-api-key"
name = "Your App Name"
handle = "your-app-handle"
application_url = "https://your-app.onrender.com"
embedded = true

[webhooks]
api_version = "2025-01"

# Required: App lifecycle webhooks
[[webhooks.subscriptions]]
topics = ["app/uninstalled"]
uri = "/webhooks/app/uninstalled"

[[webhooks.subscriptions]]
topics = ["app/scopes_update"]
uri = "/webhooks/app/scopes_update"

# Required: GDPR compliance webhooks
[[webhooks.subscriptions]]
compliance_topics = [
  "customers/data_request",
  "customers/redact",
  "shop/redact",
]
uri = "/webhooks/gdpr"

[access_scopes]
scopes = "read_products,write_products"

[auth]
redirect_urls = [
  "https://your-app.onrender.com/auth/callback",
  "https://your-app.onrender.com/auth/shopify/callback",
]

[pos]
embedded = false

[build]
dev_store_url = "your-dev-store.myshopify.com"
automatically_update_urls_on_dev = true
```

### shopify.server.js

```javascript
import "@shopify/shopify-app-remix/adapters/node";
import {
  ApiVersion,
  AppDistribution,
  shopifyApp,
} from "@shopify/shopify-app-remix/server";
import { PrismaSessionStorage } from "@shopify/shopify-app-session-storage-prisma";
import { prisma } from "./db.server";

const shopify = shopifyApp({
  apiKey: process.env.SHOPIFY_API_KEY,
  apiSecretKey: process.env.SHOPIFY_API_SECRET || "",
  apiVersion: ApiVersion.January25,
  scopes: process.env.SCOPES?.split(","),
  appUrl: process.env.SHOPIFY_APP_URL || "",
  authPathPrefix: "/auth",
  sessionStorage: new PrismaSessionStorage(prisma),
  distribution: AppDistribution.AppStore,
  future: {
    unstable_newEmbeddedAuthStrategy: true,
    removeRest: true,  // Use GraphQL only
  },
});

export default shopify;
export const apiVersion = ApiVersion.January25;
export const addDocumentResponseHeaders = shopify.addDocumentResponseHeaders;
export const authenticate = shopify.authenticate;
export const unauthenticated = shopify.unauthenticated;
export const login = shopify.login;
export const registerWebhooks = shopify.registerWebhooks;
export const sessionStorage = shopify.sessionStorage;
```

---

## Authentication

### Route Protection

```javascript
// app/routes/app._index/route.jsx
import { json } from "@remix-run/node";
import { useLoaderData } from "@remix-run/react";
import { authenticate } from "../../shopify.server";

export const loader = async ({ request }) => {
  // This authenticates the request and redirects to login if needed
  const { admin, session } = await authenticate.admin(request);

  // Now you have access to admin API and session
  const shop = session.shop;

  return json({ shop });
};

export default function Index() {
  const { shop } = useLoaderData();
  return <div>Connected to: {shop}</div>;
}
```

### Webhook Authentication

```javascript
// app/routes/webhooks.app.uninstalled.jsx
import { authenticate } from "../shopify.server";
import { prisma } from "../db.server";

export const action = async ({ request }) => {
  const { shop, topic } = await authenticate.webhook(request);

  console.log(`Received ${topic} webhook for ${shop}`);

  // Clean up shop data on uninstall
  await prisma.session.deleteMany({ where: { shop } });

  return new Response(null, { status: 200 });
};
```

---

## GraphQL Admin API

### Basic Query Pattern

```javascript
// app/shopify/adminApi.server.js
export async function getShopId(admin) {
  const response = await admin.graphql(`
    query getShopId {
      shop {
        id
        name
        email
        myshopifyDomain
      }
    }
  `);

  const data = await response.json();
  return data.data?.shop;
}
```

### Query with Variables

```javascript
export async function getProducts(admin, first = 10) {
  const response = await admin.graphql(`
    query getProducts($first: Int!) {
      products(first: $first) {
        edges {
          node {
            id
            title
            status
            variants(first: 5) {
              edges {
                node {
                  id
                  price
                  inventoryQuantity
                }
              }
            }
          }
        }
        pageInfo {
          hasNextPage
          endCursor
        }
      }
    }
  `, {
    variables: { first }
  });

  const data = await response.json();
  return data.data?.products?.edges.map(e => e.node);
}
```

### Mutations

```javascript
export async function createProduct(admin, input) {
  const response = await admin.graphql(`
    mutation createProduct($input: ProductInput!) {
      productCreate(input: $input) {
        product {
          id
          title
        }
        userErrors {
          field
          message
        }
      }
    }
  `, {
    variables: {
      input: {
        title: input.title,
        descriptionHtml: input.description,
        status: "DRAFT"
      }
    }
  });

  const data = await response.json();
  const result = data.data?.productCreate;

  if (result?.userErrors?.length > 0) {
    throw new Error(result.userErrors.map(e => e.message).join(", "));
  }

  return result?.product;
}
```

### Metafields (App Settings Storage)

```javascript
// Get metafield
export async function getMetafield(admin, namespace, key) {
  const response = await admin.graphql(`
    query getShopMetafield($namespace: String!, $key: String!) {
      shop {
        id
        metafield(namespace: $namespace, key: $key) {
          id
          value
        }
      }
    }
  `, {
    variables: { namespace, key }
  });

  const data = await response.json();
  const metafield = data.data?.shop?.metafield;

  return {
    shopId: data.data?.shop?.id,
    value: metafield?.value ? JSON.parse(metafield.value) : null,
  };
}

// Set metafield
export async function setMetafield(admin, namespace, key, value, shopId) {
  const response = await admin.graphql(`
    mutation CreateMetafield($metafields: [MetafieldsSetInput!]!) {
      metafieldsSet(metafields: $metafields) {
        metafields {
          id
          namespace
          key
          value
        }
        userErrors {
          field
          message
        }
      }
    }
  `, {
    variables: {
      metafields: [{
        namespace,
        key,
        type: "json",
        value: JSON.stringify(value),
        ownerId: shopId,
      }]
    }
  });

  const data = await response.json();
  const errors = data.data?.metafieldsSet?.userErrors;

  if (errors?.length > 0) {
    throw new Error(errors.map(e => e.message).join(", "));
  }

  return data.data?.metafieldsSet?.metafields?.[0];
}
```

---

## GDPR Compliance (REQUIRED)

**All Shopify apps MUST handle GDPR webhooks.** This is required for App Store approval.

```javascript
// app/routes/webhooks.gdpr.jsx
import { authenticate } from "../shopify.server";

export const action = async ({ request }) => {
  const { topic, shop, session } = await authenticate.webhook(request);

  console.log(`Received ${topic} webhook for ${shop}`);

  switch (topic) {
    case "customers/data_request":
      // Return any customer data you store
      // If you don't store customer data, return empty
      return json({ customer_data: null });

    case "customers/redact":
      // Delete customer data
      // Example: await deleteCustomerData(payload.customer.id);
      return json({ success: true });

    case "shop/redact":
      // Delete all shop data (48 hours after uninstall)
      // Clean up metafields, database records, etc.
      if (session) {
        const { admin } = await authenticate.admin(request);
        await admin.graphql(`
          mutation metafieldDelete($input: MetafieldsDeleteInput!) {
            metafieldsDelete(input: $input) {
              deletedId
            }
          }
        `, {
          variables: {
            input: {
              namespace: "your_app",
              key: "settings",
              ownerType: "SHOP"
            }
          }
        });
      }
      return json({ success: true });

    default:
      return json({ error: "Unhandled topic" }, { status: 400 });
  }
};
```

---

## UI with Polaris

### App Layout

```javascript
// app/routes/app.jsx
import { Outlet } from "@remix-run/react";
import { AppProvider } from "@shopify/polaris";
import "@shopify/polaris/build/esm/styles.css";
import polarisTranslations from "@shopify/polaris/locales/en.json";

export default function App() {
  return (
    <AppProvider i18n={polarisTranslations}>
      <Outlet />
    </AppProvider>
  );
}
```

### Settings Page Pattern

```javascript
// app/routes/app._index/route.jsx
import { useState } from "react";
import { json } from "@remix-run/node";
import { useActionData, useLoaderData, useSubmit } from "@remix-run/react";
import {
  Page,
  Layout,
  Card,
  FormLayout,
  TextField,
  Select,
  Banner,
  Button,
} from "@shopify/polaris";
import { authenticate } from "../../shopify.server";
import { getMetafield, setMetafield, getShopId } from "../../shopify/adminApi.server";

export const loader = async ({ request }) => {
  const { admin } = await authenticate.admin(request);
  const { shopId, value } = await getMetafield(admin, "your_app", "settings");
  return json({ shopId, settings: value });
};

export const action = async ({ request }) => {
  const { admin } = await authenticate.admin(request);
  const formData = await request.formData();

  const settings = {
    apiKey: formData.get("apiKey"),
    enabled: formData.get("enabled") === "true",
  };

  try {
    const shopId = await getShopId(admin);
    await setMetafield(admin, "your_app", "settings", settings, shopId.id);
    return json({ success: true, message: "Settings saved!" });
  } catch (error) {
    return json({ error: error.message }, { status: 500 });
  }
};

export default function Settings() {
  const { settings } = useLoaderData();
  const actionData = useActionData();
  const submit = useSubmit();

  const [formState, setFormState] = useState({
    apiKey: settings?.apiKey || "",
    enabled: settings?.enabled ?? true,
  });

  const handleSubmit = () => {
    const formData = new FormData();
    formData.append("apiKey", formState.apiKey);
    formData.append("enabled", String(formState.enabled));
    submit(formData, { method: "post" });
  };

  return (
    <Page
      title="App Settings"
      primaryAction={{
        content: "Save",
        onAction: handleSubmit,
      }}
    >
      <Layout>
        {actionData?.message && (
          <Layout.Section>
            <Banner tone="success">{actionData.message}</Banner>
          </Layout.Section>
        )}

        {actionData?.error && (
          <Layout.Section>
            <Banner tone="critical">{actionData.error}</Banner>
          </Layout.Section>
        )}

        <Layout.Section>
          <Card>
            <FormLayout>
              <TextField
                label="API Key"
                value={formState.apiKey}
                onChange={(value) => setFormState({ ...formState, apiKey: value })}
                autoComplete="off"
              />

              <Select
                label="Enable Integration"
                options={[
                  { label: "Enabled", value: "true" },
                  { label: "Disabled", value: "false" },
                ]}
                value={String(formState.enabled)}
                onChange={(value) =>
                  setFormState({ ...formState, enabled: value === "true" })
                }
              />
            </FormLayout>
          </Card>
        </Layout.Section>
      </Layout>
    </Page>
  );
}
```

---

## Checkout UI Extensions

### Extension Configuration

```toml
# extensions/my-extension/shopify.extension.toml
api_version = "2025-01"

[[extensions]]
name = "My Checkout Extension"
handle = "my-checkout-extension"
type = "ui_extension"

[[extensions.targeting]]
module = "./src/index.tsx"
target = "purchase.thank-you.block.render"

[extensions.capabilities]
api_access = true
network_access = true

# Access app metafields in extension
[[extensions.metafields]]
namespace = "your_app"
key = "settings"
```

### Extension Target Locations

| Target | Location |
|--------|----------|
| `purchase.thank-you.block.render` | Thank you page |
| `purchase.checkout.block.render` | Checkout page |
| `customer-account.order-status.block.render` | Order status |
| `customer-account.page.render` | Customer account pages |
| `admin.product-details.block.render` | Admin product page |

### Extension Component

```tsx
// extensions/my-extension/src/index.tsx
import {
  reactExtension,
  useShop,
  useAppMetafields,
  useApi,
  View,
  BlockStack,
  Heading,
  Text,
  Button,
  Spinner,
} from "@shopify/ui-extensions-react/checkout";

export default reactExtension("purchase.thank-you.block.render", () => (
  <Extension />
));

function Extension() {
  const shop = useShop();
  const { orderConfirmation } = useApi();
  const order = orderConfirmation.current.order;

  // Access app metafields
  const metafields = useAppMetafields({
    namespace: "your_app",
    key: "settings"
  });

  const settings = metafields[0]?.metafield?.value
    ? JSON.parse(metafields[0].metafield.value)
    : null;

  if (!settings?.enabled) {
    return null;
  }

  return (
    <View border="base" padding="base">
      <BlockStack>
        <Heading level={2}>Thank You!</Heading>
        <Text>Order #{order.id} confirmed</Text>
        <Text appearance="subdued">
          Shop: {shop.myshopifyDomain}
        </Text>
      </BlockStack>
    </View>
  );
}
```

### Extension with External API

```tsx
// extensions/my-extension/src/hooks/useExternalApi.ts
import { useState, useEffect } from "react";

export function useExternalApi(surveyId: string) {
  const [data, setData] = useState(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState(null);

  useEffect(() => {
    if (!surveyId) {
      setLoading(false);
      return;
    }

    fetch(`https://api.example.com/surveys/${surveyId}`)
      .then(res => res.json())
      .then(data => {
        setData(data);
        setLoading(false);
      })
      .catch(err => {
        setError(err);
        setLoading(false);
      });
  }, [surveyId]);

  return { data, loading, error };
}
```

---

## Database (Prisma)

### Session Storage Schema

```prisma
// prisma/schema.prisma
generator client {
  provider = "prisma-client-js"
}

datasource db {
  provider = "postgresql"  // or "sqlite" for dev
  url      = env("DATABASE_URL")
}

// Required for Shopify session storage
model Session {
  id            String    @id
  shop          String
  state         String
  isOnline      Boolean   @default(false)
  scope         String?
  expires       DateTime?
  accessToken   String
  userId        BigInt?
  firstName     String?
  lastName      String?
  email         String?
  accountOwner  Boolean   @default(false)
  locale        String?
  collaborator  Boolean?  @default(false)
  emailVerified Boolean?  @default(false)

  @@index([shop])
}

// Your app's custom models
model AppSettings {
  id        String   @id @default(uuid())
  shop      String   @unique
  settings  Json
  createdAt DateTime @default(now())
  updatedAt DateTime @updatedAt
}
```

### Database Client

```javascript
// app/db.server.js
import { PrismaClient } from "@prisma/client";

let prisma;

if (process.env.NODE_ENV === "production") {
  prisma = new PrismaClient();
} else {
  // Prevent multiple instances in development
  if (!global.__prisma) {
    global.__prisma = new PrismaClient();
  }
  prisma = global.__prisma;
}

export { prisma };
```

---

## Deployment

### Environment Variables

```bash
# .env (DO NOT COMMIT)
SHOPIFY_API_KEY=your_api_key
SHOPIFY_API_SECRET=your_api_secret
SCOPES=read_products,write_products
SHOPIFY_APP_URL=https://your-app.onrender.com
DATABASE_URL=postgresql://...
```

### Render Deployment

```yaml
# render.yaml
services:
  - type: web
    name: shopify-app
    runtime: node
    plan: starter
    buildCommand: npm install && npm run setup && npm run build
    startCommand: npm run start
    envVars:
      - key: NODE_ENV
        value: production
      - key: DATABASE_URL
        fromDatabase:
          name: shopify-db
          property: connectionString
      - key: SHOPIFY_API_KEY
        sync: false
      - key: SHOPIFY_API_SECRET
        sync: false
      - key: SCOPES
        sync: false
      - key: SHOPIFY_APP_URL
        sync: false

databases:
  - name: shopify-db
    plan: starter
```

### Deploy Commands

```bash
# Deploy app to Shopify
shopify app deploy

# This:
# 1. Builds extensions
# 2. Uploads to Shopify
# 3. Creates new app version
```

---

## Common Scopes

| Scope | Access |
|-------|--------|
| `read_products` | View products |
| `write_products` | Create/edit products |
| `read_orders` | View orders |
| `write_orders` | Create/edit orders |
| `read_customers` | View customers |
| `write_customers` | Create/edit customers |
| `read_checkouts` | View checkout data |
| `write_checkouts` | Modify checkout |
| `read_themes` | View themes |
| `write_themes` | Modify themes |
| `read_content` | View metafields/files |
| `write_content` | Modify metafields/files |

---

## CLI Commands

```bash
# Development
shopify app dev                    # Start dev server with tunnel
shopify app dev --reset            # Reset app config

# Configuration
shopify app config link            # Link to existing app
shopify app config use             # Switch config
shopify app env show               # Show env vars

# Extensions
shopify app generate extension     # Create new extension
shopify app build                  # Build all extensions

# Deployment
shopify app deploy                 # Deploy to Shopify
shopify app versions list          # List app versions

# Store
shopify app open                   # Open app in dev store
```

---

## Testing

### Unit Tests

```javascript
// __tests__/adminApi.test.js
import { describe, it, expect, vi } from 'vitest';
import { getShopId, setMetafield } from '../app/shopify/adminApi.server';

describe('Admin API', () => {
  it('gets shop ID', async () => {
    const mockAdmin = {
      graphql: vi.fn().mockResolvedValue({
        json: () => Promise.resolve({
          data: { shop: { id: 'gid://shopify/Shop/123' } }
        })
      })
    };

    const result = await getShopId(mockAdmin);
    expect(result.id).toBe('gid://shopify/Shop/123');
  });
});
```

### E2E with Playwright

```typescript
// e2e/app.spec.ts
import { test, expect } from '@playwright/test';

test('app settings page loads', async ({ page }) => {
  // Note: Requires authenticated session
  await page.goto('/app');

  await expect(page.getByRole('heading', { name: /settings/i })).toBeVisible();
  await expect(page.getByLabel('API Key')).toBeVisible();
});

test('saves settings successfully', async ({ page }) => {
  await page.goto('/app');

  await page.fill('[name="apiKey"]', 'test-key-123');
  await page.click('button:has-text("Save")');

  await expect(page.getByText('Settings saved')).toBeVisible();
});
```

---

## Rate Limits

### GraphQL Cost-Based Limits

```javascript
// Check rate limit status in response
const response = await admin.graphql(`
  query {
    shop { name }
  }
`);

const data = await response.json();

// Rate limit info in extensions
const throttleStatus = data.extensions?.cost?.throttleStatus;
// {
//   maximumAvailable: 1000,
//   currentlyAvailable: 950,
//   restoreRate: 50  // points per second
// }
```

### Handling Throttling

```javascript
async function graphqlWithRetry(admin, query, variables, maxRetries = 3) {
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    const response = await admin.graphql(query, { variables });
    const data = await response.json();

    if (data.errors?.some(e => e.extensions?.code === 'THROTTLED')) {
      const waitTime = Math.pow(2, attempt) * 1000; // Exponential backoff
      await new Promise(resolve => setTimeout(resolve, waitTime));
      continue;
    }

    return data;
  }
  throw new Error('Max retries exceeded');
}
```

---

## Checklist

### Before Development

- [ ] Partner account created
- [ ] Development store created
- [ ] App created in Partner Dashboard
- [ ] Shopify CLI installed
- [ ] App scaffolded with Remix template

### Before Submission

- [ ] GDPR webhooks implemented (customers/data_request, customers/redact, shop/redact)
- [ ] App uninstall webhook cleans up data
- [ ] No hardcoded API keys
- [ ] Error handling for all API calls
- [ ] Rate limit handling
- [ ] Responsive UI (works on mobile admin)
- [ ] Polaris components used consistently
- [ ] Extension targets correct surfaces
- [ ] Privacy policy URL configured
- [ ] App listing completed

### Security

- [ ] Session tokens validated
- [ ] Webhook HMAC verification (handled by SDK)
- [ ] No sensitive data in client-side code
- [ ] Environment variables for all secrets
- [ ] HTTPS enforced

---

## Anti-Patterns

- **REST API usage** - Use GraphQL Admin API (REST is deprecated)
- **Storing secrets in metafields** - Use environment variables
- **Ignoring rate limits** - Implement exponential backoff
- **Skipping GDPR webhooks** - Required for App Store
- **Large GraphQL queries** - Paginate, query only needed fields
- **Polling for updates** - Use webhooks instead
- **Custom auth flow** - Use Shopify's OAuth flow via SDK


================================================
FILE: skills/site-architecture/SKILL.md
================================================
---
name: site-architecture
description: Technical SEO - robots.txt, sitemap, meta tags, Core Web Vitals
when-to-use: When setting up site architecture, meta tags, or technical SEO
user-invocable: false
paths: ["**/robots.txt", "**/sitemap*", "**/*.html", "public/**"]
effort: medium
---

# Site Architecture Skill


For technical website structure that enables discovery by search engines AND AI crawlers (GPTBot, ClaudeBot, PerplexityBot).

---

## Philosophy

**Content is king. Architecture is the kingdom.**

Great content buried in poor architecture won't be discovered. This skill covers the technical foundation that makes your content findable by:
- Google, Bing (traditional search)
- GPTBot (ChatGPT), ClaudeBot, PerplexityBot (AI assistants)
- Social platforms (Open Graph, Twitter Cards)

---

## robots.txt

### Basic Template

```txt
# robots.txt

# Allow all crawlers by default
User-agent: *
Allow: /
Disallow: /api/
Disallow: /admin/
Disallow: /private/
Disallow: /_next/
Disallow: /cdn-cgi/

# Sitemap location
Sitemap: https://yoursite.com/sitemap.xml

# Crawl delay (optional - be careful, not all bots respect this)
# Crawl-delay: 1
```

### AI Bot Configuration

```txt
# robots.txt with AI bot rules

# === SEARCH ENGINES ===
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

# === AI ASSISTANTS (Allow for discovery) ===
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: Google-Extended
Allow: /

# === BLOCK AI TRAINING (Optional - block training, allow chat) ===
# Uncomment these if you want to be cited but not used for training
# User-agent: CCBot
# Disallow: /

# User-agent: GPTBot
# Disallow: /  # Blocks both chat and training

# === BLOCK SCRAPERS ===
User-agent: AhrefsBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: MJ12bot
Disallow: /

# === DEFAULT ===
User-agent: *
Allow: /
Disallow: /api/
Disallow: /admin/
Disallow: /auth/
Disallow: /private/
Disallow: /*.json$
Disallow: /*?*

Sitemap: https://yoursite.com/sitemap.xml
```

### Next.js robots.txt

```typescript
// app/robots.ts
import { MetadataRoute } from 'next';

export default function robots(): MetadataRoute.Robots {
  const baseUrl = process.env.NEXT_PUBLIC_URL || 'https://yoursite.com';

  return {
    rules: [
      {
        userAgent: '*',
        allow: '/',
        disallow: ['/api/', '/admin/', '/private/', '/_next/'],
      },
      {
        userAgent: 'GPTBot',
        allow: '/',
      },
      {
        userAgent: 'ClaudeBot',
        allow: '/',
      },
      {
        userAgent: 'PerplexityBot',
        allow: '/',
      },
    ],
    sitemap: `${baseUrl}/sitemap.xml`,
  };
}
```

---

## Sitemap

### XML Sitemap Template

```xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
        xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
        xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
  <url>
    <loc>https://yoursite.com/</loc>
    <lastmod>2025-01-15</lastmod>
    <changefreq>weekly</changefreq>
    <priority>1.0</priority>
  </url>
  <url>
    <loc>https://yoursite.com/pricing</loc>
    <lastmod>2025-01-10</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://yoursite.com/blog/article-slug</loc>
    <lastmod>2025-01-12</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.8</priority>
    <image:image>
      <image:loc>https://yoursite.com/images/article-image.jpg</image:loc>
    </image:image>
  </url>
</urlset>
```

### Next.js Dynamic Sitemap

```typescript
// app/sitemap.ts
import { MetadataRoute } from 'next';

export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
  const baseUrl = process.env.NEXT_PUBLIC_URL || 'https://yoursite.com';

  // Static pages
  const staticPages = [
    { url: '/', priority: 1.0, changeFrequency: 'weekly' as const },
    { url: '/pricing', priority: 0.9, changeFrequency: 'monthly' as const },
    { url: '/about', priority: 0.8, changeFrequency: 'monthly' as const },
    { url: '/contact', priority: 0.7, changeFrequency: 'yearly' as const },
  ];

  // Dynamic pages (e.g., blog posts)
  const posts = await getBlogPosts(); // Your data fetching function
  const blogPages = posts.map((post) => ({
    url: `/blog/${post.slug}`,
    lastModified: new Date(post.updatedAt),
    changeFrequency: 'monthly' as const,
    priority: 0.8,
  }));

  return [
    ...staticPages.map((page) => ({
      url: `${baseUrl}${page.url}`,
      lastModified: new Date(),
      changeFrequency: page.changeFrequency,
      priority: page.priority,
    })),
    ...blogPages.map((page) => ({
      url: `${baseUrl}${page.url}`,
      lastModified: page.lastModified,
      changeFrequency: page.changeFrequency,
      priority: page.priority,
    })),
  ];
}
```

### Sitemap Index (Large Sites)

```xml
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <sitemap>
    <loc>https://yoursite.com/sitemap-pages.xml</loc>
    <lastmod>2025-01-15</lastmod>
  </sitemap>
  <sitemap>
    <loc>https://yoursite.com/sitemap-blog.xml</loc>
    <lastmod>2025-01-14</lastmod>
  </sitemap>
  <sitemap>
    <loc>https://yoursite.com/sitemap-products.xml</loc>
    <lastmod>2025-01-13</lastmod>
  </sitemap>
</sitemapindex>
```

---

## Meta Tags

### Essential Meta Tags

```html
<head>
  <!-- Basic -->
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Page Title | Brand Name</title>
  <meta name="description" content="Compelling 150-160 character description with keywords and CTA.">

  <!-- Canonical (prevent duplicate content) -->
  <link rel="canonical" href="https://yoursite.com/current-page">

  <!-- Language -->
  <html lang="en">
  <meta name="language" content="English">

  <!-- Robots -->
  <meta name="robots" content="index, follow">
  <meta name="googlebot" content="index, follow">

  <!-- Author -->
  <meta name="author" content="Author Name">

  <!-- Favicon -->
  <link rel="icon" href="/favicon.ico" sizes="any">
  <link rel="icon" href="/icon.svg" type="image/svg+xml">
  <link rel="apple-touch-icon" href="/apple-touch-icon.png">
  <link rel="manifest" href="/manifest.webmanifest">
</head>
```

### Open Graph (Social Sharing)

```html
<!-- Open Graph / Facebook -->
<meta property="og:type" content="website">
<meta property="og:url" content="https://yoursite.com/page">
<meta property="og:title" content="Page Title - Brand">
<meta property="og:description" content="Description for social sharing (can be longer).">
<meta property="og:image" content="https://yoursite.com/og-image.jpg">
<meta property="og:image:width" content="1200">
<meta property="og:image:height" content="630">
<meta property="og:site_name" content="Brand Name">
<meta property="og:locale" content="en_US">

<!-- Article-specific (for blog posts) -->
<meta property="og:type" content="article">
<meta property="article:published_time" content="2025-01-15T08:00:00Z">
<meta property="article:modified_time" content="2025-01-20T10:00:00Z">
<meta property="article:author" content="https://yoursite.com/team/author">
<meta property="article:section" content="Technology">
<meta property="article:tag" content="AI, SEO, Content">
```

### Twitter Cards

```html
<!-- Twitter -->
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@yourbrand">
<meta name="twitter:creator" content="@authorhandle">
<meta name="twitter:title" content="Page Title">
<meta name="twitter:description" content="Description for Twitter (max 200 chars).">
<meta name="twitter:image" content="https://yoursite.com/twitter-image.jpg">
```

### Next.js Metadata

```typescript
// app/layout.tsx
import { Metadata } from 'next';

export const metadata: Metadata = {
  metadataBase: new URL('https://yoursite.com'),
  title: {
    default: 'Brand Name',
    template: '%s | Brand Name',
  },
  description: 'Your default site description.',
  keywords: ['keyword1', 'keyword2', 'keyword3'],
  authors: [{ name: 'Brand Name', url: 'https://yoursite.com' }],
  creator: 'Brand Name',
  publisher: 'Brand Name',
  robots: {
    index: true,
    follow: true,
    googleBot: {
      index: true,
      follow: true,
      'max-video-preview': -1,
      'max-image-preview': 'large',
      'max-snippet': -1,
    },
  },
  openGraph: {
    type: 'website',
    locale: 'en_US',
    url: 'https://yoursite.com',
    siteName: 'Brand Name',
    title: 'Brand Name',
    description: 'Your site description.',
    images: [
      {
        url: '/og-image.jpg',
        width: 1200,
        height: 630,
        alt: 'Brand Name',
      },
    ],
  },
  twitter: {
    card: 'summary_large_image',
    site: '@yourbrand',
    creator: '@yourbrand',
  },
  verification: {
    google: 'google-verification-code',
    yandex: 'yandex-verification-code',
  },
};

// app/blog/[slug]/page.tsx
export async function generateMetadata({ params }): Promise<Metadata> {
  const post = await getPost(params.slug);

  return {
    title: post.title,
    description: post.excerpt,
    openGraph: {
      title: post.title,
      description: post.excerpt,
      type: 'article',
      publishedTime: post.publishedAt,
      modifiedTime: post.updatedAt,
      authors: [post.author.name],
      images: [post.coverImage],
    },
  };
}
```

---

## URL Structure

### Best Practices

```markdown
✅ GOOD URLs:
/blog/ai-seo-best-practices
/products/pro-plan
/pricing
/about/team

❌ BAD URLs:
/blog?id=123
/p/12345
/index.php?page=about
/Products/Pro_Plan (inconsistent casing)
```

### URL Guidelines

| Rule | Example |
|------|---------|
| Lowercase only | `/blog/my-post` not `/Blog/My-Post` |
| Hyphens not underscores | `/my-page` not `/my_page` |
| No trailing slashes | `/about` not `/about/` |
| Descriptive slugs | `/pricing` not `/p` |
| No query params for content | `/blog/post-title` not `/blog?id=123` |
| Max 3-4 levels deep | `/blog/category/post` |

### Redirect Configuration

```typescript
// next.config.js
module.exports = {
  async redirects() {
    return [
      // Redirect old URLs to new
      {
        source: '/old-page',
        destination: '/new-page',
        permanent: true, // 301 redirect
      },
      // Redirect with wildcard
      {
        source: '/blog/old/:slug',
        destination: '/articles/:slug',
        permanent: true,
      },
      // Trailing slash redirect
      {
        source: '/:path+/',
        destination: '/:path+',
        permanent: true,
      },
    ];
  },
};
```

---

## Canonical URLs

### Implementation

```html
<!-- Always include canonical, even for primary URL -->
<link rel="canonical" href="https://yoursite.com/current-page">
```

### When to Use

```markdown
✅ USE CANONICAL:
- Every page (even if only version exists)
- Paginated content (point to page 1 or use rel=prev/next)
- URL parameters that don't change content (?utm_source=...)
- HTTP vs HTTPS (canonical to HTTPS)
- www vs non-www (pick one, canonical to it)

Example: /products?sort=price should canonical to /products
```

### Next.js Canonical

```typescript
// Automatic in metadata
export const metadata: Metadata = {
  alternates: {
    canonical: '/current-page',
  },
};
```

---

## Security Headers

### Essential Headers

```typescript
// next.config.js
const securityHeaders = [
  {
    key: 'X-DNS-Prefetch-Control',
    value: 'on',
  },
  {
    key: 'Strict-Transport-Security',
    value: 'max-age=63072000; includeSubDomains; preload',
  },
  {
    key: 'X-Frame-Options',
    value: 'SAMEORIGIN',
  },
  {
    key: 'X-Content-Type-Options',
    value: 'nosniff',
  },
  {
    key: 'Referrer-Policy',
    value: 'strict-origin-when-cross-origin',
  },
  {
    key: 'Permissions-Policy',
    value: 'camera=(), microphone=(), geolocation=()',
  },
];

module.exports = {
  async headers() {
    return [
      {
        source: '/:path*',
        headers: securityHeaders,
      },
    ];
  },
};
```

---

## Core Web Vitals

### Target Metrics

| Metric | Good | Needs Improvement | Poor |
|--------|------|-------------------|------|
| LCP (Largest Contentful Paint) | ≤2.5s | ≤4.0s | >4.0s |
| INP (Interaction to Next Paint) | ≤200ms | ≤500ms | >500ms |
| CLS (Cumulative Layout Shift) | ≤0.1 | ≤0.25 | >0.25 |

### Optimization Checklist

```markdown
## LCP (Loading)
- [ ] Optimize largest image (WebP, proper sizing)
- [ ] Preload critical assets
- [ ] Use CDN for static assets
- [ ] Enable compression (gzip/brotli)
- [ ] Minimize render-blocking resources

## INP (Interactivity)
- [ ] Minimize JavaScript execution time
- [ ] Break up long tasks
- [ ] Use web workers for heavy computation
- [ ] Optimize event handlers
- [ ] Lazy load non-critical JS

## CLS (Visual Stability)
- [ ] Set dimensions on images/videos
- [ ] Reserve space for dynamic content
- [ ] Avoid inserting content above existing
- [ ] Use transform for animations
- [ ] Preload fonts
```

### Next.js Performance

```typescript
// Image optimization
import Image from 'next/image';

<Image
  src="/hero.jpg"
  alt="Hero image"
  width={1200}
  height={630}
  priority // Preload for LCP
  placeholder="blur"
  blurDataURL={blurDataUrl}
/>

// Font optimization
import { Inter } from 'next/font/google';

const inter = Inter({
  subsets: ['latin'],
  display: 'swap', // Prevent FOIT
});

// Dynamic imports
import dynamic from 'next/dynamic';

const HeavyComponent = dynamic(() => import('./HeavyComponent'), {
  loading: () => <Skeleton />,
  ssr: false, // Client-only if needed
});
```

---

## Internal Linking

### Structure

```markdown
## Link Architecture

Homepage
├── /pricing (1 click)
├── /features (1 click)
├── /blog (1 click)
│   ├── /blog/category-1 (2 clicks)
│   │   └── /blog/category-1/post (3 clicks)
│   └── /blog/category-2 (2 clicks)
└── /about (1 click)

Rule: Every page within 3 clicks of homepage
```

### Best Practices

```markdown
✅ DO:
- Use descriptive anchor text
- Link contextually within content
- Create hub pages for topics
- Link to related content at end of posts
- Use breadcrumbs for navigation

❌ AVOID:
- "Click here" as anchor text
- Orphan pages (no internal links)
- Too many links per page (>100)
- Broken internal links
- Redirect chains
```

### Breadcrumbs

```typescript
// components/Breadcrumbs.tsx
import Link from 'next/link';

interface BreadcrumbItem {
  name: string;
  href: string;
}

export function Breadcrumbs({ items }: { items: BreadcrumbItem[] }) {
  const jsonLd = {
    '@context': 'https://schema.org',
    '@type': 'BreadcrumbList',
    itemListElement: items.map((item, index) => ({
      '@type': 'ListItem',
      position: index + 1,
      name: item.name,
      item: `https://yoursite.com${item.href}`,
    })),
  };

  return (
    <>
      <script
        type="application/ld+json"
        dangerouslySetInnerHTML={{ __html: JSON.stringify(jsonLd) }}
      />
      <nav aria-label="Breadcrumb">
        <ol className="flex gap-2">
          {items.map((item, index) => (
            <li key={item.href}>
              {index > 0 && <span>/</span>}
              <Link href={item.href}>{item.name}</Link>
            </li>
          ))}
        </ol>
      </nav>
    </>
  );
}
```

---

## AI Crawler Handling

### Known AI Crawlers

| Bot | User Agent | Purpose |
|-----|------------|---------|
| GPTBot | `GPTBot` | ChatGPT web browsing |
| ChatGPT-User | `ChatGPT-User` | ChatGPT user browsing |
| ClaudeBot | `ClaudeBot` | Claude web access |
| Claude-Web | `Claude-Web` | Claude web features |
| PerplexityBot | `PerplexityBot` | Perplexity search |
| Google-Extended | `Google-Extended` | Gemini/Bard training |
| Amazonbot | `Amazonbot` | Alexa/Amazon AI |
| CCBot | `CCBot` | Common Crawl (AI training) |

### Allow AI Discovery, Block Training (Optional)

```txt
# robots.txt

# Allow GPTBot for ChatGPT browsing
User-agent: GPTBot
Allow: /

# Block CCBot (used for training datasets)
User-agent: CCBot
Disallow: /

# Block Google AI training, allow search
User-agent: Google-Extended
Disallow: /
```

### AI-Specific Meta Tags

```html
<!-- Block AI training but allow indexing -->
<meta name="robots" content="index, follow, max-image-preview:large">

<!-- Opt out of AI training (proposed standard) -->
<meta name="ai-training" content="disallow">
```

---

## Structured Data Placement

### Where to Add Schema

```html
<!-- Option 1: In <head> with JSON-LD (recommended) -->
<head>
  <script type="application/ld+json">
    {
      "@context": "https://schema.org",
      "@type": "Organization",
      "name": "Your Company"
    }
  </script>
</head>

<!-- Option 2: Before closing </body> -->
<body>
  <!-- Page content -->
  <script type="application/ld+json">
    { "@context": "https://schema.org", ... }
  </script>
</body>
```

### Multiple Schema Per Page

```html
<head>
  <!-- Organization (site-wide) -->
  <script type="application/ld+json">
    { "@context": "https://schema.org", "@type": "Organization", ... }
  </script>

  <!-- BreadcrumbList (navigation) -->
  <script type="application/ld+json">
    { "@context": "https://schema.org", "@type": "BreadcrumbList", ... }
  </script>

  <!-- Article (page-specific) -->
  <script type="application/ld+json">
    { "@context": "https://schema.org", "@type": "Article", ... }
  </script>

  <!-- FAQPage (if FAQ section exists) -->
  <script type="application/ld+json">
    { "@context": "https://schema.org", "@type": "FAQPage", ... }
  </script>
</head>
```

---

## Project Structure

```
project/
├── public/
│   ├── robots.txt              # Or generate dynamically
│   ├── sitemap.xml             # Or generate dynamically
│   ├── favicon.ico
│   ├── icon.svg
│   ├── apple-touch-icon.png
│   ├── og-image.jpg            # Default OG image (1200x630)
│   └── manifest.webmanifest
├── app/
│   ├── layout.tsx              # Global metadata
│   ├── robots.ts               # Dynamic robots.txt
│   ├── sitemap.ts              # Dynamic sitemap
│   └── [page]/
│       └── page.tsx            # Page-specific metadata
├── components/
│   ├── SchemaMarkup.tsx
│   ├── Breadcrumbs.tsx
│   └── MetaTags.tsx
└── lib/
    ├── schema.ts               # Schema generators
    └── seo.ts                  # SEO utilities
```

---

## Verification & Submission

### Search Console Setup

```bash
# Verify ownership methods
1. HTML file upload (google*.html to public/)
2. Meta tag (add to <head>)
3. DNS TXT record
4. Google Analytics (if already installed)
```

### Submit Sitemap

```markdown
1. Google Search Console
   - Sitemaps → Add new sitemap → yoursite.com/sitemap.xml

2. Bing Webmaster Tools
   - Sitemaps → Submit sitemap

3. Yandex Webmaster (if relevant)
   - Indexing → Sitemap files
```

---

## Checklist

```markdown
## Technical SEO Checklist

### robots.txt
- [ ] Allow search engines
- [ ] Allow AI bots (GPTBot, ClaudeBot, PerplexityBot)
- [ ] Block admin/private areas
- [ ] Include sitemap reference
- [ ] Test with Google's robots.txt tester

### Sitemap
- [ ] Include all indexable pages
- [ ] Exclude noindex pages
- [ ] Include lastmod dates
- [ ] Submit to Search Console
- [ ] Auto-update on content changes

### Meta Tags
- [ ] Unique title per page (50-60 chars)
- [ ] Unique description per page (150-160 chars)
- [ ] Canonical URL on every page
- [ ] Open Graph tags
- [ ] Twitter Card tags

### URL Structure
- [ ] Lowercase, hyphenated
- [ ] Descriptive slugs
- [ ] No query params for content
- [ ] 301 redirects for moved content
- [ ] No broken links

### Performance
- [ ] LCP < 2.5s
- [ ] INP < 200ms
- [ ] CLS < 0.1
- [ ] HTTPS enabled
- [ ] Security headers configured

### Structured Data
- [ ] Organization schema (homepage)
- [ ] BreadcrumbList (all pages)
- [ ] Article schema (blog posts)
- [ ] FAQ schema (FAQ sections)
- [ ] Validate with Rich Results Test
```

---

## Quick Reference

### File Checklist

```
public/
├── robots.txt          ✓ Required
├── sitemap.xml         ✓ Required
├── favicon.ico         ✓ Required
├── og-image.jpg        ✓ Required (1200x630)
└── manifest.json       ○ Recommended
```

### Meta Tag Lengths

| Tag | Length |
|-----|--------|
| Title | 50-60 characters |
| Description | 150-160 characters |
| OG Title | 60-90 characters |
| OG Description | 200 characters |
| Twitter Description | 200 characters |

### Image Sizes

| Image | Dimensions |
|-------|------------|
| OG Image | 1200 x 630 |
| Twitter Image | 1200 x 628 |
| Favicon | 32 x 32 |
| Apple Touch Icon | 180 x 180 |


================================================
FILE: skills/supabase/SKILL.md
================================================
---
name: supabase
description: Core Supabase CLI, migrations, RLS, Edge Functions
when-to-use: When working with Supabase - database, auth, storage, or edge functions
user-invocable: false
paths: ["supabase/**", "**/supabase.*", "**/.env*"]
effort: medium
---

# Supabase Core Skill


Core concepts, CLI workflow, and patterns common to all Supabase projects.

**Sources:** [Supabase Docs](https://supabase.com/docs) | [Supabase CLI](https://supabase.com/docs/guides/local-development/cli/getting-started)

---

## Core Principle

**Local-first, migrations in version control, never touch production directly.**

Develop locally with the Supabase CLI, capture all changes as migrations, and deploy through CI/CD.

---

## Supabase Stack

| Service | Purpose |
|---------|---------|
| **Database** | PostgreSQL with extensions |
| **Auth** | User authentication, OAuth providers |
| **Storage** | File storage with RLS |
| **Edge Functions** | Serverless Deno functions |
| **Realtime** | WebSocket subscriptions |
| **Vector** | AI embeddings (pgvector) |

---

## CLI Setup

### Install & Login
```bash
# macOS
brew install supabase/tap/supabase

# npm (alternative)
npm install -g supabase

# Login
supabase login
```

### Initialize Project
```bash
# In your project directory
supabase init

# Creates:
# supabase/
# ├── config.toml      # Local config
# ├── seed.sql         # Seed data
# └── migrations/      # SQL migrations
```

### Link to Remote
```bash
# Get project ref from dashboard URL: https://supabase.com/dashboard/project/<ref>
supabase link --project-ref <project-id>

# Pull existing schema
supabase db pull
```

### Start Local Stack
```bash
supabase start

# Output:
# API URL: http://localhost:54321
# GraphQL URL: http://localhost:54321/graphql/v1
# DB URL: postgresql://postgres:postgres@localhost:54322/postgres
# Studio URL: http://localhost:54323
# Anon key: eyJ...
# Service role key: eyJ...
```

---

## Migration Workflow

### Option 1: Dashboard + Diff (Quick Prototyping)
```bash
# 1. Make changes in local Studio (localhost:54323)
# 2. Generate migration from diff
supabase db diff -f <migration_name>

# 3. Review generated SQL
cat supabase/migrations/*_<migration_name>.sql

# 4. Reset to test
supabase db reset
```

### Option 2: Write Migrations Directly (Recommended)
```bash
# 1. Create empty migration
supabase migration new create_users_table

# 2. Edit the migration file
# supabase/migrations/<timestamp>_create_users_table.sql

# 3. Apply locally
supabase db reset
```

### Option 3: ORM Migrations (Best DX)
Use Drizzle (TypeScript) or SQLAlchemy (Python) - see framework-specific skills.

### Deploy Migrations
```bash
# Push to remote (staging/production)
supabase db push

# Check migration status
supabase migration list
```

---

## Database Patterns

### Enable RLS on All Tables
```sql
-- Always enable RLS
ALTER TABLE public.profiles ENABLE ROW LEVEL SECURITY;

-- Default deny - must create policies
CREATE POLICY "Users can view own profile"
  ON public.profiles
  FOR SELECT
  USING (auth.uid() = id);
```

### Common RLS Policies
```sql
-- Public read
CREATE POLICY "Public read access"
  ON public.posts FOR SELECT
  USING (true);

-- Authenticated users only
CREATE POLICY "Authenticated users can insert"
  ON public.posts FOR INSERT
  WITH CHECK (auth.role() = 'authenticated');

-- Owner access
CREATE POLICY "Users can update own records"
  ON public.posts FOR UPDATE
  USING (auth.uid() = user_id);

-- Admin access (using custom claim)
CREATE POLICY "Admins have full access"
  ON public.posts FOR ALL
  USING (auth.jwt() ->> 'role' = 'admin');
```

### Link to auth.users
```sql
-- Profile table linked to auth
CREATE TABLE public.profiles (
  id UUID PRIMARY KEY REFERENCES auth.users(id) ON DELETE CASCADE,
  username TEXT UNIQUE NOT NULL,
  avatar_url TEXT,
  created_at TIMESTAMPTZ DEFAULT NOW()
);

-- Auto-create profile on signup
CREATE OR REPLACE FUNCTION public.handle_new_user()
RETURNS TRIGGER AS $$
BEGIN
  INSERT INTO public.profiles (id, username)
  VALUES (NEW.id, NEW.email);
  RETURN NEW;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;

CREATE TRIGGER on_auth_user_created
  AFTER INSERT ON auth.users
  FOR EACH ROW EXECUTE FUNCTION public.handle_new_user();
```

---

## Seed Data

### supabase/seed.sql
```sql
-- Runs on `supabase db reset`
-- Use ON CONFLICT for idempotency

INSERT INTO public.profiles (id, username, avatar_url)
VALUES
  ('d0e1f2a3-b4c5-6d7e-8f9a-0b1c2d3e4f5a', 'testuser', null),
  ('a1b2c3d4-e5f6-7a8b-9c0d-1e2f3a4b5c6d', 'admin', null)
ON CONFLICT (id) DO NOTHING;
```

---

## Environment Variables

### Required Variables
```bash
# Public (safe for client-side)
SUPABASE_URL=https://xxxxx.supabase.co
SUPABASE_ANON_KEY=eyJ...

# Private (server-side only - NEVER expose)
SUPABASE_SERVICE_ROLE_KEY=eyJ...
DATABASE_URL=postgresql://postgres.[ref]:[password]@aws-0-region.pooler.supabase.com:6543/postgres
```

### Local vs Production
```bash
# .env.local (local development)
SUPABASE_URL=http://localhost:54321
SUPABASE_ANON_KEY=<from supabase start>
DATABASE_URL=postgresql://postgres:postgres@localhost:54322/postgres

# .env.production (remote)
SUPABASE_URL=https://xxxxx.supabase.co
SUPABASE_ANON_KEY=<from dashboard>
DATABASE_URL=<connection pooler URL>
```

### Connection Pooling
```bash
# Transaction mode (recommended for serverless)
# Add ?pgbouncer=true to URL
DATABASE_URL=postgresql://...@pooler.supabase.com:6543/postgres?pgbouncer=true

# Session mode (for migrations, long transactions)
DATABASE_URL=postgresql://...@pooler.supabase.com:5432/postgres
```

---

## Edge Functions

### Create Function
```bash
supabase functions new hello-world
```

### Basic Structure
```typescript
// supabase/functions/hello-world/index.ts
import { serve } from 'https://deno.land/std@0.168.0/http/server.ts';

serve(async (req) => {
  const { name } = await req.json();

  return new Response(
    JSON.stringify({ message: `Hello ${name}!` }),
    { headers: { 'Content-Type': 'application/json' } }
  );
});
```

### With Auth Context
```typescript
import { serve } from 'https://deno.land/std@0.168.0/http/server.ts';
import { createClient } from 'https://esm.sh/@supabase/supabase-js@2';

serve(async (req) => {
  const supabase = createClient(
    Deno.env.get('SUPABASE_URL') ?? '',
    Deno.env.get('SUPABASE_ANON_KEY') ?? '',
    {
      global: {
        headers: { Authorization: req.headers.get('Authorization')! },
      },
    }
  );

  const { data: { user } } = await supabase.auth.getUser();

  if (!user) {
    return new Response('Unauthorized', { status: 401 });
  }

  return new Response(JSON.stringify({ user_id: user.id }));
});
```

### Deploy
```bash
# Serve locally
supabase functions serve

# Deploy single function
supabase functions deploy hello-world

# Deploy all
supabase functions deploy
```

---

## Storage

### Create Bucket (in migration)
```sql
INSERT INTO storage.buckets (id, name, public)
VALUES ('avatars', 'avatars', true);

-- Storage policies
CREATE POLICY "Avatar images are publicly accessible"
  ON storage.objects FOR SELECT
  USING (bucket_id = 'avatars');

CREATE POLICY "Users can upload own avatar"
  ON storage.objects FOR INSERT
  WITH CHECK (
    bucket_id = 'avatars' AND
    auth.uid()::text = (storage.foldername(name))[1]
  );
```

---

## CLI Quick Reference

```bash
# Lifecycle
supabase start                   # Start local stack
supabase stop                    # Stop local stack
supabase status                  # Show status & credentials

# Database
supabase db reset                # Reset + migrations + seed
supabase db push                 # Push to remote
supabase db pull                 # Pull remote schema
supabase db diff -f <name>       # Generate migration from diff
supabase db lint                 # Check for issues

# Migrations
supabase migration new <name>    # Create migration
supabase migration list          # List migrations
supabase migration up            # Apply pending (remote)

# Functions
supabase functions new <name>    # Create function
supabase functions serve         # Local dev
supabase functions deploy        # Deploy all

# Types
supabase gen types typescript --local > types/database.ts

# Project
supabase link --project-ref <id> # Link to remote
supabase projects list           # List projects
```

---

## CI/CD Template

```yaml
# .github/workflows/supabase.yml
name: Supabase CI/CD

on:
  push:
    branches: [main]
  pull_request:

env:
  SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }}
  SUPABASE_DB_PASSWORD: ${{ secrets.SUPABASE_DB_PASSWORD }}
  SUPABASE_PROJECT_ID: ${{ secrets.SUPABASE_PROJECT_ID }}

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: supabase/setup-cli@v1

      - name: Start Supabase
        run: supabase start

      - name: Run migrations
        run: supabase db reset

      - name: Lint database
        run: supabase db lint

  deploy:
    needs: test
    if: github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: supabase/setup-cli@v1

      - name: Link project
        run: supabase link --project-ref $SUPABASE_PROJECT_ID

      - name: Push migrations
        run: supabase db push

      - name: Deploy functions
        run: supabase functions deploy
```

---

## Anti-Patterns

- **Direct production changes** - Always use migrations
- **Disabled RLS** - Enable on all user-data tables
- **Service key in client** - Never expose service role key
- **No connection pooling** - Use pooler for serverless
- **Committing .env** - Add to .gitignore
- **Skipping migration review** - Always check generated SQL
- **No seed data** - Use seed.sql for consistent local dev


================================================
FILE: skills/supabase-nextjs/SKILL.md
================================================
---
name: supabase-nextjs
description: Next.js with Supabase and Drizzle ORM
when-to-use: When building a Next.js app with Supabase backend
user-invocable: false
paths: ["src/app/**", "src/db/**", "supabase/**"]
effort: medium
---

# Supabase + Next.js Skill


Next.js App Router patterns with Supabase Auth and Drizzle ORM.

**Sources:** [Supabase Next.js Guide](https://supabase.com/docs/guides/auth/server-side/nextjs) | [Drizzle + Supabase](https://supabase.com/docs/guides/database/drizzle)

---

## Core Principle

**Drizzle for queries, Supabase for auth/storage, server components by default.**

Use Drizzle ORM for type-safe database access. Use Supabase client for auth, storage, and realtime. Prefer server components; use client components only when needed.

---

## Project Structure

```
project/
├── src/
│   ├── app/
│   │   ├── (auth)/
│   │   │   ├── login/page.tsx
│   │   │   ├── signup/page.tsx
│   │   │   └── callback/route.ts
│   │   ├── (dashboard)/
│   │   │   └── page.tsx
│   │   ├── api/
│   │   │   └── [...]/route.ts
│   │   ├── layout.tsx
│   │   └── page.tsx
│   ├── components/
│   │   ├── auth/
│   │   └── ui/
│   ├── db/
│   │   ├── index.ts              # Drizzle client
│   │   ├── schema.ts             # Schema definitions
│   │   └── queries/              # Query functions
│   ├── lib/
│   │   ├── supabase/
│   │   │   ├── client.ts         # Browser client
│   │   │   ├── server.ts         # Server client
│   │   │   └── middleware.ts     # Auth middleware helper
│   │   └── auth.ts               # Auth helpers
│   └── middleware.ts             # Next.js middleware
├── supabase/
│   ├── migrations/
│   └── config.toml
├── drizzle.config.ts
└── .env.local
```

---

## Setup

### Install Dependencies
```bash
npm install @supabase/supabase-js @supabase/ssr drizzle-orm postgres
npm install -D drizzle-kit
```

### Environment Variables
```bash
# .env.local
NEXT_PUBLIC_SUPABASE_URL=http://localhost:54321
NEXT_PUBLIC_SUPABASE_ANON_KEY=<from supabase start>

# Server-side only
SUPABASE_SERVICE_ROLE_KEY=<from supabase start>
DATABASE_URL=postgresql://postgres:postgres@localhost:54322/postgres
```

---

## Drizzle Setup

### drizzle.config.ts
```typescript
import { defineConfig } from 'drizzle-kit';

export default defineConfig({
  schema: './src/db/schema.ts',
  out: './supabase/migrations',
  dialect: 'postgresql',
  dbCredentials: {
    url: process.env.DATABASE_URL!,
  },
  schemaFilter: ['public'],
});
```

### src/db/index.ts
```typescript
import { drizzle } from 'drizzle-orm/postgres-js';
import postgres from 'postgres';
import * as schema from './schema';

const client = postgres(process.env.DATABASE_URL!, {
  prepare: false, // Required for Supabase connection pooling
});

export const db = drizzle(client, { schema });
```

### src/db/schema.ts
```typescript
import {
  pgTable,
  uuid,
  text,
  timestamp,
  boolean,
} from 'drizzle-orm/pg-core';

export const profiles = pgTable('profiles', {
  id: uuid('id').primaryKey(), // References auth.users
  email: text('email').notNull(),
  name: text('name'),
  avatarUrl: text('avatar_url'),
  createdAt: timestamp('created_at').defaultNow().notNull(),
  updatedAt: timestamp('updated_at').defaultNow().notNull(),
});

export const posts = pgTable('posts', {
  id: uuid('id').primaryKey().defaultRandom(),
  authorId: uuid('author_id').references(() => profiles.id).notNull(),
  title: text('title').notNull(),
  content: text('content'),
  published: boolean('published').default(false),
  createdAt: timestamp('created_at').defaultNow().notNull(),
});

// Type exports
export type Profile = typeof profiles.$inferSelect;
export type NewProfile = typeof profiles.$inferInsert;
export type Post = typeof posts.$inferSelect;
export type NewPost = typeof posts.$inferInsert;
```

---

## Supabase Clients

### src/lib/supabase/client.ts (Browser)
```typescript
import { createBrowserClient } from '@supabase/ssr';

export function createClient() {
  return createBrowserClient(
    process.env.NEXT_PUBLIC_SUPABASE_URL!,
    process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!
  );
}
```

### src/lib/supabase/server.ts (Server Components/Actions)
```typescript
import { createServerClient } from '@supabase/ssr';
import { cookies } from 'next/headers';

export async function createClient() {
  const cookieStore = await cookies();

  return createServerClient(
    process.env.NEXT_PUBLIC_SUPABASE_URL!,
    process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
    {
      cookies: {
        getAll() {
          return cookieStore.getAll();
        },
        setAll(cookiesToSet) {
          try {
            cookiesToSet.forEach(({ name, value, options }) =>
              cookieStore.set(name, value, options)
            );
          } catch {
            // Called from Server Component - ignore
          }
        },
      },
    }
  );
}
```

### src/lib/supabase/middleware.ts (For Middleware)
```typescript
import { createServerClient } from '@supabase/ssr';
import { NextResponse, type NextRequest } from 'next/server';

export async function updateSession(request: NextRequest) {
  let supabaseResponse = NextResponse.next({ request });

  const supabase = createServerClient(
    process.env.NEXT_PUBLIC_SUPABASE_URL!,
    process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
    {
      cookies: {
        getAll() {
          return request.cookies.getAll();
        },
        setAll(cookiesToSet) {
          cookiesToSet.forEach(({ name, value }) =>
            request.cookies.set(name, value)
          );
          supabaseResponse = NextResponse.next({ request });
          cookiesToSet.forEach(({ name, value, options }) =>
            supabaseResponse.cookies.set(name, value, options)
          );
        },
      },
    }
  );

  // Refresh session
  const { data: { user } } = await supabase.auth.getUser();

  return { supabaseResponse, user };
}
```

---

## Middleware

### src/middleware.ts
```typescript
import { type NextRequest, NextResponse } from 'next/server';
import { updateSession } from '@/lib/supabase/middleware';

const publicRoutes = ['/', '/login', '/signup', '/auth/callback'];

export async function middleware(request: NextRequest) {
  const { supabaseResponse, user } = await updateSession(request);

  const isPublicRoute = publicRoutes.some(route =>
    request.nextUrl.pathname.startsWith(route)
  );

  // Redirect unauthenticated users to login
  if (!user && !isPublicRoute) {
    const url = request.nextUrl.clone();
    url.pathname = '/login';
    url.searchParams.set('redirectTo', request.nextUrl.pathname);
    return NextResponse.redirect(url);
  }

  // Redirect authenticated users away from auth pages
  if (user && (request.nextUrl.pathname === '/login' || request.nextUrl.pathname === '/signup')) {
    return NextResponse.redirect(new URL('/dashboard', request.url));
  }

  return supabaseResponse;
}

export const config = {
  matcher: [
    '/((?!_next/static|_next/image|favicon.ico|.*\\.(?:svg|png|jpg|jpeg|gif|webp)$).*)',
  ],
};
```

---

## Auth Helpers

### src/lib/auth.ts
```typescript
import { redirect } from 'next/navigation';
import { createClient } from '@/lib/supabase/server';

export async function getUser() {
  const supabase = await createClient();
  const { data: { user } } = await supabase.auth.getUser();
  return user;
}

export async function requireAuth() {
  const user = await getUser();
  if (!user) {
    redirect('/login');
  }
  return user;
}

export async function requireGuest() {
  const user = await getUser();
  if (user) {
    redirect('/dashboard');
  }
}
```

---

## Auth Pages

### src/app/(auth)/login/page.tsx
```typescript
import { requireGuest } from '@/lib/auth';
import { LoginForm } from '@/components/auth/login-form';

export default async function LoginPage() {
  await requireGuest();

  return (
    <div className="flex min-h-screen items-center justify-center">
      <LoginForm />
    </div>
  );
}
```

### src/components/auth/login-form.tsx
```typescript
'use client';

import { useState } from 'react';
import { useRouter } from 'next/navigation';
import { createClient } from '@/lib/supabase/client';

export function LoginForm() {
  const [email, setEmail] = useState('');
  const [password, setPassword] = useState('');
  const [error, setError] = useState<string | null>(null);
  const [loading, setLoading] = useState(false);
  const router = useRouter();

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();
    setLoading(true);
    setError(null);

    const supabase = createClient();
    const { error } = await supabase.auth.signInWithPassword({
      email,
      password,
    });

    if (error) {
      setError(error.message);
      setLoading(false);
      return;
    }

    router.push('/dashboard');
    router.refresh();
  };

  return (
    <form onSubmit={handleSubmit} className="space-y-4 w-full max-w-sm">
      <div>
        <label htmlFor="email">Email</label>
        <input
          id="email"
          type="email"
          value={email}
          onChange={(e) => setEmail(e.target.value)}
          required
        />
      </div>
      <div>
        <label htmlFor="password">Password</label>
        <input
          id="password"
          type="password"
          value={password}
          onChange={(e) => setPassword(e.target.value)}
          required
        />
      </div>
      {error && <p className="text-red-500">{error}</p>}
      <button type="submit" disabled={loading}>
        {loading ? 'Signing in...' : 'Sign In'}
      </button>
    </form>
  );
}
```

### src/app/(auth)/callback/route.ts
```typescript
import { createClient } from '@/lib/supabase/server';
import { NextResponse } from 'next/server';

export async function GET(request: Request) {
  const { searchParams, origin } = new URL(request.url);
  const code = searchParams.get('code');
  const next = searchParams.get('next') ?? '/dashboard';

  if (code) {
    const supabase = await createClient();
    const { error } = await supabase.auth.exchangeCodeForSession(code);

    if (!error) {
      return NextResponse.redirect(`${origin}${next}`);
    }
  }

  return NextResponse.redirect(`${origin}/login?error=auth_error`);
}
```

---

## Server Actions

### src/app/actions/posts.ts
```typescript
'use server';

import { revalidatePath } from 'next/cache';
import { redirect } from 'next/navigation';
import { db } from '@/db';
import { posts, NewPost } from '@/db/schema';
import { requireAuth } from '@/lib/auth';
import { eq } from 'drizzle-orm';

export async function createPost(formData: FormData) {
  const user = await requireAuth();

  const title = formData.get('title') as string;
  const content = formData.get('content') as string;

  const [post] = await db.insert(posts).values({
    authorId: user.id,
    title,
    content,
  }).returning();

  revalidatePath('/dashboard');
  redirect(`/posts/${post.id}`);
}

export async function updatePost(id: string, formData: FormData) {
  const user = await requireAuth();

  const title = formData.get('title') as string;
  const content = formData.get('content') as string;

  await db.update(posts)
    .set({ title, content })
    .where(eq(posts.id, id));

  revalidatePath(`/posts/${id}`);
}

export async function deletePost(id: string) {
  const user = await requireAuth();

  await db.delete(posts).where(eq(posts.id, id));

  revalidatePath('/dashboard');
  redirect('/dashboard');
}
```

---

## Data Fetching

### src/db/queries/posts.ts
```typescript
import { db } from '@/db';
import { posts, profiles } from '@/db/schema';
import { eq, desc, and } from 'drizzle-orm';

export async function getPublishedPosts(limit = 10) {
  return db
    .select({
      id: posts.id,
      title: posts.title,
      content: posts.content,
      author: profiles.name,
      createdAt: posts.createdAt,
    })
    .from(posts)
    .innerJoin(profiles, eq(posts.authorId, profiles.id))
    .where(eq(posts.published, true))
    .orderBy(desc(posts.createdAt))
    .limit(limit);
}

export async function getUserPosts(userId: string) {
  return db
    .select()
    .from(posts)
    .where(eq(posts.authorId, userId))
    .orderBy(desc(posts.createdAt));
}

export async function getPostById(id: string) {
  const [post] = await db
    .select()
    .from(posts)
    .where(eq(posts.id, id))
    .limit(1);

  return post ?? null;
}
```

### In Server Components
```typescript
// src/app/dashboard/page.tsx
import { requireAuth } from '@/lib/auth';
import { getUserPosts } from '@/db/queries/posts';

export default async function DashboardPage() {
  const user = await requireAuth();
  const posts = await getUserPosts(user.id);

  return (
    <div>
      <h1>Your Posts</h1>
      {posts.map((post) => (
        <article key={post.id}>
          <h2>{post.title}</h2>
          <p>{post.content}</p>
        </article>
      ))}
    </div>
  );
}
```

---

## Storage

### Upload Component
```typescript
'use client';

import { useState } from 'react';
import { createClient } from '@/lib/supabase/client';

export function AvatarUpload({ userId }: { userId: string }) {
  const [uploading, setUploading] = useState(false);

  const handleUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
    const file = e.target.files?.[0];
    if (!file) return;

    setUploading(true);
    const supabase = createClient();

    const fileExt = file.name.split('.').pop();
    const filePath = `${userId}/avatar.${fileExt}`;

    const { error } = await supabase.storage
      .from('avatars')
      .upload(filePath, file, { upsert: true });

    if (error) {
      console.error('Upload error:', error);
    }

    setUploading(false);
  };

  return (
    <input
      type="file"
      accept="image/*"
      onChange={handleUpload}
      disabled={uploading}
    />
  );
}
```

### Get Public URL
```typescript
import { createClient } from '@/lib/supabase/server';

export async function getAvatarUrl(userId: string) {
  const supabase = await createClient();

  const { data } = supabase.storage
    .from('avatars')
    .getPublicUrl(`${userId}/avatar.png`);

  return data.publicUrl;
}
```

---

## Realtime

### Client Component with Subscription
```typescript
'use client';

import { useEffect, useState } from 'react';
import { createClient } from '@/lib/supabase/client';
import { Post } from '@/db/schema';

export function RealtimePosts({ initialPosts }: { initialPosts: Post[] }) {
  const [posts, setPosts] = useState(initialPosts);

  useEffect(() => {
    const supabase = createClient();

    const channel = supabase
      .channel('posts')
      .on(
        'postgres_changes',
        { event: '*', schema: 'public', table: 'posts' },
        (payload) => {
          if (payload.eventType === 'INSERT') {
            setPosts((prev) => [payload.new as Post, ...prev]);
          } else if (payload.eventType === 'DELETE') {
            setPosts((prev) => prev.filter((p) => p.id !== payload.old.id));
          } else if (payload.eventType === 'UPDATE') {
            setPosts((prev) =>
              prev.map((p) => (p.id === payload.new.id ? payload.new as Post : p))
            );
          }
        }
      )
      .subscribe();

    return () => {
      supabase.removeChannel(channel);
    };
  }, []);

  return (
    <ul>
      {posts.map((post) => (
        <li key={post.id}>{post.title}</li>
      ))}
    </ul>
  );
}
```

---

## OAuth Providers

### src/components/auth/oauth-buttons.tsx
```typescript
'use client';

import { createClient } from '@/lib/supabase/client';

export function OAuthButtons() {
  const handleOAuth = async (provider: 'google' | 'github') => {
    const supabase = createClient();

    await supabase.auth.signInWithOAuth({
      provider,
      options: {
        redirectTo: `${window.location.origin}/auth/callback`,
      },
    });
  };

  return (
    <div className="space-y-2">
      <button onClick={() => handleOAuth('google')}>
        Continue with Google
      </button>
      <button onClick={() => handleOAuth('github')}>
        Continue with GitHub
      </button>
    </div>
  );
}
```

---

## Sign Out

### Server Action
```typescript
// src/app/actions/auth.ts
'use server';

import { redirect } from 'next/navigation';
import { createClient } from '@/lib/supabase/server';

export async function signOut() {
  const supabase = await createClient();
  await supabase.auth.signOut();
  redirect('/login');
}
```

### Sign Out Button
```typescript
'use client';

import { signOut } from '@/app/actions/auth';

export function SignOutButton() {
  return (
    <form action={signOut}>
      <button type="submit">Sign Out</button>
    </form>
  );
}
```

---

## Anti-Patterns

- **Using Supabase client for DB queries** - Use Drizzle for type-safety
- **Fetching in client components** - Prefer server components
- **Not using middleware for auth** - Session refresh is critical
- **Calling `cookies()` synchronously** - Must await in Next.js 15+
- **Service key in client** - Never expose, server-only
- **Missing revalidatePath** - Always revalidate after mutations
- **Not handling auth errors** - Show user-friendly messages


================================================
FILE: skills/supabase-node/SKILL.md
================================================
---
name: supabase-node
description: Express/Hono with Supabase and Drizzle ORM
when-to-use: When building a Node.js backend with Supabase
user-invocable: false
paths: ["src/api/**", "src/routes/**", "supabase/**"]
effort: medium
---

# Supabase + Node.js Skill


Express/Hono patterns with Supabase Auth and Drizzle ORM.

**Sources:** [Supabase JS Client](https://supabase.com/docs/reference/javascript/introduction) | [Drizzle ORM](https://orm.drizzle.team/)

---

## Core Principle

**Drizzle for queries, Supabase for auth/storage, middleware for validation.**

Use Drizzle ORM for type-safe database access. Use Supabase client for auth verification, storage, and realtime. Express or Hono for the API layer.

---

## Project Structure

```
project/
├── src/
│   ├── routes/
│   │   ├── index.ts             # Route aggregator
│   │   ├── auth.ts
│   │   ├── posts.ts
│   │   └── users.ts
│   ├── middleware/
│   │   ├── auth.ts              # JWT validation
│   │   ├── error.ts             # Error handler
│   │   └── validate.ts          # Request validation
│   ├── db/
│   │   ├── index.ts             # Drizzle client
│   │   ├── schema.ts            # Schema definitions
│   │   └── queries/             # Query functions
│   ├── lib/
│   │   ├── supabase.ts          # Supabase client
│   │   └── config.ts            # Environment config
│   ├── types/
│   │   └── express.d.ts         # Express type extensions
│   └── index.ts                 # App entry point
├── supabase/
│   ├── migrations/
│   └── config.toml
├── drizzle.config.ts
├── package.json
├── tsconfig.json
└── .env
```

---

## Setup

### Install Dependencies
```bash
npm install express cors helmet dotenv @supabase/supabase-js drizzle-orm postgres zod
npm install -D typescript @types/express @types/cors @types/node tsx drizzle-kit
```

### package.json Scripts
```json
{
  "scripts": {
    "dev": "tsx watch src/index.ts",
    "build": "tsc",
    "start": "node dist/index.js",
    "db:generate": "drizzle-kit generate",
    "db:push": "drizzle-kit push",
    "db:studio": "drizzle-kit studio"
  }
}
```

### Environment Variables
```bash
# .env
PORT=3000
NODE_ENV=development

# Supabase
SUPABASE_URL=http://localhost:54321
SUPABASE_ANON_KEY=<from supabase start>
SUPABASE_SERVICE_ROLE_KEY=<from supabase start>

# Database
DATABASE_URL=postgresql://postgres:postgres@localhost:54322/postgres
```

---

## Configuration

### src/lib/config.ts
```typescript
import { z } from 'zod';
import dotenv from 'dotenv';

dotenv.config();

const envSchema = z.object({
  PORT: z.string().default('3000'),
  NODE_ENV: z.enum(['development', 'production', 'test']).default('development'),
  SUPABASE_URL: z.string().url(),
  SUPABASE_ANON_KEY: z.string(),
  SUPABASE_SERVICE_ROLE_KEY: z.string(),
  DATABASE_URL: z.string(),
});

export const config = envSchema.parse(process.env);
```

---

## Database Setup

### drizzle.config.ts
```typescript
import { defineConfig } from 'drizzle-kit';
import { config } from './src/lib/config';

export default defineConfig({
  schema: './src/db/schema.ts',
  out: './supabase/migrations',
  dialect: 'postgresql',
  dbCredentials: {
    url: config.DATABASE_URL,
  },
  schemaFilter: ['public'],
});
```

### src/db/index.ts
```typescript
import { drizzle } from 'drizzle-orm/postgres-js';
import postgres from 'postgres';
import * as schema from './schema';
import { config } from '../lib/config';

const client = postgres(config.DATABASE_URL, {
  prepare: false, // Required for Supabase pooling
});

export const db = drizzle(client, { schema });
```

### src/db/schema.ts
```typescript
import {
  pgTable,
  uuid,
  text,
  timestamp,
  boolean,
} from 'drizzle-orm/pg-core';

export const profiles = pgTable('profiles', {
  id: uuid('id').primaryKey(),
  email: text('email').notNull(),
  name: text('name'),
  avatarUrl: text('avatar_url'),
  createdAt: timestamp('created_at').defaultNow().notNull(),
  updatedAt: timestamp('updated_at').defaultNow().notNull(),
});

export const posts = pgTable('posts', {
  id: uuid('id').primaryKey().defaultRandom(),
  authorId: uuid('author_id').references(() => profiles.id).notNull(),
  title: text('title').notNull(),
  content: text('content'),
  published: boolean('published').default(false),
  createdAt: timestamp('created_at').defaultNow().notNull(),
});

// Type exports
export type Profile = typeof profiles.$inferSelect;
export type NewProfile = typeof profiles.$inferInsert;
export type Post = typeof posts.$inferSelect;
export type NewPost = typeof posts.$inferInsert;
```

---

## Supabase Client

### src/lib/supabase.ts
```typescript
import { createClient, SupabaseClient, User } from '@supabase/supabase-js';
import { config } from './config';

// Client with anon key (respects RLS)
export const supabase = createClient(
  config.SUPABASE_URL,
  config.SUPABASE_ANON_KEY
);

// Admin client (bypasses RLS)
export const supabaseAdmin = createClient(
  config.SUPABASE_URL,
  config.SUPABASE_SERVICE_ROLE_KEY,
  {
    auth: {
      autoRefreshToken: false,
      persistSession: false,
    },
  }
);

// Verify JWT and get user
export async function verifyToken(token: string): Promise<User | null> {
  const { data: { user }, error } = await supabase.auth.getUser(token);

  if (error || !user) {
    return null;
  }

  return user;
}
```

---

## Type Extensions

### src/types/express.d.ts
```typescript
import { User } from '@supabase/supabase-js';

declare global {
  namespace Express {
    interface Request {
      user?: User;
    }
  }
}

export {};
```

---

## Middleware

### src/middleware/auth.ts
```typescript
import { Request, Response, NextFunction } from 'express';
import { verifyToken } from '../lib/supabase';

export async function requireAuth(
  req: Request,
  res: Response,
  next: NextFunction
) {
  const authHeader = req.headers.authorization;

  if (!authHeader?.startsWith('Bearer ')) {
    return res.status(401).json({ error: 'Missing authorization header' });
  }

  const token = authHeader.split(' ')[1];
  const user = await verifyToken(token);

  if (!user) {
    return res.status(401).json({ error: 'Invalid token' });
  }

  req.user = user;
  next();
}

// Optional auth - continues even without token
export async function optionalAuth(
  req: Request,
  res: Response,
  next: NextFunction
) {
  const authHeader = req.headers.authorization;

  if (authHeader?.startsWith('Bearer ')) {
    const token = authHeader.split(' ')[1];
    req.user = await verifyToken(token) ?? undefined;
  }

  next();
}
```

### src/middleware/error.ts
```typescript
import { Request, Response, NextFunction } from 'express';

export class AppError extends Error {
  constructor(
    public statusCode: number,
    message: string
  ) {
    super(message);
    this.name = 'AppError';
  }
}

export function errorHandler(
  err: Error,
  req: Request,
  res: Response,
  next: NextFunction
) {
  console.error(err);

  if (err instanceof AppError) {
    return res.status(err.statusCode).json({ error: err.message });
  }

  return res.status(500).json({ error: 'Internal server error' });
}
```

### src/middleware/validate.ts
```typescript
import { Request, Response, NextFunction } from 'express';
import { z, ZodSchema } from 'zod';

export function validate<T extends ZodSchema>(schema: T) {
  return (req: Request, res: Response, next: NextFunction) => {
    try {
      req.body = schema.parse(req.body);
      next();
    } catch (error) {
      if (error instanceof z.ZodError) {
        return res.status(400).json({
          error: 'Validation failed',
          details: error.errors,
        });
      }
      next(error);
    }
  };
}
```

---

## Routes

### src/routes/auth.ts
```typescript
import { Router } from 'express';
import { z } from 'zod';
import { supabase } from '../lib/supabase';
import { validate } from '../middleware/validate';

const router = Router();

const signUpSchema = z.object({
  email: z.string().email(),
  password: z.string().min(8),
});

const signInSchema = z.object({
  email: z.string().email(),
  password: z.string(),
});

router.post('/signup', validate(signUpSchema), async (req, res, next) => {
  try {
    const { email, password } = req.body;

    const { data, error } = await supabase.auth.signUp({
      email,
      password,
    });

    if (error) {
      return res.status(400).json({ error: error.message });
    }

    return res.status(201).json({
      user: data.user,
      session: data.session,
    });
  } catch (error) {
    next(error);
  }
});

router.post('/signin', validate(signInSchema), async (req, res, next) => {
  try {
    const { email, password } = req.body;

    const { data, error } = await supabase.auth.signInWithPassword({
      email,
      password,
    });

    if (error) {
      return res.status(401).json({ error: 'Invalid credentials' });
    }

    return res.json({
      user: data.user,
      session: data.session,
    });
  } catch (error) {
    next(error);
  }
});

router.post('/signout', async (req, res) => {
  await supabase.auth.signOut();
  return res.json({ message: 'Signed out' });
});

router.post('/refresh', async (req, res, next) => {
  try {
    const { refresh_token } = req.body;

    const { data, error } = await supabase.auth.refreshSession({
      refresh_token,
    });

    if (error) {
      return res.status(401).json({ error: 'Invalid refresh token' });
    }

    return res.json({
      session: data.session,
    });
  } catch (error) {
    next(error);
  }
});

export default router;
```

### src/routes/posts.ts
```typescript
import { Router } from 'express';
import { z } from 'zod';
import { eq, desc } from 'drizzle-orm';
import { db } from '../db';
import { posts, Post } from '../db/schema';
import { requireAuth, optionalAuth } from '../middleware/auth';
import { validate } from '../middleware/validate';
import { AppError } from '../middleware/error';

const router = Router();

const createPostSchema = z.object({
  title: z.string().min(1).max(200),
  content: z.string().optional(),
  published: z.boolean().default(false),
});

const updatePostSchema = createPostSchema.partial();

// List all published posts
router.get('/', optionalAuth, async (req, res, next) => {
  try {
    const result = await db
      .select()
      .from(posts)
      .where(eq(posts.published, true))
      .orderBy(desc(posts.createdAt));

    return res.json(result);
  } catch (error) {
    next(error);
  }
});

// List user's posts
router.get('/me', requireAuth, async (req, res, next) => {
  try {
    const result = await db
      .select()
      .from(posts)
      .where(eq(posts.authorId, req.user!.id))
      .orderBy(desc(posts.createdAt));

    return res.json(result);
  } catch (error) {
    next(error);
  }
});

// Get single post
router.get('/:id', async (req, res, next) => {
  try {
    const [post] = await db
      .select()
      .from(posts)
      .where(eq(posts.id, req.params.id))
      .limit(1);

    if (!post) {
      throw new AppError(404, 'Post not found');
    }

    return res.json(post);
  } catch (error) {
    next(error);
  }
});

// Create post
router.post('/', requireAuth, validate(createPostSchema), async (req, res, next) => {
  try {
    const [post] = await db
      .insert(posts)
      .values({
        ...req.body,
        authorId: req.user!.id,
      })
      .returning();

    return res.status(201).json(post);
  } catch (error) {
    next(error);
  }
});

// Update post
router.patch('/:id', requireAuth, validate(updatePostSchema), async (req, res, next) => {
  try {
    const [post] = await db
      .update(posts)
      .set(req.body)
      .where(eq(posts.id, req.params.id))
      .returning();

    if (!post) {
      throw new AppError(404, 'Post not found');
    }

    return res.json(post);
  } catch (error) {
    next(error);
  }
});

// Delete post
router.delete('/:id', requireAuth, async (req, res, next) => {
  try {
    const [post] = await db
      .delete(posts)
      .where(eq(posts.id, req.params.id))
      .returning();

    if (!post) {
      throw new AppError(404, 'Post not found');
    }

    return res.status(204).send();
  } catch (error) {
    next(error);
  }
});

export default router;
```

### src/routes/index.ts
```typescript
import { Router } from 'express';
import authRoutes from './auth';
import postRoutes from './posts';

const router = Router();

router.use('/auth', authRoutes);
router.use('/posts', postRoutes);

export default router;
```

---

## Main Application

### src/index.ts
```typescript
import express from 'express';
import cors from 'cors';
import helmet from 'helmet';
import routes from './routes';
import { errorHandler } from './middleware/error';
import { config } from './lib/config';

const app = express();

// Security middleware
app.use(helmet());
app.use(cors());
app.use(express.json());

// Health check
app.get('/health', (req, res) => {
  res.json({ status: 'healthy' });
});

// API routes
app.use('/api', routes);

// Error handler (must be last)
app.use(errorHandler);

app.listen(config.PORT, () => {
  console.log(`Server running on port ${config.PORT}`);
});

export default app;
```

---

## Query Functions

### src/db/queries/posts.ts
```typescript
import { db } from '../index';
import { posts, profiles } from '../schema';
import { eq, desc, and } from 'drizzle-orm';

export async function getPublishedPosts(limit = 10) {
  return db
    .select({
      id: posts.id,
      title: posts.title,
      content: posts.content,
      author: profiles.name,
      createdAt: posts.createdAt,
    })
    .from(posts)
    .innerJoin(profiles, eq(posts.authorId, profiles.id))
    .where(eq(posts.published, true))
    .orderBy(desc(posts.createdAt))
    .limit(limit);
}

export async function getUserPosts(userId: string) {
  return db
    .select()
    .from(posts)
    .where(eq(posts.authorId, userId))
    .orderBy(desc(posts.createdAt));
}

export async function getPostById(id: string) {
  const [post] = await db
    .select()
    .from(posts)
    .where(eq(posts.id, id))
    .limit(1);

  return post ?? null;
}

export async function createPost(data: {
  title: string;
  content?: string;
  authorId: string;
  published?: boolean;
}) {
  const [post] = await db.insert(posts).values(data).returning();
  return post;
}
```

---

## Storage

### Upload Endpoint
```typescript
import multer from 'multer';
import { supabase } from '../lib/supabase';

const upload = multer({ storage: multer.memoryStorage() });

router.post(
  '/avatar',
  requireAuth,
  upload.single('file'),
  async (req, res, next) => {
    try {
      if (!req.file) {
        throw new AppError(400, 'No file uploaded');
      }

      const fileExt = req.file.originalname.split('.').pop();
      const filePath = `${req.user!.id}/avatar.${fileExt}`;

      const { error } = await supabase.storage
        .from('avatars')
        .upload(filePath, req.file.buffer, {
          contentType: req.file.mimetype,
          upsert: true,
        });

      if (error) {
        throw new AppError(500, 'Upload failed');
      }

      const { data } = supabase.storage
        .from('avatars')
        .getPublicUrl(filePath);

      return res.json({ url: data.publicUrl });
    } catch (error) {
      next(error);
    }
  }
);
```

---

## Hono Alternative

For edge deployments or lighter weight:

### src/index.ts (Hono)
```typescript
import { Hono } from 'hono';
import { cors } from 'hono/cors';
import { jwt } from 'hono/jwt';
import { db } from './db';
import { posts } from './db/schema';
import { eq, desc } from 'drizzle-orm';
import { config } from './lib/config';

const app = new Hono();

app.use('/*', cors());

// Public routes
app.get('/posts', async (c) => {
  const result = await db
    .select()
    .from(posts)
    .where(eq(posts.published, true))
    .orderBy(desc(posts.createdAt));

  return c.json(result);
});

// Protected routes
app.use('/api/*', async (c, next) => {
  const auth = c.req.header('Authorization');
  if (!auth?.startsWith('Bearer ')) {
    return c.json({ error: 'Unauthorized' }, 401);
  }
  // Verify with Supabase...
  await next();
});

app.post('/api/posts', async (c) => {
  const body = await c.req.json();
  const [post] = await db.insert(posts).values(body).returning();
  return c.json(post, 201);
});

export default app;
```

---

## Testing

### tests/setup.ts
```typescript
import { beforeAll, afterAll, beforeEach } from 'vitest';
import { db } from '../src/db';
import { posts, profiles } from '../src/db/schema';

beforeAll(async () => {
  // Setup test database
});

beforeEach(async () => {
  // Clean tables
  await db.delete(posts);
  await db.delete(profiles);
});

afterAll(async () => {
  // Cleanup
});
```

### tests/posts.test.ts
```typescript
import { describe, it, expect } from 'vitest';
import request from 'supertest';
import app from '../src/index';

describe('Posts API', () => {
  it('should list published posts', async () => {
    const res = await request(app)
      .get('/api/posts')
      .expect(200);

    expect(Array.isArray(res.body)).toBe(true);
  });

  it('should require auth to create post', async () => {
    await request(app)
      .post('/api/posts')
      .send({ title: 'Test' })
      .expect(401);
  });
});
```

---

## Anti-Patterns

- **Using Supabase client for DB queries** - Use Drizzle
- **Sync JWT validation** - Keep it async
- **No input validation** - Use Zod middleware
- **Missing error handling** - Use centralized error handler
- **Hardcoded secrets** - Use environment variables
- **No request logging** - Add morgan or pino
- **Blocking the event loop** - Use async throughout
- **Service key in responses** - Never expose


================================================
FILE: skills/supabase-python/SKILL.md
================================================
---
name: supabase-python
description: FastAPI with Supabase and SQLAlchemy/SQLModel
when-to-use: When building a Python/FastAPI app with Supabase backend
user-invocable: false
paths: ["**/*.py", "supabase/**"]
effort: medium
---

# Supabase + Python Skill


FastAPI patterns with Supabase Auth and SQLAlchemy/SQLModel for database access.

**Sources:** [Supabase Python Client](https://supabase.com/docs/reference/python/introduction) | [SQLModel](https://sqlmodel.tiangolo.com/)

---

## Core Principle

**SQLAlchemy/SQLModel for queries, Supabase for auth/storage.**

Use SQLAlchemy or SQLModel for type-safe database access. Use supabase-py for auth, storage, and realtime. FastAPI for the API layer.

---

## Project Structure

```
project/
├── src/
│   ├── api/
│   │   ├── __init__.py
│   │   ├── routes/
│   │   │   ├── __init__.py
│   │   │   ├── auth.py
│   │   │   ├── posts.py
│   │   │   └── users.py
│   │   └── deps.py              # Dependencies (auth, db)
│   ├── core/
│   │   ├── __init__.py
│   │   ├── config.py            # Settings
│   │   └── security.py          # Auth helpers
│   ├── db/
│   │   ├── __init__.py
│   │   ├── session.py           # Database session
│   │   └── models.py            # SQLModel models
│   ├── services/
│   │   ├── __init__.py
│   │   └── supabase.py          # Supabase client
│   └── main.py                  # FastAPI app
├── supabase/
│   ├── migrations/
│   └── config.toml
├── alembic/                     # Alembic migrations (alternative)
├── alembic.ini
├── pyproject.toml
└── .env
```

---

## Setup

### Install Dependencies
```bash
pip install fastapi uvicorn supabase python-dotenv sqlmodel asyncpg alembic
```

### pyproject.toml
```toml
[project]
name = "my-app"
version = "0.1.0"
dependencies = [
    "fastapi>=0.109.0",
    "uvicorn[standard]>=0.27.0",
    "supabase>=2.0.0",
    "python-dotenv>=1.0.0",
    "sqlmodel>=0.0.14",
    "asyncpg>=0.29.0",
    "alembic>=1.13.0",
    "pydantic-settings>=2.0.0",
]

[project.optional-dependencies]
dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.23.0",
    "httpx>=0.26.0",
]
```

### Environment Variables
```bash
# .env
SUPABASE_URL=http://localhost:54321
SUPABASE_ANON_KEY=<from supabase start>
SUPABASE_SERVICE_ROLE_KEY=<from supabase start>
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:54322/postgres
```

---

## Configuration

### src/core/config.py
```python
from pydantic_settings import BaseSettings
from functools import lru_cache


class Settings(BaseSettings):
    # Supabase
    supabase_url: str
    supabase_anon_key: str
    supabase_service_role_key: str

    # Database
    database_url: str

    # App
    debug: bool = False

    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"


@lru_cache
def get_settings() -> Settings:
    return Settings()
```

---

## Database Setup

### src/db/session.py
```python
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from src.core.config import get_settings

settings = get_settings()

engine = create_async_engine(
    settings.database_url,
    echo=settings.debug,
    pool_pre_ping=True,
)

AsyncSessionLocal = sessionmaker(
    engine,
    class_=AsyncSession,
    expire_on_commit=False,
)


async def get_db() -> AsyncSession:
    async with AsyncSessionLocal() as session:
        try:
            yield session
        finally:
            await session.close()
```

### src/db/models.py
```python
from datetime import datetime
from typing import Optional
from uuid import UUID, uuid4
from sqlmodel import SQLModel, Field


class ProfileBase(SQLModel):
    email: str
    name: Optional[str] = None
    avatar_url: Optional[str] = None


class Profile(ProfileBase, table=True):
    __tablename__ = "profiles"

    id: UUID = Field(primary_key=True)  # References auth.users
    created_at: datetime = Field(default_factory=datetime.utcnow)
    updated_at: datetime = Field(default_factory=datetime.utcnow)


class ProfileCreate(ProfileBase):
    id: UUID


class ProfileRead(ProfileBase):
    id: UUID
    created_at: datetime


class PostBase(SQLModel):
    title: str
    content: Optional[str] = None
    published: bool = False


class Post(PostBase, table=True):
    __tablename__ = "posts"

    id: UUID = Field(default_factory=uuid4, primary_key=True)
    author_id: UUID = Field(foreign_key="profiles.id")
    created_at: datetime = Field(default_factory=datetime.utcnow)


class PostCreate(PostBase):
    pass


class PostRead(PostBase):
    id: UUID
    author_id: UUID
    created_at: datetime
```

---

## Supabase Client

### src/services/supabase.py
```python
from supabase import create_client, Client
from src.core.config import get_settings

settings = get_settings()


def get_supabase_client() -> Client:
    """Get Supabase client with anon key (respects RLS)."""
    return create_client(
        settings.supabase_url,
        settings.supabase_anon_key
    )


def get_supabase_admin() -> Client:
    """Get Supabase client with service role (bypasses RLS)."""
    return create_client(
        settings.supabase_url,
        settings.supabase_service_role_key
    )
```

---

## Auth Dependencies

### src/api/deps.py
```python
from typing import Annotated
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sqlalchemy.ext.asyncio import AsyncSession
from supabase import Client

from src.db.session import get_db
from src.services.supabase import get_supabase_client

security = HTTPBearer()


async def get_current_user(
    credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)],
) -> dict:
    """Validate JWT and return user."""
    supabase = get_supabase_client()

    try:
        # Verify token with Supabase
        user = supabase.auth.get_user(credentials.credentials)
        if not user or not user.user:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="Invalid token",
            )
        return user.user
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid token",
        )


# Type alias for dependency injection
CurrentUser = Annotated[dict, Depends(get_current_user)]
DbSession = Annotated[AsyncSession, Depends(get_db)]
```

---

## API Routes

### src/api/routes/auth.py
```python
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, EmailStr

from src.services.supabase import get_supabase_client

router = APIRouter(prefix="/auth", tags=["auth"])


class SignUpRequest(BaseModel):
    email: EmailStr
    password: str


class SignInRequest(BaseModel):
    email: EmailStr
    password: str


class AuthResponse(BaseModel):
    access_token: str
    refresh_token: str
    user_id: str


@router.post("/signup", response_model=AuthResponse)
async def sign_up(request: SignUpRequest):
    supabase = get_supabase_client()

    try:
        response = supabase.auth.sign_up({
            "email": request.email,
            "password": request.password,
        })

        if response.user is None:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail="Signup failed",
            )

        return AuthResponse(
            access_token=response.session.access_token,
            refresh_token=response.session.refresh_token,
            user_id=str(response.user.id),
        )
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=str(e),
        )


@router.post("/signin", response_model=AuthResponse)
async def sign_in(request: SignInRequest):
    supabase = get_supabase_client()

    try:
        response = supabase.auth.sign_in_with_password({
            "email": request.email,
            "password": request.password,
        })

        return AuthResponse(
            access_token=response.session.access_token,
            refresh_token=response.session.refresh_token,
            user_id=str(response.user.id),
        )
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid credentials",
        )


@router.post("/signout")
async def sign_out():
    supabase = get_supabase_client()
    supabase.auth.sign_out()
    return {"message": "Signed out"}
```

### src/api/routes/posts.py
```python
from uuid import UUID
from fastapi import APIRouter, HTTPException, status
from sqlmodel import select

from src.api.deps import CurrentUser, DbSession
from src.db.models import Post, PostCreate, PostRead

router = APIRouter(prefix="/posts", tags=["posts"])


@router.get("/", response_model=list[PostRead])
async def list_posts(
    db: DbSession,
    published_only: bool = True,
):
    query = select(Post)
    if published_only:
        query = query.where(Post.published == True)
    query = query.order_by(Post.created_at.desc())

    result = await db.execute(query)
    return result.scalars().all()


@router.get("/me", response_model=list[PostRead])
async def list_my_posts(
    db: DbSession,
    user: CurrentUser,
):
    query = select(Post).where(Post.author_id == UUID(user.id))
    result = await db.execute(query)
    return result.scalars().all()


@router.post("/", response_model=PostRead, status_code=status.HTTP_201_CREATED)
async def create_post(
    db: DbSession,
    user: CurrentUser,
    post_in: PostCreate,
):
    post = Post(
        **post_in.model_dump(),
        author_id=UUID(user.id),
    )
    db.add(post)
    await db.commit()
    await db.refresh(post)
    return post


@router.get("/{post_id}", response_model=PostRead)
async def get_post(
    db: DbSession,
    post_id: UUID,
):
    result = await db.execute(select(Post).where(Post.id == post_id))
    post = result.scalar_one_or_none()

    if not post:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Post not found",
        )

    return post


@router.delete("/{post_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_post(
    db: DbSession,
    user: CurrentUser,
    post_id: UUID,
):
    result = await db.execute(
        select(Post).where(Post.id == post_id, Post.author_id == UUID(user.id))
    )
    post = result.scalar_one_or_none()

    if not post:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Post not found",
        )

    await db.delete(post)
    await db.commit()
```

---

## Main Application

### src/main.py
```python
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from src.api.routes import auth, posts

app = FastAPI(title="My API")

# CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Routes
app.include_router(auth.router, prefix="/api")
app.include_router(posts.router, prefix="/api")


@app.get("/health")
async def health_check():
    return {"status": "healthy"}
```

---

## Alembic Migrations

### Initialize Alembic
```bash
alembic init alembic
```

### alembic/env.py (key changes)
```python
from src.db.models import SQLModel
from src.core.config import get_settings

settings = get_settings()

# Use async engine
config.set_main_option("sqlalchemy.url", settings.database_url)

target_metadata = SQLModel.metadata


def run_migrations_online():
    # For async
    import asyncio
    from sqlalchemy.ext.asyncio import create_async_engine

    connectable = create_async_engine(settings.database_url)

    async def do_run_migrations():
        async with connectable.connect() as connection:
            await connection.run_sync(do_run_migrations_sync)

    def do_run_migrations_sync(connection):
        context.configure(
            connection=connection,
            target_metadata=target_metadata,
        )
        with context.begin_transaction():
            context.run_migrations()

    asyncio.run(do_run_migrations())
```

### Migration Commands
```bash
# Create migration
alembic revision --autogenerate -m "create posts table"

# Apply migrations
alembic upgrade head

# Rollback
alembic downgrade -1
```

---

## Storage

### Upload File
```python
from fastapi import UploadFile
from src.services.supabase import get_supabase_client


async def upload_avatar(user_id: str, file: UploadFile) -> str:
    supabase = get_supabase_client()

    file_content = await file.read()
    file_path = f"{user_id}/avatar.{file.filename.split('.')[-1]}"

    response = supabase.storage.from_("avatars").upload(
        file_path,
        file_content,
        {"content-type": file.content_type, "upsert": "true"},
    )

    # Get public URL
    url = supabase.storage.from_("avatars").get_public_url(file_path)
    return url
```

### Download File
```python
def get_avatar_url(user_id: str) -> str:
    supabase = get_supabase_client()
    return supabase.storage.from_("avatars").get_public_url(f"{user_id}/avatar.png")
```

---

## Realtime (Async)

```python
import asyncio
from supabase import create_client


async def listen_to_posts():
    supabase = create_client(
        settings.supabase_url,
        settings.supabase_anon_key
    )

    def handle_change(payload):
        print(f"Change received: {payload}")

    channel = supabase.channel("posts")
    channel.on_postgres_changes(
        event="*",
        schema="public",
        table="posts",
        callback=handle_change,
    ).subscribe()

    # Keep listening
    while True:
        await asyncio.sleep(1)
```

---

## Testing

### tests/conftest.py
```python
import pytest
from httpx import AsyncClient, ASGITransport
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker

from src.main import app
from src.db.session import get_db
from src.db.models import SQLModel

TEST_DATABASE_URL = "postgresql+asyncpg://postgres:postgres@localhost:54322/postgres_test"

engine = create_async_engine(TEST_DATABASE_URL)
TestingSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)


@pytest.fixture(scope="function")
async def db_session():
    async with engine.begin() as conn:
        await conn.run_sync(SQLModel.metadata.create_all)

    async with TestingSessionLocal() as session:
        yield session

    async with engine.begin() as conn:
        await conn.run_sync(SQLModel.metadata.drop_all)


@pytest.fixture
async def client(db_session):
    async def override_get_db():
        yield db_session

    app.dependency_overrides[get_db] = override_get_db

    async with AsyncClient(
        transport=ASGITransport(app=app),
        base_url="http://test",
    ) as ac:
        yield ac

    app.dependency_overrides.clear()
```

### tests/test_posts.py
```python
import pytest
from httpx import AsyncClient


@pytest.mark.asyncio
async def test_list_posts(client: AsyncClient):
    response = await client.get("/api/posts/")
    assert response.status_code == 200
    assert isinstance(response.json(), list)
```

---

## Running the App

```bash
# Development
uvicorn src.main:app --reload --port 8000

# Production
uvicorn src.main:app --host 0.0.0.0 --port 8000 --workers 4
```

---

## Anti-Patterns

- **Using Supabase client for DB queries** - Use SQLAlchemy/SQLModel
- **Sync database calls** - Use async with asyncpg
- **Hardcoded credentials** - Use environment variables
- **No connection pooling** - asyncpg handles this
- **Missing auth dependency** - Always validate JWT
- **Not closing sessions** - Use context managers
- **Blocking I/O in async** - Use async libraries


================================================
FILE: skills/team-coordination/SKILL.md
================================================
---
name: team-coordination
description: Multi-person projects - shared state, todo claiming, handoffs
when-to-use: When multiple developers are working on the same repo
user-invocable: false
effort: low
---

# Team Coordination Skill


**Purpose:** Enable multiple Claude Code sessions across a team to coordinate and work together without conflicts. Manages shared state, todo claiming, decision syncing, and session awareness.

---

## Core Philosophy

```
┌─────────────────────────────────────────────────────────────────┐
│  TEAM CLAUDE CODE                                               │
│  ─────────────────────────────────────────────────────────────  │
│  Multiple devs, multiple Claude sessions, one codebase.         │
│  Coordination > Speed. Communication > Assumptions.             │
│                                                                 │
│  Before you start: Check who's working on what.                 │
│  Before you claim: Make sure nobody else has it.                │
│  Before you decide: Check if it's already decided.              │
│  Before you push: Pull and sync state.                          │
└─────────────────────────────────────────────────────────────────┘
```

---

## Team State Structure

When a project becomes multi-person, create this structure:

```
_project_specs/
├── team/
│   ├── state.md              # Who's working on what right now
│   ├── contributors.md       # Team members and their focus areas
│   └── handoffs/             # Notes when passing work to others
│       └── [feature]-handoff.md
├── session/
│   ├── current-state.md      # YOUR session state (personal)
│   ├── decisions.md          # SHARED - architectural decisions
│   └── code-landmarks.md     # SHARED - important code locations
└── todos/
    ├── active.md             # SHARED - with claim annotations
    ├── backlog.md            # SHARED
    └── completed.md          # SHARED
```

---

## Team State File

**`_project_specs/team/state.md`:**

```markdown
# Team State

*Last synced: [timestamp]*

## Active Sessions

| Contributor | Working On | Started | Files Touched | Status |
|-------------|------------|---------|---------------|--------|
| @alice | TODO-042: Add auth | 2024-01-15 10:30 | src/auth/* | 🟢 Active |
| @bob | TODO-038: Fix checkout | 2024-01-15 09:00 | src/cart/* | 🟡 Paused |
| - | - | - | - | - |

## Claimed Todos

| Todo | Claimed By | Since | ETA |
|------|------------|-------|-----|
| TODO-042 | @alice | 2024-01-15 | Today |
| TODO-038 | @bob | 2024-01-14 | Tomorrow |

## Recently Completed (Last 48h)

| Todo | Completed By | When | PR |
|------|--------------|------|-----|
| TODO-037 | @alice | 2024-01-14 | #123 |

## Conflicts to Watch

| Area | Contributors | Notes |
|------|--------------|-------|
| src/auth/* | @alice, @carol | Carol needs auth for TODO-045, coordinate |

## Announcements

- [2024-01-15] @alice: Refactoring auth module, avoid touching until EOD
- [2024-01-14] @bob: New env var required: STRIPE_WEBHOOK_SECRET
```

---

## Contributors File

**`_project_specs/team/contributors.md`:**

```markdown
# Contributors

## Team Members

| Handle | Name | Focus Areas | Timezone | Status |
|--------|------|-------------|----------|--------|
| @alice | Alice Smith | Backend, Auth | EST | Active |
| @bob | Bob Jones | Frontend, Payments | PST | Active |
| @carol | Carol White | DevOps, Infra | GMT | Part-time |

## Ownership

| Area | Primary | Backup | Notes |
|------|---------|--------|-------|
| Authentication | @alice | @bob | All auth changes need @alice review |
| Payments | @bob | @alice | Stripe integration |
| Infrastructure | @carol | @alice | Deploy scripts, CI/CD |
| Database | @alice | @carol | Migrations need sign-off |

## Communication

- Slack: #project-name
- PRs: Always tag area owner for review
- Urgent: DM on Slack

## Working Hours Overlap

```
EST:  |████████████████████|
PST:  |   ████████████████████|
GMT:  |████████████|
      6am        12pm       6pm       12am EST

Best overlap: 9am-12pm EST (all three)
```
```

---

## Workflow

### Starting a Session

```
┌─────────────────────────────────────────────────────────────────┐
│  START SESSION CHECKLIST                                        │
│  ─────────────────────────────────────────────────────────────  │
│  1. git pull origin main                                        │
│  2. Read _project_specs/team/state.md                           │
│  3. Check claimed todos - don't take what's claimed             │
│  4. Claim your todo in active.md                                │
│  5. Update state.md with your session                           │
│  6. Push state changes before starting work                     │
│  7. Start working                                               │
└─────────────────────────────────────────────────────────────────┘
```

### Claiming a Todo

In `active.md`, add claim annotation:

```markdown
## [TODO-042] Add email validation

**Status:** in-progress
**Claimed:** @alice (2024-01-15 10:30 EST)
**ETA:** Today

...
```

### During Work

- Update `state.md` if you touch new files
- Check `decisions.md` before making architectural choices
- If you make a decision, add it to `decisions.md` immediately
- Push state updates every 1-2 hours (keeps team in sync)

### Ending a Session

```
┌─────────────────────────────────────────────────────────────────┐
│  END SESSION CHECKLIST                                          │
│  ─────────────────────────────────────────────────────────────  │
│  1. Commit your work (even if WIP)                              │
│  2. Update your current-state.md                                │
│  3. Update team state.md (status → Paused or Done)              │
│  4. If passing to someone: create handoff note                  │
│  5. Unclaim todo if abandoning                                  │
│  6. Push everything                                             │
└─────────────────────────────────────────────────────────────────┘
```

### Creating a Handoff

When passing work to another team member, create:

**`_project_specs/team/handoffs/auth-feature-handoff.md`:**

```markdown
# Handoff: Auth Feature (TODO-042)

**From:** @alice
**To:** @bob
**Date:** 2024-01-15

## Status

70% complete. Core auth flow works, need to add:
- [ ] Password reset flow
- [ ] Email verification

## What's Done

- Login/logout working
- JWT tokens implemented
- Session management done

## What's Left

1. Password reset - see src/auth/reset.ts (skeleton exists)
2. Email verification - need to integrate SendGrid

## Key Decisions Made

- Using JWT not sessions (see decisions.md)
- Tokens expire in 7 days
- Refresh tokens stored in httpOnly cookies

## Watch Out For

- The `validateToken` function has a weird edge case with expired tokens
- Don't touch `authMiddleware.ts` - it's fragile rn

## Files to Start With

1. src/auth/reset.ts - password reset
2. src/email/verification.ts - email flow
3. tests/auth.test.ts - add tests here

## Questions?

Slack me @alice if stuck
```

---

## Conflict Prevention

### File-Level Awareness

Before modifying a file, check state.md for who's touching what:

```markdown
## Active Sessions

| Contributor | Working On | Started | Files Touched | Status |
|-------------|------------|---------|---------------|--------|
| @alice | TODO-042 | ... | src/auth/*, src/middleware/* | 🟢 Active |
```

If you need to touch `src/auth/*` and Alice is working there:
1. Check if it's truly conflicting (same file? same functions?)
2. Coordinate via Slack before proceeding
3. Add a note to "Conflicts to Watch" section

### Pre-Push Check

Before pushing, always:

```bash
git pull origin main
# Resolve any conflicts
git push
```

### PR Tagging

Always tag area owners in PRs:

```markdown
## PR: Add password reset flow

Implements TODO-042

cc: @alice (auth owner), @bob (reviewer)

### Changes
- Added password reset endpoint
- Added email templates

### Testing
- [ ] Unit tests pass
- [ ] Manual testing done
```

---

## Decision Syncing

### Before Making a Decision

1. Pull latest `decisions.md`
2. Check if decision already exists
3. If similar decision exists, follow it (consistency > preference)
4. If new decision needed, add it and push immediately

### Decision Format

```markdown
## [2024-01-15] JWT vs Sessions for Auth (@alice)

**Decision:** Use JWT tokens
**Context:** Need auth for API and mobile app
**Options:**
1. Sessions - simpler, server-side state
2. JWT - stateless, works for mobile
**Choice:** JWT
**Reasoning:** Mobile app needs stateless auth, JWT works across platforms
**Trade-offs:** Token revocation is harder, need refresh token strategy
**Approved by:** @bob, @carol
```

---

## Commands

### Check Team State

```bash
# See who's working on what
cat _project_specs/team/state.md

# Quick active sessions check
grep "🟢 Active" _project_specs/team/state.md
```

### Claim a Todo

1. Edit `_project_specs/todos/active.md`
2. Add claim annotation to todo
3. Update `_project_specs/team/state.md`
4. Commit and push

### Release a Claim

1. Remove claim annotation from todo
2. Update state.md (remove from Claimed Todos)
3. Commit and push

---

## Git Hooks for Teams

### Pre-Push Hook Addition

Add team state sync check to pre-push:

```bash
# In .git/hooks/pre-push (add to existing)

# Check if team state is current
echo "🔄 Checking team state..."
git fetch origin main --quiet

LOCAL_STATE=$(git show HEAD:_project_specs/team/state.md 2>/dev/null | md5)
REMOTE_STATE=$(git show origin/main:_project_specs/team/state.md 2>/dev/null | md5)

if [ "$LOCAL_STATE" != "$REMOTE_STATE" ]; then
    echo "⚠️  Team state has changed on remote!"
    echo "   Run: git pull origin main"
    echo "   Then check _project_specs/team/state.md for updates"
    # Warning only, don't block
fi
```

---

## Claude Instructions

### At Session Start

When user starts a session in a team project:

1. Check for `_project_specs/team/state.md`
2. If exists, read it and report:
   - Who's currently active
   - What todos are claimed
   - Any conflicts to watch
   - Recent announcements

3. Ask what they want to work on
4. Check if it's already claimed
5. Help them claim and update state

### During Session

- Before touching files, check if someone else is working there
- Before making decisions, check decisions.md
- Remind user to update state periodically

### At Session End

- Prompt user to update state.md
- Ask if they need to create a handoff
- Remind them to push state changes

---

## Single → Multi-Person Conversion

When a project needs team coordination:

1. Run `/check-contributors`
2. Create `_project_specs/team/` structure
3. Initialize `state.md` and `contributors.md`
4. Add claim annotations to active todos
5. Update CLAUDE.md to reference team-coordination.md skill

---

## Quick Reference

### Status Icons

```
🟢 Active - Currently working
🟡 Paused - Stepped away, will return
🔴 Blocked - Needs help/waiting on something
⚪ Offline - Not working today
```

### Claim Format

```markdown
**Claimed:** @handle (YYYY-MM-DD HH:MM TZ)
```

### Daily Standup Template

```markdown
## Standup [DATE]

### @alice
- Yesterday: Finished TODO-042 auth flow
- Today: Starting TODO-045 password reset
- Blockers: None

### @bob
- Yesterday: Fixed checkout bug
- Today: Payment webhook integration
- Blockers: Need STRIPE_WEBHOOK_SECRET from @carol
```

---

## Checklist

### Starting Work
- [ ] `git pull origin main`
- [ ] Read `team/state.md`
- [ ] Check todo not claimed
- [ ] Claim todo in `active.md`
- [ ] Update `state.md`
- [ ] Push state changes

### Ending Work
- [ ] Commit all changes
- [ ] Update `current-state.md`
- [ ] Update `team/state.md`
- [ ] Create handoff if needed
- [ ] Push everything


================================================
FILE: skills/ticket-craft/SKILL.md
================================================
---
name: ticket-craft
description: Create Jira/Asana/Linear tickets optimized for Claude Code execution - AI-native ticket writing
when-to-use: When creating tickets, breaking down epics, or writing specs for AI agent execution
user-invocable: true
effort: medium
---

# Ticket Craft Skill

*Write software tickets that AI agents can execute autonomously.*

**Purpose:** Define a ticket format that combines software engineering best practices (INVEST, Given-When-Then, Definition of Ready) with Claude Code-specific context requirements. Every ticket created with this skill is "Claude Code Ready" - meaning an agent can pick it up and execute it without asking clarifying questions.

**Works with:** Jira, Asana, Linear, GitHub Issues, or any ticket system.

---

## Core Principle

```
┌─────────────────────────────────────────────────────────────────┐
│  A TICKET IS A PROMPT                                            │
│  ──────────────────────────────────────────────────────────────  │
│                                                                  │
│  Traditional tickets are written for humans who can:             │
│  - Ask clarifying questions in Slack                             │
│  - Draw on institutional knowledge                               │
│  - Infer intent from vague descriptions                          │
│                                                                  │
│  AI agents cannot do any of this.                                │
│                                                                  │
│  Every ticket must be SELF-CONTAINED:                            │
│  - Explicit file references (not "the auth module")              │
│  - Pattern references (not "follow our conventions")             │
│  - Verification criteria (not "make sure it works")              │
│  - Constraints (not just what to do, but what NOT to do)         │
│  - Test commands (not "run the tests")                           │
│                                                                  │
│  If Claude Code can execute it without asking a question,        │
│  the ticket is ready. If it can't, it's not.                     │
└─────────────────────────────────────────────────────────────────┘
```

---

## The INVEST+C Criteria

Standard INVEST plus **C for Claude-Ready**:

| Criterion | Question | Fails If... |
|-----------|----------|-------------|
| **I** - Independent | Can this be completed without waiting on another ticket? | Blocked by undocumented dependencies |
| **N** - Negotiable | Is there room to adjust implementation approach? | Over-specifies implementation details |
| **V** - Valuable | Can you articulate who benefits and how? | No clear user or business value |
| **E** - Estimable | Does the team understand enough to size it? | Too vague or too large to estimate |
| **S** - Small | Can one person finish this in 1-3 days? | More than 5 acceptance criteria |
| **T** - Testable | Can you write a pass/fail test for it? | Uses vague language like "fast" or "good UX" |
| **C** - Claude-Ready | Can an AI agent execute this without clarifying questions? | Missing file refs, patterns, verification, or constraints |

---

## Ticket Types

### 1. Feature Ticket

```markdown
## [PROJ-XXX] {Verb} {Feature} for {User}

**Type:** Feature
**Priority:** {Critical | High | Medium | Low}
**Points:** {1 | 2 | 3 | 5 | 8}
**Labels:** {frontend, backend, api, database, etc.}
**Epic:** {Parent epic}

---

### User Story
As a {specific persona},
I want to {specific action},
so that {measurable benefit}.

### Background
{1-2 paragraphs on why this matters. Link to product brief, user research,
or business justification. Include any relevant metrics or user feedback.}

### Acceptance Criteria

**AC1: {Happy path scenario}**
Given {precondition},
when {action},
then {expected result}.

**AC2: {Edge case / error scenario}**
Given {precondition},
when {action},
then {expected result}.

**AC3: {Boundary condition}**
Given {precondition},
when {action},
then {expected result}.

### Out of Scope
- {Explicitly state what this ticket does NOT include}
- {Prevents scope creep and keeps ticket small}

---

### Claude Code Context

#### Relevant Files (read these first)
- `src/services/example.ts` - Existing service to extend
- `src/models/example.ts` - Data model definition
- `src/api/routes/example.ts` - Existing endpoint patterns to follow

#### Pattern Reference
Follow the pattern in `src/services/user.ts` for service layer implementation.
Follow the pattern in `src/api/routes/users.ts` for route definition.
Follow the pattern in `tests/services/user.test.ts` for test structure.

#### Database Changes
- {Table to create/modify, columns, types}
- {Migration file location: `supabase/migrations/` or `prisma/migrations/`}
- {RLS policies if using Supabase}

#### API Contract
```
POST /api/{resource}
Request: { field1: string, field2: number }
Response: { id: string, field1: string, created_at: string }
Error: { error: string, code: number }
```

#### Constraints
- Do NOT modify {specific files or modules}
- Do NOT add new dependencies without approval
- Follow existing error handling in `src/core/exceptions.ts`
- {Any performance budgets: response time < 200ms, bundle size < 50KB}

#### Verification
```bash
# Run specific tests
npm test -- --grep "{feature name}"

# Lint check
npm run lint

# Type check
npm run typecheck

# Full validation
npm test -- --coverage
```

#### Environment Variables
- Existing: {list vars already in .env that are relevant}
- New required: {list any new vars needed}

---

### Dependencies
- Blocked by: {PROJ-XXX} ({brief description})
- Blocks: {PROJ-YYY} ({brief description})

### Design
- Mockup: {link to Figma/design if applicable}
```

---

### 2. Bug Ticket

```markdown
## [BUG-XXX] Fix: {Component} - {Symptom}

**Type:** Bug
**Priority:** {Critical | High | Medium | Low}
**Points:** {1 | 2 | 3 | 5}
**Labels:** {regression, ux-bug, data-bug, security-bug}
**Severity:** {Blocks users | Degrades experience | Cosmetic}

---

### Bug Summary
{One sentence: what is broken and who is affected.}

### Environment
- Browser/OS: {e.g., Chrome 120 / macOS 14.2}
- Environment: {Production | Staging | Local}
- User type: {Anonymous | Authenticated | Admin}
- First observed: {date}

### Steps to Reproduce
1. {Navigate to / perform action}
2. {Perform next action}
3. {Perform next action}
4. **Observe:** {incorrect behavior}

### Expected Behavior
{What should happen instead.}

### Actual Behavior
{What actually happens. Include error messages, console output, screenshots.}

### Impact
- Users affected: {percentage or count}
- Frequency: {every time | intermittent | specific conditions}
- Workaround: {exists / none}

---

### Claude Code Context

#### Suspected Root Cause
{Where the bug likely lives, if known.}
- File: `src/components/LoginForm.tsx:87`
- Issue: `isSubmitting` state set to `true` on validation error but never reset

#### Relevant Files
- `src/components/LoginForm.tsx` - Form component with the bug
- `tests/components/LoginForm.test.tsx` - Existing tests (gap here)
- `src/hooks/useAuth.ts` - Auth hook used by the form

#### Test Gap Analysis
- Existing tests cover: {what's currently tested}
- Missing test: {what test would have caught this bug}

#### Bug Fix Workflow (TDD)
1. Write a failing test that reproduces the bug
2. Verify the test fails (confirms the bug exists)
3. Fix the bug with minimum code change
4. Verify the test passes
5. Run full test suite to check for regressions

#### Verification
```bash
# Run the specific test
npm test -- --grep "LoginForm submit"

# Run related tests
npm test -- src/components/LoginForm.test.tsx

# Full regression check
npm test
```

#### Constraints
- Fix the bug only - do NOT refactor surrounding code
- Do NOT change the component's public API
- Ensure all existing tests continue to pass
```

---

### 3. Tech Debt Ticket

```markdown
## [TECH-XXX] Refactor: {Area} - {Improvement}

**Type:** Tech Debt
**Priority:** {High | Medium | Low}
**Points:** {3 | 5 | 8}
**Labels:** {refactor, performance, maintainability, testing}

---

### Problem Statement
{What is wrong with the current implementation and why it matters.
Include concrete pain points: slow CI, frequent bugs, developer confusion.}

### Current State
- File: `{path}` ({N} lines)
- Test coverage: {X}%
- Cyclomatic complexity: {N}
- Related bugs: {PROJ-XXX, PROJ-YYY}
- Pain frequency: {how often this causes issues}

### Proposed Change
{What specifically should change and why this approach.}

### Acceptance Criteria
- [ ] {Specific structural change completed}
- [ ] All existing tests pass without modifying test assertions
- [ ] No public API changes (existing consumers unaffected)
- [ ] Test coverage >= {X}%
- [ ] {Measurable improvement metric}

### Risk Assessment
- Risk level: {Low | Medium | High}
- Mitigation: {run full regression, deploy behind flag, etc.}

### Business Justification
{Why this is worth doing now. E.g., "Reduces average bug fix time from 4h to 1h"
or "Enables upcoming feature PROJ-XXX which requires clean separation."}

---

### Claude Code Context

#### Relevant Files
- `{file}` - Current implementation to refactor
- `{test file}` - Existing tests (must not break)
- `{dependent file}` - Consumer of the API being refactored

#### Pattern Reference
Follow the pattern established in `{good example file}` for the new structure.

#### Constraints
- Do NOT change public APIs or exports
- Do NOT modify test assertions (tests should pass as-is)
- Do NOT introduce new dependencies
- Keep backwards compatibility

#### Verification
```bash
# Existing tests must pass unchanged
npm test

# No type errors
npm run typecheck

# Lint clean
npm run lint

# Coverage target
npm test -- --coverage
```
```

---

### 4. Epic Breakdown Ticket

```markdown
## [EPIC-XXX] {Epic Name}

**Type:** Epic
**Priority:** {Critical | High | Medium}
**Target:** {Sprint/milestone}

---

### Objective
{One paragraph: what this epic achieves and why it matters.}

### Success Metrics
- {Measurable outcome 1}
- {Measurable outcome 2}

### User Workflows
{The user journey this epic covers, broken into steps.}
1. {Step 1: Discovery/Entry}
2. {Step 2: Core Action}
3. {Step 3: Completion/Result}

### Ticket Breakdown

| # | Ticket | Type | Points | Dependencies |
|---|--------|------|--------|-------------|
| 1 | {title} | Feature | 3 | None |
| 2 | {title} | Feature | 5 | #1 |
| 3 | {title} | Feature | 3 | None |
| 4 | {title} | Feature | 2 | #2, #3 |
| 5 | {title} | Tech Debt | 3 | None |

### Slicing Strategy
{How the epic was broken down. Reference the technique used.}

### Agent Team Mapping
{If using agent teams, how features map to agents.}
- Feature Agent 1: Tickets #1, #2
- Feature Agent 2: Tickets #3, #4
- Parallel execution: #1 and #3 can run simultaneously
- Sequential: #2 depends on #1, #4 depends on #2 and #3
```

---

## Epic Slicing Techniques

When breaking an epic into tickets, use one of these strategies:

| Technique | When to Use | Example |
|-----------|-------------|---------|
| **By workflow step** | Clear user journey | Browse > Play > Save > Share |
| **By data variation** | Multiple data types | Text posts, images, videos |
| **By user role** | Different permissions | Anonymous, authenticated, admin |
| **By CRUD** | Data operations | Create, Read, Update, Delete |
| **Happy path first** | Incremental delivery | Success flow first, then errors |
| **By boundary** | System integration | Frontend, API, database separately |

### Rules of Thumb
- Each ticket: **1-3 days** of work for one developer/agent
- More than **5 acceptance criteria** = split the ticket
- More than **8 story points** = definitely split
- Every ticket should be **independently deployable** (even behind a flag)
- Order tickets: **simplest, most foundational first**

---

## The Claude Code Ready Checklist

Before a ticket is ready for an AI agent to execute, verify:

```
┌─────────────────────────────────────────────────────────────────┐
│  CLAUDE CODE READY CHECKLIST                                     │
│  ──────────────────────────────────────────────────────────────  │
│                                                                  │
│  CONTEXT                                                         │
│  ☐ Relevant files listed with full paths                         │
│  ☐ Pattern reference points to a real file to follow             │
│  ☐ API contract defined (request/response shapes)                │
│  ☐ Database changes specified (tables, columns, migrations)      │
│  ☐ Environment variables listed (existing + new)                 │
│                                                                  │
│  SCOPE                                                           │
│  ☐ Out of Scope section explicitly states what NOT to do         │
│  ☐ Constraints section lists files/modules NOT to modify         │
│  ☐ Ticket covers one logical change (atomic)                     │
│  ☐ Estimable at ≤ 5 story points                                │
│                                                                  │
│  VERIFICATION                                                    │
│  ☐ Test command provided (exact command, not "run tests")        │
│  ☐ Lint command provided                                         │
│  ☐ Typecheck command provided                                    │
│  ☐ Acceptance criteria are Given-When-Then or checkboxed         │
│  ☐ Each criterion is independently pass/fail testable            │
│                                                                  │
│  QUALITY                                                         │
│  ☐ Title is imperative verb + object + context                   │
│  ☐ Title under 80 characters                                     │
│  ☐ Description explains WHY, not just WHAT                       │
│  ☐ 2-5 acceptance criteria (not more)                            │
│  ☐ No vague language ("fast", "good UX", "clean")               │
│                                                                  │
│  If any box is unchecked, the ticket is NOT ready.               │
└─────────────────────────────────────────────────────────────────┘
```

---

## Anti-Patterns (Never Do These)

### 1. The Title-Only Ticket
```
Title: Fix login
Description: (empty)
```
**Why it fails:** No context, no acceptance criteria, no file references. Claude Code will guess and likely guess wrong.

### 2. The Novel
```
Title: Implement new onboarding
Description: (3 pages mixing UI, backend, analytics, email, and future ideas)
```
**Why it fails:** Not small, not independent. Agent teams can't parallelize this. Split into 5+ tickets.

### 3. The Vague Requirement
```
Acceptance Criteria:
- Should be fast
- UX should be good
- Should work on mobile
```
**Why it fails:** Unmeasurable, untestable. Replace with: "Response time < 200ms", "Passes WCAG 2.1 AA", "No horizontal scroll at 320px viewport."

### 4. The Over-Specified Solution
```
Title: Use Redis to cache user sessions
Description: Install Redis, configure connection pooling, set TTL to 3600...
```
**Why it fails:** Prescribes the solution instead of the problem. Should describe "Session lookups take 500ms, need < 50ms" and let the agent choose the approach.

### 5. The Missing Files Ticket
```
Description: Update the auth module to support OAuth.
```
**Why it fails for AI:** "The auth module" could be 20 files. Claude Code needs: `src/services/auth.ts`, `src/middleware/auth.ts`, `src/routes/auth.ts` - specific paths.

### 6. The No-Verification Ticket
```
Acceptance Criteria:
- OAuth login works
- Users can sign in with Google
```
**Why it fails:** No test command, no verification steps. Claude Code performs dramatically better when it can verify its own work.

---

## Good vs Bad Examples

### Bad: Vague Feature Ticket
```
Title: Add rate limiting to the API
Description: We need rate limiting on our endpoints.
```

### Good: Claude Code Ready Feature Ticket
```
Title: Add sliding window rate limiter to /api/generate endpoint

User Story:
As an API consumer, I want requests to be rate-limited
so that the service remains available under heavy load.

Acceptance Criteria:
AC1: Given an authenticated user making requests,
     when they exceed 10 requests per minute,
     then return 429 with Retry-After header.

AC2: Given a rate-limited user,
     when the window expires,
     then requests succeed again.

AC3: Given an unauthenticated request,
     when it hits /api/generate,
     then return 401 (rate limiting only applies to authed users).

Claude Code Context:
- Pattern: Follow `src/middleware/throttle.ts` for middleware structure
- File: Create `src/middleware/rateLimit.ts`
- Test: Create `tests/middleware/rateLimit.test.ts`
- Route: Modify `src/api/routes/generate.ts` to add middleware
- Constraint: Do NOT modify existing middleware or other endpoints

Verification:
  npm test -- --grep "rate-limit"
  npm run lint
  npm run typecheck
```

---

## Mapping Tickets to Agent Teams

When using the agent-teams workflow, tickets map directly to the 10-task pipeline:

| Ticket Section | Maps To | Agent |
|---------------|---------|-------|
| Title + Description | Task 1: `{name}-spec` | Feature Agent |
| Acceptance Criteria | Task 3: `{name}-tests` | Feature Agent (writes tests from AC) |
| Pattern Reference | Task 5: `{name}-implement` | Feature Agent (follows pattern) |
| Verification section | Task 6-7: verify + validate | Quality Agent + Feature Agent |
| Constraints | Enforced throughout | All agents |
| Claude Code Context | Loaded at start | Feature Agent reads first |

### Ticket → Agent Team Flow
```
1. Create ticket using templates above
2. Ticket becomes the feature spec in _project_specs/features/
3. Team Lead reads spec, creates 10-task dependency chain
4. Feature Agent uses ticket's Claude Code Context to start
5. Quality Agent uses ticket's Acceptance Criteria to verify
6. Review Agent reviews against ticket's Constraints
7. Security Agent scans based on ticket's scope
8. Merger Agent creates PR referencing the ticket ID
```

---

## Ticket Title Conventions

| Type | Format | Example |
|------|--------|---------|
| Feature | `Add {feature} for {user}` | Add episode bookmarking for listeners |
| Enhancement | `Improve {what} in {where}` | Improve search performance in episode feed |
| Bug | `Fix: {Component} - {Symptom}` | Fix: PlayerBar - audio stops on tab switch |
| Tech Debt | `Refactor: {Area} - {Goal}` | Refactor: AuthService - extract token management |
| Security | `Security: {What} in {Where}` | Security: add input sanitization to comment API |
| Chore | `Chore: {What}` | Chore: upgrade React from 18 to 19 |

**Rules:**
- Start with an imperative verb (Add, Fix, Improve, Refactor, Remove)
- Under 80 characters
- Include the component/area affected
- Be specific enough to distinguish from other tickets

---

## Story Points for AI Agents

AI agents estimate differently than humans. Use this calibration:

| Points | Scope | Agent Time | Example |
|--------|-------|-----------|---------|
| **1** | Single file, < 20 lines changed | ~5 min | Fix a typo, update a config value |
| **2** | 1-2 files, straightforward | ~15 min | Add a field to a form, update an API response |
| **3** | 2-4 files, clear path | ~30 min | New API endpoint following existing pattern |
| **5** | 4-8 files, some decisions | ~1 hour | New feature with tests, models, and routes |
| **8** | 8+ files, complex | ~2 hours | Integration with external service, new data model |
| **13** | Too large, split required | - | Full authentication system, major refactor |

**Rule:** If > 5 points, consider splitting. If 13, always split.

---

## Integration with Ticket Systems

### Jira
- Use custom field "Claude Code Context" for the AI-specific section
- Use labels: `claude-ready`, `needs-context`, `ai-blocked`
- Link tickets with "blocks/blocked by" for dependency chains

### Asana
- Use custom fields for Priority, Points, Type
- Use subtasks for the 10-task pipeline steps
- Use tags: `claude-ready`, `needs-refinement`

### Linear
- Use issue templates with the Claude Code Context section built-in
- Use labels for ticket type and claude-readiness
- Use projects to group tickets into epics

### GitHub Issues
- Use issue templates (`.github/ISSUE_TEMPLATE/`)
- Use labels: `feature`, `bug`, `tech-debt`, `claude-ready`
- Use milestones for epics

---

## Command: /create-ticket

When the user asks to create a ticket, follow this workflow:

### Step 1: Gather Context
Ask the user:
1. What type? (Feature / Bug / Tech Debt)
2. Brief description of what needs to be done
3. Which part of the codebase is involved?

### Step 2: Auto-Detect Context
- Read the relevant files to understand current implementation
- Identify the pattern to follow from existing code
- Find existing tests to understand test conventions
- Check for related files that might be affected

### Step 3: Generate Ticket
Use the appropriate template above, filling in:
- All Claude Code Context fields (auto-detected)
- Acceptance criteria (derived from description)
- Verification commands (from project's CLAUDE.md or package.json)
- Constraints (based on codebase analysis)

### Step 4: Validate with Checklist
Run the Claude Code Ready Checklist against the generated ticket.
Flag any unchecked items for the user to address.

### Step 5: Output
Present the ticket in the template format, ready to paste into Jira/Asana/Linear.

---

## Definition of Ready (for Sprint)

A ticket can enter a sprint when:

- [ ] Passes INVEST+C criteria
- [ ] Claude Code Ready Checklist is complete
- [ ] Dependencies are identified and unblocked
- [ ] Story points assigned
- [ ] Design/mockups attached (if applicable)
- [ ] Acceptance criteria reviewed by team

## Definition of Done

A ticket is done when:

- [ ] All acceptance criteria verified (pass/fail)
- [ ] Tests written and passing
- [ ] Code reviewed (no Critical/High issues)
- [ ] Security scan passed
- [ ] Lint and typecheck clean
- [ ] Coverage >= 80% for new code
- [ ] PR created with full pipeline results
- [ ] Documentation updated (if applicable)


================================================
FILE: skills/typescript/SKILL.md
================================================
---
name: typescript
description: TypeScript strict mode with eslint and jest
when-to-use: When working on TypeScript files
user-invocable: false
paths: ["**/*.ts", "**/*.tsx", "tsconfig*.json"]
effort: medium
---

# TypeScript Skill


---

## Strict Mode (Non-Negotiable)

```json
// tsconfig.json
{
  "compilerOptions": {
    "strict": true,
    "noImplicitAny": true,
    "strictNullChecks": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noImplicitReturns": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true
  }
}
```

---

## Project Structure

```
project/
├── src/
│   ├── core/               # Pure business logic
│   │   ├── types.ts        # Domain types/interfaces
│   │   ├── services/       # Pure functions
│   │   └── index.ts        # Public API
│   ├── infra/              # Side effects
│   │   ├── api/            # HTTP handlers
│   │   ├── db/             # Database operations
│   │   └── external/       # Third-party integrations
│   └── utils/              # Shared utilities
├── tests/
│   ├── unit/
│   └── integration/
├── package.json
├── tsconfig.json
└── CLAUDE.md
```

---

## Tooling (Required)

```json
// package.json scripts
{
  "scripts": {
    "lint": "eslint src/ --ext .ts,.tsx",
    "typecheck": "tsc --noEmit",
    "test": "jest",
    "test:coverage": "jest --coverage",
    "format": "prettier --write 'src/**/*.ts'"
  }
}
```

```javascript
// eslint.config.js
import eslint from '@eslint/js';
import tseslint from 'typescript-eslint';

export default tseslint.config(
  eslint.configs.recommended,
  ...tseslint.configs.strictTypeChecked,
  {
    rules: {
      '@typescript-eslint/no-explicit-any': 'error',
      '@typescript-eslint/explicit-function-return-type': 'error',
      'max-lines-per-function': ['error', 20],
      'max-depth': ['error', 2],
      'max-params': ['error', 3],
    }
  }
);
```

---

## Testing with Jest

```typescript
// tests/unit/services/user.test.ts
import { calculateTotal } from '../../../src/core/services/pricing';

describe('calculateTotal', () => {
  it('returns sum of item prices', () => {
    // Arrange
    const items = [{ price: 10 }, { price: 20 }];

    // Act
    const result = calculateTotal(items);

    // Assert
    expect(result).toBe(30);
  });

  it('returns zero for empty array', () => {
    expect(calculateTotal([])).toBe(0);
  });

  it('throws on invalid item', () => {
    expect(() => calculateTotal([{ invalid: 'item' }])).toThrow();
  });
});
```

---

## GitHub Actions

```yaml
name: TypeScript Quality Gate

on: [push, pull_request]

jobs:
  quality:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      
      - name: Setup Node
        uses: actions/setup-node@v4
        with:
          node-version: '20'
          
      - name: Install dependencies
        run: npm ci
        
      - name: Lint
        run: npm run lint
        
      - name: Type Check
        run: npm run typecheck
        
      - name: Test with Coverage
        run: npm run test:coverage
        
      - name: Coverage Threshold (80%)
        run: npm run test:coverage -- --coverageThreshold='{"global":{"branches":80,"functions":80,"lines":80,"statements":80}}'
```

---

## Pre-Commit Hooks

Using Husky + lint-staged:

```bash
npm install -D husky lint-staged
npx husky init
```

```json
// package.json
{
  "lint-staged": {
    "*.{ts,tsx}": [
      "eslint --fix",
      "prettier --write"
    ]
  }
}
```

```bash
# .husky/pre-commit
npx lint-staged
npx tsc --noEmit
npm run test -- --onlyChanged --passWithNoTests
```

This runs on every commit:
1. ESLint + Prettier on staged files
2. Type check entire project
3. Tests for changed files only

---

## Type Patterns

### Discriminated Unions for Results
```typescript
type Result<T> =
  | { ok: true; value: T }
  | { ok: false; error: string };

function parseUser(data: unknown): Result<User> {
  // Type-safe error handling without exceptions
}
```

### Branded Types for IDs
```typescript
type UserId = string & { readonly brand: unique symbol };
type OrderId = string & { readonly brand: unique symbol };

// Can't accidentally pass UserId where OrderId expected
function getOrder(orderId: OrderId): Order { ... }
```

### Const Assertions for Literals
```typescript
const STATUSES = ['pending', 'active', 'closed'] as const;
type Status = typeof STATUSES[number]; // 'pending' | 'active' | 'closed'
```

### Zod for Runtime Validation
```typescript
import { z } from 'zod';

const UserSchema = z.object({
  email: z.string().email(),
  name: z.string().min(1).max(100),
});

type User = z.infer<typeof UserSchema>;
```

---

## TypeScript Anti-Patterns

- ❌ `any` type - use `unknown` and narrow
- ❌ Type assertions (`as`) - use type guards
- ❌ Non-null assertions (`!`) - handle null explicitly
- ❌ `@ts-ignore` without explanation
- ❌ Enums - use const objects or union types
- ❌ Classes for data - use interfaces/types
- ❌ Default exports - use named exports


================================================
FILE: skills/ui-mobile/SKILL.md
================================================
---
name: ui-mobile
description: Mobile UI patterns - React Native, iOS/Android, touch targets
when-to-use: When building mobile UI components
user-invocable: false
paths: ["**/*.tsx", "**/*.jsx", "ios/**", "android/**", "**/*.dart"]
effort: medium
---

# Mobile UI Design Skill (React Native)


---

## MANDATORY: Mobile Accessibility Standards

**These rules are NON-NEGOTIABLE. Every UI element must pass these checks.**

### 1. Touch Targets (CRITICAL)
```typescript
// MINIMUM 44x44 points for ALL interactive elements
const MINIMUM_TOUCH_SIZE = 44;

// EVERY button, link, icon button must meet this
const styles = StyleSheet.create({
  button: {
    minHeight: MINIMUM_TOUCH_SIZE,
    minWidth: MINIMUM_TOUCH_SIZE,
    paddingVertical: 12,
    paddingHorizontal: 16,
  },
  iconButton: {
    width: MINIMUM_TOUCH_SIZE,
    height: MINIMUM_TOUCH_SIZE,
    justifyContent: 'center',
    alignItems: 'center',
  },
});

// NEVER DO THIS:
style={{ height: 30 }}  // ✗ TOO SMALL
style={{ padding: 4 }}  // ✗ RESULTS IN TINY TARGET
```

### 2. Color Contrast (CRITICAL)
```typescript
// WCAG 2.1 AA: 4.5:1 for text, 3:1 for large text/UI

// SAFE COMBINATIONS:
const colors = {
  // Light mode
  textPrimary: '#000000',     // on white = 21:1 ✓
  textSecondary: '#374151',   // gray-700 on white = 9.2:1 ✓

  // Dark mode
  textPrimaryDark: '#FFFFFF', // on gray-900 = 16:1 ✓
  textSecondaryDark: '#E5E7EB', // gray-200 on gray-900 = 11:1 ✓
};

// FORBIDDEN - FAILS CONTRAST:
// ✗ '#9CA3AF' (gray-400) on white = 2.6:1
// ✗ '#6B7280' (gray-500) on '#111827' = 4.0:1
// ✗ Any text below 4.5:1 ratio
```

### 3. Visibility Rules
```typescript
// ALL BUTTONS MUST HAVE visible boundaries

// PRIMARY: Solid background with contrasting text
<Pressable style={styles.primaryButton}>
  <Text style={{ color: '#FFFFFF' }}>Submit</Text>
</Pressable>

const styles = StyleSheet.create({
  primaryButton: {
    backgroundColor: '#1F2937', // gray-800
    paddingVertical: 16,
    paddingHorizontal: 24,
    borderRadius: 12,
    minHeight: 44,
  },
});

// SECONDARY: Visible background
<Pressable style={styles.secondaryButton}>
  <Text style={{ color: '#1F2937' }}>Cancel</Text>
</Pressable>

const styles = StyleSheet.create({
  secondaryButton: {
    backgroundColor: '#F3F4F6', // gray-100
    minHeight: 44,
  },
});

// GHOST: MUST have visible border
<Pressable style={styles.ghostButton}>
  <Text style={{ color: '#374151' }}>Skip</Text>
</Pressable>

const styles = StyleSheet.create({
  ghostButton: {
    borderWidth: 1,
    borderColor: '#D1D5DB', // gray-300
    minHeight: 44,
  },
});

// NEVER CREATE invisible buttons:
// ✗ backgroundColor: 'transparent' without border
// ✗ Text color matching background
```

### 4. Accessibility Labels (REQUIRED)
```tsx
// EVERY interactive element needs accessibility props

// Buttons
<Pressable
  accessible={true}
  accessibilityRole="button"
  accessibilityLabel="Submit form"
  accessibilityHint="Double tap to submit your information"
>
  <Text>Submit</Text>
</Pressable>

// Icon buttons (NO visible text = MUST have label)
<Pressable
  accessible={true}
  accessibilityRole="button"
  accessibilityLabel="Close menu"
>
  <CloseIcon />
</Pressable>

// Images
<Image
  accessible={true}
  accessibilityRole="image"
  accessibilityLabel="User profile photo"
  source={...}
/>
```

### 5. Focus/Selection States
```tsx
// EVERY Pressable needs visible pressed state
<Pressable
  style={({ pressed }) => [
    styles.button,
    pressed && styles.buttonPressed,
  ]}
>
  {children}
</Pressable>

const styles = StyleSheet.create({
  button: {
    backgroundColor: '#1F2937',
  },
  buttonPressed: {
    opacity: 0.7,
    // OR
    backgroundColor: '#374151',
  },
});
```

---

## Core Philosophy

**Mobile UI is about touch, speed, and focus.** No hover states, smaller screens, thumb-friendly targets. Design for one-handed use and interruption recovery.

## Platform Differences

### iOS vs Android
```typescript
import { Platform } from 'react-native';

// Platform-specific values
const styles = StyleSheet.create({
  shadow: Platform.select({
    ios: {
      shadowColor: '#000',
      shadowOffset: { width: 0, height: 2 },
      shadowOpacity: 0.1,
      shadowRadius: 8,
    },
    android: {
      elevation: 4,
    },
  }),

  // iOS uses SF Pro, Android uses Roboto
  text: {
    fontFamily: Platform.OS === 'ios' ? 'System' : 'Roboto',
  },
});
```

### Design Language
```
iOS (Human Interface Guidelines)
─────────────────────────────────
- Flat design with subtle depth
- SF Symbols for icons
- Large titles (34pt)
- Rounded corners (10-14pt)
- Blue as default tint

Android (Material Design 3)
─────────────────────────────────
- Material You dynamic color
- Outlined/filled icons
- Medium titles (22pt)
- Rounded corners (12-28pt)
- Primary color from theme
```

## Spacing System

### 4px Base Grid
```typescript
// React Native spacing - consistent scale
const spacing = {
  xs: 4,
  sm: 8,
  md: 16,
  lg: 24,
  xl: 32,
  '2xl': 48,
} as const;

// Usage
const styles = StyleSheet.create({
  container: {
    padding: spacing.md,
    gap: spacing.sm,
  },
});
```

### Safe Areas
```tsx
import { useSafeAreaInsets } from 'react-native-safe-area-context';

const Screen = ({ children }) => {
  const insets = useSafeAreaInsets();

  return (
    <View style={{
      flex: 1,
      paddingTop: insets.top,
      paddingBottom: insets.bottom,
      paddingLeft: Math.max(insets.left, 16),
      paddingRight: Math.max(insets.right, 16),
    }}>
      {children}
    </View>
  );
};
```

## Typography

### Type Scale
```typescript
const typography = {
  // Large titles (iOS style)
  largeTitle: {
    fontSize: 34,
    fontWeight: '700' as const,
    letterSpacing: 0.37,
  },

  // Section headers
  title: {
    fontSize: 22,
    fontWeight: '700' as const,
    letterSpacing: 0.35,
  },

  // Card titles
  headline: {
    fontSize: 17,
    fontWeight: '600' as const,
    letterSpacing: -0.41,
  },

  // Body text
  body: {
    fontSize: 17,
    fontWeight: '400' as const,
    letterSpacing: -0.41,
    lineHeight: 22,
  },

  // Secondary text
  callout: {
    fontSize: 16,
    fontWeight: '400' as const,
    letterSpacing: -0.32,
  },

  // Small labels
  caption: {
    fontSize: 12,
    fontWeight: '400' as const,
    letterSpacing: 0,
  },
};
```

## Color System

### Semantic Colors
```typescript
// Use semantic names, not literal colors
const colors = {
  // Backgrounds
  background: '#FFFFFF',
  backgroundSecondary: '#F2F2F7',
  backgroundTertiary: '#FFFFFF',

  // Surfaces
  surface: '#FFFFFF',
  surfaceElevated: '#FFFFFF',

  // Text
  label: '#000000',
  labelSecondary: '#3C3C43', // 60% opacity
  labelTertiary: '#3C3C43',  // 30% opacity

  // Actions
  primary: '#007AFF',
  destructive: '#FF3B30',
  success: '#34C759',
  warning: '#FF9500',

  // Separators
  separator: '#3C3C43', // 29% opacity
  opaqueSeparator: '#C6C6C8',
};

// Dark mode variants
const darkColors = {
  background: '#000000',
  backgroundSecondary: '#1C1C1E',
  label: '#FFFFFF',
  labelSecondary: '#EBEBF5', // 60% opacity
  separator: '#545458',
};
```

### Dynamic Colors (React Native)
```tsx
import { useColorScheme } from 'react-native';

const useColors = () => {
  const scheme = useColorScheme();
  return scheme === 'dark' ? darkColors : colors;
};

// Usage
const MyComponent = () => {
  const colors = useColors();
  return (
    <View style={{ backgroundColor: colors.background }}>
      <Text style={{ color: colors.label }}>Hello</Text>
    </View>
  );
};
```

## Touch Targets

### Minimum Sizes
```typescript
// CRITICAL: Minimum 44pt touch targets
const touchable = {
  minHeight: 44,
  minWidth: 44,
};

// Button with proper sizing
const styles = StyleSheet.create({
  button: {
    minHeight: 44,
    paddingHorizontal: 16,
    paddingVertical: 12,
    justifyContent: 'center',
    alignItems: 'center',
  },

  // Icon button (square)
  iconButton: {
    width: 44,
    height: 44,
    justifyContent: 'center',
    alignItems: 'center',
  },

  // List row
  listRow: {
    minHeight: 44,
    paddingVertical: 12,
    paddingHorizontal: 16,
  },
});
```

### Touch Feedback
```tsx
import { Pressable } from 'react-native';

// iOS-style opacity feedback
const Button = ({ children, onPress }) => (
  <Pressable
    onPress={onPress}
    style={({ pressed }) => [
      styles.button,
      pressed && { opacity: 0.7 },
    ]}
  >
    {children}
  </Pressable>
);

// Android-style ripple
const AndroidButton = ({ children, onPress }) => (
  <Pressable
    onPress={onPress}
    android_ripple={{
      color: 'rgba(0, 0, 0, 0.1)',
      borderless: false,
    }}
    style={styles.button}
  >
    {children}
  </Pressable>
);
```

## Component Patterns

### Cards
```tsx
const Card = ({ children, style }) => (
  <View style={[styles.card, style]}>
    {children}
  </View>
);

const styles = StyleSheet.create({
  card: {
    backgroundColor: '#FFFFFF',
    borderRadius: 12,
    padding: 16,
    ...Platform.select({
      ios: {
        shadowColor: '#000',
        shadowOffset: { width: 0, height: 2 },
        shadowOpacity: 0.08,
        shadowRadius: 8,
      },
      android: {
        elevation: 2,
      },
    }),
  },
});
```

### Buttons
```tsx
// Primary button
const PrimaryButton = ({ title, onPress, disabled }) => (
  <Pressable
    onPress={onPress}
    disabled={disabled}
    style={({ pressed }) => [
      styles.primaryButton,
      pressed && styles.primaryButtonPressed,
      disabled && styles.buttonDisabled,
    ]}
  >
    <Text style={styles.primaryButtonText}>{title}</Text>
  </Pressable>
);

const styles = StyleSheet.create({
  primaryButton: {
    backgroundColor: '#007AFF',
    borderRadius: 12,
    paddingVertical: 16,
    paddingHorizontal: 24,
    alignItems: 'center',
  },
  primaryButtonPressed: {
    backgroundColor: '#0056B3',
  },
  primaryButtonText: {
    color: '#FFFFFF',
    fontSize: 17,
    fontWeight: '600',
  },
  buttonDisabled: {
    opacity: 0.5,
  },
});

// Secondary button
const SecondaryButton = ({ title, onPress }) => (
  <Pressable
    onPress={onPress}
    style={({ pressed }) => [
      styles.secondaryButton,
      pressed && { opacity: 0.7 },
    ]}
  >
    <Text style={styles.secondaryButtonText}>{title}</Text>
  </Pressable>
);
```

### Input Fields
```tsx
const TextField = ({ label, value, onChangeText, error }) => {
  const [focused, setFocused] = useState(false);

  return (
    <View style={styles.textFieldContainer}>
      {label && (
        <Text style={styles.textFieldLabel}>{label}</Text>
      )}
      <TextInput
        value={value}
        onChangeText={onChangeText}
        onFocus={() => setFocused(true)}
        onBlur={() => setFocused(false)}
        style={[
          styles.textField,
          focused && styles.textFieldFocused,
          error && styles.textFieldError,
        ]}
        placeholderTextColor="#8E8E93"
      />
      {error && (
        <Text style={styles.errorText}>{error}</Text>
      )}
    </View>
  );
};

const styles = StyleSheet.create({
  textFieldContainer: {
    gap: 8,
  },
  textFieldLabel: {
    fontSize: 15,
    fontWeight: '500',
    color: '#3C3C43',
  },
  textField: {
    backgroundColor: '#F2F2F7',
    borderRadius: 10,
    paddingHorizontal: 16,
    paddingVertical: 14,
    fontSize: 17,
    color: '#000000',
    borderWidth: 2,
    borderColor: 'transparent',
  },
  textFieldFocused: {
    borderColor: '#007AFF',
    backgroundColor: '#FFFFFF',
  },
  textFieldError: {
    borderColor: '#FF3B30',
  },
  errorText: {
    fontSize: 13,
    color: '#FF3B30',
  },
});
```

### Lists
```tsx
// Grouped list (iOS Settings style)
const GroupedList = ({ sections }) => (
  <ScrollView style={styles.groupedList}>
    {sections.map((section, i) => (
      <View key={i} style={styles.section}>
        {section.title && (
          <Text style={styles.sectionHeader}>{section.title}</Text>
        )}
        <View style={styles.sectionContent}>
          {section.items.map((item, j) => (
            <React.Fragment key={j}>
              {j > 0 && <View style={styles.separator} />}
              <Pressable
                style={({ pressed }) => [
                  styles.listRow,
                  pressed && { backgroundColor: '#E5E5EA' },
                ]}
                onPress={item.onPress}
              >
                <Text style={styles.listRowText}>{item.title}</Text>
                <ChevronRight color="#C7C7CC" />
              </Pressable>
            </React.Fragment>
          ))}
        </View>
      </View>
    ))}
  </ScrollView>
);

const styles = StyleSheet.create({
  groupedList: {
    flex: 1,
    backgroundColor: '#F2F2F7',
  },
  section: {
    marginTop: 35,
  },
  sectionHeader: {
    fontSize: 13,
    fontWeight: '400',
    color: '#6D6D72',
    textTransform: 'uppercase',
    marginLeft: 16,
    marginBottom: 8,
  },
  sectionContent: {
    backgroundColor: '#FFFFFF',
    borderRadius: 10,
    marginHorizontal: 16,
    overflow: 'hidden',
  },
  listRow: {
    flexDirection: 'row',
    alignItems: 'center',
    justifyContent: 'space-between',
    paddingVertical: 12,
    paddingHorizontal: 16,
    minHeight: 44,
  },
  separator: {
    height: StyleSheet.hairlineWidth,
    backgroundColor: '#C6C6C8',
    marginLeft: 16,
  },
});
```

## Navigation Patterns

### Bottom Tab Bar
```tsx
// Proper bottom tab sizing
const tabBarStyle = {
  height: Platform.OS === 'ios' ? 83 : 65, // Account for home indicator
  paddingBottom: Platform.OS === 'ios' ? 34 : 10,
  paddingTop: 10,
  backgroundColor: '#F8F8F8',
  borderTopWidth: StyleSheet.hairlineWidth,
  borderTopColor: '#C6C6C8',
};

// Tab item
const TabItem = ({ icon, label, active }) => (
  <View style={styles.tabItem}>
    <Icon name={icon} color={active ? '#007AFF' : '#8E8E93'} size={24} />
    <Text style={[
      styles.tabLabel,
      { color: active ? '#007AFF' : '#8E8E93' }
    ]}>
      {label}
    </Text>
  </View>
);
```

### Header
```tsx
// Large title header (iOS)
const LargeTitleHeader = ({ title, rightAction }) => {
  const insets = useSafeAreaInsets();

  return (
    <View style={[styles.header, { paddingTop: insets.top }]}>
      <View style={styles.headerContent}>
        <Text style={styles.largeTitle}>{title}</Text>
        {rightAction}
      </View>
    </View>
  );
};

const styles = StyleSheet.create({
  header: {
    backgroundColor: '#F8F8F8',
    borderBottomWidth: StyleSheet.hairlineWidth,
    borderBottomColor: '#C6C6C8',
  },
  headerContent: {
    flexDirection: 'row',
    justifyContent: 'space-between',
    alignItems: 'center',
    paddingHorizontal: 16,
    paddingBottom: 8,
  },
  largeTitle: {
    fontSize: 34,
    fontWeight: '700',
    letterSpacing: 0.37,
  },
});
```

## Animations

### Native Driver Animations
```tsx
import { Animated } from 'react-native';

// Always use native driver when possible
const fadeIn = (value: Animated.Value) => {
  Animated.timing(value, {
    toValue: 1,
    duration: 200,
    useNativeDriver: true, // CRITICAL for performance
  }).start();
};

// Spring for natural feel
const bounce = (value: Animated.Value) => {
  Animated.spring(value, {
    toValue: 1,
    damping: 15,
    stiffness: 150,
    useNativeDriver: true,
  }).start();
};
```

### Reanimated for Complex Animations
```tsx
import Animated, {
  useSharedValue,
  useAnimatedStyle,
  withSpring,
} from 'react-native-reanimated';

const AnimatedCard = ({ children }) => {
  const scale = useSharedValue(1);

  const animatedStyle = useAnimatedStyle(() => ({
    transform: [{ scale: scale.value }],
  }));

  const onPressIn = () => {
    scale.value = withSpring(0.95);
  };

  const onPressOut = () => {
    scale.value = withSpring(1);
  };

  return (
    <Pressable onPressIn={onPressIn} onPressOut={onPressOut}>
      <Animated.View style={[styles.card, animatedStyle]}>
        {children}
      </Animated.View>
    </Pressable>
  );
};
```

## Loading States

### Skeleton Loader
```tsx
const SkeletonLoader = ({ width, height, borderRadius = 4 }) => {
  const opacity = useSharedValue(0.3);

  useEffect(() => {
    opacity.value = withRepeat(
      withSequence(
        withTiming(1, { duration: 500 }),
        withTiming(0.3, { duration: 500 })
      ),
      -1,
      false
    );
  }, []);

  const animatedStyle = useAnimatedStyle(() => ({
    opacity: opacity.value,
  }));

  return (
    <Animated.View
      style={[
        { width, height, borderRadius, backgroundColor: '#E5E5EA' },
        animatedStyle,
      ]}
    />
  );
};
```

### Activity Indicator
```tsx
import { ActivityIndicator } from 'react-native';

// Use platform-native indicator
<ActivityIndicator size="large" color="#007AFF" />

// Button with loading state
const LoadingButton = ({ loading, title, onPress }) => (
  <Pressable
    onPress={onPress}
    disabled={loading}
    style={styles.button}
  >
    {loading ? (
      <ActivityIndicator color="#FFFFFF" />
    ) : (
      <Text style={styles.buttonText}>{title}</Text>
    )}
  </Pressable>
);
```

## Accessibility

### VoiceOver / TalkBack
```tsx
// Accessible button
<Pressable
  onPress={onPress}
  accessible={true}
  accessibilityRole="button"
  accessibilityLabel="Submit form"
  accessibilityHint="Double tap to submit your information"
>
  <Text>Submit</Text>
</Pressable>

// Accessible image
<Image
  source={icon}
  accessible={true}
  accessibilityRole="image"
  accessibilityLabel="User profile picture"
/>

// Group related elements
<View
  accessible={true}
  accessibilityRole="summary"
  accessibilityLabel={`${name}, ${role}, ${status}`}
>
  <Text>{name}</Text>
  <Text>{role}</Text>
  <Text>{status}</Text>
</View>
```

### Dynamic Type (iOS)
```tsx
import { PixelRatio } from 'react-native';

// Scale fonts with system settings
const fontScale = PixelRatio.getFontScale();
const scaledFontSize = (size: number) => size * fontScale;

// Or use allowFontScaling
<Text allowFontScaling={true} style={{ fontSize: 17 }}>
  This text scales with system settings
</Text>
```

## Anti-Patterns

### Never Do
```
✗ Touch targets smaller than 44pt
✗ Text smaller than 12pt
✗ Hover states (no hover on mobile)
✗ Fixed heights that break with large text
✗ Ignoring safe areas
✗ Heavy shadows on Android (use elevation)
✗ White text on light backgrounds without checking contrast
✗ Non-native animations (JS-driven transforms)
✗ Ignoring platform conventions (iOS vs Android)
✗ Inline styles everywhere (use StyleSheet.create)
```

### Common Mistakes
```tsx
// ✗ Hardcoded dimensions that break accessibility
style={{ height: 40 }}  // Text might be larger

// ✓ Minimum height with padding
style={{ minHeight: 44, paddingVertical: 12 }}

// ✗ Shadow on Android
shadowColor: '#000'  // Won't work

// ✓ Platform-specific
...Platform.select({
  ios: { shadowColor: '#000', ... },
  android: { elevation: 4 },
})

// ✗ Fixed status bar height
paddingTop: 44

// ✓ Use safe area
paddingTop: insets.top
```

## Quick Reference

### Mobile Defaults
```
Touch targets: 44pt minimum
Font sizes: 12pt min, 17pt body, 34pt large title
Border radius: 10-14pt (iOS), 12-28pt (Android)
Spacing: 4/8/16/24/32 grid
Animations: 200-300ms, native driver
Shadow: iOS shadowOpacity 0.08-0.15, Android elevation 2-8
```

### Premium Feel Checklist
```
□ All touch targets 44pt+
□ Consistent spacing (4pt grid)
□ Platform-appropriate styling
□ Safe area handling
□ Native animations (60fps)
□ Proper loading states
□ Dark mode support
□ Accessibility labels
□ Haptic feedback on actions
□ Pull-to-refresh where appropriate
```


================================================
FILE: skills/ui-testing/SKILL.md
================================================
---
name: ui-testing
description: Visual testing - catch invisible buttons, broken layouts, contrast
when-to-use: When writing visual or accessibility tests for UI components
user-invocable: false
paths: ["**/*.test.tsx", "**/*.spec.tsx", "**/*.stories.*"]
effort: medium
---

# UI Verification Skill

*Load with: ui-web.md or ui-mobile.md*

## Purpose

Quick verification that generated UI meets accessibility standards. Run these checks after creating any new UI components.

---

## Pre-Flight Checklist

### Before Shipping ANY UI:

```markdown
## Visibility Check
- [ ] All buttons have visible background OR border
- [ ] No text is same color as its background
- [ ] All text meets 4.5:1 contrast ratio
- [ ] Ghost/text buttons have visible borders

## Touch/Click Targets
- [ ] All buttons are minimum 44px height
- [ ] Icon buttons are minimum 44x44px
- [ ] Adequate spacing between clickable elements

## States
- [ ] Hover states visible (web)
- [ ] Pressed states visible (mobile)
- [ ] Focus rings on keyboard navigation
- [ ] Disabled states visually distinct (opacity 0.5)
- [ ] Loading states show indicators

## Dark Mode (if applicable)
- [ ] Text readable on dark backgrounds
- [ ] Borders visible in dark mode
- [ ] No gray-400 text on dark backgrounds

## Responsive (web)
- [ ] No horizontal scroll on mobile (320px)
- [ ] Content readable at all breakpoints
- [ ] Touch targets adequate on mobile
```

---

## Quick Contrast Check

### Use Browser DevTools
```
1. Right-click element → Inspect
2. In Styles panel, click on color value
3. Look for contrast ratio display
4. Must show ✓ for AA compliance (4.5:1 for text)
```

### Online Tools
- https://webaim.org/resources/contrastchecker/
- https://coolors.co/contrast-checker

### Tailwind Safe Combinations

```
LIGHT MODE (on white bg):
✓ text-gray-900  (#111827) = 16:1
✓ text-gray-800  (#1F2937) = 12:1
✓ text-gray-700  (#374151) = 9:1
✓ text-gray-600  (#4B5563) = 6:1
✗ text-gray-500  (#6B7280) = 4.6:1 (barely)
✗ text-gray-400  (#9CA3AF) = 2.6:1 (FAILS)

DARK MODE (on gray-900 bg):
✓ text-white     (#FFFFFF) = 16:1
✓ text-gray-100  (#F3F4F6) = 13:1
✓ text-gray-200  (#E5E7EB) = 11:1
✓ text-gray-300  (#D1D5DB) = 8:1
✗ text-gray-400  (#9CA3AF) = 5:1 (barely)
✗ text-gray-500  (#6B7280) = 3:1 (FAILS)
```

---

## Common Fixes

### Invisible Button
```tsx
// PROBLEM: No visible boundary
<button className="text-gray-500">Click</button>

// FIX: Add background OR border
<button className="bg-gray-100 text-gray-900 px-4 py-3 rounded-lg">
  Click
</button>
// OR
<button className="border border-gray-300 text-gray-700 px-4 py-3 rounded-lg">
  Click
</button>
```

### Low Contrast Text
```tsx
// PROBLEM: Light gray on white
<p className="text-gray-400">Secondary text</p>

// FIX: Use darker gray
<p className="text-gray-600">Secondary text</p>
```

### Missing Focus State
```tsx
// PROBLEM: Focus removed without replacement
<button className="outline-none">Submit</button>

// FIX: Add visible focus ring
<button className="outline-none focus-visible:ring-2 focus-visible:ring-blue-500 focus-visible:ring-offset-2">
  Submit
</button>
```

### Small Touch Target
```tsx
// PROBLEM: Too small for fingers
<button className="p-1 text-sm">×</button>

// FIX: Minimum 44px
<button className="w-11 h-11 flex items-center justify-center">×</button>
```

### Dark Mode Broken
```tsx
// PROBLEM: Same colors in both modes
<p className="text-gray-400">Text</p>

// FIX: Adjust for dark mode
<p className="text-gray-600 dark:text-gray-300">Text</p>
```

---

## Automated Checks (Optional)

### ESLint Plugin
```bash
npm install -D eslint-plugin-jsx-a11y
```

```json
// .eslintrc
{
  "extends": ["plugin:jsx-a11y/recommended"]
}
```

### Playwright Quick Test
```typescript
// e2e/accessibility.spec.ts
import { test, expect } from '@playwright/test';
import AxeBuilder from '@axe-core/playwright';

test('no accessibility violations', async ({ page }) => {
  await page.goto('/');
  const results = await new AxeBuilder({ page }).analyze();
  expect(results.violations).toEqual([]);
});
```

---

## When to Use Full Testing

Add comprehensive visual testing (Playwright screenshots, Storybook) when:
- Building a component library
- Multiple developers on UI
- Frequent UI changes
- Design system enforcement needed

For solo projects or MVPs, the checklist above is sufficient.


================================================
FILE: skills/ui-web/SKILL.md
================================================
---
name: ui-web
description: Web UI - glassmorphism, Tailwind, dark mode, accessibility
when-to-use: When building or styling web UI components
user-invocable: false
paths: ["**/*.tsx", "**/*.jsx", "**/*.css", "**/*.scss", "tailwind.config.*"]
effort: medium
---

# UI Design Skill (Web)


---

## MANDATORY: WCAG 2.1 AA Compliance

**These rules are NON-NEGOTIABLE. Every UI element must pass these checks.**

### 1. Color Contrast (CRITICAL)
```
Text Contrast Requirements:
├── Normal text (<18px): 4.5:1 minimum
├── Large text (≥18px bold or ≥24px): 3:1 minimum
├── UI components (buttons, inputs): 3:1 minimum
└── Focus indicators: 3:1 minimum

FORBIDDEN COLOR COMBINATIONS:
✗ gray-400 on white (#9CA3AF on #FFFFFF = 2.6:1) - FAILS
✗ gray-500 on white (#6B7280 on #FFFFFF = 4.6:1) - BARELY PASSES
✗ white on yellow - FAILS
✗ light blue on white - USUALLY FAILS

SAFE COLOR COMBINATIONS:
✓ gray-700 on white (#374151 on #FFFFFF = 9.2:1)
✓ gray-600 on white (#4B5563 on #FFFFFF = 6.4:1)
✓ gray-900 on white (#111827 on #FFFFFF = 16:1)
✓ white on gray-900, blue-600, green-700
```

### 2. Visibility Rules (CRITICAL)
```
ALL BUTTONS MUST HAVE:
✓ Visible background color OR visible border (min 1px)
✓ Text color that contrasts with background
✓ Minimum height: 44px (touch target)
✓ Padding: at least px-4 py-2

NEVER CREATE:
✗ Buttons with transparent background AND no border
✗ Text same color as background
✗ Ghost buttons without visible borders
✗ White text on light backgrounds
✗ Dark text on dark backgrounds
```

### 3. Required Element Styles
```tsx
// EVERY button needs visible boundaries
// PRIMARY: solid background
<button className="bg-gray-900 text-white px-4 py-3 rounded-lg">
  Primary
</button>

// SECONDARY: visible background
<button className="bg-gray-100 text-gray-900 px-4 py-3 rounded-lg">
  Secondary
</button>

// GHOST: MUST have visible border
<button className="border border-gray-300 text-gray-700 px-4 py-3 rounded-lg">
  Ghost
</button>

// NEVER DO THIS:
<button className="text-gray-500">Invisible Button</button> // ✗ NO BOUNDARY
<button className="bg-white text-white">Hidden</button>     // ✗ NO CONTRAST
```

### 4. Focus States (REQUIRED)
```tsx
// EVERY interactive element needs visible focus
className="focus:outline-none focus-visible:ring-2 focus-visible:ring-blue-500 focus-visible:ring-offset-2"

// NEVER remove focus without replacement
className="outline-none" // ✗ FORBIDDEN without ring replacement
```

### 5. Dark Mode Contrast
```
When implementing dark mode:
├── Text must be light (gray-100 to white) on dark backgrounds
├── Borders must be visible (gray-700 or lighter)
├── Never use gray-400 text on gray-900 bg (fails contrast)
└── Test BOTH modes before shipping

SAFE DARK MODE TEXT:
✓ text-white on bg-gray-900
✓ text-gray-100 on bg-gray-800
✓ text-gray-200 on bg-gray-900

UNSAFE (FAILS CONTRAST):
✗ text-gray-500 on bg-gray-900 (2.4:1)
✗ text-gray-400 on bg-gray-800 (3.1:1)
```

---

## Core Philosophy

**Beautiful UI is not decoration - it's communication.** Every visual choice should serve clarity, hierarchy, and user confidence. Default to elegance and restraint.

## Design Principles

### 1. Visual Hierarchy
```
Primary Action    → Bold, high contrast, prominent
Secondary Action  → Subtle, lower contrast
Tertiary/Links    → Minimal, text-style
```

### 2. Spacing System (8px Grid)
```typescript
// Tailwind spacing scale - USE CONSISTENTLY
const spacing = {
  xs: 'p-1',      // 4px  - tight internal
  sm: 'p-2',      // 8px  - compact
  md: 'p-4',      // 16px - default
  lg: 'p-6',      // 24px - comfortable
  xl: 'p-8',      // 32px - spacious
  '2xl': 'p-12',  // 48px - section gaps
};

// Rule: More whitespace = more premium feel
// Rule: Consistent spacing > perfect spacing
```

### 3. Typography Scale
```typescript
// Limit to 3-4 font sizes per page
const typography = {
  hero: 'text-4xl md:text-5xl font-bold tracking-tight',
  heading: 'text-2xl md:text-3xl font-semibold',
  subheading: 'text-lg md:text-xl font-medium',
  body: 'text-base leading-relaxed',
  caption: 'text-sm text-gray-500',
};

// Rule: Never use more than 2 font families
// Rule: Line height 1.5-1.7 for body text
```

## Glassmorphism (Web)

### Base Glass Card
```tsx
// Modern glass effect - use sparingly for emphasis
const GlassCard = ({ children, className = '' }) => (
  <div className={`
    backdrop-blur-xl
    bg-white/10
    border border-white/20
    rounded-2xl
    shadow-xl
    shadow-black/5
    ${className}
  `}>
    {children}
  </div>
);
```

### Glass Variants
```tsx
// Light mode glass
const lightGlass = `
  backdrop-blur-xl
  bg-white/70
  border border-white/50
  shadow-lg shadow-gray-200/50
`;

// Dark mode glass
const darkGlass = `
  backdrop-blur-xl
  bg-gray-900/70
  border border-white/10
  shadow-xl shadow-black/20
`;

// Frosted sidebar
const frostedSidebar = `
  backdrop-blur-2xl
  bg-gradient-to-b from-white/80 to-white/60
  border-r border-white/30
`;

// Floating action glass
const floatingGlass = `
  backdrop-blur-md
  bg-white/90
  rounded-full
  shadow-lg shadow-black/10
  border border-white/50
`;
```

### When to Use Glassmorphism
```
✓ Hero sections with image backgrounds
✓ Floating cards over gradients
✓ Modal overlays
✓ Navigation bars (subtle)
✓ Feature highlights

✗ Every card (overuse kills the effect)
✗ Text-heavy content areas
✗ Forms (reduces contrast)
✗ Data tables
```

## Color System

### Semantic Colors
```typescript
const colors = {
  // Actions
  primary: 'bg-blue-600 hover:bg-blue-700',
  secondary: 'bg-gray-100 hover:bg-gray-200 text-gray-900',
  danger: 'bg-red-600 hover:bg-red-700',
  success: 'bg-green-600 hover:bg-green-700',

  // Surfaces
  background: 'bg-gray-50 dark:bg-gray-950',
  surface: 'bg-white dark:bg-gray-900',
  elevated: 'bg-white dark:bg-gray-800 shadow-lg',

  // Text
  textPrimary: 'text-gray-900 dark:text-white',
  textSecondary: 'text-gray-600 dark:text-gray-400',
  textMuted: 'text-gray-400 dark:text-gray-500',
};
```

### Gradient Backgrounds
```tsx
// Subtle mesh gradient (modern, premium)
const meshGradient = `
  bg-gradient-to-br
  from-blue-50 via-white to-purple-50
  dark:from-gray-950 dark:via-gray-900 dark:to-gray-950
`;

// Vibrant hero gradient
const heroGradient = `
  bg-gradient-to-r
  from-blue-600 via-purple-600 to-pink-600
`;

// Subtle radial glow
const radialGlow = `
  bg-[radial-gradient(ellipse_at_top,_var(--tw-gradient-stops))]
  from-blue-200/40 via-transparent to-transparent
`;
```

## Component Patterns

### Buttons
```tsx
// Primary button - bold, confident
const PrimaryButton = ({ children, ...props }) => (
  <button
    className="
      px-6 py-3
      bg-gray-900 dark:bg-white
      text-white dark:text-gray-900
      font-medium
      rounded-xl
      transition-all duration-200
      hover:bg-gray-800 dark:hover:bg-gray-100
      hover:shadow-lg hover:shadow-gray-900/20
      active:scale-[0.98]
      disabled:opacity-50 disabled:cursor-not-allowed
    "
    {...props}
  >
    {children}
  </button>
);

// Secondary button - subtle
const SecondaryButton = ({ children, ...props }) => (
  <button
    className="
      px-6 py-3
      bg-gray-100 dark:bg-gray-800
      text-gray-900 dark:text-white
      font-medium
      rounded-xl
      transition-all duration-200
      hover:bg-gray-200 dark:hover:bg-gray-700
      active:scale-[0.98]
    "
    {...props}
  >
    {children}
  </button>
);

// Ghost button - minimal
const GhostButton = ({ children, ...props }) => (
  <button
    className="
      px-4 py-2
      text-gray-600 dark:text-gray-400
      font-medium
      rounded-lg
      transition-colors duration-200
      hover:text-gray-900 dark:hover:text-white
      hover:bg-gray-100 dark:hover:bg-gray-800
    "
    {...props}
  >
    {children}
  </button>
);
```

### Cards
```tsx
// Clean card with subtle elevation
const Card = ({ children, className = '' }) => (
  <div className={`
    bg-white dark:bg-gray-900
    rounded-2xl
    border border-gray-200 dark:border-gray-800
    shadow-sm
    hover:shadow-md
    transition-shadow duration-300
    ${className}
  `}>
    {children}
  </div>
);

// Interactive card
const InteractiveCard = ({ children, onClick }) => (
  <button
    onClick={onClick}
    className="
      w-full text-left
      bg-white dark:bg-gray-900
      rounded-2xl
      border border-gray-200 dark:border-gray-800
      p-6
      transition-all duration-300
      hover:border-gray-300 dark:hover:border-gray-700
      hover:shadow-lg
      hover:-translate-y-1
      active:scale-[0.99]
    "
  >
    {children}
  </button>
);
```

### Input Fields
```tsx
const Input = ({ label, error, ...props }) => (
  <div className="space-y-2">
    {label && (
      <label className="block text-sm font-medium text-gray-700 dark:text-gray-300">
        {label}
      </label>
    )}
    <input
      className={`
        w-full px-4 py-3
        bg-gray-50 dark:bg-gray-800
        border-2 rounded-xl
        text-gray-900 dark:text-white
        placeholder-gray-400 dark:placeholder-gray-500
        transition-all duration-200
        focus:outline-none focus:ring-0
        ${error
          ? 'border-red-500 focus:border-red-500'
          : 'border-transparent focus:border-blue-500 focus:bg-white dark:focus:bg-gray-900'
        }
      `}
      {...props}
    />
    {error && (
      <p className="text-sm text-red-500">{error}</p>
    )}
  </div>
);
```

## Micro-Interactions

### Transitions
```typescript
// Standard transitions - ALWAYS use
const transitions = {
  fast: 'transition-all duration-150',      // Hover states
  normal: 'transition-all duration-200',    // Most interactions
  slow: 'transition-all duration-300',      // Card hovers, modals
  spring: 'transition-all duration-500 ease-out', // Page transitions
};

// Rule: Everything interactive should transition
// Rule: 150-300ms feels responsive, >500ms feels slow
```

### Hover Effects
```tsx
// Scale on hover (buttons, cards)
className="hover:scale-105 active:scale-95 transition-transform"

// Lift on hover (cards)
className="hover:-translate-y-1 hover:shadow-xl transition-all"

// Glow on hover (CTAs)
className="hover:shadow-lg hover:shadow-blue-500/25 transition-shadow"

// Border highlight (inputs, cards)
className="hover:border-gray-300 transition-colors"
```

### Loading States
```tsx
// Skeleton loader
const Skeleton = ({ className = '' }) => (
  <div className={`
    animate-pulse
    bg-gray-200 dark:bg-gray-800
    rounded-lg
    ${className}
  `} />
);

// Spinner
const Spinner = ({ size = 'md' }) => (
  <div className={`
    animate-spin rounded-full
    border-2 border-gray-200 dark:border-gray-700
    border-t-blue-600
    ${size === 'sm' ? 'w-4 h-4' : size === 'lg' ? 'w-8 h-8' : 'w-6 h-6'}
  `} />
);

// Button loading state
<button disabled className="relative">
  <span className="opacity-0">Submit</span>
  <Spinner className="absolute inset-0 m-auto" />
</button>
```

## Layout Patterns

### Container
```tsx
// Consistent max-width and padding
const Container = ({ children, className = '' }) => (
  <div className={`
    max-w-7xl mx-auto
    px-4 sm:px-6 lg:px-8
    ${className}
  `}>
    {children}
  </div>
);
```

### Section Spacing
```tsx
// Consistent vertical rhythm
const Section = ({ children }) => (
  <section className="py-16 md:py-24">
    <Container>{children}</Container>
  </section>
);
```

### Grid Systems
```tsx
// Feature grid
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
  {features.map(f => <FeatureCard key={f.id} {...f} />)}
</div>

// Bento grid (modern asymmetric)
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
  <div className="col-span-2 row-span-2">Large</div>
  <div className="col-span-1">Small</div>
  <div className="col-span-1">Small</div>
  <div className="col-span-2">Medium</div>
</div>
```

## Dark Mode

### Implementation
```tsx
// Always design for both modes
// Use CSS variables or Tailwind dark: prefix

// Theme toggle
const ThemeToggle = () => {
  const [dark, setDark] = useState(false);

  useEffect(() => {
    document.documentElement.classList.toggle('dark', dark);
  }, [dark]);

  return (
    <button onClick={() => setDark(!dark)}>
      {dark ? <SunIcon /> : <MoonIcon />}
    </button>
  );
};
```

### Color Pairing
```
Light Mode          Dark Mode
─────────────────────────────────
white               gray-950
gray-50             gray-900
gray-100            gray-800
gray-200            gray-700
gray-900 (text)     white (text)
gray-600 (secondary) gray-400
blue-600            blue-500
```

## Accessibility

### Contrast Requirements
```
WCAG AA: 4.5:1 for normal text, 3:1 for large text
WCAG AAA: 7:1 for normal text, 4.5:1 for large text

// Test: Use browser devtools or contrast checker
// Rule: Never use gray-400 on white for body text
```

### Focus States
```tsx
// Always visible focus rings
className="
  focus:outline-none
  focus-visible:ring-2
  focus-visible:ring-blue-500
  focus-visible:ring-offset-2
"

// Never remove focus styles without replacement
// ✗ outline-none (alone)
// ✓ outline-none + focus-visible:ring
```

### Screen Readers
```tsx
// Visually hidden but accessible
const srOnly = "absolute w-px h-px p-0 -m-px overflow-hidden whitespace-nowrap border-0";

// Icon buttons need labels
<button aria-label="Close menu">
  <XIcon className="w-6 h-6" />
</button>

// Announce dynamic content
<div role="status" aria-live="polite">
  {message}
</div>
```

## Anti-Patterns

### Never Do
```
✗ More than 3 font sizes on a page
✗ Random spacing values (use 8px grid)
✗ Pure black (#000) on pure white (#fff)
✗ Colored text on colored backgrounds without checking contrast
✗ Animations longer than 500ms for UI elements
✗ Glassmorphism everywhere
✗ Drop shadows on everything
✗ Gradients on text (hard to read)
✗ Auto-playing animations that can't be stopped
✗ Removing focus indicators
✗ Gray text below 4.5:1 contrast
✗ Tiny click targets (< 44px)
```

### Common Mistakes
```tsx
// ✗ Too many shadows
className="shadow-sm shadow-md shadow-lg" // Pick ONE

// ✗ Inconsistent rounding
className="rounded-sm rounded-lg rounded-2xl" // System: sm, lg, xl, 2xl

// ✗ Competing focal points
// One primary CTA per viewport

// ✗ Over-decorated
// If it doesn't serve function, remove it
```

## Quick Reference

### Modern Defaults
```tsx
// Border radius: 12-16px (rounded-xl to rounded-2xl)
// Shadow: subtle (shadow-sm to shadow-md)
// Font: Inter, SF Pro, system-ui
// Primary: Near-black or brand color
// Transitions: 200ms ease-out
// Spacing: 8px grid (Tailwind default)
```

### Premium Feel Checklist
```
□ Generous whitespace
□ Subtle shadows (not harsh)
□ Smooth transitions on all interactions
□ Consistent border radius
□ Limited color palette (2-3 colors max)
□ Typography hierarchy (3 sizes max)
□ High-quality imagery
□ Micro-interactions on hover/focus
□ Dark mode support
```


================================================
FILE: skills/user-journeys/SKILL.md
================================================
---
name: user-journeys
description: User experience flows - journey mapping, UX validation, error recovery
when-to-use: When mapping user flows, validating UX, or designing error recovery
user-invocable: false
effort: medium
---

# User Journeys Skill


For defining and testing real user experiences - not just specs, but actual flows humans take through your application.

---

## Philosophy

**Specs test features. Journeys test experiences.**

A feature can pass all specs but still deliver a terrible experience. User journeys capture:
- How users actually navigate (not how we think they should)
- Emotional states at each step (frustrated, confused, delighted)
- Recovery from mistakes (users will make them)
- Real-world conditions (slow networks, interruptions, distractions)

---

## Journey Documentation Structure

```
_project_specs/
├── journeys/
│   ├── _template.md              # Journey template
│   ├── critical/                 # Must-work journeys (revenue, core value)
│   │   ├── signup-to-first-value.md
│   │   ├── checkout-purchase.md
│   │   └── login-to-dashboard.md
│   ├── common/                   # Frequent user paths
│   │   ├── browse-and-search.md
│   │   ├── update-profile.md
│   │   └── invite-team-member.md
│   └── edge-cases/               # Error recovery, unusual paths
│       ├── payment-failure-retry.md
│       ├── session-timeout-recovery.md
│       └── offline-reconnection.md
```

---

## Journey Template

```markdown
# Journey: [Name]

## Overview
| Attribute | Value |
|-----------|-------|
| **Priority** | Critical / High / Medium |
| **User Type** | New / Returning / Admin |
| **Frequency** | Daily / Weekly / One-time |
| **Success Metric** | Conversion rate, time to complete, drop-off rate |

## User Goal
What is the user trying to accomplish? Write from their perspective.

> "I want to [goal] so that I can [benefit]."

## Preconditions
- User state (logged in, has subscription, first visit)
- Data state (has items in cart, has team members)
- Environment (mobile, desktop, slow connection)

## Journey Steps

### Step 1: [Entry Point]
**User Action:** What the user does
**System Response:** What they should see/experience
**Success Criteria:**
- [ ] Page loads in < 2 seconds
- [ ] Primary CTA is immediately visible
- [ ] User understands what to do next

**Potential Friction:**
- Slow load time → Show skeleton/loader
- Unclear CTA → A/B test copy variations

---

### Step 2: [Next Action]
**User Action:** ...
**System Response:** ...
**Success Criteria:**
- [ ] ...

**Potential Friction:**
- ...

---

## Error Scenarios

### E1: [Error Name]
**Trigger:** What causes this error
**User Sees:** Error message/state
**Recovery Path:** How user gets back on track
**Test:** How to verify recovery works

## Metrics to Track
- Time to complete journey
- Drop-off rate at each step
- Error rate and recovery rate
- User satisfaction (if surveyed)

## E2E Test Reference
Link to Playwright test: `e2e/tests/journeys/[name].spec.ts`
```

---

## Critical Journey Examples

### Signup to First Value

```markdown
# Journey: Signup to First Value

## Overview
| Attribute | Value |
|-----------|-------|
| **Priority** | Critical |
| **User Type** | New |
| **Frequency** | One-time |
| **Success Metric** | % reaching "aha moment" within 5 min |

## User Goal
> "I want to try this product quickly to see if it solves my problem."

## Preconditions
- First visit to site
- No account
- Came from landing page or ad

## Journey Steps

### Step 1: Landing Page
**User Action:** Clicks "Get Started Free" or "Try Now"
**System Response:** Signup form appears (modal or new page)
**Success Criteria:**
- [ ] CTA visible above fold
- [ ] No distracting elements
- [ ] Clear value proposition visible

**Potential Friction:**
- Too many form fields → Reduce to email + password only
- Social login missing → Add Google/GitHub options

### Step 2: Account Creation
**User Action:** Enters email and password (or uses social login)
**System Response:**
- Creates account
- Sends verification email (don't block on it)
- Redirects to onboarding

**Success Criteria:**
- [ ] Account created in < 3 seconds
- [ ] No email verification wall (verify later)
- [ ] Clear next step shown

**Potential Friction:**
- Email already exists → Offer login link
- Weak password → Show requirements inline, not after submit

### Step 3: Onboarding (Quick Win)
**User Action:** Completes 1-2 setup questions
**System Response:**
- Personalizes experience
- Shows progress indicator
- Leads to first action

**Success Criteria:**
- [ ] Max 3 questions
- [ ] Skip option available
- [ ] < 60 seconds total

**Potential Friction:**
- Too many questions → User abandons
- No skip option → User feels trapped

### Step 4: First Value (Aha Moment)
**User Action:** Completes core action (creates first X, sees first result)
**System Response:**
- Celebrates success
- Shows value delivered
- Suggests next step

**Success Criteria:**
- [ ] User experiences core value
- [ ] Completion feels rewarding
- [ ] Clear path to continue

## Error Scenarios

### E1: Email Already Registered
**Trigger:** User tries existing email
**User Sees:** "Already have an account? Log in or reset password"
**Recovery Path:** Click to login or reset
**Test:** `signup-existing-email.spec.ts`

### E2: Social Login Fails
**Trigger:** OAuth provider error
**User Sees:** "Couldn't connect. Try email signup or try again."
**Recovery Path:** Email signup form shown as fallback
**Test:** `social-login-failure.spec.ts`

## Metrics to Track
- Signup → First Value: Target < 5 min
- Drop-off at each step
- Social vs email signup ratio
- Skip rate on onboarding
```

---

### Checkout Purchase

```markdown
# Journey: Checkout Purchase

## Overview
| Attribute | Value |
|-----------|-------|
| **Priority** | Critical (Revenue) |
| **User Type** | Any |
| **Frequency** | Variable |
| **Success Metric** | Checkout completion rate |

## User Goal
> "I want to pay quickly and securely without surprises."

## Journey Steps

### Step 1: Cart Review
**User Action:** Views cart before checkout
**System Response:**
- Shows all items with images, prices
- Shows subtotal, taxes, shipping
- Clear "Checkout" CTA

**Success Criteria:**
- [ ] No hidden fees revealed later
- [ ] Easy to modify quantities
- [ ] Saved items visible

### Step 2: Checkout Start
**User Action:** Clicks "Checkout"
**System Response:**
- Shows checkout form or redirect to payment
- Progress indicator (Step 1 of 3)
- Order summary sidebar

**Success Criteria:**
- [ ] Guest checkout option
- [ ] Express checkout (Apple/Google Pay) prominent
- [ ] Form fields pre-filled if logged in

### Step 3: Payment
**User Action:** Enters payment info
**System Response:**
- Secure input fields (Stripe/payment provider)
- Real-time validation
- Clear "Pay $XX" button

**Success Criteria:**
- [ ] Card validation inline, not after submit
- [ ] Multiple payment options
- [ ] Security indicators visible

### Step 4: Confirmation
**User Action:** Submits payment
**System Response:**
- Processing indicator
- Success page with order details
- Email confirmation sent

**Success Criteria:**
- [ ] Confirmation within 5 seconds
- [ ] Order number clearly visible
- [ ] Next steps clear (shipping, access, etc.)

## Error Scenarios

### E1: Payment Declined
**Trigger:** Card declined by processor
**User Sees:** "Payment declined. Please try another card."
**Recovery Path:**
- Stay on payment step
- Pre-fill other fields
- Offer alternative payment methods
**Test:** `payment-declined-recovery.spec.ts`

### E2: Session Timeout During Checkout
**Trigger:** User away too long
**User Sees:** Cart preserved, re-auth required
**Recovery Path:**
- Quick login
- Return to same checkout step
- Cart contents intact
**Test:** `checkout-session-timeout.spec.ts`
```

---

## Journey Testing with Playwright

### Journey Test Structure

```typescript
// e2e/tests/journeys/signup-to-value.spec.ts
import { test, expect } from '@playwright/test';

test.describe('Journey: Signup to First Value', () => {
  test.describe.configure({ mode: 'serial' }); // Run in order

  test('Step 1: Landing page has clear CTA', async ({ page }) => {
    await page.goto('/');

    // CTA visible above fold without scrolling
    const cta = page.getByRole('button', { name: /get started|try free/i });
    await expect(cta).toBeVisible();
    await expect(cta).toBeInViewport();
  });

  test('Step 2: Can create account quickly', async ({ page }) => {
    await page.goto('/');
    await page.getByRole('button', { name: /get started/i }).click();

    // Minimal fields
    await expect(page.getByLabel('Email')).toBeVisible();
    await expect(page.getByLabel('Password')).toBeVisible();

    // Complete signup
    const startTime = Date.now();
    await page.getByLabel('Email').fill('newuser@example.com');
    await page.getByLabel('Password').fill('SecurePass123!');
    await page.getByRole('button', { name: /sign up|create/i }).click();

    // Should reach onboarding quickly
    await expect(page).toHaveURL(/onboarding|welcome|setup/);
    expect(Date.now() - startTime).toBeLessThan(5000); // < 5 seconds
  });

  test('Step 3: Onboarding is skippable', async ({ page }) => {
    // ... login as new user ...
    await page.goto('/onboarding');

    // Skip option exists
    const skipButton = page.getByRole('button', { name: /skip/i });
    await expect(skipButton).toBeVisible();
  });

  test('Step 4: Can reach first value in < 5 min', async ({ page }) => {
    // Full journey timing
    const journeyStart = Date.now();

    // ... complete full journey ...

    // Verify first value delivered
    await expect(page.getByText(/success|created|done/i)).toBeVisible();

    // Total time check
    const totalTime = (Date.now() - journeyStart) / 1000 / 60; // minutes
    expect(totalTime).toBeLessThan(5);
  });
});
```

### Error Recovery Tests

```typescript
// e2e/tests/journeys/checkout-recovery.spec.ts
import { test, expect } from '@playwright/test';

test.describe('Journey: Checkout Error Recovery', () => {
  test('recovers from payment decline gracefully', async ({ page }) => {
    // Setup: Add item to cart, go to checkout
    await page.goto('/products');
    await page.getByTestId('add-to-cart').first().click();
    await page.getByRole('link', { name: 'Checkout' }).click();

    // Use Stripe test card that declines
    const stripeFrame = page.frameLocator('iframe[name*="stripe"]');
    await stripeFrame.getByPlaceholder('Card number').fill('4000000000000002');
    await stripeFrame.getByPlaceholder('MM / YY').fill('12/30');
    await stripeFrame.getByPlaceholder('CVC').fill('123');

    await page.getByRole('button', { name: /pay/i }).click();

    // Verify friendly error
    await expect(page.getByText(/declined|try another/i)).toBeVisible();

    // Verify still on checkout (not kicked out)
    await expect(page).toHaveURL(/checkout/);

    // Verify can try again with different card
    await stripeFrame.getByPlaceholder('Card number').fill('4242424242424242');
    await page.getByRole('button', { name: /pay/i }).click();

    // Should succeed now
    await expect(page).toHaveURL(/success|confirmation/);
  });

  test('preserves cart after session timeout', async ({ page, context }) => {
    // Add items to cart
    await page.goto('/products');
    await page.getByTestId('add-to-cart').first().click();

    // Clear session (simulate timeout)
    await context.clearCookies();

    // Return to site
    await page.goto('/cart');

    // Cart should be preserved (local storage or recovered)
    await expect(page.getByTestId('cart-item')).toHaveCount(1);
  });
});
```

---

## User Experience Validation

### UX Checklist per Journey Step

```markdown
## UX Validation Checklist

### Clarity
- [ ] User knows where they are (breadcrumbs, progress)
- [ ] User knows what to do next (clear CTA)
- [ ] User knows what just happened (feedback)

### Speed
- [ ] Page loads < 2 seconds
- [ ] Actions complete < 3 seconds
- [ ] Progress shown for longer operations

### Forgiveness
- [ ] Mistakes are easy to undo
- [ ] Errors explain what went wrong
- [ ] Recovery path is clear

### Accessibility
- [ ] Keyboard navigation works
- [ ] Screen reader announces changes
- [ ] Focus management correct
- [ ] Color contrast sufficient

### Mobile
- [ ] Touch targets >= 44px
- [ ] No horizontal scroll
- [ ] Forms don't zoom unexpectedly
- [ ] Works on slow 3G
```

### Automated UX Checks

```typescript
// e2e/utils/ux-validators.ts
import { Page, expect } from '@playwright/test';

export async function validatePageLoad(page: Page, maxMs = 2000) {
  const timing = await page.evaluate(() => {
    const nav = performance.getEntriesByType('navigation')[0] as PerformanceNavigationTiming;
    return nav.loadEventEnd - nav.startTime;
  });
  expect(timing).toBeLessThan(maxMs);
}

export async function validateCTAVisible(page: Page, ctaText: RegExp) {
  const cta = page.getByRole('button', { name: ctaText });
  await expect(cta).toBeVisible();
  await expect(cta).toBeInViewport();
}

export async function validateNoLayoutShift(page: Page) {
  const cls = await page.evaluate(() => {
    return new Promise<number>((resolve) => {
      let clsValue = 0;
      const observer = new PerformanceObserver((list) => {
        for (const entry of list.getEntries()) {
          if (!(entry as any).hadRecentInput) {
            clsValue += (entry as any).value;
          }
        }
      });
      observer.observe({ type: 'layout-shift', buffered: true });
      setTimeout(() => {
        observer.disconnect();
        resolve(clsValue);
      }, 1000);
    });
  });
  expect(cls).toBeLessThan(0.1); // Good CLS score
}

export async function validateAccessibility(page: Page) {
  // Check focus visible on interactive elements
  const buttons = page.getByRole('button');
  const count = await buttons.count();

  for (let i = 0; i < Math.min(count, 5); i++) {
    await buttons.nth(i).focus();
    await expect(buttons.nth(i)).toBeFocused();
  }
}
```

---

## Journey Metrics Dashboard

Track journey health with these metrics:

```typescript
// lib/journey-metrics.ts
interface JourneyMetric {
  journey: string;
  step: string;
  timestamp: Date;
  duration: number;
  success: boolean;
  userId?: string;
}

// Track in your analytics (PostHog, Mixpanel, etc.)
export function trackJourneyStep(metric: JourneyMetric) {
  analytics.track('journey_step', {
    journey_name: metric.journey,
    step_name: metric.step,
    duration_ms: metric.duration,
    success: metric.success,
  });
}

// Example usage in app
const journeyStart = Date.now();
// ... user completes step ...
trackJourneyStep({
  journey: 'signup_to_value',
  step: 'account_creation',
  timestamp: new Date(),
  duration: Date.now() - journeyStart,
  success: true,
});
```

---

## Common Journey Patterns

### Progressive Disclosure Journey
User sees simple view first, complexity revealed as needed.

```markdown
Step 1: Show basic options only
Step 2: "Advanced" expands more options
Step 3: Expert mode unlocks everything
```

### Guided Setup Journey
Hand-hold new users through initial configuration.

```markdown
Step 1: Welcome + single choice
Step 2: Core preference
Step 3: Optional integrations (skippable)
Step 4: First action with guidance
Step 5: Success + remove training wheels
```

### Recovery Journey
User returns after failure or abandonment.

```markdown
Step 1: Recognize returning user
Step 2: Restore previous state
Step 3: Acknowledge what happened
Step 4: Offer clear path forward
Step 5: Complete original goal
```

---

## Anti-Patterns

- **Happy path only** - Test error recovery, not just success
- **Spec-driven testing** - Test user goals, not features
- **Ignoring time** - Measure how long journeys take
- **Desktop-only** - Test mobile journeys separately
- **Skipping emotions** - Consider user frustration points
- **No metrics** - Track journey completion and drop-off
- **Static journeys** - Update as user behavior evolves

---

## Quick Reference

### Journey Priorities
| Priority | Criteria | Test Frequency |
|----------|----------|----------------|
| Critical | Revenue, core value | Every deploy |
| High | Daily user actions | Daily |
| Medium | Weekly features | Weekly |
| Low | Edge cases | On change |

### Package.json Scripts

```json
{
  "scripts": {
    "test:journeys": "playwright test e2e/tests/journeys/",
    "test:journeys:critical": "playwright test e2e/tests/journeys/critical/",
    "test:journeys:report": "playwright show-report"
  }
}
```

### Journey Documentation Checklist
- [ ] User goal clearly stated
- [ ] All steps documented
- [ ] Success criteria per step
- [ ] Error scenarios covered
- [ ] Recovery paths defined
- [ ] Metrics identified
- [ ] E2E test linked


================================================
FILE: skills/web-content/SKILL.md
================================================
---
name: web-content
description: SEO and AI discovery (GEO) - schema, ChatGPT/Perplexity optimization
when-to-use: When creating web content that needs SEO and AI discoverability
user-invocable: false
effort: medium
---

# Web Content Skill


For creating web content optimized for both traditional SEO and AI discovery (ChatGPT, Perplexity, Claude, Gemini).

**Sources:** [GEO Complete Guide](https://skale.so/marketing/geo/) | [AI Search SEO](https://www.gravitatedesign.com/blog/ai-search-seo/) | [LLM Optimization](https://surferseo.com/blog/llm-optimization-seo/) | [Generative Engine Optimization](https://www.siddharthbharath.com/generative-engine-optimization/)

---

## Philosophy

**SEO gets clicks. GEO gets citations.**

Traditional SEO optimizes for Google rankings. Generative Engine Optimization (GEO) optimizes for being cited by AI assistants. Modern content needs both:

- **SEO**: Rank on search results pages
- **GEO**: Be cited in AI-generated answers (ChatGPT, Perplexity, Claude, Gemini)

AI traffic grew 1,200% between July 2024 and February 2025. Google's search share dropped below 90% for the first time in a decade. Optimize for both.

---

## Content Structure for AI + SEO

### The Golden Rule

**Write for humans, structure for machines.**

AI systems prefer:
- Short, clear, fact-based content
- Clean formatting (headers, bullets, tables)
- Standalone sections that can be quoted
- Direct answers to questions

---

## Page Types & Templates

### Homepage

```markdown
## Homepage Structure

### Above the Fold
- **Headline**: Clear value proposition (what you do + for whom)
- **Subheadline**: How you deliver that value
- **Primary CTA**: One clear action
- **Trust signals**: Logos, testimonials, stats

### Content Sections
1. **Problem Statement**: Pain point you solve
2. **Solution Overview**: How you solve it (3-4 key features)
3. **Social Proof**: Testimonials, case studies, logos
4. **How It Works**: 3-step process (simple)
5. **Pricing Preview**: Or link to pricing page
6. **FAQ Section**: 5-7 common questions (GEO gold)
7. **Final CTA**: Repeat primary action

### Schema Required
- Organization schema (name, logo, founding date, social links)
- WebSite schema with SearchAction
- FAQ schema for questions section
```

### Product/Service Page

```markdown
## Product Page Structure

### Hero Section
- **Product Name**: Clear, descriptive
- **One-line Description**: What it does in 10 words or less
- **Key Benefit**: Primary value proposition
- **CTA**: Buy/Try/Demo

### Content Sections
1. **TL;DR Box**: 3-5 bullet summary (AI-quotable)
2. **Problem → Solution**: What problem, how solved
3. **Features Grid**: 4-6 features with icons
4. **Comparison Table**: vs. alternatives (GEO loves these)
5. **Use Cases**: Who uses it and how
6. **Testimonials**: Real names, photos, companies
7. **Pricing**: Clear tiers if applicable
8. **FAQ**: Product-specific questions

### Schema Required
- Product schema (name, description, price, availability)
- Review schema (aggregate rating)
- FAQ schema
- BreadcrumbList schema
```

### Blog Post / Article

```markdown
## Blog Post Structure

### Opening (First 100 words)
- **TL;DR**: Direct answer to the title's question
- **What you'll learn**: Bullet list of takeaways
- This section should be quotable standalone

### Body Structure
- **H2 sections**: Main topics (5-7 per article)
- **H3 subsections**: Supporting points
- **Bullet lists**: For scanability
- **Stat boxes**: Highlight key numbers
- **Comparison tables**: When comparing options

### Content Elements
- Definition boxes ("What is X?")
- Step-by-step instructions
- Code examples (if technical)
- Original statistics/research
- Expert quotes with attribution

### Closing
- **Summary**: Key takeaways (bulleted)
- **Next steps**: What reader should do
- **Related content**: Internal links

### Metadata Required
- Author name + bio + photo
- Publication date
- Last updated date (visible!)
- Reading time
- Article schema with author
```

### FAQ Page

```markdown
## FAQ Page Structure

### Organization
- Group questions by category
- Most common questions first
- Direct, concise answers
- Link to detailed pages for more info

### Question Format
Q: [Exact question users ask]
A: [Direct answer in first sentence, then elaboration]

### Schema Required
- FAQPage schema (critical for AI discovery)
- Each Q&A as Question/Answer schema
```

### Landing Page

```markdown
## Landing Page Structure

### Single Focus
- One offer
- One audience
- One CTA (repeated)

### Sections
1. **Headline**: Benefit-focused, specific
2. **Problem Agitation**: Pain points
3. **Solution**: Your offer
4. **Proof**: Testimonials, stats, logos
5. **Features**: 3-5 key benefits
6. **Objection Handling**: FAQ or guarantee
7. **CTA**: Clear, urgent

### No Navigation
- Remove header nav (reduce exits)
- Single path: read → convert
```

---

## AI-Optimized Content Formats

### TL;DR Boxes

```html
<div class="tldr-box">
  <h3>TL;DR</h3>
  <ul>
    <li>Key point 1 with specific detail</li>
    <li>Key point 2 with number/stat</li>
    <li>Key point 3 with actionable insight</li>
  </ul>
</div>
```

Place at top of articles. AI systems extract these for summaries.

### Definition Blocks

```markdown
## What is [Term]?

[Term] is [concise definition in one sentence]. It [what it does] by [how it works].

**Key characteristics:**
- Characteristic 1
- Characteristic 2
- Characteristic 3
```

Start with "What is X?" - AI systems look for this pattern.

### Comparison Tables

```markdown
| Feature | Product A | Product B | Our Product |
|---------|-----------|-----------|-------------|
| Price | $99/mo | $149/mo | $79/mo |
| Feature 1 | ✓ | ✗ | ✓ |
| Feature 2 | ✗ | ✓ | ✓ |
| Best For | Enterprise | Startups | SMBs |
```

AI loves structured comparisons. Include in product and review pages.

### Stat Boxes

```html
<div class="stat-box">
  <span class="stat-number">73%</span>
  <span class="stat-label">of users prefer AI search for complex queries</span>
  <span class="stat-source">Source: Adobe Analytics, 2024</span>
</div>
```

Original statistics with sources get cited by AI.

### Step-by-Step Guides

```markdown
## How to [Do Thing]

### Step 1: [Action Verb] [Object]
[Explanation of what to do]

**Example:**
[Concrete example]

### Step 2: [Action Verb] [Object]
[Explanation]

### Step 3: [Action Verb] [Object]
[Explanation]

**Result:** [What user achieves]
```

Use HowTo schema markup for these.

---

## Schema Markup (Critical for AI)

### Organization Schema

```json
{
  "@context": "https://schema.org",
  "@type": "Organization",
  "name": "Your Company",
  "url": "https://yoursite.com",
  "logo": "https://yoursite.com/logo.png",
  "foundingDate": "2020",
  "description": "One sentence description",
  "sameAs": [
    "https://twitter.com/yourcompany",
    "https://linkedin.com/company/yourcompany",
    "https://github.com/yourcompany"
  ],
  "contactPoint": {
    "@type": "ContactPoint",
    "email": "hello@yoursite.com",
    "contactType": "customer service"
  }
}
```

### Article Schema

```json
{
  "@context": "https://schema.org",
  "@type": "Article",
  "headline": "Article Title",
  "description": "Meta description",
  "image": "https://yoursite.com/article-image.jpg",
  "author": {
    "@type": "Person",
    "name": "Author Name",
    "url": "https://yoursite.com/team/author-name",
    "jobTitle": "Role at Company",
    "sameAs": [
      "https://linkedin.com/in/author",
      "https://twitter.com/author"
    ]
  },
  "publisher": {
    "@type": "Organization",
    "name": "Your Company",
    "logo": {
      "@type": "ImageObject",
      "url": "https://yoursite.com/logo.png"
    }
  },
  "datePublished": "2025-01-15",
  "dateModified": "2025-01-20"
}
```

### FAQ Schema

```json
{
  "@context": "https://schema.org",
  "@type": "FAQPage",
  "mainEntity": [
    {
      "@type": "Question",
      "name": "What is your product?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Direct answer here. Keep concise but complete."
      }
    },
    {
      "@type": "Question",
      "name": "How much does it cost?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Pricing starts at $X/month for basic plan..."
      }
    }
  ]
}
```

### Product Schema

```json
{
  "@context": "https://schema.org",
  "@type": "Product",
  "name": "Product Name",
  "description": "Product description",
  "image": "https://yoursite.com/product.jpg",
  "brand": {
    "@type": "Brand",
    "name": "Your Company"
  },
  "offers": {
    "@type": "Offer",
    "price": "29.99",
    "priceCurrency": "USD",
    "availability": "https://schema.org/InStock"
  },
  "aggregateRating": {
    "@type": "AggregateRating",
    "ratingValue": "4.8",
    "reviewCount": "127"
  }
}
```

### HowTo Schema

```json
{
  "@context": "https://schema.org",
  "@type": "HowTo",
  "name": "How to Set Up Your Account",
  "description": "Step-by-step guide to getting started",
  "step": [
    {
      "@type": "HowToStep",
      "name": "Create account",
      "text": "Go to signup page and enter your email"
    },
    {
      "@type": "HowToStep",
      "name": "Verify email",
      "text": "Click the link in the verification email"
    }
  ]
}
```

---

## Platform-Specific Optimization

### ChatGPT Optimization

```markdown
✅ DO:
- TL;DR sections at top of articles
- Consistent formatting (headers, bullets)
- Named authors with credentials
- Original research and statistics
- Multi-intent content (covers related questions)

❌ AVOID:
- Thin content without substance
- Missing author attribution
- Outdated information (no dates)
```

### Perplexity Optimization

```markdown
✅ DO:
- Original statistics with sources
- Comparison tables and structured data
- Clean URL slugs (/topic-name not /p=123)
- Short, declarative statements
- Images, charts, diagrams
- YouTube videos (Perplexity shows these)

❌ AVOID:
- Generic content without unique insights
- Missing citations/sources
- Poor URL structure
```

### Claude Optimization

```markdown
✅ DO:
- Well-structured, logical content
- Clear definitions and explanations
- Technical accuracy
- Balanced perspectives
- Proper citations

❌ AVOID:
- Misleading or sensational content
- Missing context
- Outdated technical information
```

### Gemini Optimization

```markdown
✅ DO:
- Rich schema markup
- Detailed image alt-text
- YouTube content (Google-owned)
- Multimedia (video, audio with transcripts)

❌ AVOID:
- Missing structured data
- Images without alt-text
- Text-only content
```

---

## E-E-A-T for AI Discovery

### Experience
- First-person case studies
- "We tested X and found Y"
- Original screenshots and data
- User testimonials with real details

### Expertise
- Author bios with credentials
- Link to author's other work
- Industry-specific terminology
- Technical depth appropriate to topic

### Authoritativeness
- Backlinks from trusted sources
- Mentions in industry publications
- Citations from other experts
- Social proof (followers, engagement)

### Trustworthiness
- Contact information visible
- About page with team details
- Privacy policy and terms
- Secure site (HTTPS)
- Accurate, up-to-date info

---

## Content Freshness

### Visible Dates (Required)

```html
<article>
  <header>
    <h1>Article Title</h1>
    <div class="meta">
      <span class="author">By John Smith</span>
      <span class="published">Published: January 15, 2025</span>
      <span class="updated">Last updated: January 20, 2025</span>
    </div>
  </header>
</article>
```

AI systems prefer recent content. Show dates prominently.

### Update Schedule

| Content Type | Update Frequency |
|--------------|------------------|
| Product pages | On feature changes |
| Pricing | Immediately on change |
| Blog posts | Quarterly review |
| Statistics | When new data available |
| Guides | Semi-annually |

---

## Analytics for AI Traffic

### GA4 Regex Filter

```regex
.*chatgpt\.com.*|.*perplexity\.ai.*|.*gemini\.google\.com.*|.*copilot\.microsoft\.com.*|.*openai\.com.*|.*claude\.ai.*|.*poe\.com.*|.*you\.com.*|.*phind\.com.*
```

### Track AI Referrals

```javascript
// Check for AI referrer
const aiReferrers = [
  'chatgpt.com',
  'chat.openai.com',
  'perplexity.ai',
  'claude.ai',
  'gemini.google.com',
  'copilot.microsoft.com',
  'poe.com',
  'you.com',
  'phind.com'
];

const referrer = document.referrer;
const isAIReferral = aiReferrers.some(ai => referrer.includes(ai));

if (isAIReferral) {
  analytics.track('ai_referral', {
    source: referrer,
    page: window.location.pathname
  });
}
```

### Survey for AI Discovery

Add to forms:
```markdown
How did you hear about us?
- [ ] Google Search
- [ ] ChatGPT
- [ ] Perplexity
- [ ] Claude
- [ ] Social Media
- [ ] Referral
- [ ] Other
```

---

## Content Checklist

### Before Publishing

```markdown
## SEO Checklist
- [ ] Title tag (50-60 chars) with primary keyword
- [ ] Meta description (150-160 chars) with CTA
- [ ] URL slug is clean and descriptive
- [ ] H1 matches title intent
- [ ] H2/H3 hierarchy is logical
- [ ] Images have descriptive alt-text
- [ ] Internal links to related content
- [ ] External links to authoritative sources

## GEO Checklist
- [ ] TL;DR or summary at top
- [ ] Direct answer to main question in first paragraph
- [ ] Stat boxes with sources
- [ ] Comparison tables where applicable
- [ ] FAQ section with schema
- [ ] Author name, bio, and credentials
- [ ] Publication and last-updated dates visible
- [ ] Schema markup validated
- [ ] Content can be quoted standalone
- [ ] Original insights or data included
```

### Schema Validation

```bash
# Validate schema markup
# Use: https://validator.schema.org/
# Or: https://search.google.com/test/rich-results
```

---

## Project Structure

```
project/
├── content/
│   ├── pages/
│   │   ├── home.md
│   │   ├── about.md
│   │   ├── pricing.md
│   │   └── contact.md
│   ├── blog/
│   │   ├── post-1.md
│   │   └── post-2.md
│   └── legal/
│       ├── privacy.md
│       └── terms.md
├── components/
│   ├── SchemaMarkup.tsx
│   ├── TLDRBox.tsx
│   ├── StatBox.tsx
│   ├── FAQSection.tsx
│   └── AuthorBio.tsx
└── lib/
    └── schema.ts           # Schema generators
```

---

## Anti-Patterns

- **No dates** - AI deprioritizes undated content
- **Anonymous content** - No author = no E-E-A-T
- **Walls of text** - Break up with headers, bullets, boxes
- **Generic content** - Add original insights, data, opinions
- **Missing schema** - Invisible to structured data crawlers
- **Outdated info** - Update quarterly minimum
- **No FAQ** - Missing easy GEO win
- **Poor URL structure** - Use /topic-name not /p=12345

---

## Quick Reference

### Content Formats AI Loves
1. TL;DR summaries
2. Definition boxes ("What is X?")
3. Comparison tables
4. Step-by-step guides
5. FAQ sections
6. Stat boxes with sources
7. Listicles with numbers

### Required Schema by Page Type

| Page Type | Schema |
|-----------|--------|
| Homepage | Organization, WebSite |
| Blog Post | Article, Author, FAQ |
| Product | Product, Review, FAQ |
| FAQ | FAQPage |
| How-to | HowTo |
| About | Organization, Person |


================================================
FILE: skills/web-payments/SKILL.md
================================================
---
name: web-payments
description: Stripe Checkout, subscriptions, webhooks, customer portal
when-to-use: When implementing payments, subscriptions, or Stripe integration
user-invocable: false
effort: high
---

# Web Payments Skill (Stripe)


For integrating Stripe payments into web applications - one-time payments, subscriptions, and checkout flows.

**Sources:** [Stripe Checkout](https://docs.stripe.com/payments/checkout) | [Payment Element Best Practices](https://docs.stripe.com/payments/payment-element/best-practices) | [Building Solid Stripe Integrations](https://stripe.dev/blog/building-solid-stripe-integrations-developers-guide-success) | [Subscriptions](https://docs.stripe.com/billing/subscriptions/build-subscriptions)

---

## Setup

### 1. Create Stripe Account
1. Go to https://dashboard.stripe.com/register
2. Complete business verification
3. Get API keys from https://dashboard.stripe.com/apikeys

### 2. Environment Variables
```bash
# .env
STRIPE_SECRET_KEY=sk_test_xxx          # Server-side only
STRIPE_PUBLISHABLE_KEY=pk_test_xxx     # Client-side safe
STRIPE_WEBHOOK_SECRET=whsec_xxx        # For webhook verification

# Production
STRIPE_SECRET_KEY=sk_live_xxx
STRIPE_PUBLISHABLE_KEY=pk_live_xxx
```

### 3. Install SDK
```bash
# Node.js
npm install stripe @stripe/stripe-js

# Python
pip install stripe
```

---

## Integration Options

| Method | Best For | Complexity |
|--------|----------|------------|
| **Checkout (Hosted)** | Quick setup, Stripe-hosted page | Low |
| **Checkout (Embedded)** | Custom site, embedded form | Low |
| **Payment Element** | Full customization, complex flows | Medium |
| **Custom Form** | Complete control (rare) | High |

**Recommendation**: Start with Checkout, migrate to Payment Element if needed.

---

## Stripe Checkout (Recommended)

### Server: Create Checkout Session

#### Node.js / Next.js
```typescript
// app/api/checkout/route.ts (Next.js App Router)
import Stripe from "stripe";
import { NextResponse } from "next/server";

const stripe = new Stripe(process.env.STRIPE_SECRET_KEY!);

export async function POST(request: Request) {
  const { priceId, mode = "payment" } = await request.json();

  try {
    const session = await stripe.checkout.sessions.create({
      mode: mode as "payment" | "subscription",
      payment_method_types: ["card"],
      line_items: [
        {
          price: priceId,
          quantity: 1,
        },
      ],
      success_url: `${process.env.NEXT_PUBLIC_URL}/success?session_id={CHECKOUT_SESSION_ID}`,
      cancel_url: `${process.env.NEXT_PUBLIC_URL}/canceled`,
      // Optional: Link to existing customer
      // customer: customerId,
      // Optional: Collect shipping
      // shipping_address_collection: { allowed_countries: ["US", "CA"] },
      // Optional: Add metadata for tracking
      metadata: {
        userId: "user_123",
        source: "pricing_page",
      },
    });

    return NextResponse.json({ sessionId: session.id, url: session.url });
  } catch (error) {
    console.error("Stripe error:", error);
    return NextResponse.json({ error: "Failed to create session" }, { status: 500 });
  }
}
```

#### Python / FastAPI
```python
# app/api/checkout.py
import stripe
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
import os

stripe.api_key = os.environ["STRIPE_SECRET_KEY"]
router = APIRouter()

class CheckoutRequest(BaseModel):
    price_id: str
    mode: str = "payment"  # or "subscription"

@router.post("/api/checkout")
async def create_checkout_session(request: CheckoutRequest):
    try:
        session = stripe.checkout.Session.create(
            mode=request.mode,
            payment_method_types=["card"],
            line_items=[{
                "price": request.price_id,
                "quantity": 1,
            }],
            success_url=f"{os.environ['APP_URL']}/success?session_id={{CHECKOUT_SESSION_ID}}",
            cancel_url=f"{os.environ['APP_URL']}/canceled",
            metadata={
                "user_id": "user_123",
            },
        )
        return {"session_id": session.id, "url": session.url}
    except stripe.error.StripeError as e:
        raise HTTPException(status_code=400, detail=str(e))
```

### Client: Redirect to Checkout

```typescript
// components/CheckoutButton.tsx
"use client";

import { loadStripe } from "@stripe/stripe-js";

const stripePromise = loadStripe(process.env.NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY!);

export function CheckoutButton({ priceId }: { priceId: string }) {
  const handleCheckout = async () => {
    const response = await fetch("/api/checkout", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ priceId }),
    });

    const { url } = await response.json();

    // Redirect to Stripe Checkout
    window.location.href = url;
  };

  return (
    <button onClick={handleCheckout}>
      Subscribe Now
    </button>
  );
}
```

---

## Embedded Checkout

For keeping users on your site:

```typescript
// components/EmbeddedCheckout.tsx
"use client";

import { useEffect, useState } from "react";
import { loadStripe } from "@stripe/stripe-js";
import {
  EmbeddedCheckoutProvider,
  EmbeddedCheckout,
} from "@stripe/react-stripe-js";

const stripePromise = loadStripe(process.env.NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY!);

export function EmbeddedCheckoutForm({ priceId }: { priceId: string }) {
  const [clientSecret, setClientSecret] = useState("");

  useEffect(() => {
    fetch("/api/checkout/embedded", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ priceId }),
    })
      .then((res) => res.json())
      .then((data) => setClientSecret(data.clientSecret));
  }, [priceId]);

  if (!clientSecret) return <div>Loading...</div>;

  return (
    <EmbeddedCheckoutProvider stripe={stripePromise} options={{ clientSecret }}>
      <EmbeddedCheckout />
    </EmbeddedCheckoutProvider>
  );
}
```

Server endpoint for embedded:
```typescript
// app/api/checkout/embedded/route.ts
export async function POST(request: Request) {
  const { priceId } = await request.json();

  const session = await stripe.checkout.sessions.create({
    mode: "subscription",
    line_items: [{ price: priceId, quantity: 1 }],
    ui_mode: "embedded",
    return_url: `${process.env.NEXT_PUBLIC_URL}/success?session_id={CHECKOUT_SESSION_ID}`,
  });

  return NextResponse.json({ clientSecret: session.client_secret });
}
```

---

## Webhooks (Critical)

**Never trust client-side data**. Always verify payments via webhooks.

### Webhook Endpoint

```typescript
// app/api/webhooks/stripe/route.ts
import Stripe from "stripe";
import { headers } from "next/headers";

const stripe = new Stripe(process.env.STRIPE_SECRET_KEY!);
const webhookSecret = process.env.STRIPE_WEBHOOK_SECRET!;

export async function POST(request: Request) {
  const body = await request.text();
  const signature = headers().get("stripe-signature")!;

  let event: Stripe.Event;

  // Verify webhook signature
  try {
    event = stripe.webhooks.constructEvent(body, signature, webhookSecret);
  } catch (err) {
    console.error("Webhook signature verification failed");
    return new Response("Invalid signature", { status: 400 });
  }

  // Handle events
  switch (event.type) {
    case "checkout.session.completed": {
      const session = event.data.object as Stripe.Checkout.Session;
      await handleCheckoutComplete(session);
      break;
    }
    case "customer.subscription.created":
    case "customer.subscription.updated": {
      const subscription = event.data.object as Stripe.Subscription;
      await handleSubscriptionUpdate(subscription);
      break;
    }
    case "customer.subscription.deleted": {
      const subscription = event.data.object as Stripe.Subscription;
      await handleSubscriptionCanceled(subscription);
      break;
    }
    case "invoice.payment_failed": {
      const invoice = event.data.object as Stripe.Invoice;
      await handlePaymentFailed(invoice);
      break;
    }
    default:
      console.log(`Unhandled event type: ${event.type}`);
  }

  // Return 200 quickly - process async if needed
  return new Response("OK", { status: 200 });
}

async function handleCheckoutComplete(session: Stripe.Checkout.Session) {
  const userId = session.metadata?.userId;
  const customerId = session.customer as string;
  const subscriptionId = session.subscription as string;

  // Update your database
  await db.user.update({
    where: { id: userId },
    data: {
      stripeCustomerId: customerId,
      stripeSubscriptionId: subscriptionId,
      subscriptionStatus: "active",
    },
  });
}
```

### Python Webhook
```python
# app/api/webhooks.py
import stripe
from fastapi import APIRouter, Request, HTTPException

router = APIRouter()

@router.post("/api/webhooks/stripe")
async def stripe_webhook(request: Request):
    payload = await request.body()
    sig_header = request.headers.get("stripe-signature")

    try:
        event = stripe.Webhook.construct_event(
            payload, sig_header, os.environ["STRIPE_WEBHOOK_SECRET"]
        )
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid payload")
    except stripe.error.SignatureVerificationError:
        raise HTTPException(status_code=400, detail="Invalid signature")

    # Handle events
    if event["type"] == "checkout.session.completed":
        session = event["data"]["object"]
        await handle_checkout_complete(session)
    elif event["type"] == "customer.subscription.deleted":
        subscription = event["data"]["object"]
        await handle_subscription_canceled(subscription)

    return {"status": "success"}
```

### Key Webhook Events

| Event | When | Action |
|-------|------|--------|
| `checkout.session.completed` | Payment successful | Provision access |
| `customer.subscription.created` | New subscription | Store subscription ID |
| `customer.subscription.updated` | Plan change | Update plan in DB |
| `customer.subscription.deleted` | Canceled | Revoke access |
| `invoice.payment_failed` | Payment failed | Notify user, retry |
| `invoice.paid` | Renewal successful | Extend access |

---

## Products & Prices

### Create via Dashboard (Recommended)
1. Go to https://dashboard.stripe.com/products
2. Create product with name, description
3. Add price(s) - one-time or recurring
4. Copy Price ID (`price_xxx`)

### Create via API
```typescript
// One-time product
const product = await stripe.products.create({
  name: "Pro Plan",
  description: "Full access to all features",
});

const price = await stripe.prices.create({
  product: product.id,
  unit_amount: 2999, // $29.99 in cents
  currency: "usd",
});

// Subscription product
const subscriptionPrice = await stripe.prices.create({
  product: product.id,
  unit_amount: 999, // $9.99/month
  currency: "usd",
  recurring: {
    interval: "month",
  },
});
```

---

## Customer Portal

Let users manage their subscriptions:

```typescript
// app/api/portal/route.ts
export async function POST(request: Request) {
  const { customerId } = await request.json();

  const session = await stripe.billingPortal.sessions.create({
    customer: customerId,
    return_url: `${process.env.NEXT_PUBLIC_URL}/settings`,
  });

  return NextResponse.json({ url: session.url });
}
```

Configure portal at: https://dashboard.stripe.com/settings/billing/portal

---

## Subscriptions

### Create Subscription with Trial
```typescript
const session = await stripe.checkout.sessions.create({
  mode: "subscription",
  line_items: [{ price: priceId, quantity: 1 }],
  subscription_data: {
    trial_period_days: 14,
    // Cancel if no payment method after trial
    trial_settings: {
      end_behavior: { missing_payment_method: "cancel" },
    },
  },
  success_url: successUrl,
  cancel_url: cancelUrl,
});
```

### Check Subscription Status
```typescript
// lib/subscription.ts
export async function getSubscriptionStatus(customerId: string) {
  const subscriptions = await stripe.subscriptions.list({
    customer: customerId,
    status: "all",
    limit: 1,
  });

  if (subscriptions.data.length === 0) {
    return { status: "none", plan: null };
  }

  const subscription = subscriptions.data[0];
  return {
    status: subscription.status,
    plan: subscription.items.data[0].price.id,
    currentPeriodEnd: new Date(subscription.current_period_end * 1000),
    cancelAtPeriodEnd: subscription.cancel_at_period_end,
  };
}
```

---

## Testing

### Test Cards
| Card Number | Scenario |
|-------------|----------|
| `4242424242424242` | Success |
| `4000000000000002` | Declined |
| `4000002500003155` | Requires 3D Secure |
| `4000000000009995` | Insufficient funds |

### Stripe CLI for Webhooks
```bash
# Install CLI
brew install stripe/stripe-cli/stripe

# Login
stripe login

# Forward webhooks to local server
stripe listen --forward-to localhost:3000/api/webhooks/stripe

# Trigger test events
stripe trigger checkout.session.completed
stripe trigger customer.subscription.deleted
```

---

## Project Structure

```
project/
├── app/
│   ├── api/
│   │   ├── checkout/
│   │   │   └── route.ts          # Create checkout session
│   │   ├── portal/
│   │   │   └── route.ts          # Customer portal
│   │   └── webhooks/
│   │       └── stripe/
│   │           └── route.ts      # Webhook handler
│   ├── pricing/
│   │   └── page.tsx              # Pricing page
│   ├── success/
│   │   └── page.tsx              # Post-checkout success
│   └── settings/
│       └── page.tsx              # Manage subscription
├── lib/
│   ├── stripe.ts                 # Stripe client
│   └── subscription.ts           # Subscription helpers
└── .env.local
```

---

## Security Best Practices

### Non-Negotiable Rules
1. **Server-side only for secrets** - Never expose `STRIPE_SECRET_KEY`
2. **Always verify webhooks** - Check signature before processing
3. **Idempotency** - Store webhook event IDs, skip duplicates
4. **Use metadata** - Track user IDs, sources for debugging
5. **Handle all states** - Success, failure, pending, canceled

### Idempotent Webhook Handler
```typescript
const processedEvents = new Set<string>(); // Use Redis in production

export async function POST(request: Request) {
  // ... verify signature ...

  // Skip duplicate events
  if (processedEvents.has(event.id)) {
    return new Response("Already processed", { status: 200 });
  }
  processedEvents.add(event.id);

  // Process event...
}
```

### Amount Handling
```typescript
// Always use cents (smallest currency unit)
const priceInCents = 2999; // $29.99

// Helper functions
const toCents = (dollars: number) => Math.round(dollars * 100);
const toDollars = (cents: number) => cents / 100;

// Display
const displayPrice = (cents: number) =>
  new Intl.NumberFormat("en-US", {
    style: "currency",
    currency: "USD",
  }).format(toDollars(cents));
```

---

## Common Patterns

### Pricing Page
```typescript
// app/pricing/page.tsx
const plans = [
  {
    name: "Starter",
    price: "$9/mo",
    priceId: "price_starter_monthly",
    features: ["Feature 1", "Feature 2"],
  },
  {
    name: "Pro",
    price: "$29/mo",
    priceId: "price_pro_monthly",
    features: ["Everything in Starter", "Feature 3", "Feature 4"],
    popular: true,
  },
];

export default function PricingPage() {
  return (
    <div className="grid md:grid-cols-2 gap-8">
      {plans.map((plan) => (
        <div key={plan.name} className={plan.popular ? "border-blue-500" : ""}>
          <h3>{plan.name}</h3>
          <p>{plan.price}</p>
          <ul>
            {plan.features.map((f) => <li key={f}>{f}</li>)}
          </ul>
          <CheckoutButton priceId={plan.priceId} />
        </div>
      ))}
    </div>
  );
}
```

### Protect Routes by Subscription
```typescript
// middleware.ts
import { getSubscriptionStatus } from "@/lib/subscription";

export async function middleware(request: NextRequest) {
  const session = await getSession();

  if (request.nextUrl.pathname.startsWith("/pro")) {
    const { status } = await getSubscriptionStatus(session.stripeCustomerId);

    if (status !== "active" && status !== "trialing") {
      return NextResponse.redirect(new URL("/pricing", request.url));
    }
  }
}
```

---

## Anti-Patterns

- **Hardcoding API keys** - Use environment variables
- **Client-side payment creation** - Always create PaymentIntent/Session server-side
- **Skipping webhook verification** - Always verify signatures
- **Processing duplicate webhooks** - Implement idempotency
- **Floating-point currency math** - Use integers (cents)
- **Trusting client data** - Verify everything server-side
- **Ignoring failed payments** - Handle `invoice.payment_failed`
- **No error handling** - Catch and handle Stripe errors

---

## Quick Reference

```bash
# Install
npm install stripe @stripe/stripe-js @stripe/react-stripe-js

# Stripe CLI
stripe login
stripe listen --forward-to localhost:3000/api/webhooks/stripe
stripe trigger checkout.session.completed

# Test mode prefix
sk_test_xxx  # Secret key
pk_test_xxx  # Publishable key

# Live mode prefix
sk_live_xxx
pk_live_xxx
```

### Key Endpoints
| Endpoint | Purpose |
|----------|---------|
| `POST /api/checkout` | Create checkout session |
| `POST /api/portal` | Customer billing portal |
| `POST /api/webhooks/stripe` | Handle Stripe events |

### Environment Variables
```bash
STRIPE_SECRET_KEY=sk_test_xxx
STRIPE_PUBLISHABLE_KEY=pk_test_xxx
STRIPE_WEBHOOK_SECRET=whsec_xxx
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=pk_test_xxx
```


================================================
FILE: skills/woocommerce/SKILL.md
================================================
---
name: woocommerce
description: WooCommerce REST API - products, orders, customers, webhooks
when-to-use: When integrating with WooCommerce stores
user-invocable: false
effort: medium
---

# WooCommerce Development Skill


For integrating with WooCommerce stores via REST API - products, orders, customers, webhooks, and custom extensions.

**Sources:** [WooCommerce REST API](https://woocommerce.github.io/woocommerce-rest-api-docs/) | [Developer Docs](https://developer.woocommerce.com/docs/)

---

## Prerequisites

### Store Requirements

```bash
# WooCommerce store must have:
# 1. WordPress with WooCommerce plugin installed
# 2. HTTPS enabled (required for API auth)
# 3. Permalinks set to anything except "Plain"
#    WordPress Admin → Settings → Permalinks → Post name (recommended)
```

### Generate API Keys

1. Go to **WooCommerce → Settings → Advanced → REST API**
2. Click **Add key**
3. Set Description, User (admin), and Permissions (Read/Write)
4. Click **Generate API key**
5. Copy **Consumer Key** and **Consumer Secret** (shown only once)

---

## API Basics

### Base URL

```
https://your-store.com/wp-json/wc/v3/
```

### Authentication

```typescript
// Node.js - Basic Auth (recommended)
const WooCommerceRestApi = require("@woocommerce/woocommerce-rest-api").default;

const api = new WooCommerceRestApi({
  url: "https://your-store.com",
  consumerKey: process.env.WC_CONSUMER_KEY,
  consumerSecret: process.env.WC_CONSUMER_SECRET,
  version: "wc/v3"
});
```

```python
# Python
from woocommerce import API

wcapi = API(
    url="https://your-store.com",
    consumer_key=os.environ["WC_CONSUMER_KEY"],
    consumer_secret=os.environ["WC_CONSUMER_SECRET"],
    version="wc/v3"
)
```

### Query String Auth (Fallback)

```bash
# Only use if Basic Auth fails (some hosting configurations)
curl https://your-store.com/wp-json/wc/v3/products \
  ?consumer_key=ck_xxx&consumer_secret=cs_xxx
```

---

## Installation

### Node.js

```bash
npm install @woocommerce/woocommerce-rest-api
```

```typescript
// lib/woocommerce.ts
import WooCommerceRestApi from "@woocommerce/woocommerce-rest-api";

const api = new WooCommerceRestApi({
  url: process.env.WC_STORE_URL!,
  consumerKey: process.env.WC_CONSUMER_KEY!,
  consumerSecret: process.env.WC_CONSUMER_SECRET!,
  version: "wc/v3",
  queryStringAuth: false, // Set true for HTTP (dev only)
});

export default api;
```

### Python

```bash
pip install woocommerce
```

```python
# lib/woocommerce.py
import os
from woocommerce import API

wcapi = API(
    url=os.environ["WC_STORE_URL"],
    consumer_key=os.environ["WC_CONSUMER_KEY"],
    consumer_secret=os.environ["WC_CONSUMER_SECRET"],
    version="wc/v3",
    timeout=30
)
```

---

## Products

### List Products

```typescript
// Node.js
async function getProducts(page = 1, perPage = 20) {
  const response = await api.get("products", {
    page,
    per_page: perPage,
    status: "publish",
  });
  return response.data;
}

// With filters
async function searchProducts(search: string, category?: number) {
  const response = await api.get("products", {
    search,
    category: category || undefined,
    orderby: "popularity",
    order: "desc",
  });
  return response.data;
}
```

```python
# Python
def get_products(page=1, per_page=20):
    response = wcapi.get("products", params={
        "page": page,
        "per_page": per_page,
        "status": "publish"
    })
    return response.json()
```

### Get Single Product

```typescript
async function getProduct(productId: number) {
  const response = await api.get(`products/${productId}`);
  return response.data;
}
```

### Create Product

```typescript
async function createProduct(data: ProductInput) {
  const response = await api.post("products", {
    name: data.name,
    type: "simple", // simple, variable, grouped, external
    regular_price: data.price.toString(),
    description: data.description,
    short_description: data.shortDescription,
    categories: data.categoryIds.map(id => ({ id })),
    images: data.images.map(url => ({ src: url })),
    manage_stock: true,
    stock_quantity: data.stockQuantity,
    status: "publish",
  });
  return response.data;
}
```

### Update Product

```typescript
async function updateProduct(productId: number, data: Partial<ProductInput>) {
  const response = await api.put(`products/${productId}`, data);
  return response.data;
}

// Update stock only
async function updateStock(productId: number, quantity: number) {
  const response = await api.put(`products/${productId}`, {
    stock_quantity: quantity,
  });
  return response.data;
}
```

### Delete Product

```typescript
async function deleteProduct(productId: number, force = false) {
  // force: true = permanent delete, false = move to trash
  const response = await api.delete(`products/${productId}`, {
    force,
  });
  return response.data;
}
```

### Variable Products

```typescript
// Create variable product
async function createVariableProduct(data: VariableProductInput) {
  // 1. Create product with type "variable"
  const product = await api.post("products", {
    name: data.name,
    type: "variable",
    attributes: [
      {
        name: "Size",
        visible: true,
        variation: true,
        options: ["Small", "Medium", "Large"],
      },
      {
        name: "Color",
        visible: true,
        variation: true,
        options: ["Red", "Blue"],
      },
    ],
  });

  // 2. Create variations
  for (const variant of data.variants) {
    await api.post(`products/${product.data.id}/variations`, {
      regular_price: variant.price.toString(),
      stock_quantity: variant.stock,
      attributes: [
        { name: "Size", option: variant.size },
        { name: "Color", option: variant.color },
      ],
    });
  }

  return product.data;
}

// Get variations
async function getVariations(productId: number) {
  const response = await api.get(`products/${productId}/variations`);
  return response.data;
}
```

---

## Orders

### List Orders

```typescript
async function getOrders(params: OrderQueryParams = {}) {
  const response = await api.get("orders", {
    page: params.page || 1,
    per_page: params.perPage || 20,
    status: params.status || "any", // pending, processing, completed, etc.
    after: params.after, // ISO date string
    before: params.before,
  });
  return response.data;
}

// Get recent orders
async function getRecentOrders(days = 7) {
  const after = new Date();
  after.setDate(after.getDate() - days);

  const response = await api.get("orders", {
    after: after.toISOString(),
    orderby: "date",
    order: "desc",
  });
  return response.data;
}
```

### Get Single Order

```typescript
async function getOrder(orderId: number) {
  const response = await api.get(`orders/${orderId}`);
  return response.data;
}
```

### Create Order

```typescript
async function createOrder(data: OrderInput) {
  const response = await api.post("orders", {
    payment_method: "stripe",
    payment_method_title: "Credit Card",
    set_paid: false,
    billing: {
      first_name: data.customer.firstName,
      last_name: data.customer.lastName,
      email: data.customer.email,
      phone: data.customer.phone,
      address_1: data.billing.address1,
      city: data.billing.city,
      state: data.billing.state,
      postcode: data.billing.postcode,
      country: data.billing.country,
    },
    shipping: {
      first_name: data.customer.firstName,
      last_name: data.customer.lastName,
      address_1: data.shipping.address1,
      city: data.shipping.city,
      state: data.shipping.state,
      postcode: data.shipping.postcode,
      country: data.shipping.country,
    },
    line_items: data.items.map(item => ({
      product_id: item.productId,
      variation_id: item.variationId,
      quantity: item.quantity,
    })),
    shipping_lines: [
      {
        method_id: "flat_rate",
        method_title: "Flat Rate",
        total: data.shippingCost.toString(),
      },
    ],
  });
  return response.data;
}
```

### Update Order Status

```typescript
async function updateOrderStatus(orderId: number, status: OrderStatus) {
  const response = await api.put(`orders/${orderId}`, {
    status, // pending, processing, on-hold, completed, cancelled, refunded, failed
  });
  return response.data;
}

// Add order note
async function addOrderNote(orderId: number, note: string, customerNote = false) {
  const response = await api.post(`orders/${orderId}/notes`, {
    note,
    customer_note: customerNote, // true = visible to customer
  });
  return response.data;
}
```

### Order Statuses

| Status | Description |
|--------|-------------|
| `pending` | Awaiting payment |
| `processing` | Payment received, awaiting fulfillment |
| `on-hold` | Awaiting action (stock, payment confirmation) |
| `completed` | Order fulfilled |
| `cancelled` | Cancelled by admin or customer |
| `refunded` | Refunded |
| `failed` | Payment failed |

---

## Customers

### List Customers

```typescript
async function getCustomers(params: CustomerQueryParams = {}) {
  const response = await api.get("customers", {
    page: params.page || 1,
    per_page: params.perPage || 20,
    role: "customer",
    orderby: "registered_date",
    order: "desc",
  });
  return response.data;
}

// Search customers
async function searchCustomers(email: string) {
  const response = await api.get("customers", {
    email,
  });
  return response.data;
}
```

### Create Customer

```typescript
async function createCustomer(data: CustomerInput) {
  const response = await api.post("customers", {
    email: data.email,
    first_name: data.firstName,
    last_name: data.lastName,
    username: data.email.split("@")[0],
    billing: {
      first_name: data.firstName,
      last_name: data.lastName,
      email: data.email,
      phone: data.phone,
      address_1: data.address1,
      city: data.city,
      state: data.state,
      postcode: data.postcode,
      country: data.country,
    },
    shipping: {
      // Same as billing or different
    },
  });
  return response.data;
}
```

### Update Customer

```typescript
async function updateCustomer(customerId: number, data: Partial<CustomerInput>) {
  const response = await api.put(`customers/${customerId}`, data);
  return response.data;
}
```

---

## Webhooks

### Create Webhook

```typescript
async function createWebhook(topic: string, deliveryUrl: string) {
  const response = await api.post("webhooks", {
    name: `Webhook for ${topic}`,
    topic, // order.created, order.updated, product.created, etc.
    delivery_url: deliveryUrl,
    status: "active",
    secret: process.env.WC_WEBHOOK_SECRET,
  });
  return response.data;
}
```

### Webhook Topics

| Topic | Trigger |
|-------|---------|
| `order.created` | New order placed |
| `order.updated` | Order status/details changed |
| `order.deleted` | Order deleted |
| `product.created` | New product created |
| `product.updated` | Product updated |
| `product.deleted` | Product deleted |
| `customer.created` | New customer registered |
| `customer.updated` | Customer updated |
| `coupon.created` | New coupon created |

### Verify Webhook Signature

```typescript
// Express.js webhook handler
import crypto from "crypto";

function verifyWooCommerceWebhook(req: Request): boolean {
  const signature = req.headers["x-wc-webhook-signature"] as string;
  const payload = JSON.stringify(req.body);

  const expectedSignature = crypto
    .createHmac("sha256", process.env.WC_WEBHOOK_SECRET!)
    .update(payload)
    .digest("base64");

  return crypto.timingSafeEqual(
    Buffer.from(signature),
    Buffer.from(expectedSignature)
  );
}

// Route handler
app.post("/webhooks/woocommerce", (req, res) => {
  if (!verifyWooCommerceWebhook(req)) {
    return res.status(401).json({ error: "Invalid signature" });
  }

  const topic = req.headers["x-wc-webhook-topic"];
  const payload = req.body;

  switch (topic) {
    case "order.created":
      handleNewOrder(payload);
      break;
    case "order.updated":
      handleOrderUpdate(payload);
      break;
    // ... other topics
  }

  res.status(200).json({ received: true });
});
```

```python
# Python/Flask webhook handler
import hmac
import hashlib
import base64

@app.route("/webhooks/woocommerce", methods=["POST"])
def woocommerce_webhook():
    signature = request.headers.get("X-WC-Webhook-Signature")
    payload = request.get_data()

    expected = base64.b64encode(
        hmac.new(
            os.environ["WC_WEBHOOK_SECRET"].encode(),
            payload,
            hashlib.sha256
        ).digest()
    ).decode()

    if not hmac.compare_digest(signature, expected):
        return {"error": "Invalid signature"}, 401

    topic = request.headers.get("X-WC-Webhook-Topic")
    data = request.json

    if topic == "order.created":
        handle_new_order(data)
    elif topic == "order.updated":
        handle_order_update(data)

    return {"received": True}, 200
```

---

## Categories & Tags

### List Categories

```typescript
async function getCategories() {
  const response = await api.get("products/categories", {
    per_page: 100,
    orderby: "name",
  });
  return response.data;
}

// Create category
async function createCategory(name: string, parentId?: number) {
  const response = await api.post("products/categories", {
    name,
    parent: parentId || 0,
  });
  return response.data;
}
```

### List Tags

```typescript
async function getTags() {
  const response = await api.get("products/tags", {
    per_page: 100,
  });
  return response.data;
}
```

---

## Coupons

### Create Coupon

```typescript
async function createCoupon(data: CouponInput) {
  const response = await api.post("coupons", {
    code: data.code,
    discount_type: data.type, // percent, fixed_cart, fixed_product
    amount: data.amount.toString(),
    individual_use: true,
    exclude_sale_items: false,
    minimum_amount: data.minimumAmount?.toString(),
    maximum_amount: data.maximumAmount?.toString(),
    usage_limit: data.usageLimit,
    usage_limit_per_user: 1,
    date_expires: data.expiresAt, // ISO date string
  });
  return response.data;
}
```

---

## Reports

### Sales Report

```typescript
async function getSalesReport(period = "month") {
  const response = await api.get("reports/sales", {
    period, // day, week, month, year
  });
  return response.data;
}

// Top sellers
async function getTopSellers(period = "month") {
  const response = await api.get("reports/top_sellers", {
    period,
  });
  return response.data;
}
```

---

## Pagination

### Handle Large Datasets

```typescript
async function getAllProducts() {
  const allProducts = [];
  let page = 1;
  const perPage = 100;

  while (true) {
    const response = await api.get("products", {
      page,
      per_page: perPage,
    });

    allProducts.push(...response.data);

    // Check headers for total pages
    const totalPages = parseInt(response.headers["x-wp-totalpages"]);
    if (page >= totalPages) break;

    page++;
  }

  return allProducts;
}
```

### Pagination Headers

| Header | Description |
|--------|-------------|
| `X-WP-Total` | Total number of items |
| `X-WP-TotalPages` | Total number of pages |

---

## Error Handling

```typescript
import WooCommerceRestApi from "@woocommerce/woocommerce-rest-api";

async function safeApiCall<T>(
  operation: () => Promise<{ data: T }>
): Promise<T> {
  try {
    const response = await operation();
    return response.data;
  } catch (error: any) {
    if (error.response) {
      // API returned an error
      const { status, data } = error.response;

      switch (status) {
        case 400:
          throw new Error(`Bad request: ${data.message}`);
        case 401:
          throw new Error("Invalid API credentials");
        case 404:
          throw new Error("Resource not found");
        case 429:
          // Rate limited - wait and retry
          await new Promise(r => setTimeout(r, 5000));
          return safeApiCall(operation);
        default:
          throw new Error(`API error: ${data.message}`);
      }
    }
    throw error;
  }
}

// Usage
const products = await safeApiCall(() => api.get("products"));
```

---

## Environment Variables

```bash
# .env
WC_STORE_URL=https://your-store.com
WC_CONSUMER_KEY=ck_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
WC_CONSUMER_SECRET=cs_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
WC_WEBHOOK_SECRET=your_webhook_secret
```

Add to `credentials.md`:
```python
'WC_CONSUMER_KEY': r'ck_[a-f0-9]{40}',
'WC_CONSUMER_SECRET': r'cs_[a-f0-9]{40}',
```

---

## Checklist

### Before Integration

- [ ] WooCommerce plugin installed and activated
- [ ] HTTPS enabled on store
- [ ] Permalinks set to non-Plain setting
- [ ] API keys generated with appropriate permissions
- [ ] Webhook secret configured

### Security

- [ ] API keys stored in environment variables
- [ ] Webhook signatures verified
- [ ] HTTPS used for all API calls
- [ ] Rate limiting handled

### Testing

- [ ] Test API connection
- [ ] Test product CRUD operations
- [ ] Test order creation/updates
- [ ] Test webhook delivery
- [ ] Test pagination for large datasets

---

## Anti-Patterns

- **Plain permalinks** - API won't work without pretty permalinks
- **HTTP in production** - Always use HTTPS
- **Ignoring rate limits** - WooCommerce may throttle requests
- **Large single requests** - Use pagination for bulk operations
- **Storing keys in code** - Use environment variables
- **Skipping webhook verification** - Always verify signatures


================================================
FILE: skills/workspace/SKILL.md
================================================
---
name: workspace
description: Dynamic multi-repo and monorepo awareness for Claude Code. Analyze workspace topology, track API contracts, and maintain cross-repo context.
when-to-use: When working across multiple repos or in a monorepo with shared dependencies
user-invocable: true
effort: high
---

# Workspace Analysis Skill

> Dynamic multi-repo and monorepo awareness for Claude Code. Analyze workspace topology, track API contracts, and maintain cross-repo context.

## The Problem

When you have separate frontend/backend repos (or monorepo with multiple apps), Claude Code operates in isolation. It doesn't know:

- API contracts between modules/repos
- Shared types and interfaces
- Full system architecture
- Cross-repo dependencies
- What changed in other parts of the system

This leads to:
- Duplicate type definitions
- API contract mismatches
- Breaking changes not caught until runtime
- Claude reimplementing things that exist elsewhere

---

## Solution: Dynamic Workspace Analysis

Instead of static manifests that get stale, Claude dynamically analyzes the workspace and generates context artifacts that stay fresh through hooks.

```
┌─────────────────────────────────────────────────────────────────┐
│  WORKSPACE ANALYSIS SYSTEM                                       │
├─────────────────────────────────────────────────────────────────┤
│                                                                  │
│  /analyze-workspace (Full Analysis - ~2 min)                     │
│  ├── Topology discovery (monorepo vs multi-repo)                │
│  ├── Dependency graph (who calls whom)                          │
│  ├── Contract extraction (OpenAPI, GraphQL, types)              │
│  └── Key file identification (what to load when)                │
│                                                                  │
│  /sync-contracts (Incremental - ~15 sec)                         │
│  ├── Check contract source files for changes                    │
│  ├── Update CONTRACTS.md with diffs                             │
│  └── Validate consistency                                       │
│                                                                  │
│  Hooks (Automatic)                                               │
│  ├── Session start: Staleness advisory (~5 sec)                 │
│  ├── Post-commit: Auto-sync if contracts changed (~15 sec)      │
│  └── Pre-push: Validation gate (~10 sec)                        │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘
```

---

## Workspace Classification

### Detection Patterns

| Type | Indicators | File Access |
|------|------------|-------------|
| **Monorepo** | pnpm-workspace.yaml, nx.json, turbo.json, lerna.json | Direct (same tree) |
| **Multi-repo** | Sibling directories with separate .git | Via symlinks or paths |
| **Hybrid** | Monorepo + external repo dependencies | Mixed |
| **Single** | One app, no workspace config | N/A (use existing-repo) |

### Monorepo Detection

```bash
# Check for monorepo indicators
ls package.json pnpm-workspace.yaml lerna.json nx.json turbo.json 2>/dev/null
ls apps/ packages/ services/ libs/ modules/ 2>/dev/null
```

### Multi-Repo Detection

```bash
# Check sibling directories for related repos
ls -la ../*.git 2>/dev/null
cat ../*/.git/config 2>/dev/null | grep "url"

# Look for naming patterns
ls .. | grep -E "(frontend|backend|api|web|shared|common)"
```

### Polyglot Detection

```bash
# Find all package manifests
find . -maxdepth 4 -name "package.json" -o -name "pyproject.toml" \
  -o -name "go.mod" -o -name "Cargo.toml" -o -name "pom.xml" \
  -o -name "build.gradle" -o -name "Gemfile"
```

---

## Analysis Protocol

### Phase 1: Topology Discovery (~30 seconds)

Determine workspace structure:

```markdown
## Discovery Checklist

1. [ ] Identify workspace root
2. [ ] Classify workspace type (monorepo/multi-repo/hybrid/single)
3. [ ] List all modules/apps/packages
4. [ ] Detect tech stack per module
5. [ ] Identify entry points per module
```

**Module Detection Pattern:**

```
workspace-root/
├── apps/           → Application modules
│   ├── web/        → Frontend app
│   └── api/        → Backend app
├── packages/       → Shared packages
│   ├── ui/         → Component library
│   ├── types/      → Shared types
│   └── db/         → Database layer
├── services/       → Microservices
└── libs/           → Internal libraries
```

### Phase 2: Dependency Graph (~60 seconds)

For each module, map:

**1. Internal Dependencies**
```bash
# TypeScript/JavaScript
grep -r "from ['\"]@" --include="*.ts" --include="*.tsx" | head -50
grep -r "workspace:" package.json

# Python
grep -r "from \." --include="*.py" | head -50
```

**2. API Relationships**
```bash
# Find API calls
grep -rE "fetch|axios|httpx|requests\." --include="*.ts" --include="*.py" | \
  grep -E "/api|localhost|127\.0\.0\.1" | head -30
```

**3. Database Connections**
```bash
# Find DB access patterns
grep -rE "prisma|drizzle|sqlalchemy|sequelize|typeorm" --include="*.ts" --include="*.py"
```

### Phase 3: Contract Extraction (~45 seconds)

Identify and parse API contracts:

| Contract Type | Detection | Extraction |
|---------------|-----------|------------|
| **OpenAPI** | openapi.json, swagger.yaml, /docs endpoint | Parse paths, schemas |
| **GraphQL** | schema.graphql, *.gql, /graphql endpoint | Parse types, queries, mutations |
| **tRPC** | trpc router files, @trpc/* imports | Parse router definitions |
| **Protobuf** | *.proto files | Parse services, messages |
| **TypeScript** | Shared .d.ts, exported interfaces | Parse exported types |
| **Pydantic** | schemas/, models/ with BaseModel | Parse model definitions |
| **Zod** | schemas/ with z.object | Parse schema definitions |

**Contract Source Priority:**

1. Generated specs (openapi.json) - most accurate
2. Schema definitions (Pydantic, Zod) - source of truth
3. Type exports (TypeScript .d.ts) - consumer contracts
4. Inferred from code - last resort

### Phase 4: Key File Identification (~30 seconds)

Identify files Claude MUST know about for each context:

| Category | Detection Pattern | Token Priority |
|----------|-------------------|----------------|
| **Route definitions** | `**/routes/**`, `**/api/**`, `@app.get`, `@router` | HIGH |
| **Type definitions** | `**/types/**`, `*.d.ts`, `schemas/`, `models/` | HIGH |
| **Config** | `.env.example`, `config/`, `settings.py` | MEDIUM |
| **Entry points** | `main.ts`, `index.ts`, `app.py`, `server.py` | MEDIUM |
| **API clients** | `**/api/client*`, `**/lib/api*` | HIGH |
| **Database schema** | `schema/`, `migrations/`, `prisma/schema.prisma` | MEDIUM |
| **Tests** | `__tests__/`, `*_test.py`, `*.spec.ts` | LOW (on-demand) |

---

## Generated Artifacts

All artifacts go in `_project_specs/workspace/`:

```
_project_specs/workspace/
├── TOPOLOGY.md           # What modules exist, their roles
├── CONTRACTS.md          # API specs, shared types (summarized)
├── DEPENDENCY_GRAPH.md   # Who calls whom (visual + list)
├── KEY_FILES.md          # What to load for each context
├── CROSS_REPO_INDEX.md   # Capabilities across all modules
└── .contract-sources     # Files to monitor for changes
```

### TOPOLOGY.md Format

```markdown
# Workspace Topology

Generated: 2026-01-20T14:32:00Z
Analyzer: maggy/workspace-analysis
Workspace Type: Monorepo (Turborepo)

## Overview

```
┌─────────────────────────────────────────────────┐
│ apps/web (Next.js) ←→ apps/api (FastAPI)        │
│      ↓                      ↓                   │
│ packages/shared-types ← packages/db             │
└─────────────────────────────────────────────────┘
```

## Modules

### apps/web
- **Path**: /apps/web
- **Tech**: Next.js 14, TypeScript, TailwindCSS
- **Role**: Customer-facing dashboard
- **Consumes**: apps/api (REST), packages/shared-types
- **Entry**: src/app/layout.tsx
- **Key files**:
  - `src/lib/api/client.ts` - API client (187 lines)
  - `src/types/` - Frontend-specific types (12 files)
- **Token estimate**: ~15K (full), ~4K (summarized)

### apps/api
- **Path**: /apps/api
- **Tech**: FastAPI, Python 3.12, SQLAlchemy
- **Role**: REST API, business logic
- **Exposes**: OpenAPI at /docs (47 endpoints)
- **Consumes**: packages/db
- **Entry**: app/main.py
- **Key files**:
  - `app/routes/` - All endpoints (8 routers)
  - `app/schemas/` - Pydantic models (23 files)
  - `openapi.json` - Generated spec
- **Token estimate**: ~22K (full), ~6K (summarized)

### packages/shared-types
- **Path**: /packages/shared-types
- **Tech**: TypeScript
- **Role**: Shared type definitions
- **Consumed by**: apps/web, apps/api (codegen)
- **Key files**:
  - `src/index.ts` - All exports (340 lines)
- **Token estimate**: ~3K

### packages/db
- **Path**: /packages/db
- **Tech**: Drizzle ORM, TypeScript
- **Role**: Database schema, migrations
- **Consumed by**: apps/api
- **Key files**:
  - `schema/` - Table definitions (8 files)
  - `migrations/` - Migration history (23 files)
- **Token estimate**: ~8K (full), ~2K (schema only)
```

### CONTRACTS.md Format

```markdown
# API Contracts

Generated: 2026-01-20T14:32:00Z
Last sync: 2026-01-20T16:45:00Z
Sources: 3 files monitored

## REST API: apps/api → apps/web

### Endpoints Summary (47 total)

| Domain | Count | Key Endpoints |
|--------|-------|---------------|
| /api/auth | 5 | POST /login, POST /register, POST /refresh |
| /api/users | 6 | GET /me, PATCH /me, GET /:id |
| /api/campaigns | 8 | CRUD + POST /bulk, GET /analytics |
| /api/analytics | 12 | GET /dashboard, GET /timeseries, GET /funnel |
| /api/settings | 4 | GET /, PATCH /, GET /integrations |

### Key Types

```typescript
// Campaign domain (from apps/api/app/schemas/campaign.py)
interface Campaign {
  id: string;
  name: string;
  status: 'draft' | 'active' | 'paused' | 'completed';
  budget: number;
  target_audience: TargetAudience;
  created_at: string;
  updated_at: string;
}

interface CampaignCreate {
  name: string;
  budget: number;
  target_audience?: TargetAudience;
}

// Auth domain (from apps/api/app/schemas/auth.py)
interface User {
  id: string;
  email: string;
  name: string;
  role: 'user' | 'admin';
}

interface TokenPair {
  access_token: string;
  refresh_token: string;
  expires_in: number;
}
```

### Contract Validation Status

| Check | Status | Details |
|-------|--------|---------|
| OpenAPI matches routes | ✅ | 47/47 endpoints documented |
| Types match schemas | ✅ | All Pydantic models exported |
| Frontend types current | ⚠️ | 2 types need regeneration |

## Shared Types: packages/shared-types

### Exported Types (34 total)

| Category | Types | Used By |
|----------|-------|---------|
| Domain models | Campaign, User, Analytics | web, api |
| API responses | ApiResponse<T>, PaginatedResponse<T> | web |
| Utilities | DateRange, FilterParams | web, api |

## Database Schema: packages/db

### Tables (12 total)

| Table | Key Columns | Relations |
|-------|-------------|-----------|
| users | id, email, name, role | campaigns, sessions |
| campaigns | id, user_id, name, status | analytics, targets |
| analytics | id, campaign_id, date, metrics | campaigns |
```

### DEPENDENCY_GRAPH.md Format

```markdown
# Dependency Graph

Generated: 2026-01-20T14:32:00Z

## Visual Overview

```
                    ┌─────────────────┐
                    │  packages/db    │
                    │  (Drizzle ORM)  │
                    └────────┬────────┘
                             │
                             ▼
┌─────────────────┐   ┌─────────────────┐
│    apps/web     │◄──│    apps/api     │
│   (Next.js)     │   │   (FastAPI)     │
└────────┬────────┘   └────────┬────────┘
         │                     │
         ▼                     ▼
┌─────────────────────────────────────────┐
│        packages/shared-types            │
│           (TypeScript)                  │
└─────────────────────────────────────────┘
```

## Dependency Matrix

| Module | Depends On | Depended By |
|--------|------------|-------------|
| apps/web | shared-types, apps/api (runtime) | - |
| apps/api | shared-types (codegen), db | apps/web |
| packages/shared-types | - | apps/web, apps/api |
| packages/db | - | apps/api |

## Import Analysis

### apps/web imports:
```
@repo/shared-types: 23 files
apps/api (via fetch): 15 files
```

### apps/api imports:
```
packages/db: 12 files
packages/shared-types (codegen): 8 files
```

## API Call Graph

```
apps/web                          apps/api
─────────                         ────────
src/lib/api/client.ts ──────────► app/routes/auth.py
  └── login()          POST /api/auth/login
  └── register()       POST /api/auth/register

src/app/campaigns/page.tsx ─────► app/routes/campaigns.py
  └── getCampaigns()   GET /api/campaigns
  └── createCampaign() POST /api/campaigns
```
```

### KEY_FILES.md Format

```markdown
# Key Files by Context

## Context: Frontend API Integration
**When**: Modifying API calls, response handling, or API types in frontend

Load these files (~8K tokens):
```
apps/web/src/lib/api/client.ts       # API client implementation
apps/web/src/types/api.d.ts          # Frontend API types
apps/api/openapi.json                # Full API spec (or summary)
packages/shared-types/src/index.ts   # Shared type definitions
```

## Context: Backend Endpoint Development
**When**: Adding/modifying API endpoints

Load these files (~12K tokens):
```
apps/api/app/routes/                 # Existing route patterns
apps/api/app/schemas/                # Pydantic models (relevant domain)
apps/api/app/dependencies/           # Auth, DB dependencies
packages/db/schema/                  # Relevant table definitions
```

## Context: Database Changes
**When**: Schema modifications, migrations, queries

Load these files (~6K tokens):
```
packages/db/schema/                  # All table definitions
packages/db/migrations/              # Last 5 migrations
apps/api/app/models/                 # ORM model usage
```

## Context: Shared Types
**When**: Modifying interfaces used across modules

Load these files (~4K tokens):
```
packages/shared-types/src/           # Type source files
apps/web/src/types/api.d.ts          # Consumer (frontend)
apps/api/app/schemas/                # Source (backend)
```

## Context: Authentication
**When**: Auth flow, sessions, tokens

Load these files (~5K tokens):
```
apps/api/app/routes/auth.py          # Auth endpoints
apps/api/app/dependencies/auth.py    # Auth middleware
apps/web/src/lib/auth/               # Frontend auth handling
packages/shared-types/src/auth.ts    # Auth types
```

## Load-on-Demand Triggers

| Claude detects... | Load additionally |
|-------------------|-------------------|
| "check the API contract" | Full OpenAPI spec |
| Import from another module | That module's exports |
| Database query pattern | Full schema definitions |
| Test failure in other module | That module's test files |
| "breaking change" | Both sides of the contract |
```

### CROSS_REPO_INDEX.md Format

```markdown
# Cross-Repository Capability Index

Generated: 2026-01-20T14:32:00Z

## Capabilities by Domain

### Authentication
| Capability | Location | Module | Type |
|------------|----------|--------|------|
| Login user | POST /api/auth/login | apps/api | endpoint |
| Register user | POST /api/auth/register | apps/api | endpoint |
| Refresh token | POST /api/auth/refresh | apps/api | endpoint |
| Auth context | src/contexts/AuthContext.tsx | apps/web | component |
| Auth hook | src/hooks/useAuth.ts | apps/web | hook |
| User type | src/auth.ts | shared-types | type |
| Session type | src/auth.ts | shared-types | type |

### Campaigns
| Capability | Location | Module | Type |
|------------|----------|--------|------|
| List campaigns | GET /api/campaigns | apps/api | endpoint |
| Create campaign | POST /api/campaigns | apps/api | endpoint |
| Campaign CRUD | app/routes/campaigns.py | apps/api | router |
| Campaign form | src/components/CampaignForm.tsx | apps/web | component |
| Campaign type | src/campaign.ts | shared-types | type |
| campaigns table | schema/campaigns.ts | packages/db | table |

### Analytics
| Capability | Location | Module | Type |
|------------|----------|--------|------|
| Dashboard data | GET /api/analytics/dashboard | apps/api | endpoint |
| Timeseries | GET /api/analytics/timeseries | apps/api | endpoint |
| Analytics hook | src/hooks/useAnalytics.ts | apps/web | hook |
| Chart components | src/components/charts/ | apps/web | components |

## Search Index

Before implementing new functionality, search this index:

```
Q: "How do I get the current user?"
A: Use useAuth() hook from apps/web/src/hooks/useAuth.ts
   Or GET /api/users/me endpoint from apps/api

Q: "Where are campaign types defined?"
A: Source of truth: packages/shared-types/src/campaign.ts
   Backend schema: apps/api/app/schemas/campaign.py
   Frontend types: apps/web/src/types/api.d.ts (generated)

Q: "How do I add a new API endpoint?"
A: Pattern in apps/api/app/routes/campaigns.py
   Register in apps/api/app/routes/__init__.py
   Add types to packages/shared-types
   Regenerate frontend types
```
```

---

## Token Budget Management

### Context Limits

```
┌─────────────────────────────────────────────────────────────────┐
│  TOKEN BUDGET ALLOCATION                                         │
├─────────────────────────────────────────────────────────────────┤
│  Total context: ~200K tokens                                     │
│  Reserve for output: ~50K tokens                                 │
│  Working budget: ~150K tokens                                    │
├─────────────────────────────────────────────────────────────────┤
│  P0 (Must have):     50K │ Current module (full)                │
│  P1 (Should have):   40K │ Directly related modules (summary)   │
│  P2 (Nice to have):  30K │ Contracts + shared types             │
│  P3 (If room):       20K │ Decisions, todos, history            │
│  Buffer:             10K │ Dynamic loading during session       │
└─────────────────────────────────────────────────────────────────┘
```

### Automatic Summarization

When loading cross-module context, summarize:

| Content Type | Full Load Threshold | Summarization Strategy |
|--------------|---------------------|------------------------|
| OpenAPI spec | < 50 endpoints | Endpoints + key types only |
| Type files | < 30 types | Exported types only |
| Route files | < 200 lines | Signatures + docstrings |
| Config files | < 50 lines | Keys only (no values/secrets) |
| Test files | Never full | Only on explicit request |

### Context Loading Strategy

```
┌─────────────────────────────────────────────────────────────────┐
│  CONTEXT LOADING HIERARCHY                                       │
├─────────────────────────────────────────────────────────────────┤
│  Level 1: Always loaded (~5K tokens)                             │
│  ├── TOPOLOGY.md (workspace structure)                          │
│  ├── CONTRACTS.md (API summary)                                 │
│  └── CROSS_REPO_INDEX.md (capability search)                    │
│                                                                  │
│  Level 2: Loaded based on current file (~15K tokens)            │
│  ├── KEY_FILES.md recommendations for current context           │
│  ├── Related module summaries                                   │
│  └── Relevant type definitions                                  │
│                                                                  │
│  Level 3: On-demand expansion (variable)                        │
│  ├── Full OpenAPI spec (when "check API contract")              │
│  ├── Full type files (when modifying interfaces)                │
│  └── Other module's full files (when cross-repo change)         │
└─────────────────────────────────────────────────────────────────┘
```

---

## Multi-Repo File Access

For multi-repo workspaces (separate .git directories):

### Option 1: Sibling Directory Convention (Recommended)

```
~/code/
├── myapp-frontend/     # git repo
├── myapp-backend/      # git repo
├── myapp-shared/       # git repo
└── .workspace/         # workspace config (optional)
    └── myapp.yaml
```

Claude accesses via relative paths: `../myapp-backend/`

### Option 2: Workspace Symlinks

```bash
# In frontend repo
mkdir -p .workspace/repos
ln -s ../../myapp-backend .workspace/repos/backend
ln -s ../../myapp-shared .workspace/repos/shared
```

### Option 3: Git Submodules

```bash
# Add related repos as submodules (read-only)
git submodule add --depth 1 ../myapp-shared .workspace/shared
```

### File Access Rules

```markdown
## Multi-Repo Access Protocol

WHEN accessing files from another repo:
1. Use relative paths from workspace root
2. Read-only access (never modify other repos)
3. Cache contract files locally in _project_specs/workspace/cache/
4. Log cross-repo reads in decisions.md

BEFORE making cross-repo changes:
1. Document the change in BOTH repos' decisions.md
2. Create linked todos in BOTH repos
3. Implement in dependency order (shared → backend → frontend)
```

---

## Cross-Repo Change Detection

When Claude detects changes that affect other modules:

```
┌─────────────────────────────────────────────────────────────────┐
│  ⚠️  CROSS-REPO CHANGE DETECTED                                  │
├─────────────────────────────────────────────────────────────────┤
│  This change affects: apps/api                                   │
│  Specifically: Endpoint POST /api/campaigns expects new field    │
│                                                                  │
│  Impact Analysis:                                                │
│  ├── apps/web/src/lib/api/client.ts - needs update              │
│  ├── packages/shared-types/src/campaign.ts - needs new field    │
│  └── apps/api/app/schemas/campaign.py - source of change        │
│                                                                  │
│  Recommended Order:                                              │
│  1. Update packages/shared-types first (source of truth)         │
│  2. Update apps/api schema                                       │
│  3. Regenerate frontend types                                    │
│  4. Update apps/web API client                                   │
│  5. Run /sync-contracts                                          │
│                                                                  │
│  [Proceed with guidance] [Load full context] [Cancel]            │
└─────────────────────────────────────────────────────────────────┘
```

### Change Impact Patterns

| Change Type | Impacts | Action |
|-------------|---------|--------|
| New API endpoint | Frontend client, types | Add to both, sync contracts |
| Modified response | Frontend types, tests | Regenerate types, update tests |
| New required field | All consumers | Breaking change protocol |
| Renamed field | All consumers | Migration + deprecation |
| New shared type | Consumers on next use | Export from shared-types |
| Schema migration | API models, queries | Run migration, verify queries |

---

## Contract Freshness System

### Staleness Detection

```bash
# .contract-sources file (auto-generated)
# Files that define contracts - monitored for changes

# OpenAPI specs
apps/api/openapi.json
apps/api/docs/openapi.yaml

# Type definitions
packages/shared-types/src/index.ts
packages/shared-types/src/api.ts

# Pydantic schemas
apps/api/app/schemas/*.py

# Database schema
packages/db/schema/*.ts
```

### Freshness Tiers

| Tier | Trigger | Action | Time | Blocking |
|------|---------|--------|------|----------|
| 1 | Session start | Staleness check | ~5s | No |
| 2 | Post-commit | Auto-sync if contracts changed | ~15s | No |
| 3 | Pre-push | Validation gate | ~10s | Yes (bypassable) |
| 4 | PR opened | CI validation | ~30s | Yes |
| 5 | Weekly cron | Full re-analysis | ~2min | No |

### Freshness Indicators

```markdown
## Contract Status (shown in CONTRACTS.md header)

Last full analysis: 2026-01-18T10:00:00Z
Last sync: 2026-01-20T14:32:00Z
Staleness: 🟢 Fresh (synced 2 hours ago)

## Confidence Levels

🟢 Fresh     - Synced within 24 hours, no source changes
🟡 Stale     - Sources changed since last sync
🔴 Outdated  - Over 7 days since last analysis
⚠️  Drift    - Validation found inconsistencies
```

---

## Integration with Existing Skills

### With existing-repo.md

`workspace.md` calls `existing-repo.md` analysis for each module:

```markdown
## Module Analysis Delegation

For each module in workspace:
1. Run existing-repo analysis on that module
2. Extract: tech stack, conventions, guardrails status
3. Aggregate into TOPOLOGY.md
4. Don't duplicate - reference existing-repo output
```

### With session-management.md

```markdown
## Session State Integration

Workspace context files are part of session state:
- TOPOLOGY.md → structural context (rarely changes)
- CONTRACTS.md → API context (check freshness each session)
- KEY_FILES.md → loading guidance (static reference)

On session start:
1. Load _project_specs/workspace/*.md into context
2. Check contract freshness
3. Advise if sync needed
```

### With code-review.md

```markdown
## Cross-Repo Review Checks

When reviewing code that touches contracts:

1. Check if change affects other modules
2. Verify contract consistency
3. Flag if CONTRACTS.md needs update
4. Warn about breaking changes

Add to review output:
### 🔗 Cross-Repo Impact
- [ ] This change affects: apps/web (API client)
- [ ] Contract update needed: Yes
- [ ] Breaking change: No
```

---

## Commands

### /analyze-workspace

Full workspace analysis - run on first setup or major changes.

See `commands/analyze-workspace.md` for full specification.

### /sync-contracts

Lightweight incremental contract update - run frequently.

See `commands/sync-contracts.md` for full specification.

### /workspace-status

Quick status check:

```
📊 Workspace Status: myapp

Type: Monorepo (Turborepo)
Modules: 4 (2 apps, 2 packages)
Contracts: 🟢 Fresh (synced 2h ago)
Token estimate: 45K / 150K budget

Quick actions:
  /sync-contracts     - Update contracts
  /analyze-workspace  - Full refresh
```

---

## CI/CD Integration

### GitHub Actions: Contract Validation

```yaml
# .github/workflows/contracts.yml
name: Contract Validation

on:
  pull_request:
    paths:
      - 'apps/api/**'
      - 'packages/shared-types/**'
      - 'packages/db/schema/**'

jobs:
  validate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Check contract freshness
        run: |
          CHANGED=$(git diff --name-only origin/main HEAD | \
            grep -E "openapi|schema|types" || true)

          if [ -n "$CHANGED" ]; then
            echo "Contract sources changed:"
            echo "$CHANGED"

            if ! git diff --name-only origin/main HEAD | grep -q "CONTRACTS.md"; then
              echo "::error::Contract sources changed but CONTRACTS.md not updated"
              echo "Run /sync-contracts before merging"
              exit 1
            fi
          fi

      - name: Validate consistency
        run: |
          if [ -f "apps/api/openapi.json" ]; then
            ENDPOINTS=$(jq -r '.paths | keys | length' apps/api/openapi.json)
            DOCUMENTED=$(grep -c "^| /" _project_specs/workspace/CONTRACTS.md || echo 0)

            if [ "$ENDPOINTS" != "$DOCUMENTED" ]; then
              echo "::warning::Endpoint count mismatch"
            fi
          fi
```

### Pre-commit Hook

```bash
#!/bin/bash
# hooks/pre-commit-contracts

WORKSPACE_DIR="_project_specs/workspace"
[ ! -f "$WORKSPACE_DIR/.contract-sources" ] && exit 0

# Check if staged files include contract sources
STAGED=$(git diff --cached --name-only)
CONTRACT_SOURCES=$(cat "$WORKSPACE_DIR/.contract-sources")

for source in $CONTRACT_SOURCES; do
  if echo "$STAGED" | grep -q "$source"; then
    echo "📝 Contract source staged: $source"
    echo "Remember to run /sync-contracts before pushing"
  fi
done
```

---

## Troubleshooting

### "Workspace not detected"

```bash
# Check for workspace indicators
ls -la package.json pnpm-workspace.yaml turbo.json nx.json 2>/dev/null

# If multi-repo, check sibling directories
ls -la ../

# Manual classification
/analyze-workspace --type monorepo
/analyze-workspace --type multi-repo --repos "../backend,../shared"
```

### "Contract sync failed"

```bash
# Check contract sources exist
cat _project_specs/workspace/.contract-sources

# Verify file access
for f in $(cat .contract-sources); do
  ls -la "$f" 2>/dev/null || echo "Missing: $f"
done

# Force full refresh
/analyze-workspace --force
```

### "Token budget exceeded"

```bash
# Check current estimates
/workspace-status

# Reduce context loading
# Edit KEY_FILES.md to prioritize
# Or work on one module at a time
```

### "Cross-repo access denied"

```bash
# Check paths are correct
ls ../backend/  # or wherever related repo is

# Set up symlinks if needed
mkdir -p .workspace/repos
ln -s ../../backend .workspace/repos/backend

# Or configure in workspace
/analyze-workspace --repo-path backend=../myapp-backend
```


================================================
FILE: templates/AGENTS.md
================================================
# AGENTS.md

## Personality

You are a brilliant engineer who also happens to be genuinely funny. Think dry wit, clever observations, and well-timed one-liners. You:

- Drop a joke or witty remark naturally into your responses (not forced, not every single line)
- Use self-deprecating humor about AI when it fits ("I've reviewed 500 lines of code and my only complaint is that I can't drink coffee while doing it")
- Make cheeky comments about bad code patterns ("Ah yes, a 400-line function. Bold choice. I admire the confidence.")
- Celebrate wins with personality ("Tests passing. Chef's kiss. Gordon Ramsay would weep.")
- Keep the humor punchy, never at the user's expense, and never let it get in the way of actually being helpful
- Match energy: if the user is stressed about a deadline, read the room. If they're vibing, vibe back.
- No dad jokes. No "as an AI" disclaimers. No cringe. Think more "witty coworker" than "corporate chatbot trying to be relatable."

## Skills
@.agents/skills/base/SKILL.md
@.agents/skills/iterative-development/SKILL.md
@.agents/skills/security/SKILL.md
@.agents/skills/cross-agent-delegation/SKILL.md

## Project Context
- Language: [e.g., TypeScript]
- Framework: [e.g., Next.js 14 (App Router)]
- Database: [e.g., Supabase/PostgreSQL]
- ORM: [e.g., Drizzle]
- Testing: [e.g., Vitest]
- Auth: [e.g., Supabase Auth]

## Commands
[npm test]                     # run tests
[npm run test:coverage]        # tests with coverage
[npm run lint]                 # lint
[npm run typecheck]            # type check
[npm run dev]                  # local dev server

## Project Structure
[Fill in after project setup, e.g.:]
src/
  app/           # Pages / routes
  components/    # UI components
  lib/           # Shared utilities
  db/
    schema.ts    # Database schema — read before any DB code
    migrations/  # Database migrations
  api/           # API route handlers

## Key Decisions
[Document settled architectural choices so the agent doesn't re-litigate them, e.g.:]
- [ORM choice and why]
- [Auth approach]
- [State management approach]
- [Branch strategy: feature branches off main, squash merge via PR]
- [Environment variables validated at startup via src/lib/env.ts]

## Conventions
[Document patterns the agent should follow, e.g.:]
- Colocated tests: Component.test.tsx next to Component.tsx
- API routes return { data, error } shape
- Database queries go through src/db/queries/ — never raw SQL in routes
- Use existing utilities before creating new ones — check src/lib/ first

## Cross-Agent Workflow

### Codex Auto-Review (Stop Hook)
After tests pass, Codex automatically reviews changes for bugs/security.
Critical/High findings feed back to the agent for fixing. Requires: `codex` CLI installed.

### Kimi Delegation (Token Optimization)
The orchestrating agent delegates to Kimi automatically:
- Blast radius <= 3 files: Delegate to Kimi via `kimi --print -y -p "..."`
- Blast radius 4-8 files: Ask user, then delegate or handle directly
- Blast radius > 8 files: Handle directly (needs full context)
Context is passed via `mnemos checkpoint` + `mnemos resume` (not raw conversation).

### iCPG (Always-On for All Agents)
Before ANY code change in ANY tool (Claude, Kimi, Codex):
1. `icpg query prior "<goal>"` — check for duplicate work
2. `icpg query constraints <file>` — check invariants
3. `icpg query risk <symbol>` — check fragility

### Mnemos (Always-On for All Agents)
All agents use Mnemos for memory management:
- `mnemos add goal "<task>"` at task start
- `mnemos checkpoint` at sub-goal boundaries
- Session hooks auto-manage fatigue and checkpoints

## Don't
- Don't modify .env files
- Don't add packages without checking if existing deps cover the need
- Don't put secrets in client-exposed env vars (NEXT_PUBLIC_*, VITE_*)
- Don't skip the test phase


================================================
FILE: templates/CLAUDE.local.md
================================================
# CLAUDE.local.md - Private Developer Overrides
# This file is NOT checked into git. Use it for personal preferences.

# Uncomment and customize what applies to you:

# ## My Preferences
# - I prefer verbose explanations over terse responses
# - Skip type annotations in my PRs
# - I'm new to this codebase, explain more context

# ## Local Environment
# - My local DB runs on port 5433
# - Use `pnpm` instead of `npm` for my setup

# ## Override Quality Gates
# - Allow 30 lines per function (I prefer fewer files)
# - Skip coverage check for prototype work


================================================
FILE: templates/CLAUDE.md
================================================
# CLAUDE.md

## Personality

You are a brilliant engineer who also happens to be genuinely funny. Think dry wit, clever observations, and well-timed one-liners. You:

- Drop a joke or witty remark naturally into your responses (not forced, not every single line)
- Use self-deprecating humor about AI when it fits ("I've reviewed 500 lines of code and my only complaint is that I can't drink coffee while doing it")
- Make cheeky comments about bad code patterns ("Ah yes, a 400-line function. Bold choice. I admire the confidence.")
- Celebrate wins with personality ("Tests passing. Chef's kiss. Gordon Ramsay would weep.")
- Keep the humor punchy, never at the user's expense, and never let it get in the way of actually being helpful
- Match energy: if the user is stressed about a deadline, read the room. If they're vibing, vibe back.
- No dad jokes. No "as an AI" disclaimers. No cringe. Think more "witty coworker" than "corporate chatbot trying to be relatable."

## Skills
@.claude/skills/base/SKILL.md
@.claude/skills/iterative-development/SKILL.md
@.claude/skills/security/SKILL.md
@.claude/skills/mnemos/SKILL.md
@.claude/skills/cross-agent-delegation/SKILL.md
@.claude/skills/polyphony/SKILL.md

## Project Context
- Language: [e.g., TypeScript]
- Framework: [e.g., Next.js 14 (App Router)]
- Database: [e.g., Supabase/PostgreSQL]
- ORM: [e.g., Drizzle]
- Testing: [e.g., Vitest]
- Auth: [e.g., Supabase Auth]

## Commands
[npm test]                     # run tests
[npm run test:coverage]        # tests with coverage
[npm run lint]                 # lint
[npm run typecheck]            # type check
[npm run dev]                  # local dev server

## Project Structure
[Fill in after project setup, e.g.:]
src/
  app/           # Pages / routes
  components/    # UI components
  lib/           # Shared utilities
  db/
    schema.ts    # Database schema — read before any DB code
    migrations/  # Database migrations
  api/           # API route handlers

## Key Decisions
[Document settled architectural choices so Claude doesn't re-litigate them, e.g.:]
- [ORM choice and why]
- [Auth approach]
- [State management approach]
- [Branch strategy: feature branches off main, squash merge via PR]
- [Environment variables validated at startup via src/lib/env.ts]

## Conventions
[Document patterns Claude should follow, e.g.:]
- Colocated tests: Component.test.tsx next to Component.tsx
- API routes return { data, error } shape
- Database queries go through src/db/queries/ — never raw SQL in routes
- Use existing utilities before creating new ones — check src/lib/ first

## Cross-Agent Workflow

### Codex Auto-Review (Stop Hook)
After tests pass, Codex automatically reviews changes for bugs/security.
Critical/High findings feed back to Claude for fixing. Requires: `codex` CLI installed.

### Kimi Delegation (Token Optimization)
Claude orchestrates Kimi delegation automatically:
- Blast radius <= 3 files: Claude delegates to Kimi via `kimi --print -y -p "..."`
- Blast radius 4-8 files: Claude asks user, then delegates or handles directly
- Blast radius > 8 files: Claude handles it (needs full context)
Context is passed via `mnemos checkpoint` + `mnemos resume` (not raw conversation).

### Container Isolation (Polyphony)
When Docker is available, each feature agent runs in its own container with an independent git branch.
- `/spawn-team` uses Polyphony by default (fallback to native agents if no Docker)
- `polyphony status` to see running agents
- `polyphony cleanup` after completion

### iCPG (Always-On for All Agents)
Before ANY code change in ANY tool (Claude, Kimi, Codex):
1. `icpg query prior "<goal>"` — check for duplicate work
2. `icpg query constraints <file>` — check invariants
3. `icpg query risk <symbol>` — check fragility

### Mnemos (Always-On for All Agents)
All agents use Mnemos for memory management:
- `mnemos add goal "<task>"` at task start
- `mnemos checkpoint` at sub-goal boundaries
- Session hooks auto-manage fatigue and checkpoints

## Don't
- Don't modify .env files
- Don't add packages without checking if existing deps cover the need
- Don't put secrets in client-exposed env vars (NEXT_PUBLIC_*, VITE_*)
- Don't skip the test phase


================================================
FILE: templates/Dockerfile.polyphony
================================================
FROM python:3.12-slim AS base
RUN apt-get update && apt-get install -y --no-install-recommends \
    git curl ca-certificates gnupg && rm -rf /var/lib/apt/lists/*

# Node.js 20 (JS/TS projects)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
    apt-get install -y nodejs && rm -rf /var/lib/apt/lists/*

# GitHub CLI
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
    | gpg --dearmor -o /usr/share/keyrings/githubcli.gpg && \
    echo "deb [signed-by=/usr/share/keyrings/githubcli.gpg] \
    https://cli.github.com/packages stable main" \
    > /etc/apt/sources.list.d/github-cli.list && \
    apt-get update && apt-get install -y gh && rm -rf /var/lib/apt/lists/*

RUN useradd -m -s /bin/bash worker
USER worker
WORKDIR /workspace

# Auth mounted at runtime: -v ~/.claude:/home/worker/.claude:ro
# Agent CLI installed via volume or ARG at build time


================================================
FILE: templates/codex-auto-review.sh
================================================
#!/bin/bash
# codex-auto-review.sh — Stop hook: auto-review with Codex after tests pass
# Exit 0 = pass (no issues or codex not installed)
# Exit 2 = critical/high issues found (feeds back to Claude)
#
# Install: copy to .claude/scripts/codex-auto-review.sh
# Requires: codex CLI (npm i -g @openai/codex)

set -uo pipefail

REVIEW_FILE="/tmp/codex-review-$$.txt"

check_codex() {
    command -v codex &>/dev/null
}

get_changed_files() {
    git diff --name-only HEAD 2>/dev/null
    git diff --cached --name-only 2>/dev/null
}

has_changes() {
    local files
    files=$(get_changed_files | sort -u | grep -cE '\.(ts|tsx|js|jsx|py|go|rs|java|kt)$' || true)
    [ "$files" -gt 0 ]
}

run_codex_review() {
    local diff_content
    diff_content=$(git diff HEAD 2>/dev/null; git diff --cached 2>/dev/null)
    [ -z "$diff_content" ] && return 0

    # Truncate diff to avoid token limits (keep first 8000 chars)
    local truncated
    truncated=$(echo "$diff_content" | head -c 8000)

    codex exec \
        --full-auto \
        --sandbox read-only \
        --output-last-message "$REVIEW_FILE" \
        "Review this diff for critical bugs and security issues only. Be concise. Flag only Critical or High severity: $truncated" \
        2>/dev/null
}

check_findings() {
    [ -f "$REVIEW_FILE" ] || return 0

    if grep -qiE 'critical|🔴|security vulnerability|injection' "$REVIEW_FILE"; then
        echo "CODEX AUTO-REVIEW: Critical issues found:" >&2
        cat "$REVIEW_FILE" >&2
        rm -f "$REVIEW_FILE"
        return 2
    fi

    if grep -qiE '🟠|high severity' "$REVIEW_FILE"; then
        echo "CODEX AUTO-REVIEW: High severity issues:" >&2
        cat "$REVIEW_FILE" >&2
        rm -f "$REVIEW_FILE"
        return 2
    fi

    rm -f "$REVIEW_FILE"
    return 0
}

main() {
    # Skip if codex not installed
    check_codex || exit 0

    # Skip if no code changes
    has_changes || exit 0

    # Run review
    run_codex_review || exit 0

    # Check for critical/high findings
    check_findings
    exit $?
}

main


================================================
FILE: templates/config.toml
================================================
# Agent CLI Configuration
# Compatible with Kimi CLI and OpenAI Codex CLI
# Generated from Maggy settings.json hooks
#
# Kimi: copy to ~/.kimi/config.toml or .kimi/config.toml
# Codex: copy to ~/.codex/config.toml or .codex/config.toml

# ─── Skills merge (Kimi-specific) ───────────────────────────
# Kimi reads skills from ~/.kimi/, ~/.claude/, ~/.codex/
# merge_all_available_skills = true  # default: merge all brands

# ─── Hook: Pre-Compact (Mnemos checkpoint) ──────────────────
[[hooks]]
event = "PreCompact"
command = """
if [ -x ".claude/scripts/mnemos-pre-compact.sh" ]; then
  exec ".claude/scripts/mnemos-pre-compact.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-pre-compact.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-pre-compact.sh"
fi
exit 0
"""
timeout = 8

# ─── Hook: Pre-Edit (Mnemos fatigue + intent check) ────────
[[hooks]]
event = "PreToolUse"
matcher = "Edit|Write|StrReplaceFile|WriteFile"
command = """
if [ -x ".claude/scripts/mnemos-pre-edit.sh" ]; then
  exec ".claude/scripts/mnemos-pre-edit.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-pre-edit.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-pre-edit.sh"
fi
exit 0
"""
timeout = 5

# ─── Hook: Post-Compact Restore ────────────────────────────
[[hooks]]
event = "PreToolUse"
command = """
if [ -x ".claude/scripts/mnemos-post-compact-inject.sh" ]; then
  exec ".claude/scripts/mnemos-post-compact-inject.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-post-compact-inject.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-post-compact-inject.sh"
fi
exit 0
"""
timeout = 2

# ─── Hook: Post-Tool (Mnemos logging) ──────────────────────
[[hooks]]
event = "PostToolUse"
command = """
if [ -x ".claude/scripts/mnemos-post-tool.sh" ]; then
  exec ".claude/scripts/mnemos-post-tool.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-post-tool.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-post-tool.sh"
fi
exit 0
"""
timeout = 1

# ─── Hook: TDD Loop Check (Stop) ───────────────────────────
[[hooks]]
event = "Stop"
command = """
if [ -x ".claude/scripts/tdd-loop-check.sh" ]; then
  exec ".claude/scripts/tdd-loop-check.sh"
fi
if [ -x "$HOME/.claude/templates/tdd-loop-check.sh" ]; then
  exec "$HOME/.claude/templates/tdd-loop-check.sh"
fi
exit 0
"""
timeout = 60

# ─── Hook: Codex Auto-Review (Stop) ──────────────────────────
[[hooks]]
event = "Stop"
command = """
if command -v codex &>/dev/null; then
  if [ -x ".claude/scripts/codex-auto-review.sh" ]; then
    exec ".claude/scripts/codex-auto-review.sh"
  elif [ -x "$HOME/.claude/templates/codex-auto-review.sh" ]; then
    exec "$HOME/.claude/templates/codex-auto-review.sh"
  fi
fi
exit 0
"""
timeout = 120

# ─── Hook: ICPG Stop Record ────────────────────────────────
[[hooks]]
event = "Stop"
command = """
if [ -x ".claude/scripts/icpg-stop-record.sh" ]; then
  exec ".claude/scripts/icpg-stop-record.sh"
fi
if [ -x "$HOME/.claude/templates/icpg-stop-record.sh" ]; then
  exec "$HOME/.claude/templates/icpg-stop-record.sh"
fi
exit 0
"""
timeout = 5

# ─── Hook: Mnemos Stop Checkpoint ──────────────────────────
[[hooks]]
event = "Stop"
command = """
if [ -x ".claude/scripts/mnemos-stop-checkpoint.sh" ]; then
  exec ".claude/scripts/mnemos-stop-checkpoint.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-stop-checkpoint.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-stop-checkpoint.sh"
fi
exit 0
"""
timeout = 5

# ─── Hook: Session Start (Mnemos restore) ──────────────────
[[hooks]]
event = "SessionStart"
command = """
if [ -x ".claude/scripts/mnemos-session-start.sh" ]; then
  exec ".claude/scripts/mnemos-session-start.sh"
fi
if [ -x "$HOME/.claude/templates/mnemos-session-start.sh" ]; then
  exec "$HOME/.claude/templates/mnemos-session-start.sh"
fi
exit 0
"""
timeout = 5


================================================
FILE: templates/icpg-pre-edit.sh
================================================
#!/bin/bash
# iCPG PreToolUse Hook — injects intent context before Edit/Write operations.
#
# Shows the agent: what intents exist for this file, what invariants apply,
# and the risk profile of symbols being modified.
#
# Install: add to .claude/settings.json under hooks.PreToolUse
# Timeout: 3 seconds max — never blocks

# Skip if icpg not installed or no DB
if ! command -v icpg &>/dev/null && ! python -m icpg --version &>/dev/null 2>&1; then
    exit 0
fi

if [ ! -f ".icpg/reason.db" ]; then
    exit 0
fi

# Extract file path from tool input
# Claude Code passes tool input as JSON via stdin for PreToolUse hooks
FILE_PATH=""
if [ -n "$CLAUDE_TOOL_INPUT" ]; then
    FILE_PATH=$(echo "$CLAUDE_TOOL_INPUT" | python3 -c "
import sys, json
try:
    data = json.load(sys.stdin)
    print(data.get('file_path', data.get('path', '')))
except:
    pass
")
fi

if [ -z "$FILE_PATH" ]; then
    exit 0
fi

# Run icpg binary or module
ICPG_CMD="icpg"
if ! command -v icpg &>/dev/null; then
    ICPG_CMD="python -m icpg"
fi

# Query context, constraints, and drift (file-scoped fast check)
CONTEXT=$($ICPG_CMD query context "$FILE_PATH")
CONSTRAINTS=$($ICPG_CMD query constraints "$FILE_PATH")
DRIFT=$($ICPG_CMD drift file "$FILE_PATH")

# Only output if we have something
if [ -n "$CONTEXT" ] || [ -n "$CONSTRAINTS" ] || [ -n "$DRIFT" ]; then
    echo "═══ iCPG CONTEXT ═══"
    [ -n "$CONTEXT" ] && echo "$CONTEXT"
    [ -n "$CONSTRAINTS" ] && echo -e "\n$CONSTRAINTS"
    [ -n "$DRIFT" ] && echo -e "\n$DRIFT"
    echo "PRESERVE function signatures unless your task requires changing them."
    echo "═══════════════════"
fi

exit 0


================================================
FILE: templates/icpg-stop-record.sh
================================================
#!/bin/bash
# iCPG Stop Hook Extension — auto-records symbols after implementation.
#
# Reads .icpg/.current-intent to know which ReasonNode is active.
# If set, records symbols from git diff to that intent.
#
# Chain this AFTER tdd-loop-check.sh in the Stop hook:
#   tdd-loop-check runs first → if tests pass → this records symbols

# Skip if no active intent
CURRENT_INTENT=$(cat .icpg/.current-intent 2>/dev/null)
if [ -z "$CURRENT_INTENT" ]; then
    exit 0
fi

# Skip if icpg not available
ICPG_CMD=""
if command -v icpg &>/dev/null; then
    ICPG_CMD="icpg"
elif python -m icpg --version &>/dev/null 2>&1; then
    ICPG_CMD="python -m icpg"
else
    exit 0
fi

# Record symbols from current diff
OUTPUT=$($ICPG_CMD record --reason "$CURRENT_INTENT" --base main 2>&1)
if [ $? -eq 0 ]; then
    echo "iCPG: $OUTPUT" >&2
fi

exit 0


================================================
FILE: templates/mnemos-post-compact-inject.sh
================================================
#!/bin/bash
# Mnemos Post-Compaction Injection — Layer 2 of task restoration.
#
# This is a PreToolUse hook with NO matcher (fires on ALL tool calls).
# It detects when compaction just occurred and re-injects the full checkpoint
# into Claude's context, ensuring the task can be resumed seamlessly.
#
# Fast path: ~5ms when no compaction happened (just a file existence check).
# Slow path: ~100ms when injecting checkpoint (only fires once after compaction).
#
# How it works:
#   1. PreCompact hook writes ".mnemos/just-compacted" marker
#   2. This hook checks for that marker on every tool call
#   3. If marker exists and is fresh (<5 min), inject checkpoint and delete marker
#   4. Marker deletion is atomic (rename) to prevent parallel injection
#
# Install: add to .claude/settings.json under hooks.PreToolUse (no matcher)

# ─── Fast path: no compaction marker = exit immediately ───

[ -f ".mnemos/just-compacted" ] || exit 0

# ─── Validate marker is fresh and atomically consume it ───

CONSUMED=$(python3 -c "
import json, time, os

marker = '.mnemos/just-compacted'
consumed = '.mnemos/just-compacted.consumed'

try:
    with open(marker) as f:
        data = json.load(f)
    age = time.time() - data.get('timestamp', 0)
    if age > 300:
        # Stale marker (>5 min), just delete it
        os.unlink(marker)
        print('stale')
    else:
        # Fresh marker — atomically consume it
        os.rename(marker, consumed)
        try:
            os.unlink(consumed)
        except:
            pass
        print('consumed')
except FileNotFoundError:
    # Another hook already consumed it (parallel tool calls)
    print('already_consumed')
except Exception:
    print('error')
")

# Only inject if we successfully consumed the marker
if [ "$CONSUMED" != "consumed" ]; then
    exit 0
fi

# ─── Inject checkpoint into Claude's context ───

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

python3 -c "
import sys, json
sys.path.insert(0, '${SCRIPT_DIR%/templates}/scripts')

try:
    from mnemos.checkpoint import format_for_post_compact_injection
    output = format_for_post_compact_injection('.')
    if output:
        print(output)
    else:
        print('=== MNEMOS: Compaction detected but no checkpoint found. ===')
        print('Previous context was lost. Ask the user what they were working on.')
except Exception as e:
    # Fallback: try to read checkpoint JSON directly
    try:
        with open('.mnemos/checkpoint-latest.json') as f:
            data = json.load(f)
        print('=== MNEMOS: CONTEXT RESTORED AFTER COMPACTION ===')
        print()
        print('Compaction just occurred. Resume from this checkpoint:')
        print()
        print(f'## Goal')
        print(data.get('goal', 'No goal recorded'))
        print()
        constraints = data.get('active_constraints', [])
        if constraints:
            print('## Active Constraints (DO NOT VIOLATE)')
            for c in constraints:
                print(f'- {c}')
            print()
        narrative = data.get('task_narrative', '')
        if narrative:
            print(f'## What You Were Working On')
            print(narrative)
            print()
        print('=== Resume work from this checkpoint. ===')
    except:
        print('=== MNEMOS: Compaction detected but checkpoint unreadable. ===')
        print('Ask the user what they were working on.')
"

exit 0


================================================
FILE: templates/mnemos-post-tool.sh
================================================
#!/bin/bash
# Mnemos PostToolUse Hook — logs tool outcomes + auto-feeds token signal.
#
# 1. Logs success/failure signal to .mnemos/signals.jsonl (error density)
# 2. If fatigue.json is stale (>60s), estimates context usage from JSONL
#
# Receives JSON on stdin with tool_name, tool_input, tool_response.
# Install: add to .claude/settings.json under hooks.PostToolUse
# Timeout: 1 second max — never blocks

# Skip if no .mnemos directory
if [ ! -d ".mnemos" ]; then
    exit 0
fi

# Read hook input from stdin
HOOK_INPUT=$(cat)

if [ -z "$HOOK_INPUT" ]; then
    exit 0
fi

# Log signal + update fatigue.json if stale
python3 -c "
import json, sys, time, os, glob

try:
    data = json.loads('''$(echo "$HOOK_INPUT" | sed "s/'/'\\\\''/g")''')
except:
    sys.exit(0)

tool = data.get('tool_name', '')
tool_input = data.get('tool_input', {})
response = data.get('tool_response', {})

# Extract file path
fp = tool_input.get('file_path', '') or tool_input.get('path', '')

# Determine success
success = True
if isinstance(response, dict):
    if response.get('error') or response.get('is_error'):
        success = False
    if 'exit_code' in response and response['exit_code'] != 0:
        success = False
elif isinstance(response, str):
    if response.startswith('Error:') or 'error' in response[:50].lower():
        success = False

# Append signal
signal = {
    'tool': tool,
    'event': 'post',
    'file_path': fp,
    'success': success,
    'ts': time.time()
}

os.makedirs('.mnemos', exist_ok=True)
with open('.mnemos/signals.jsonl', 'a') as f:
    f.write(json.dumps(signal) + '\n')

# ─── Auto-feed token signal from JSONL if fatigue.json is stale ───

fatigue_path = '.mnemos/fatigue.json'
stale = True
try:
    with open(fatigue_path) as f:
        fd = json.load(f)
    # Fresh if updated within last 60 seconds (statusline is feeding it)
    if time.time() - fd.get('timestamp', 0) < 60:
        stale = False
except:
    pass

if stale:
    # Find the most recent session JSONL
    home = os.path.expanduser('~')
    cwd = os.getcwd()
    # Claude Code project hash: path with / replaced by -
    project_key = cwd.replace('/', '-')
    if project_key.startswith('-'):
        pass  # expected
    project_dir = os.path.join(home, '.claude', 'projects', project_key)

    if not os.path.isdir(project_dir):
        # Try parent directories (Claude Code may use git root)
        for parent in [os.path.dirname(cwd), os.path.dirname(os.path.dirname(cwd))]:
            pk = parent.replace('/', '-')
            pd = os.path.join(home, '.claude', 'projects', pk)
            if os.path.isdir(pd):
                project_dir = pd
                break

    try:
        jsonl_files = sorted(
            glob.glob(os.path.join(project_dir, '*.jsonl')),
            key=os.path.getmtime, reverse=True
        )
        if jsonl_files:
            # Read the last line of the most recent JSONL
            with open(jsonl_files[0], 'rb') as f:
                # Seek to end, scan backwards for last newline
                f.seek(0, 2)
                pos = f.tell()
                if pos > 0:
                    # Read last 8KB (enough for one JSON entry)
                    read_size = min(8192, pos)
                    f.seek(pos - read_size)
                    chunk = f.read().decode('utf-8', errors='replace')
                    lines = chunk.strip().split('\n')
                    last_line = lines[-1]
                    entry = json.loads(last_line)
                    usage = entry.get('message', {}).get('usage', {})
                    if usage:
                        input_tok = usage.get('input_tokens', 0)
                        cache_read = usage.get('cache_read_input_tokens', 0)
                        cache_create = usage.get('cache_creation_input_tokens', 0)
                        total_in_context = input_tok + cache_read + cache_create
                        # Opus/Sonnet context window = 200k
                        context_limit = 200000
                        # JSONL tokens overestimate actual context by ~25%
                        # due to cache overhead. Apply correction factor.
                        correction = 0.75
                        used_pct = min(100.0, (total_in_context * correction / context_limit) * 100)
                        fatigue_data = {
                            'used_percentage': round(used_pct, 1),
                            'remaining_percentage': round(100 - used_pct, 1),
                            'used_tokens': total_in_context,
                            'total_tokens': context_limit,
                            'remaining_tokens': max(0, context_limit - total_in_context),
                            'timestamp': time.time(),
                            'source': 'jsonl_estimate'
                        }
                        with open(fatigue_path, 'w') as f:
                            json.dump(fatigue_data, f)
    except:
        pass  # Best effort — don't block the hook
"

exit 0


================================================
FILE: templates/mnemos-pre-compact.sh
================================================
#!/bin/bash
# Mnemos PreCompact Hook — emergency checkpoint + typed preservation + compaction marker.
#
# TWO-LAYER DEFENSE against lossy compaction:
#   Layer 1 (this script): Write emergency checkpoint, output strong preservation
#           instructions with inline content for the summarizer.
#   Layer 2 (mnemos-post-compact-inject.sh): After compaction, the first tool call
#           re-injects the full checkpoint. See that script for details.
#
# The marker file (.mnemos/just-compacted) bridges the two layers.
#
# Install: add to .claude/settings.json under hooks.PreCompact
# This EXTENDS (not replaces) the existing pre-compact.sh

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

# ─── 1. Write emergency checkpoint with task narrative ───

MNEMOS_CMD=""
if command -v mnemos &>/dev/null; then
    MNEMOS_CMD="mnemos"
elif PYTHONPATH="${SCRIPT_DIR%/templates}/scripts" python3 -m mnemos --version &>/dev/null 2>&1; then
    MNEMOS_CMD="PYTHONPATH=${SCRIPT_DIR%/templates}/scripts python3 -m mnemos"
fi

if [ -n "$MNEMOS_CMD" ]; then
    eval $MNEMOS_CMD checkpoint --force &>/dev/null
fi

# ─── 2. Write compaction marker for Layer 2 detection ───

python3 -c "
import json, time, os
os.makedirs('.mnemos', exist_ok=True)
with open('.mnemos/just-compacted', 'w') as f:
    json.dump({'timestamp': time.time(), 'reason': 'pre_compact_hook'}, f)
"

# ─── 3. Build inline checkpoint content for summarizer ───
# Use a temp Python script to avoid bash escaping issues with f-strings

CHECKPOINT_CONTENT=""
if [ -f ".mnemos/checkpoint-latest.json" ]; then
    TMPSCRIPT=$(mktemp /tmp/mnemos-precompact-XXXXXX.py)
    cat > "$TMPSCRIPT" << 'PYSCRIPT'
import json, sys, os
try:
    with open('.mnemos/checkpoint-latest.json') as f:
        data = json.load(f)
    lines = []
    goal = data.get('goal', '')
    if goal:
        lines.append('GOAL: ' + goal)
    for c in data.get('active_constraints', []):
        lines.append('CONSTRAINT: ' + c)
    narrative = data.get('task_narrative', '')
    if narrative:
        lines.append('ACTIVITY: ' + narrative)
    subgoal = data.get('current_subgoal', '')
    if subgoal:
        lines.append('CURRENT TASK: ' + subgoal)
    working = data.get('working_memory', '')
    if working:
        lines.append('WORKING MEMORY: ' + working[:300])
    for r in data.get('active_results', [])[:5]:
        lines.append('RESULT: ' + r)
    files = data.get('recent_files', [])[:8]
    if files:
        file_parts = []
        for entry in files:
            p = entry.get('path', '?')
            e = entry.get('edits', 0)
            r = entry.get('reads', 0)
            parts = []
            if e:
                parts.append('edited ' + str(e) + 'x')
            if r:
                parts.append('read ' + str(r) + 'x')
            file_parts.append(p + ' (' + ', '.join(parts) + ')')
        lines.append('FILES: ' + '; '.join(file_parts))
    git = data.get('git_state', {})
    if git.get('branch'):
        lines.append('GIT: branch=' + git['branch'])
        uncommitted = git.get('uncommitted', [])
        if uncommitted:
            lines.append('UNCOMMITTED: ' + ', '.join(uncommitted[:5]))
    print('\n'.join(lines))
except Exception as e:
    print('Error: ' + str(e), file=sys.stderr)
PYSCRIPT
    CHECKPOINT_CONTENT=$(python3 "$TMPSCRIPT")
    rm -f "$TMPSCRIPT"
fi

# ─── 4. Extract typed preservation priorities from MnemoGraph ───

MNEMOS_PRIORITIES=""
if [ -f ".mnemos/mnemo.db" ]; then
    TMPSCRIPT2=$(mktemp /tmp/mnemos-priorities-XXXXXX.py)
    cat > "$TMPSCRIPT2" << PYSCRIPT
import json, sys
sys.path.insert(0, '${SCRIPT_DIR%/templates}/scripts')

try:
    from mnemos.store import MnemosStore
    store = MnemosStore('.')
    if not store.exists():
        sys.exit(0)

    goals = store.get_by_type('goal')
    constraints = store.get_by_type('constraint')
    working = store.get_by_type('working')
    results = store.get_by_type('result')

    lines = []
    if goals:
        lines.append('GOAL (NEVER DROP):')
        for g in goals[:5]:
            lines.append('  - ' + g.content[:200])

    if constraints:
        lines.append('CONSTRAINTS (NEVER DROP):')
        for c in constraints[:10]:
            lines.append('  - ' + c.content[:200])

    if working:
        lines.append('CURRENT TASK (HIGH PRIORITY):')
        for w in working[:3]:
            lines.append('  - ' + w.content[:200])

    if results:
        lines.append('RESULTS (KEEP SUMMARIES):')
        for r in results[:5]:
            summary = r.summary or r.content[:100]
            lines.append('  - ' + summary)

    print('\n'.join(lines))
except Exception:
    pass
PYSCRIPT
    MNEMOS_PRIORITIES=$(python3 "$TMPSCRIPT2")
    rm -f "$TMPSCRIPT2"
fi

# ─── 5. Output preservation instructions for summarizer ───
# Everything to stdout becomes additional instructions for the compaction prompt

cat <<'INSTRUCTIONS'
## CRITICAL: Mnemos Task State Preservation

An emergency checkpoint has been saved to disk (.mnemos/checkpoint-latest.json).
A post-compaction injection hook will re-inject this checkpoint after compaction.

However, your summary should ALSO preserve the following task state. Include
this section VERBATIM in your summary output under a "## Mnemos Task State" heading:

INSTRUCTIONS

if [ -n "$CHECKPOINT_CONTENT" ]; then
cat <<INSTRUCTIONS

### Mnemos Task State (INCLUDE VERBATIM IN SUMMARY)

$CHECKPOINT_CONTENT

INSTRUCTIONS
fi

cat <<'INSTRUCTIONS'

### Typed Eviction Policies

**NEVER EVICT** (include verbatim in summary):
- GoalNodes: The task's primary objective — without this the agent cannot continue
- ConstraintNodes: Invariants and contracts that must not be violated

**COMPRESS BUT KEEP** (include summary, not full content):
- WorkingNodes: Current in-progress reasoning
- ResultNodes: Completed sub-task results (keep summaries)

**OK TO DROP** (can be re-derived from disk):
- ContextNodes: File contents, tool outputs
- Full tool call results (keep only findings)
- Exploration that led nowhere

INSTRUCTIONS

if [ -n "$MNEMOS_PRIORITIES" ]; then
cat <<INSTRUCTIONS

### Active Memory Nodes (from MnemoGraph)

$MNEMOS_PRIORITIES

These nodes represent the agent's active working memory. The summarizer
MUST preserve Goal and Constraint nodes VERBATIM in the output.

INSTRUCTIONS
fi

# ─── 6. Run existing pre-compact.sh if present ───

if [ -f "$SCRIPT_DIR/pre-compact.sh" ]; then
    bash "$SCRIPT_DIR/pre-compact.sh"
fi

exit 0


================================================
FILE: templates/mnemos-pre-edit.sh
================================================
#!/bin/bash
# Mnemos PreToolUse Hook — fatigue-aware pre-edit with iCPG context.
#
# 1. Logs file path to signals.jsonl (for scope scatter + re-read tracking)
# 2. Reads fatigue from observable signals + token data
# 3. Auto-checkpoint when fatigue >= 0.60
# 4. Auto-consolidation when fatigue >= 0.40
# 5. Injects iCPG context, constraints, drift
#
# Install: add to .claude/settings.json under hooks.PreToolUse
# Timeout: 5 seconds max

# ─── Read hook input from stdin ───

HOOK_INPUT=$(cat)

# ─── Extract file path and tool name ───

FILE_PATH=""
TOOL_NAME=""
if [ -n "$HOOK_INPUT" ]; then
    eval $(echo "$HOOK_INPUT" | python3 -c "
import sys, json
try:
    data = json.load(sys.stdin)
    fp = data.get('tool_input', {}).get('file_path', '') or data.get('tool_input', {}).get('path', '')
    tn = data.get('tool_name', '')
    print(f'FILE_PATH=\"{fp}\"')
    print(f'TOOL_NAME=\"{tn}\"')
except:
    print('FILE_PATH=\"\"')
    print('TOOL_NAME=\"\"')
")
fi

if [ -z "$FILE_PATH" ]; then
    exit 0
fi

# ─── Log signal for fatigue computation ───

if [ -d ".mnemos" ] || [ -f ".mnemos/fatigue.json" ]; then
    python3 -c "
import json, time, os
os.makedirs('.mnemos', exist_ok=True)
signal = {
    'tool': '$TOOL_NAME',
    'event': 'pre',
    'file_path': '$FILE_PATH',
    'ts': time.time()
}
with open('.mnemos/signals.jsonl', 'a') as f:
    f.write(json.dumps(signal) + '\n')
"
fi

# ─── Fatigue check (full model from observable signals) ───

FATIGUE_WARNING=""
if [ -f ".mnemos/fatigue.json" ]; then
    FATIGUE_ACTION=$(python3 -c "
import json, sys
sys.path.insert(0, 'scripts')

try:
    from mnemos.fatigue import compute_fatigue, read_fatigue_file
    data = read_fatigue_file('.')
    if not data:
        print('flow')
        sys.exit(0)

    fatigue = compute_fatigue(data, '.')
    print(fatigue.state)
except Exception:
    # Fallback: just use token utilization
    try:
        with open('.mnemos/fatigue.json') as f:
            data = json.load(f)
        used = data.get('used_percentage', 0)
        if used >= 90: print('emergency')
        elif used >= 75: print('rem')
        elif used >= 60: print('pre_sleep')
        elif used >= 40: print('compress')
        else: print('flow')
    except:
        print('flow')
")

    # Auto-checkpoint at pre_sleep or higher
    if [ "$FATIGUE_ACTION" = "pre_sleep" ] || [ "$FATIGUE_ACTION" = "rem" ] || [ "$FATIGUE_ACTION" = "emergency" ]; then
        # Write checkpoint in background (don't block the hook)
        if command -v mnemos &>/dev/null; then
            mnemos checkpoint --force &>/dev/null &
        elif python3 -m mnemos --version &>/dev/null 2>&1; then
            PYTHONPATH=scripts python3 -m mnemos checkpoint --force &>/dev/null &
        fi

        if [ "$FATIGUE_ACTION" = "emergency" ]; then
            FATIGUE_WARNING="EMERGENCY: Context 90%+ full. Checkpoint written. Finish current task and hand off."
        elif [ "$FATIGUE_ACTION" = "rem" ]; then
            FATIGUE_WARNING="WARNING: Context 75%+ full. Checkpoint written. Consider wrapping up."
        else
            FATIGUE_WARNING="NOTICE: Context 60%+ full. Checkpoint written. Keep changes focused."
        fi
    fi

    # Auto-consolidate at compress or higher
    if [ "$FATIGUE_ACTION" = "compress" ] || [ "$FATIGUE_ACTION" = "pre_sleep" ] || [ "$FATIGUE_ACTION" = "rem" ]; then
        if command -v mnemos &>/dev/null; then
            mnemos consolidate &>/dev/null &
        elif python3 -m mnemos --version &>/dev/null 2>&1; then
            PYTHONPATH=scripts python3 -m mnemos consolidate &>/dev/null &
        fi
    fi
fi

# ─── iCPG context ───

CONTEXT=""
CONSTRAINTS=""
DRIFT=""

if command -v icpg &>/dev/null || python3 -m icpg --version &>/dev/null 2>&1; then
    if [ -f ".icpg/reason.db" ]; then
        ICPG_CMD="icpg"
        if ! command -v icpg &>/dev/null; then
            ICPG_CMD="python3 -m icpg"
        fi

        CONTEXT=$($ICPG_CMD query context "$FILE_PATH")
        CONSTRAINTS=$($ICPG_CMD query constraints "$FILE_PATH")
        DRIFT=$($ICPG_CMD drift file "$FILE_PATH")
    fi
fi

# ─── Output ───

HAS_OUTPUT=""
[ -n "$FATIGUE_WARNING" ] && HAS_OUTPUT="1"
[ -n "$CONTEXT" ] && HAS_OUTPUT="1"
[ -n "$CONSTRAINTS" ] && HAS_OUTPUT="1"
[ -n "$DRIFT" ] && HAS_OUTPUT="1"

if [ -n "$HAS_OUTPUT" ]; then
    echo "--- Mnemos + iCPG Context ---"

    if [ -n "$FATIGUE_WARNING" ]; then
        echo "$FATIGUE_WARNING"
        echo ""
    fi

    [ -n "$CONTEXT" ] && echo "$CONTEXT"
    [ -n "$CONSTRAINTS" ] && echo -e "\n$CONSTRAINTS"
    [ -n "$DRIFT" ] && echo -e "\n$DRIFT"

    if [ -n "$CONTEXT" ] || [ -n "$CONSTRAINTS" ]; then
        echo "PRESERVE function signatures unless your task requires changing them."
    fi

    echo "---"
fi

exit 0


================================================
FILE: templates/mnemos-session-start.sh
================================================
#!/bin/bash
# Mnemos SessionStart Hook — loads checkpoint on session resume.
#
# Checks for .mnemos/checkpoint-latest.json and injects it into context.
# Also bridges iCPG state if available.
#
# Install: add to .claude/settings.json under hooks.SessionStart

# ─── Load checkpoint if exists ───

if [ -f ".mnemos/checkpoint-latest.json" ]; then
    MNEMOS_CMD=""
    if command -v mnemos &>/dev/null; then
        MNEMOS_CMD="mnemos"
    elif python3 -m mnemos --version &>/dev/null 2>&1; then
        MNEMOS_CMD="python3 -m mnemos"
    fi

    if [ -n "$MNEMOS_CMD" ]; then
        RESUME_OUTPUT=$($MNEMOS_CMD resume 2>/dev/null)
        if [ -n "$RESUME_OUTPUT" ]; then
            echo "=== MNEMOS SESSION RESUME ==="
            echo "$RESUME_OUTPUT"
            echo ""
            echo "You are resuming from a previous session checkpoint."
            echo "Review the goal and constraints above before proceeding."
            echo "============================="
        fi
    fi
fi

# ─── Bridge iCPG if available and Mnemos DB exists ───

if [ -f ".icpg/reason.db" ] && [ -f ".mnemos/mnemo.db" ]; then
    MNEMOS_CMD=""
    if command -v mnemos &>/dev/null; then
        MNEMOS_CMD="mnemos"
    elif python3 -m mnemos --version &>/dev/null 2>&1; then
        MNEMOS_CMD="python3 -m mnemos"
    fi

    if [ -n "$MNEMOS_CMD" ]; then
        # Bridge in background — don't block session start
        $MNEMOS_CMD bridge-icpg &>/dev/null &
    fi
fi

# ─── Show iCPG status if available ───

if [ -f ".icpg/reason.db" ]; then
    ICPG_CMD=""
    if command -v icpg &>/dev/null; then
        ICPG_CMD="icpg"
    elif python3 -m icpg --version &>/dev/null 2>&1; then
        ICPG_CMD="python3 -m icpg"
    fi

    if [ -n "$ICPG_CMD" ]; then
        STATUS=$($ICPG_CMD status 2>/dev/null)
        if [ -n "$STATUS" ]; then
            echo ""
            echo "=== iCPG STATUS ==="
            echo "$STATUS"
            echo "==================="
        fi
    fi
fi

exit 0


================================================
FILE: templates/mnemos-statusline.sh
================================================
#!/bin/bash
# Mnemos Statusline Script — receives context JSON on stdin every API call.
#
# 1. Writes fatigue.json for hooks to read (always)
# 2. Delegates display to ccusage statusline if available (cost + context)
# 3. Falls back to simple context % display if ccusage not installed
#
# Auto-configured by Mnemos via settings.json statusLine.
# Input (stdin JSON): context_window.used_percentage, remaining_percentage, etc.

# Read JSON from stdin — must capture before any piping
INPUT=$(cat)

if [ -z "$INPUT" ]; then
    exit 0
fi

# ─── Step 1: Write fatigue.json (always, fast) ───

python3 -c "
import json, time, os, sys

os.makedirs('.mnemos', exist_ok=True)

raw = '''$(echo "$INPUT" | sed "s/'/'\\\\''/g")'''

try:
    data = json.loads(raw)
except:
    data = {}

cw = data.get('context_window', {})
used_pct = cw.get('used_percentage', 0)
remaining_pct = cw.get('remaining_percentage', 100)
ctx_size = cw.get('context_window_size', 200000)

# Token counts are under current_usage (not top-level)
cu = cw.get('current_usage', {})
used_tokens = (cu.get('input_tokens', 0)
    + cu.get('cache_creation_input_tokens', 0)
    + cu.get('cache_read_input_tokens', 0))
remaining_tokens = max(0, ctx_size - int(ctx_size * used_pct / 100))

fatigue = {
    'used_percentage': used_pct,
    'remaining_percentage': remaining_pct,
    'used_tokens': used_tokens,
    'total_tokens': ctx_size,
    'remaining_tokens': remaining_tokens,
    'total_input_tokens': cw.get('total_input_tokens', 0),
    'total_output_tokens': cw.get('total_output_tokens', 0),
    'timestamp': time.time(),
    'source': 'statusline'
}
with open('.mnemos/fatigue.json', 'w') as f:
    json.dump(fatigue, f)
"

# ─── Step 2: Display — prefer ccusage, fallback to simple ───

if command -v ccusage &>/dev/null; then
    # ccusage statusline gets the same JSON, shows cost + context + burn rate
    echo "$INPUT" | ccusage statusline 2>/dev/null
    if [ $? -eq 0 ]; then
        exit 0
    fi
fi

# Try npx ccusage (slower, only if ccusage not globally installed)
if command -v npx &>/dev/null; then
    echo "$INPUT" | npx --yes ccusage statusline 2>/dev/null
    if [ $? -eq 0 ]; then
        exit 0
    fi
fi

# Fallback: simple context display
python3 -c "
import json
try:
    data = json.loads('''$(echo "$INPUT" | sed "s/'/'\\\\''/g")''')
    cw = data.get('context_window', {})
    used = cw.get('used_percentage', 0)
    if used >= 90: s = ' EMERGENCY'
    elif used >= 75: s = ' WARNING'
    elif used >= 60: s = ' NOTICE'
    elif used >= 40: s = ' ~'
    else: s = ''
    print(f'Ctx:{used:.0f}%{s}')
except:
    print('Ctx:?%')
"

exit 0


================================================
FILE: templates/mnemos-stop-checkpoint.sh
================================================
#!/bin/bash
# Mnemos Stop Hook — writes incremental checkpoint when agent stops.
#
# Captures final session state so the next session can resume cleanly.
#
# Install: add to .claude/settings.json under hooks.Stop

MNEMOS_CMD=""
if command -v mnemos &>/dev/null; then
    MNEMOS_CMD="mnemos"
elif python3 -m mnemos --version &>/dev/null 2>&1; then
    MNEMOS_CMD="python3 -m mnemos"
fi

if [ -z "$MNEMOS_CMD" ]; then
    exit 0
fi

# Only checkpoint if Mnemos is initialized
if [ ! -f ".mnemos/mnemo.db" ]; then
    exit 0
fi

# Write checkpoint
$MNEMOS_CMD checkpoint --force &>/dev/null

exit 0


================================================
FILE: templates/polyphony-agents.yaml
================================================
agents:
  - name: claude-opus
    agent_type: claude
    cli_command: "claude -p"
    context_window_tokens: 200000
    strengths: [long_context, research, architecture]
    event_protocol: stream-json
    auth_path: ~/.claude

  - name: codex-default
    agent_type: codex
    cli_command: "codex exec"
    context_window_tokens: 192000
    strengths: [code, testing]
    event_protocol: ndjson
    auth_path: ~/.codex

  - name: kimi-default
    agent_type: kimi
    cli_command: "kimi --print -y"
    context_window_tokens: 128000
    strengths: [code, fast_iteration]
    event_protocol: ndjson
    auth_path: ~/.kimi


================================================
FILE: templates/polyphony-config.yaml
================================================
workspace_root: ~/polyphony/workspaces
mirror_root: ~/polyphony/mirrors
poll_interval: 30s
max_concurrent_agents: 8
event_idle_timeout: 5m

work_sources:
  - kind: github
    repo: owner/repo
    label_filter: "agent-ready"
  - kind: local
    db: ~/polyphony/queue.db

identities_file: ~/.polyphony/identities.yaml
agent_profiles_file: ~/.polyphony/agents.yaml
routing_file: ~/.polyphony/routing.yaml


================================================
FILE: templates/polyphony-identities.yaml
================================================
identities:
  - name: protaige
    volumes:
      claude: ~/.claude
      codex: ~/.codex
      kimi: ~/.kimi
    cost_ceiling_usd_per_day: 50


================================================
FILE: templates/polyphony-routing.yaml
================================================
routing:
  rules:
    - match: { task_type: research, requires_web: true }
      agent: claude-opus
      fallback: [codex-default]

    - match: { task_type: feature, risk: [low, medium] }
      agent: codex-default
      fallback: [claude-opus]

    - match: { task_type: [bugfix, docs], scope: single_file }
      agent: kimi-default

    - match: { task_type: refactor, risk: high }
      agent: claude-opus

  default:
    agent: claude-opus
    fallback: [codex-default, kimi-default]


================================================
FILE: templates/pre-compact.sh
================================================
#!/bin/bash
# PreCompact Hook — injects project-specific preservation instructions
# into the compaction summarizer so it keeps what actually matters.
#
# How it works:
#   Claude Code's PreCompact hook runs right before compaction.
#   Stdout from this script becomes custom instructions for the summarizer.
#   Exit 0 = instructions accepted. Exit 2 = block compaction (don't use).
#
# The built-in summarizer uses a generic 9-section template.
# This hook tells it: "for THIS project, prioritize these specific things."

# ─── Detect project context ───

PROJECT_TYPE=""
SCHEMA_FILE=""
TEST_CMD=""
KEY_DIRS=""

# Detect tech stack
if [ -f "package.json" ]; then
    PROJECT_TYPE="javascript"
    if [ -f "tsconfig.json" ]; then
        PROJECT_TYPE="typescript"
    fi
    if grep -q '"next"' package.json 2>/dev/null; then
        PROJECT_TYPE="$PROJECT_TYPE/nextjs"
    elif grep -q '"react"' package.json 2>/dev/null; then
        PROJECT_TYPE="$PROJECT_TYPE/react"
    elif grep -q '"express\|fastify"' package.json 2>/dev/null; then
        PROJECT_TYPE="$PROJECT_TYPE/node-backend"
    fi
    TEST_CMD="npm test"
fi

if [ -f "pyproject.toml" ] || [ -f "setup.py" ]; then
    PROJECT_TYPE="python"
    if grep -q "fastapi" pyproject.toml 2>/dev/null; then
        PROJECT_TYPE="python/fastapi"
    elif grep -q "django" pyproject.toml 2>/dev/null; then
        PROJECT_TYPE="python/django"
    fi
    TEST_CMD="pytest"
fi

if [ -f "pubspec.yaml" ]; then
    PROJECT_TYPE="flutter"
    TEST_CMD="flutter test"
fi

# Find schema files
for f in src/db/schema.ts prisma/schema.prisma drizzle/schema.ts supabase/migrations models.py src/models; do
    if [ -e "$f" ]; then
        SCHEMA_FILE="$f"
        break
    fi
done

# Find key directories
KEY_DIRS=""
for d in src/api src/routes src/app/api api routes server/routes; do
    if [ -d "$d" ]; then
        KEY_DIRS="$KEY_DIRS $d"
    fi
done

# ─── Gather live project state ───

# Git state
GIT_BRANCH=""
GIT_CHANGES=""
if command -v git &>/dev/null && git rev-parse --git-dir &>/dev/null 2>&1; then
    GIT_BRANCH=$(git branch --show-current 2>/dev/null)
    GIT_CHANGES=$(git diff --name-only 2>/dev/null | head -15)
    GIT_STAGED=$(git diff --cached --name-only 2>/dev/null | head -10)
fi

# CLAUDE.md key decisions (if they exist)
KEY_DECISIONS=""
if [ -f "CLAUDE.md" ]; then
    # Extract the Key Decisions section
    KEY_DECISIONS=$(sed -n '/^## Key Decisions/,/^## /p' CLAUDE.md | head -20 | tail -n +2)
fi

# ─── Output custom instructions for the summarizer ───
# Everything sent to stdout becomes additional instructions for the compaction prompt

cat <<INSTRUCTIONS
## Project-Specific Preservation Priorities

This is a $PROJECT_TYPE project. When summarizing, prioritize preserving:

### 1. Architectural Decisions (HIGHEST PRIORITY)
Preserve the EXACT reasoning behind architectural choices, not just the choice itself.
If the conversation discussed why we chose X over Y, keep the "why" verbatim.
INSTRUCTIONS

if [ -n "$KEY_DECISIONS" ]; then
cat <<INSTRUCTIONS

These are the project's settled decisions — reference them by name in the summary:
$KEY_DECISIONS
INSTRUCTIONS
fi

if [ -n "$SCHEMA_FILE" ]; then
cat <<INSTRUCTIONS

### 2. Database Schema Context
Schema file: $SCHEMA_FILE
Preserve ALL discussion about schema changes, column names, relationships,
migration decisions, and data model reasoning. These are expensive to re-derive.
INSTRUCTIONS
fi

if [ -n "$KEY_DIRS" ]; then
cat <<INSTRUCTIONS

### 3. API Contract Details
API directories:$KEY_DIRS
Preserve exact endpoint paths, request/response shapes, status codes,
and validation rules discussed. These affect multiple consumers.
INSTRUCTIONS
fi

cat <<INSTRUCTIONS

### 4. Error Context
When summarizing errors and fixes, preserve:
- The EXACT error message (not paraphrased)
- The file and line number
- What fix was applied and why
- Whether the fix was verified (tests passing)

### 5. Current Work State
INSTRUCTIONS

if [ -n "$GIT_BRANCH" ]; then
    echo "Branch: $GIT_BRANCH"
fi

if [ -n "$GIT_CHANGES" ]; then
cat <<INSTRUCTIONS
Uncommitted changes:
$GIT_CHANGES
INSTRUCTIONS
fi

if [ -n "$GIT_STAGED" ]; then
cat <<INSTRUCTIONS
Staged for commit:
$GIT_STAGED
INSTRUCTIONS
fi

cat <<INSTRUCTIONS

### 6. Test Status
Preserve the last known test state — which tests pass, which fail, what coverage was.
Test command: ${TEST_CMD:-"unknown"}

### 7. What NOT to Summarize
- Don't preserve exploration that led nowhere (dead ends)
- Don't preserve full file contents that can be re-read from disk
- Don't preserve tool result formatting — just the key findings
- Compress repeated test-fix-test cycles into: "Fixed X by doing Y, tests now pass"
INSTRUCTIONS

exit 0


================================================
FILE: templates/settings.json
================================================
{
  "statusLine": {
    "type": "command",
    "command": "scripts/mnemos-statusline.sh",
    "padding": 0
  },
  "permissions": {
    "allow": [
      "Bash(npm test *)",
      "Bash(npm run lint *)",
      "Bash(npm run typecheck *)",
      "Bash(npx tsc *)",
      "Bash(npx jest *)",
      "Bash(npx vitest *)",
      "Bash(pytest *)",
      "Bash(ruff *)",
      "Bash(mypy *)",
      "Bash(eslint *)",
      "Bash(git status *)",
      "Bash(git diff *)",
      "Bash(git log *)",
      "Bash(git branch *)",
      "Bash(gh pr *)",
      "Bash(gh issue *)",
      "Bash(ls *)",
      "Bash(cat *)",
      "Bash(head *)",
      "Bash(wc *)",
      "Bash(icpg *)",
      "Bash(python -m icpg *)",
      "Bash(mnemos *)",
      "Bash(python -m mnemos *)",
      "Bash(polyphony *)",
      "Bash(python -m polyphony *)",
      "Bash(docker ps *)",
      "Bash(docker logs *)"
    ],
    "deny": [
      "Bash(rm -rf *)",
      "Bash(git push --force *)",
      "Bash(git reset --hard *)",
      "Write(.env)",
      "Write(.env.*)",
      "Edit(.env)",
      "Edit(.env.*)"
    ]
  },
  "hooks": {
    "PreCompact": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-pre-compact.sh\" ]; then exec \".claude/scripts/mnemos-pre-compact.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-pre-compact.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-pre-compact.sh\"; fi; echo \"[maggy] hook script 'mnemos-pre-compact.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-pre-compact.sh to silence\" >&2; exit 0",
            "timeout": 8,
            "statusMessage": "Writing emergency checkpoint + compaction priorities..."
          }
        ]
      }
    ],
    "PreToolUse": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-post-compact-inject.sh\" ]; then exec \".claude/scripts/mnemos-post-compact-inject.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-post-compact-inject.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-post-compact-inject.sh\"; fi; echo \"[maggy] hook script 'mnemos-post-compact-inject.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-post-compact-inject.sh to silence\" >&2; exit 0",
            "timeout": 2,
            "statusMessage": "Checking for post-compaction restore..."
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-pre-edit.sh\" ]; then exec \".claude/scripts/mnemos-pre-edit.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-pre-edit.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-pre-edit.sh\"; fi; echo \"[maggy] hook script 'mnemos-pre-edit.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-pre-edit.sh to silence\" >&2; exit 0",
            "timeout": 5,
            "statusMessage": "Checking fatigue + intent context..."
          }
        ]
      }
    ],
    "PostToolUse": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-post-tool.sh\" ]; then exec \".claude/scripts/mnemos-post-tool.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-post-tool.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-post-tool.sh\"; fi; echo \"[maggy] hook script 'mnemos-post-tool.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-post-tool.sh to silence\" >&2; exit 0",
            "timeout": 1,
            "statusMessage": "Logging tool outcome..."
          }
        ]
      }
    ],
    "Stop": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/tdd-loop-check.sh\" ]; then exec \".claude/scripts/tdd-loop-check.sh\"; fi; if [ -x \"$HOME/.claude/templates/tdd-loop-check.sh\" ]; then exec \"$HOME/.claude/templates/tdd-loop-check.sh\"; fi; echo \"[maggy] hook script 'tdd-loop-check.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/tdd-loop-check.sh to silence\" >&2; exit 0",
            "timeout": 60,
            "statusMessage": "Running tests..."
          },
          {
            "type": "command",
            "command": "if command -v codex &>/dev/null; then if [ -x \".claude/scripts/codex-auto-review.sh\" ]; then exec \".claude/scripts/codex-auto-review.sh\"; elif [ -x \"$HOME/.claude/templates/codex-auto-review.sh\" ]; then exec \"$HOME/.claude/templates/codex-auto-review.sh\"; fi; fi; exit 0",
            "timeout": 120,
            "statusMessage": "Codex reviewing changes..."
          },
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/icpg-stop-record.sh\" ]; then exec \".claude/scripts/icpg-stop-record.sh\"; fi; if [ -x \"$HOME/.claude/templates/icpg-stop-record.sh\" ]; then exec \"$HOME/.claude/templates/icpg-stop-record.sh\"; fi; echo \"[maggy] hook script 'icpg-stop-record.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/icpg-stop-record.sh to silence\" >&2; exit 0",
            "timeout": 5,
            "statusMessage": "Recording symbols to intent graph..."
          },
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-stop-checkpoint.sh\" ]; then exec \".claude/scripts/mnemos-stop-checkpoint.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-stop-checkpoint.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-stop-checkpoint.sh\"; fi; echo \"[maggy] hook script 'mnemos-stop-checkpoint.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-stop-checkpoint.sh to silence\" >&2; exit 0",
            "timeout": 5,
            "statusMessage": "Writing session checkpoint..."
          }
        ]
      }
    ],
    "SessionStart": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "if [ -x \".claude/scripts/mnemos-session-start.sh\" ]; then exec \".claude/scripts/mnemos-session-start.sh\"; fi; if [ -x \"$HOME/.claude/templates/mnemos-session-start.sh\" ]; then exec \"$HOME/.claude/templates/mnemos-session-start.sh\"; fi; echo \"[maggy] hook script 'mnemos-session-start.sh' not installed \u2014 run <maggy>/install.sh (one-time) or touch .claude/scripts/mnemos-session-start.sh to silence\" >&2; exit 0",
            "timeout": 5,
            "statusMessage": "Loading session checkpoint + project context..."
          }
        ]
      }
    ]
  }
}


================================================
FILE: templates/tdd-loop-check.sh
================================================
#!/bin/bash
# TDD Loop Check - Claude Code Stop hook script
# Runs after each Claude response. Exit 0 = done, Exit 2 = failures fed back to Claude.
#
# Install: copy to scripts/tdd-loop-check.sh in your project
# Configure: add Stop hook in .claude/settings.json (see iterative-development skill)

MAX_ITERATIONS=25
ITERATION_FILE=".claude/.tdd-iteration-count"
mkdir -p .claude

# Track iteration count
if [ -f "$ITERATION_FILE" ]; then
    count=$(cat "$ITERATION_FILE")
    count=$((count + 1))
else
    count=1
fi
echo "$count" > "$ITERATION_FILE"

# Safety: stop after max iterations
if [ "$count" -ge "$MAX_ITERATIONS" ]; then
    rm -f "$ITERATION_FILE"
    echo "Max iterations ($MAX_ITERATIONS) reached. Stopping loop." >&2
    exit 0
fi

# Skip if no test files exist yet
if ! find . -name "*.test.*" -o -name "*.spec.*" -o -name "test_*" | grep -q .; then
    rm -f "$ITERATION_FILE"
    exit 0
fi

# Detect project type and run tests
if [ -f "package.json" ]; then
    TEST_OUTPUT=$(npm test 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Tests failing:" >&2
        echo "$TEST_OUTPUT" | tail -30 >&2
        echo "" >&2
        echo "Fix the failing tests and try again." >&2
        exit 2
    }

    # Lint
    if grep -q '"lint"' package.json; then
        LINT_OUTPUT=$(npm run lint 2>&1) || {
            echo "ITERATION $count/$MAX_ITERATIONS - Lint errors:" >&2
            echo "$LINT_OUTPUT" | tail -20 >&2
            exit 2
        }
    fi

    # Typecheck
    if [ -f "tsconfig.json" ]; then
        TYPE_OUTPUT=$(npx tsc --noEmit 2>&1) || {
            echo "ITERATION $count/$MAX_ITERATIONS - Type errors:" >&2
            echo "$TYPE_OUTPUT" | tail -20 >&2
            exit 2
        }
    fi

elif [ -f "pyproject.toml" ] || [ -f "setup.py" ]; then
    TEST_OUTPUT=$(pytest -v 2>&1) || {
        echo "ITERATION $count/$MAX_ITERATIONS - Tests failing:" >&2
        echo "$TEST_OUTPUT" | tail -30 >&2
        exit 2
    }

    if command -v ruff &>/dev/null; then
        LINT_OUTPUT=$(ruff check . 2>&1) || {
            echo "ITERATION $count/$MAX_ITERATIONS - Lint errors:" >&2
            echo "$LINT_OUTPUT" | tail -20 >&2
            exit 2
        }
    fi

    if command -v mypy &>/dev/null; then
        TYPE_OUTPUT=$(mypy . 2>&1) || {
            echo "ITERATION $count/$MAX_ITERATIONS - Type errors:" >&2
            echo "$TYPE_OUTPUT" | tail -20 >&2
            exit 2
        }
    fi
fi

# All green - reset counter
rm -f "$ITERATION_FILE"
exit 0


================================================
FILE: tests/test_cross_agent.py
================================================
"""Tests for cross-agent intelligence (Codex auto-review, Kimi delegation, iCPG + Mnemos)."""

from __future__ import annotations

import json
import os
from pathlib import Path

import pytest

REPO_ROOT = Path(__file__).parent.parent


class TestCodexAutoReview:
    """Tests for templates/codex-auto-review.sh."""

    def test_script_exists(self) -> None:
        path = REPO_ROOT / "templates" / "codex-auto-review.sh"
        assert path.exists()

    def test_script_is_executable(self) -> None:
        path = REPO_ROOT / "templates" / "codex-auto-review.sh"
        assert os.access(path, os.X_OK)

    def test_script_has_shebang(self) -> None:
        path = REPO_ROOT / "templates" / "codex-auto-review.sh"
        content = path.read_text()
        assert content.startswith("#!/bin/bash")

    def test_script_checks_codex_installed(self) -> None:
        path = REPO_ROOT / "templates" / "codex-auto-review.sh"
        content = path.read_text()
        assert "command -v codex" in content

    def test_script_uses_exit_codes(self) -> None:
        path = REPO_ROOT / "templates" / "codex-auto-review.sh"
        content = path.read_text()
        assert "exit 0" in content
        assert "return 2" in content


class TestCrossAgentDelegation:
    """Tests for skills/cross-agent-delegation/SKILL.md."""

    def test_skill_exists(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        assert path.exists()

    def test_skill_has_frontmatter(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        content = path.read_text()
        assert content.startswith("---")
        assert "name: cross-agent-delegation" in content

    def test_skill_references_icpg(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        content = path.read_text()
        assert "icpg" in content.lower()
        assert "icpg query prior" in content
        assert "icpg query constraints" in content
        assert "icpg query risk" in content

    def test_skill_references_mnemos(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        content = path.read_text()
        assert "mnemos" in content.lower()
        assert "mnemos add goal" in content
        assert "mnemos checkpoint" in content

    def test_skill_has_complexity_scoring_rules(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        content = path.read_text()
        assert "0-3" in content
        assert "4-6" in content
        assert "7-10" in content

    def test_skill_has_tool_detection(self) -> None:
        path = REPO_ROOT / "skills" / "cross-agent-delegation" / "SKILL.md"
        content = path.read_text()
        assert "command -v kimi" in content
        assert "command -v codex" in content


class TestSettingsJsonHook:
    """Tests for codex-auto-review hook in settings.json."""

    def test_settings_has_codex_review_hook(self) -> None:
        path = REPO_ROOT / "templates" / "settings.json"
        data = json.loads(path.read_text())
        stop_hooks = data["hooks"]["Stop"][0]["hooks"]
        commands = [h["command"] for h in stop_hooks]
        assert any("codex-auto-review" in cmd for cmd in commands)

    def test_codex_hook_after_tdd(self) -> None:
        path = REPO_ROOT / "templates" / "settings.json"
        data = json.loads(path.read_text())
        stop_hooks = data["hooks"]["Stop"][0]["hooks"]
        commands = [h["command"] for h in stop_hooks]
        tdd_idx = next(
            i for i, c in enumerate(commands) if "tdd-loop-check" in c
        )
        codex_idx = next(
            i for i, c in enumerate(commands) if "codex-auto-review" in c
        )
        assert codex_idx > tdd_idx

    def test_codex_hook_before_icpg(self) -> None:
        path = REPO_ROOT / "templates" / "settings.json"
        data = json.loads(path.read_text())
        stop_hooks = data["hooks"]["Stop"][0]["hooks"]
        commands = [h["command"] for h in stop_hooks]
        codex_idx = next(
            i for i, c in enumerate(commands) if "codex-auto-review" in c
        )
        icpg_idx = next(
            i for i, c in enumerate(commands) if "icpg-stop-record" in c
        )
        assert codex_idx < icpg_idx

    def test_codex_hook_has_timeout(self) -> None:
        path = REPO_ROOT / "templates" / "settings.json"
        data = json.loads(path.read_text())
        stop_hooks = data["hooks"]["Stop"][0]["hooks"]
        codex_hook = next(
            h for h in stop_hooks if "codex-auto-review" in h["command"]
        )
        assert codex_hook["timeout"] == 120


class TestConfigTomlHook:
    """Tests for codex-auto-review hook in config.toml."""

    def test_config_toml_has_codex_hook(self) -> None:
        path = REPO_ROOT / "templates" / "config.toml"
        content = path.read_text()
        assert "codex-auto-review" in content

    def test_config_toml_codex_hook_timeout(self) -> None:
        path = REPO_ROOT / "templates" / "config.toml"
        content = path.read_text()
        # Find the codex-auto-review block and check timeout
        lines = content.splitlines()
        in_codex_block = False
        for line in lines:
            if "Codex Auto-Review" in line:
                in_codex_block = True
            if in_codex_block and line.startswith("timeout"):
                assert "120" in line
                break


class TestTemplateSkillRefs:
    """Tests for skill references in templates."""

    def test_claude_md_has_delegation_skill(self) -> None:
        path = REPO_ROOT / "templates" / "CLAUDE.md"
        content = path.read_text()
        assert "cross-agent-delegation/SKILL.md" in content

    def test_agents_md_has_delegation_skill(self) -> None:
        path = REPO_ROOT / "templates" / "AGENTS.md"
        content = path.read_text()
        assert "cross-agent-delegation/SKILL.md" in content

    def test_claude_md_has_workflow_section(self) -> None:
        path = REPO_ROOT / "templates" / "CLAUDE.md"
        content = path.read_text()
        assert "## Cross-Agent Workflow" in content
        assert "Codex Auto-Review" in content
        assert "Kimi Delegation" in content

    def test_agents_md_has_workflow_section(self) -> None:
        path = REPO_ROOT / "templates" / "AGENTS.md"
        content = path.read_text()
        assert "## Cross-Agent Workflow" in content
        assert "Codex Auto-Review" in content
        assert "Kimi Delegation" in content


class TestInitializeProjectRef:
    """Tests for cross-agent-delegation in initialize-project.md."""

    def test_init_copies_delegation_skill(self) -> None:
        path = REPO_ROOT / "commands" / "initialize-project.md"
        content = path.read_text()
        assert "cross-agent-delegation/" in content


================================================
FILE: tests/test_cross_tool.py
================================================
"""Tests for cross-tool (Claude/Kimi/Codex) compatibility."""

from __future__ import annotations

import os
import subprocess
from pathlib import Path

import pytest

REPO_ROOT = Path(__file__).parent.parent


class TestDetectAgents:
    """Tests for scripts/detect-agents.sh."""

    def test_script_exists_and_executable(self) -> None:
        script = REPO_ROOT / "scripts" / "detect-agents.sh"
        assert script.exists()
        assert os.access(script, os.X_OK)

    def test_outputs_valid_format(self) -> None:
        script = REPO_ROOT / "scripts" / "detect-agents.sh"
        result = subprocess.run(
            [str(script)],
            capture_output=True,
            text=True,
            timeout=10,
        )
        assert result.returncode == 0
        valid_tools = {"claude", "kimi", "codex", "docker", "orbstack", "polyphony"}
        for line in result.stdout.strip().splitlines():
            assert line in valid_tools


class TestInstallSkills:
    """Tests for scripts/install-skills.sh."""

    def test_script_exists_and_executable(self) -> None:
        script = REPO_ROOT / "scripts" / "install-skills.sh"
        assert script.exists()
        assert os.access(script, os.X_OK)

    def test_copies_skills_to_target(self, tmp_path: Path) -> None:
        script = REPO_ROOT / "scripts" / "install-skills.sh"
        target = tmp_path / "target-skills"

        result = subprocess.run(
            [str(script), str(target)],
            capture_output=True,
            text=True,
            timeout=30,
        )
        assert result.returncode == 0
        assert target.exists()

        # Should have at least 'base' skill
        base_skill = target / "base" / "SKILL.md"
        assert base_skill.exists()

    def test_no_args_shows_usage(self) -> None:
        script = REPO_ROOT / "scripts" / "install-skills.sh"
        result = subprocess.run(
            [str(script)],
            capture_output=True,
            text=True,
            timeout=10,
        )
        assert result.returncode != 0


class TestTemplates:
    """Tests for cross-tool templates."""

    def test_agents_md_exists(self) -> None:
        path = REPO_ROOT / "templates" / "AGENTS.md"
        assert path.exists()

    def test_agents_md_has_skills_section(self) -> None:
        path = REPO_ROOT / "templates" / "AGENTS.md"
        content = path.read_text()
        assert "## Skills" in content
        assert "SKILL.md" in content

    def test_config_toml_exists(self) -> None:
        path = REPO_ROOT / "templates" / "config.toml"
        assert path.exists()

    def test_config_toml_has_hooks(self) -> None:
        path = REPO_ROOT / "templates" / "config.toml"
        content = path.read_text()
        assert "[[hooks]]" in content
        assert 'event = "Stop"' in content
        assert 'event = "SessionStart"' in content

    def test_agents_md_has_conventions(self) -> None:
        path = REPO_ROOT / "templates" / "AGENTS.md"
        content = path.read_text()
        assert "## Conventions" in content
        assert "## Don't" in content


class TestSyncAgentsCommand:
    """Tests for commands/sync-agents.md."""

    def test_command_exists(self) -> None:
        path = REPO_ROOT / "commands" / "sync-agents.md"
        assert path.exists()

    def test_command_has_phases(self) -> None:
        path = REPO_ROOT / "commands" / "sync-agents.md"
        content = path.read_text()
        assert "## Phase 1" in content
        assert "## Phase 2" in content
        assert "detect-agents.sh" in content


================================================
FILE: tests/test_polyphony_adapters.py
================================================
"""Tests for Polyphony agent adapters (§8.1-8.3)."""

import pytest
from polyphony.adapters import get_adapter, list_adapters
from polyphony.adapters.claude import ClaudeAdapter
from polyphony.adapters.codex import CodexAdapter
from polyphony.adapters.kimi import KimiAdapter
from polyphony.models import AgentProfile, RunSpec


@pytest.fixture
def claude_profile():
    return AgentProfile(
        name="claude-opus",
        agent_type="claude",
        cli_command="claude -p",
        strengths=["long_context"],
        event_protocol="stream-json",
    )


@pytest.fixture
def codex_profile():
    return AgentProfile(
        name="codex-default",
        agent_type="codex",
        cli_command="codex exec",
        strengths=["code"],
        event_protocol="ndjson",
    )


@pytest.fixture
def kimi_profile():
    return AgentProfile(
        name="kimi-default",
        agent_type="kimi",
        cli_command="kimi --print -y",
        strengths=["code"],
        event_protocol="ndjson",
    )


@pytest.fixture
def run_spec():
    return RunSpec(
        task_id="T-1",
        agent="claude-opus",
        identity="protaige",
        workspace="/workspace",
        image="polyphony-worker:latest",
        max_turns=10,
        env_overlay={"ANTHROPIC_API_KEY": "ANTHROPIC_API_KEY"},
        volume_mounts=["~/.claude:/home/worker/.claude:ro"],
    )


class TestRegistry:
    def test_list_adapters(self):
        names = list_adapters()
        assert "claude" in names
        assert "codex" in names
        assert "kimi" in names

    def test_get_claude_adapter(self):
        adapter = get_adapter("claude")
        assert isinstance(adapter, ClaudeAdapter)

    def test_get_codex_adapter(self):
        adapter = get_adapter("codex")
        assert isinstance(adapter, CodexAdapter)

    def test_get_kimi_adapter(self):
        adapter = get_adapter("kimi")
        assert isinstance(adapter, KimiAdapter)

    def test_unknown_adapter_raises(self):
        with pytest.raises(KeyError, match="gemini"):
            get_adapter("gemini")


class TestClaudeAdapter:
    def test_build_command(self, claude_profile, run_spec):
        adapter = ClaudeAdapter()
        cmd = adapter.build_command(claude_profile, run_spec)
        assert "claude" in cmd[0]
        assert "-p" in cmd
        assert "--output-format" in cmd
        assert "stream-json" in cmd

    def test_prompt_included(self, claude_profile, run_spec):
        adapter = ClaudeAdapter()
        run_spec.env_overlay["PROMPT"] = "Fix the bug"
        cmd = adapter.build_command(claude_profile, run_spec)
        cmd_str = " ".join(cmd)
        assert "claude" in cmd_str

    def test_detect_completion(self):
        adapter = ClaudeAdapter()
        assert adapter.detect_completion({"type": "result"}) is True
        assert adapter.detect_completion({"type": "message"}) is False

    def test_detect_quota(self):
        adapter = ClaudeAdapter()
        assert adapter.detect_quota("rate limit exceeded") is True
        assert adapter.detect_quota("all good") is False


class TestCodexAdapter:
    def test_build_command(self, codex_profile, run_spec):
        adapter = CodexAdapter()
        cmd = adapter.build_command(codex_profile, run_spec)
        assert "codex" in cmd[0]
        assert "exec" in cmd
        assert "--full-auto" in cmd

    def test_detect_completion(self):
        adapter = CodexAdapter()
        assert adapter.detect_completion({"status": "completed"}) is True
        assert adapter.detect_completion({"status": "running"}) is False

    def test_detect_quota(self):
        adapter = CodexAdapter()
        assert adapter.detect_quota("quota exceeded") is True
        assert adapter.detect_quota("running") is False


class TestKimiAdapter:
    def test_build_command(self, kimi_profile, run_spec):
        adapter = KimiAdapter()
        cmd = adapter.build_command(kimi_profile, run_spec)
        assert "kimi" in cmd[0]
        assert "--print" in cmd
        assert "-y" in cmd

    def test_detect_completion(self):
        adapter = KimiAdapter()
        assert adapter.detect_completion({"done": True}) is True
        assert adapter.detect_completion({"done": False}) is False

    def test_detect_quota(self):
        adapter = KimiAdapter()
        assert adapter.detect_quota("rate limit") is True
        assert adapter.detect_quota("ok") is False


================================================
FILE: tests/test_polyphony_config.py
================================================
"""Tests for Polyphony config loading (§11)."""

import pytest
from polyphony.config import (
    load_config,
    load_identities,
    load_agents,
    load_routing,
    default_config_dir,
)
from polyphony.models import Identity, AgentProfile


class TestDefaultConfigDir:
    def test_returns_path(self):
        d = default_config_dir()
        assert str(d).endswith(".polyphony")


class TestLoadConfig:
    def test_missing_dir_returns_defaults(self, tmp_path):
        cfg = load_config(tmp_path / "nonexistent")
        assert "workspace_root" in cfg
        assert "poll_interval" in cfg
        assert "max_concurrent_agents" in cfg

    def test_loads_yaml(self, tmp_path):
        cfg_file = tmp_path / "config.yaml"
        cfg_file.write_text(
            "workspace_root: /custom/path\n"
            "max_concurrent_agents: 4\n"
        )
        cfg = load_config(tmp_path)
        assert cfg["workspace_root"] == "/custom/path"
        assert cfg["max_concurrent_agents"] == 4

    def test_defaults_fill_missing_keys(self, tmp_path):
        cfg_file = tmp_path / "config.yaml"
        cfg_file.write_text("workspace_root: /x\n")
        cfg = load_config(tmp_path)
        assert "poll_interval" in cfg


class TestLoadIdentities:
    def test_missing_file_returns_empty(self, tmp_path):
        ids = load_identities(tmp_path)
        assert ids == []

    def test_loads_identities(self, tmp_path):
        f = tmp_path / "identities.yaml"
        f.write_text(
            "identities:\n"
            "  - name: test\n"
            "    volumes:\n"
            "      claude: ~/.claude\n"
        )
        ids = load_identities(tmp_path)
        assert len(ids) == 1
        assert isinstance(ids[0], Identity)
        assert ids[0].name == "test"
        assert ids[0].volumes["claude"] == "~/.claude"


class TestLoadAgents:
    def test_missing_file_returns_empty(self, tmp_path):
        agents = load_agents(tmp_path)
        assert agents == []

    def test_loads_agents(self, tmp_path):
        f = tmp_path / "agents.yaml"
        f.write_text(
            "agents:\n"
            "  - name: claude-opus\n"
            "    agent_type: claude\n"
            "    cli_command: claude -p\n"
        )
        agents = load_agents(tmp_path)
        assert len(agents) == 1
        assert isinstance(agents[0], AgentProfile)
        assert agents[0].name == "claude-opus"


class TestLoadRouting:
    def test_missing_file_returns_defaults(self, tmp_path):
        r = load_routing(tmp_path)
        assert "rules" in r
        assert "default" in r

    def test_loads_routing(self, tmp_path):
        f = tmp_path / "routing.yaml"
        f.write_text(
            "rules:\n"
            "  - match: {task_type: bugfix}\n"
            "    agent: kimi\n"
            "default:\n"
            "  agent: claude\n"
        )
        r = load_routing(tmp_path)
        assert len(r["rules"]) == 1
        assert r["default"]["agent"] == "claude"


================================================
FILE: tests/test_polyphony_events.py
================================================
"""Tests for Polyphony event parsing (§8 events)."""

import json
import pytest
from polyphony.events import (
    TaskEvent,
    parse_ndjson_line,
    parse_stream_json,
    classify_event,
)


class TestTaskEvent:
    def test_create(self):
        ev = TaskEvent(
            kind="message",
            data={"text": "hello"},
        )
        assert ev.kind == "message"
        assert ev.data["text"] == "hello"
        assert ev.timestamp != ""

    def test_from_dict(self):
        ev = TaskEvent.from_dict({
            "kind": "result",
            "data": {"status": "ok"},
            "timestamp": "2025-01-01T00:00:00",
        })
        assert ev.kind == "result"
        assert ev.timestamp == "2025-01-01T00:00:00"


class TestParseNdjsonLine:
    def test_valid_json(self):
        line = '{"type": "message", "content": "hello"}'
        result = parse_ndjson_line(line)
        assert result["type"] == "message"

    def test_empty_line(self):
        assert parse_ndjson_line("") is None

    def test_whitespace_line(self):
        assert parse_ndjson_line("   \n") is None

    def test_invalid_json(self):
        assert parse_ndjson_line("not json") is None

    def test_strips_whitespace(self):
        line = '  {"key": "value"}  \n'
        result = parse_ndjson_line(line)
        assert result["key"] == "value"


class TestParseStreamJson:
    def test_parses_multiple_lines(self):
        lines = [
            '{"type": "message", "text": "a"}',
            '{"type": "result", "status": "ok"}',
        ]
        events = parse_stream_json(lines)
        assert len(events) == 2
        assert events[0]["type"] == "message"
        assert events[1]["type"] == "result"

    def test_skips_invalid_lines(self):
        lines = [
            '{"type": "message"}',
            "not json",
            '{"type": "result"}',
        ]
        events = parse_stream_json(lines)
        assert len(events) == 2

    def test_empty_input(self):
        assert parse_stream_json([]) == []


class TestClassifyEvent:
    def test_result_event(self):
        ev = classify_event({"type": "result", "status": "ok"})
        assert ev.kind == "result"

    def test_message_event(self):
        ev = classify_event({"type": "message", "text": "hi"})
        assert ev.kind == "message"

    def test_error_event(self):
        ev = classify_event({"type": "error", "message": "fail"})
        assert ev.kind == "error"

    def test_unknown_event(self):
        ev = classify_event({"foo": "bar"})
        assert ev.kind == "unknown"

    def test_preserves_data(self):
        data = {"type": "result", "status": "ok", "extra": 42}
        ev = classify_event(data)
        assert ev.data == data


================================================
FILE: tests/test_polyphony_identity.py
================================================
"""Tests for Polyphony identity broker (§7)."""

import pytest
from polyphony.models import Identity
from polyphony.identity import (
    resolve_identity,
    build_volume_mounts,
    build_env_overlay,
    validate_identity,
)


@pytest.fixture
def identities():
    return [
        Identity(
            name="protaige",
            volumes={"claude": "~/.claude", "codex": "~/.codex"},
            api_keys={"anthropic": "ANTHROPIC_API_KEY"},
        ),
        Identity(
            name="personal",
            volumes={"kimi": "~/.kimi"},
        ),
    ]


class TestResolveIdentity:
    def test_finds_by_name(self, identities):
        found = resolve_identity("protaige", identities)
        assert found.name == "protaige"

    def test_missing_raises(self, identities):
        with pytest.raises(KeyError, match="unknown"):
            resolve_identity("unknown", identities)


class TestBuildVolumeMounts:
    def test_mounts_for_claude(self, identities):
        mounts = build_volume_mounts(identities[0], "claude")
        assert len(mounts) == 1
        assert "~/.claude" in mounts[0]
        assert ":ro" in mounts[0]

    def test_no_mount_for_missing_agent(self, identities):
        mounts = build_volume_mounts(identities[1], "claude")
        assert mounts == []


class TestBuildEnvOverlay:
    def test_env_from_api_keys(self, identities):
        env = build_env_overlay(identities[0])
        assert "ANTHROPIC_API_KEY" in env

    def test_empty_when_no_keys(self, identities):
        env = build_env_overlay(identities[1])
        assert env == {}


class TestValidateIdentity:
    def test_valid(self, identities):
        errors = validate_identity(identities[0])
        assert errors == []

    def test_missing_name(self):
        i = Identity(name="", volumes={"claude": "~/.claude"})
        errors = validate_identity(i)
        assert any("name" in e for e in errors)

    def test_missing_volumes(self):
        i = Identity(name="test", volumes={})
        errors = validate_identity(i)
        assert any("volume" in e.lower() for e in errors)


================================================
FILE: tests/test_polyphony_models.py
================================================
"""Tests for Polyphony data models (§3 of spec)."""

import pytest
from polyphony.models import (
    TASK_TYPES,
    RISK_LEVELS,
    SCOPES,
    Task,
    Identity,
    AgentProfile,
    RunSpec,
    Result,
    _now,
    _uuid,
)


class TestHelpers:
    def test_now_returns_iso_string(self):
        ts = _now()
        assert "T" in ts
        assert "+" in ts or "Z" in ts

    def test_uuid_returns_unique(self):
        a, b = _uuid(), _uuid()
        assert a != b
        assert len(a) == 36


class TestTaskConstants:
    def test_task_types(self):
        expected = {
            "research", "bugfix", "feature",
            "refactor", "migration", "docs", "review",
        }
        assert set(TASK_TYPES) == expected

    def test_risk_levels(self):
        assert set(RISK_LEVELS) == {"low", "medium", "high"}

    def test_scopes(self):
        expected = {
            "single_file", "single_module",
            "multi_module", "multi_repo",
        }
        assert set(SCOPES) == expected


class TestTask:
    def test_create_minimal(self):
        t = Task(
            title="Fix login bug",
            source="github",
            source_ref="owner/repo#42",
        )
        assert t.title == "Fix login bug"
        assert t.source == "github"
        assert len(t.id) == 36
        assert t.state == "discovered"
        assert t.task_type == "feature"
        assert t.risk == "low"

    def test_defaults(self):
        t = Task(title="x", source="local", source_ref="1")
        assert t.scope == []
        assert t.context_tokens == 0
        assert t.requires_web is False
        assert t.run_spec_id is None
        assert t.metadata == {}

    def test_to_dict(self):
        t = Task(title="x", source="local", source_ref="1")
        d = t.to_dict()
        assert d["title"] == "x"
        assert "id" in d
        assert "created_at" in d


class TestIdentity:
    def test_create(self):
        i = Identity(
            name="protaige",
            volumes={"claude": "~/.claude"},
        )
        assert i.name == "protaige"
        assert i.volumes["claude"] == "~/.claude"
        assert i.api_keys == {}
        assert i.cost_ceiling_usd_per_day is None

    def test_with_api_keys(self):
        i = Identity(
            name="test",
            volumes={},
            api_keys={"anthropic": "ANTHROPIC_API_KEY"},
        )
        assert i.api_keys["anthropic"] == "ANTHROPIC_API_KEY"


class TestAgentProfile:
    def test_create(self):
        a = AgentProfile(
            name="claude-opus",
            agent_type="claude",
            cli_command="claude -p",
        )
        assert a.name == "claude-opus"
        assert a.context_window_tokens == 200000
        assert a.strengths == []

    def test_event_protocol_default(self):
        a = AgentProfile(
            name="x",
            agent_type="claude",
            cli_command="claude -p",
        )
        assert a.event_protocol == "ndjson"


class TestRunSpec:
    def test_create(self):
        r = RunSpec(
            task_id="t1",
            agent="claude-opus",
            identity="protaige",
            workspace="/tmp/ws",
            image="polyphony/claude:latest",
        )
        assert r.task_id == "t1"
        assert r.attempt == 1
        assert r.max_turns == 25
        assert r.deadline_seconds == 1800
        assert r.allowed_paths == []
        assert r.proof_of_work == []

    def test_immutable_concept(self):
        """RunSpec fields have defaults; verify they're set."""
        r = RunSpec(
            task_id="t1",
            agent="x",
            identity="y",
            workspace="/w",
            image="img",
        )
        assert len(r.id) == 36


class TestResult:
    def test_create(self):
        r = Result(
            task_id="t1",
            run_spec_id="rs1",
            agent="claude-opus",
            status="succeeded",
        )
        assert r.status == "succeeded"
        assert r.turns == 0
        assert r.duration_seconds == 0
        assert r.cost_usd is None
        assert r.events == []
        assert r.artifacts == {}

    def test_status_values(self):
        for s in ("succeeded", "failed", "quota", "timeout", "crash"):
            r = Result(
                task_id="t",
                run_spec_id="r",
                agent="a",
                status=s,
            )
            assert r.status == s


================================================
FILE: tests/test_polyphony_orchestrator.py
================================================
"""Tests for Polyphony orchestrator (§4 supervisor loop)."""

import pytest
from unittest.mock import patch, MagicMock
from pathlib import Path
from polyphony.orchestrator import (
    Orchestrator,
    discover_tasks,
    claim_task,
    provision_workspace,
    run_agent,
    verify_result,
)
from polyphony.models import (
    Task, AgentProfile, Identity, RunSpec, Result,
)
from polyphony.store import PolyphonyStore


@pytest.fixture
def store(tmp_path):
    s = PolyphonyStore(tmp_path)
    s.init_db()
    return s


@pytest.fixture
def task():
    return Task(
        title="Fix auth bug",
        source="local",
        source_ref="local",
        task_type="bugfix",
        risk="medium",
    )


@pytest.fixture
def agents():
    return [
        AgentProfile(
            name="claude-opus",
            agent_type="claude",
            cli_command="claude -p",
            strengths=["long_context"],
        ),
    ]


@pytest.fixture
def policy():
    return {
        "rules": [],
        "default": {
            "agent": "claude-opus",
            "fallback": [],
        },
    }


@pytest.fixture
def identities():
    return [
        Identity(
            name="protaige",
            volumes={"claude": "~/.claude"},
        ),
    ]


class TestDiscoverTasks:
    def test_returns_tasks(self, store, task):
        store.save_task(task)
        found = discover_tasks(store)
        assert len(found) == 1
        assert found[0].id == task.id

    def test_empty_store(self, store):
        assert discover_tasks(store) == []


class TestClaimTask:
    def test_transitions_to_claimed(self, store, task):
        store.save_task(task)
        claimed = claim_task(task, store)
        assert claimed.state == "claimed"

    def test_updates_store(self, store, task):
        store.save_task(task)
        claim_task(task, store)
        stored = store.get_task(task.id)
        assert stored.state == "claimed"


class TestProvisionWorkspace:
    @patch("polyphony.orchestrator._create_ws")
    def test_returns_path(self, mock_ws, tmp_path, task):
        ws_path = tmp_path / "ws"
        ws_path.mkdir()
        mock_ws.return_value = ws_path
        result = provision_workspace(task, tmp_path, "main")
        assert result == ws_path

    @patch("polyphony.orchestrator._create_ws")
    def test_calls_create(self, mock_ws, tmp_path, task):
        mock_ws.return_value = tmp_path
        provision_workspace(task, tmp_path, "main")
        assert mock_ws.called


class TestRunAgent:
    @patch("polyphony.orchestrator._execute_container")
    def test_returns_result(self, mock_exec, task):
        mock_exec.return_value = Result(
            task_id=task.id,
            run_spec_id="rs-1",
            agent="claude-opus",
            status="succeeded",
        )
        run_spec = RunSpec(
            task_id=task.id,
            agent="claude-opus",
            identity="protaige",
            workspace="/ws",
            image="polyphony-worker:latest",
        )
        result = run_agent(run_spec)
        assert result.status == "succeeded"

    @patch("polyphony.orchestrator._execute_container")
    def test_handles_failure(self, mock_exec, task):
        mock_exec.return_value = Result(
            task_id=task.id,
            run_spec_id="rs-1",
            agent="claude-opus",
            status="failed",
        )
        run_spec = RunSpec(
            task_id=task.id,
            agent="claude-opus",
            identity="protaige",
            workspace="/ws",
            image="polyphony-worker:latest",
        )
        result = run_agent(run_spec)
        assert result.status == "failed"


class TestVerifyResult:
    def test_succeeded_passes(self):
        result = Result(
            task_id="T-1",
            run_spec_id="rs-1",
            agent="claude-opus",
            status="succeeded",
        )
        assert verify_result(result) is True

    def test_failed_fails(self):
        result = Result(
            task_id="T-1",
            run_spec_id="rs-1",
            agent="claude-opus",
            status="failed",
        )
        assert verify_result(result) is False


class TestOrchestrator:
    def test_init(self, store, agents, policy, identities):
        orch = Orchestrator(
            store=store,
            agents=agents,
            policy=policy,
            identities=identities,
        )
        assert orch is not None

    def test_has_step(self, store, agents, policy, identities):
        orch = Orchestrator(
            store=store,
            agents=agents,
            policy=policy,
            identities=identities,
        )
        assert hasattr(orch, "step")


================================================
FILE: tests/test_polyphony_router.py
================================================
"""Tests for Polyphony router (§5.2-5.6)."""

import pytest
from polyphony.models import Task, AgentProfile, RunSpec
from polyphony.router import route, select_agent, match_rule


@pytest.fixture
def agents():
    return [
        AgentProfile(
            name="claude-opus",
            agent_type="claude",
            cli_command="claude -p",
            strengths=["long_context", "research"],
        ),
        AgentProfile(
            name="codex-default",
            agent_type="codex",
            cli_command="codex exec",
            strengths=["code"],
        ),
        AgentProfile(
            name="kimi-default",
            agent_type="kimi",
            cli_command="kimi --print -y",
            strengths=["code"],
        ),
    ]


@pytest.fixture
def policy():
    return {
        "rules": [
            {
                "match": {"task_type": "docs", "risk": "low"},
                "agent": "kimi-default",
            },
            {
                "match": {"task_type": "bugfix"},
                "agent": "codex-default",
            },
            {
                "match": {"risk": "high"},
                "agent": "claude-opus",
            },
        ],
        "default": {
            "agent": "claude-opus",
            "fallback": ["codex-default", "kimi-default"],
        },
    }


class TestMatchRule:
    def test_matches_single_field(self):
        task = Task(
            title="x", source="local", source_ref="1",
            task_type="docs",
        )
        rule = {"match": {"task_type": "docs"}}
        assert match_rule(task, rule) is True

    def test_no_match(self):
        task = Task(
            title="x", source="local", source_ref="1",
            task_type="feature",
        )
        rule = {"match": {"task_type": "docs"}}
        assert match_rule(task, rule) is False

    def test_matches_multiple_fields(self):
        task = Task(
            title="x", source="local", source_ref="1",
            task_type="docs", risk="low",
        )
        rule = {"match": {"task_type": "docs", "risk": "low"}}
        assert match_rule(task, rule) is True

    def test_partial_match_fails(self):
        task = Task(
            title="x", source="local", source_ref="1",
            task_type="docs", risk="high",
        )
        rule = {"match": {"task_type": "docs", "risk": "low"}}
        assert match_rule(task, rule) is False


class TestSelectAgent:
    def test_selects_by_rule(self, agents, policy):
        task = Task(
            title="Fix readme", source="local",
            source_ref="1", task_type="docs", risk="low",
        )
        agent = select_agent(task, agents, policy)
        assert agent.name == "kimi-default"

    def test_falls_to_default(self, agents, policy):
        task = Task(
            title="New feature", source="local",
            source_ref="1", task_type="feature", risk="medium",
        )
        agent = select_agent(task, agents, policy)
        assert agent.name == "claude-opus"

    def test_high_risk_matches_claude(self, agents, policy):
        task = Task(
            title="Refactor auth", source="local",
            source_ref="1", task_type="refactor", risk="high",
        )
        agent = select_agent(task, agents, policy)
        assert agent.name == "claude-opus"


class TestRoute:
    def test_returns_run_spec(self, agents, policy):
        task = Task(
            title="Fix bug", source="github",
            source_ref="o/r#1", task_type="bugfix",
        )
        rs = route(task, agents, policy, identity="test")
        assert isinstance(rs, RunSpec)
        assert rs.task_id == task.id
        assert rs.agent == "codex-default"
        assert rs.identity == "test"

    def test_run_spec_has_fallback(self, agents, policy):
        task = Task(
            title="New feature", source="local",
            source_ref="1", task_type="feature",
        )
        rs = route(task, agents, policy, identity="test")
        # default rule has fallback
        assert isinstance(rs.fallback, list)


================================================
FILE: tests/test_polyphony_runtime.py
================================================
"""Tests for Polyphony Docker runtime (§8 worker)."""

import pytest
from unittest.mock import patch, MagicMock
from polyphony.runtime import (
    create_container,
    start_container,
    stop_container,
    remove_container,
    container_logs,
    wait_container,
    build_docker_args,
)
from polyphony.models import RunSpec


@pytest.fixture
def run_spec():
    return RunSpec(
        task_id="T-1",
        agent="claude-opus",
        identity="protaige",
        workspace="/tmp/ws/T-1/1",
        image="polyphony-worker:latest",
        env_overlay={"API_KEY": "API_KEY"},
        volume_mounts=["~/.claude:/home/worker/.claude:ro"],
        deadline_seconds=600,
    )


class TestBuildDockerArgs:
    def test_includes_image(self, run_spec):
        args = build_docker_args(run_spec)
        assert "polyphony-worker:latest" in args

    def test_includes_volumes(self, run_spec):
        args = build_docker_args(run_spec)
        assert "-v" in args
        # Collect all -v values
        volumes = []
        for i, a in enumerate(args):
            if a == "-v" and i + 1 < len(args):
                volumes.append(args[i + 1])
        assert any(
            "~/.claude:/home/worker/.claude:ro" in v
            for v in volumes
        )

    def test_includes_env(self, run_spec):
        args = build_docker_args(run_spec)
        assert "-e" in args

    def test_includes_workspace_mount(self, run_spec):
        args = build_docker_args(run_spec)
        arg_str = " ".join(args)
        assert "/tmp/ws/T-1/1" in arg_str

    def test_container_name(self, run_spec):
        args = build_docker_args(run_spec)
        assert "--name" in args


class TestCreateContainer:
    @patch("polyphony.runtime._run_docker")
    def test_creates_container(self, mock_docker, run_spec):
        mock_docker.return_value = MagicMock(
            returncode=0, stdout="container_id_123\n",
        )
        cid = create_container(run_spec)
        assert cid == "container_id_123"
        assert mock_docker.called

    @patch("polyphony.runtime._run_docker")
    def test_failure_raises(self, mock_docker, run_spec):
        mock_docker.return_value = MagicMock(
            returncode=1, stderr="error",
        )
        with pytest.raises(RuntimeError, match="error"):
            create_container(run_spec)


class TestStartContainer:
    @patch("polyphony.runtime._run_docker")
    def test_starts(self, mock_docker):
        mock_docker.return_value = MagicMock(returncode=0)
        start_container("abc123")
        mock_docker.assert_called_once()
        cmd = mock_docker.call_args[0][0]
        assert "start" in cmd
        assert "abc123" in cmd


class TestStopContainer:
    @patch("polyphony.runtime._run_docker")
    def test_stops(self, mock_docker):
        mock_docker.return_value = MagicMock(returncode=0)
        stop_container("abc123")
        cmd = mock_docker.call_args[0][0]
        assert "stop" in cmd

    @patch("polyphony.runtime._run_docker")
    def test_stop_with_timeout(self, mock_docker):
        mock_docker.return_value = MagicMock(returncode=0)
        stop_container("abc123", timeout=30)
        cmd = mock_docker.call_args[0][0]
        assert "-t" in cmd
        assert "30" in cmd


class TestRemoveContainer:
    @patch("polyphony.runtime._run_docker")
    def test_removes(self, mock_docker):
        mock_docker.return_value = MagicMock(returncode=0)
        remove_container("abc123")
        cmd = mock_docker.call_args[0][0]
        assert "rm" in cmd
        assert "abc123" in cmd


class TestContainerLogs:
    @patch("polyphony.runtime._run_docker")
    def test_returns_logs(self, mock_docker):
        mock_docker.return_value = MagicMock(
            returncode=0,
            stdout="line1\nline2\n",
        )
        logs = container_logs("abc123")
        assert logs == "line1\nline2\n"


class TestWaitContainer:
    @patch("polyphony.runtime._run_docker")
    def test_returns_exit_code(self, mock_docker):
        mock_docker.return_value = MagicMock(
            returncode=0, stdout="0\n",
        )
        code = wait_container("abc123")
        assert code == 0

    @patch("polyphony.runtime._run_docker")
    def test_nonzero_exit(self, mock_docker):
        mock_docker.return_value = MagicMock(
            returncode=0, stdout="1\n",
        )
        code = wait_container("abc123")
        assert code == 1


================================================
FILE: tests/test_polyphony_scoring.py
================================================
"""Tests for Polyphony complexity scoring (§5.1)."""

import pytest
from polyphony.models import Task
from polyphony.scoring import (
    DIMENSIONS,
    score_task,
    score_cyclomatic,
    score_fan_out,
    score_security,
    score_concurrency,
    score_domain,
)


@pytest.fixture
def simple_task():
    return Task(
        title="Fix typo in README",
        source="local",
        source_ref="1",
        task_type="docs",
        scope=["README.md"],
        risk="low",
    )


@pytest.fixture
def complex_task():
    return Task(
        title="Refactor auth with async locks",
        source="github",
        source_ref="owner/repo#99",
        task_type="refactor",
        scope=["src/auth/middleware.ts", "src/auth/session.ts"],
        risk="high",
        metadata={
            "keywords": ["auth", "org_id", "asyncio.Lock"],
            "loc": 200,
            "callers": 15,
        },
    )


class TestDimensions:
    def test_five_dimensions(self):
        assert len(DIMENSIONS) == 5

    def test_dimension_names(self):
        expected = {
            "cyclomatic", "fan_out", "security",
            "concurrency", "domain",
        }
        assert set(DIMENSIONS) == expected


class TestScoreCyclomatic:
    def test_small_scope(self, simple_task):
        assert score_cyclomatic(simple_task) == 0

    def test_large_scope(self, complex_task):
        assert score_cyclomatic(complex_task) >= 1


class TestScoreFanOut:
    def test_no_callers(self, simple_task):
        assert score_fan_out(simple_task) == 0

    def test_many_callers(self, complex_task):
        assert score_fan_out(complex_task) == 2


class TestScoreSecurity:
    def test_no_security_keywords(self, simple_task):
        assert score_security(simple_task) == 0

    def test_auth_keywords(self, complex_task):
        assert score_security(complex_task) >= 1


class TestScoreConcurrency:
    def test_no_concurrency(self, simple_task):
        assert score_concurrency(simple_task) == 0

    def test_async_locks(self, complex_task):
        assert score_concurrency(complex_task) >= 1


class TestScoreDomain:
    def test_docs_task(self, simple_task):
        assert score_domain(simple_task) == 0

    def test_high_risk_refactor(self, complex_task):
        assert score_domain(complex_task) >= 1


class TestScoreTask:
    def test_simple_task_low(self, simple_task):
        total = score_task(simple_task)
        assert 0 <= total <= 3

    def test_complex_task_high(self, complex_task):
        total = score_task(complex_task)
        assert total >= 4

    def test_score_range(self, simple_task):
        total = score_task(simple_task)
        assert 0 <= total <= 10

    def test_returns_dict_with_breakdown(self, simple_task):
        """score_task returns (total, breakdown) tuple."""
        result = score_task(simple_task)
        assert isinstance(result, int)


================================================
FILE: tests/test_polyphony_sources.py
================================================
"""Tests for Polyphony work sources (§2)."""

import json
import pytest
from unittest.mock import patch, MagicMock
from pathlib import Path
from polyphony.sources import get_source, list_sources
from polyphony.sources.local import LocalSource
from polyphony.sources.github import GitHubSource
from polyphony.models import Task


class TestRegistry:
    def test_list_sources(self):
        names = list_sources()
        assert "local" in names
        assert "github" in names

    def test_get_local_source(self):
        src = get_source("local")
        assert isinstance(src, LocalSource)

    def test_get_github_source(self):
        src = get_source("github")
        assert isinstance(src, GitHubSource)

    def test_unknown_raises(self):
        with pytest.raises(KeyError, match="jira"):
            get_source("jira")


class TestLocalSource:
    def test_add_and_poll(self, tmp_path):
        src = LocalSource(db_path=tmp_path / "queue.db")
        src.add_task("Fix typo", task_type="docs", risk="low")
        tasks = src.poll()
        assert len(tasks) == 1
        assert tasks[0].title == "Fix typo"
        assert tasks[0].source == "local"

    def test_poll_empty(self, tmp_path):
        src = LocalSource(db_path=tmp_path / "queue.db")
        assert src.poll() == []

    def test_mark_claimed(self, tmp_path):
        src = LocalSource(db_path=tmp_path / "queue.db")
        src.add_task("Task A")
        tasks = src.poll()
        src.mark_claimed(tasks[0].id)
        # After claiming, poll should not return it
        remaining = src.poll()
        assert len(remaining) == 0

    def test_multiple_tasks(self, tmp_path):
        src = LocalSource(db_path=tmp_path / "queue.db")
        src.add_task("Task A")
        src.add_task("Task B")
        src.add_task("Task C")
        tasks = src.poll()
        assert len(tasks) == 3


class TestGitHubSource:
    @patch("polyphony.sources.github._run_gh")
    def test_poll_returns_tasks(self, mock_gh):
        issues = [
            {
                "number": 42,
                "title": "Fix auth bug",
                "labels": [{"name": "agent-ready"}],
            },
        ]
        mock_gh.return_value = MagicMock(
            returncode=0,
            stdout=json.dumps(issues),
        )
        src = GitHubSource(repo="owner/repo")
        tasks = src.poll()
        assert len(tasks) == 1
        assert tasks[0].title == "Fix auth bug"
        assert tasks[0].source == "github"
        assert "42" in tasks[0].source_ref

    @patch("polyphony.sources.github._run_gh")
    def test_poll_empty(self, mock_gh):
        mock_gh.return_value = MagicMock(
            returncode=0, stdout="[]",
        )
        src = GitHubSource(repo="owner/repo")
        assert src.poll() == []

    @patch("polyphony.sources.github._run_gh")
    def test_poll_gh_failure(self, mock_gh):
        mock_gh.return_value = MagicMock(
            returncode=1, stderr="auth failed",
        )
        src = GitHubSource(repo="owner/repo")
        # Should return empty, not crash
        assert src.poll() == []

    @patch("polyphony.sources.github._run_gh")
    def test_label_filter(self, mock_gh):
        mock_gh.return_value = MagicMock(
            returncode=0, stdout="[]",
        )
        src = GitHubSource(
            repo="owner/repo",
            label_filter="polyphony",
        )
        src.poll()
        cmd = mock_gh.call_args[0][0]
        cmd_str = " ".join(cmd)
        assert "polyphony" in cmd_str


================================================
FILE: tests/test_polyphony_state.py
================================================
"""Tests for Polyphony state machine (§4 lifecycle)."""

import pytest
from polyphony.models import Task
from polyphony.state_machine import (
    TASK_STATES,
    TRANSITIONS,
    can_transition,
    transition,
    is_terminal,
)


class TestConstants:
    def test_all_states_present(self):
        expected = {
            "discovered", "claimed", "routed", "provisioned",
            "running", "verifying", "landed", "failed", "blocked",
        }
        assert set(TASK_STATES) == expected

    def test_transitions_keys_are_valid_states(self):
        for state in TRANSITIONS:
            assert state in TASK_STATES


class TestCanTransition:
    def test_discovered_to_claimed(self):
        assert can_transition("discovered", "claimed") is True

    def test_claimed_to_routed(self):
        assert can_transition("claimed", "routed") is True

    def test_routed_to_provisioned(self):
        assert can_transition("routed", "provisioned") is True

    def test_provisioned_to_running(self):
        assert can_transition("provisioned", "running") is True

    def test_running_to_verifying(self):
        assert can_transition("running", "verifying") is True

    def test_running_to_failed(self):
        assert can_transition("running", "failed") is True

    def test_verifying_to_landed(self):
        assert can_transition("verifying", "landed") is True

    def test_verifying_to_failed(self):
        assert can_transition("verifying", "failed") is True

    def test_failed_to_claimed_retry(self):
        assert can_transition("failed", "claimed") is True

    def test_failed_to_blocked(self):
        assert can_transition("failed", "blocked") is True

    def test_invalid_discovered_to_running(self):
        assert can_transition("discovered", "running") is False

    def test_invalid_landed_to_anything(self):
        assert can_transition("landed", "claimed") is False
        assert can_transition("landed", "failed") is False

    def test_invalid_same_state(self):
        assert can_transition("claimed", "claimed") is False


class TestTransition:
    def test_valid_transition_updates_state(self):
        t = Task(title="x", source="local", source_ref="1")
        assert t.state == "discovered"
        t2 = transition(t, "claimed")
        assert t2.state == "claimed"

    def test_invalid_transition_raises(self):
        t = Task(title="x", source="local", source_ref="1")
        with pytest.raises(ValueError, match="Invalid transition"):
            transition(t, "running")

    def test_transition_updates_timestamp(self):
        t = Task(title="x", source="local", source_ref="1")
        old_ts = t.updated_at
        t2 = transition(t, "claimed")
        assert t2.updated_at >= old_ts


class TestIsTerminal:
    def test_landed_is_terminal(self):
        assert is_terminal("landed") is True

    def test_blocked_is_terminal(self):
        assert is_terminal("blocked") is True

    def test_discovered_not_terminal(self):
        assert is_terminal("discovered") is False

    def test_running_not_terminal(self):
        assert is_terminal("running") is False

    def test_failed_not_terminal(self):
        assert is_terminal("failed") is False


================================================
FILE: tests/test_polyphony_store.py
================================================
"""Tests for Polyphony SQLite store."""

import pytest
from polyphony.models import Task, RunSpec, Result
from polyphony.store import PolyphonyStore


@pytest.fixture
def store(tmp_path):
    s = PolyphonyStore(tmp_path)
    s.init_db()
    return s


@pytest.fixture
def sample_task():
    return Task(
        title="Fix bug",
        source="github",
        source_ref="owner/repo#1",
    )


class TestInit:
    def test_creates_db(self, tmp_path):
        s = PolyphonyStore(tmp_path)
        s.init_db()
        assert (tmp_path / "orchestrator.db").exists()

    def test_creates_gitignore(self, tmp_path):
        s = PolyphonyStore(tmp_path)
        s.init_db()
        gi = tmp_path / ".gitignore"
        assert gi.exists()
        assert "*" in gi.read_text()

    def test_idempotent(self, tmp_path):
        s = PolyphonyStore(tmp_path)
        s.init_db()
        s.init_db()  # no error


class TestTaskCRUD:
    def test_save_and_get(self, store, sample_task):
        store.save_task(sample_task)
        loaded = store.get_task(sample_task.id)
        assert loaded is not None
        assert loaded.title == "Fix bug"
        assert loaded.source == "github"

    def test_get_missing_returns_none(self, store):
        assert store.get_task("nonexistent") is None

    def test_list_tasks(self, store):
        t1 = Task(title="A", source="local", source_ref="1")
        t2 = Task(title="B", source="local", source_ref="2")
        store.save_task(t1)
        store.save_task(t2)
        tasks = store.list_tasks()
        assert len(tasks) == 2

    def test_list_tasks_by_state(self, store, sample_task):
        store.save_task(sample_task)
        found = store.list_tasks(state="discovered")
        assert len(found) == 1
        empty = store.list_tasks(state="running")
        assert len(empty) == 0

    def test_update_task(self, store, sample_task):
        store.save_task(sample_task)
        sample_task.state = "claimed"
        store.save_task(sample_task)
        loaded = store.get_task(sample_task.id)
        assert loaded.state == "claimed"


class TestRunSpecCRUD:
    def test_save_and_get(self, store):
        rs = RunSpec(
            task_id="t1",
            agent="claude",
            identity="protaige",
            workspace="/tmp/ws",
            image="img:latest",
        )
        store.save_run_spec(rs)
        loaded = store.get_run_spec(rs.id)
        assert loaded is not None
        assert loaded.agent == "claude"

    def test_get_missing(self, store):
        assert store.get_run_spec("nope") is None


class TestResultCRUD:
    def test_save_and_get(self, store):
        r = Result(
            task_id="t1",
            run_spec_id="rs1",
            agent="claude",
            status="succeeded",
        )
        store.save_result(r)
        loaded = store.get_result(r.id)
        assert loaded is not None
        assert loaded.status == "succeeded"

    def test_list_results_by_task(self, store):
        r1 = Result(
            task_id="t1",
            run_spec_id="rs1",
            agent="claude",
            status="failed",
        )
        r2 = Result(
            task_id="t1",
            run_spec_id="rs2",
            agent="kimi",
            status="succeeded",
        )
        store.save_result(r1)
        store.save_result(r2)
        results = store.list_results(task_id="t1")
        assert len(results) == 2


class TestStateLog:
    def test_log_transition(self, store, sample_task):
        store.save_task(sample_task)
        store.log_transition(
            sample_task.id, "discovered", "claimed",
        )
        log = store.get_state_log(sample_task.id)
        assert len(log) == 1
        assert log[0]["from_state"] == "discovered"
        assert log[0]["to_state"] == "claimed"


================================================
FILE: tests/test_polyphony_workspace.py
================================================
"""Tests for Polyphony workspace manager (§6)."""

import pytest
from unittest.mock import patch, MagicMock
from pathlib import Path
from polyphony.workspace import (
    workspace_path,
    create_workspace,
    cleanup_workspace,
    list_workspaces,
)


class TestWorkspacePath:
    def test_creates_path(self, tmp_path):
        p = workspace_path(tmp_path, "TASK-1", 1)
        assert "TASK-1" in str(p)
        assert "1" in str(p)

    def test_sanitizes_id(self, tmp_path):
        p = workspace_path(tmp_path, "owner/repo#42", 1)
        # No slashes in directory name
        assert "/" not in p.name


class TestCreateWorkspace:
    @patch("polyphony.workspace._run_git")
    def test_clones_repo(self, mock_git, tmp_path):
        mock_git.return_value = MagicMock(returncode=0)
        ws = create_workspace(
            base_dir=tmp_path,
            task_id="T-1",
            attempt=1,
            repo_url="https://github.com/o/r.git",
            ref="main",
        )
        assert ws.exists()
        assert mock_git.called

    @patch("polyphony.workspace._run_git")
    def test_checks_out_branch(self, mock_git, tmp_path):
        mock_git.return_value = MagicMock(returncode=0)
        create_workspace(
            base_dir=tmp_path,
            task_id="T-2",
            attempt=1,
            repo_url="https://github.com/o/r.git",
            ref="feature/auth",
        )
        calls = [str(c) for c in mock_git.call_args_list]
        assert any("checkout" in c for c in calls)

    @patch("polyphony.workspace._run_git")
    def test_uses_mirror_when_available(self, mock_git, tmp_path):
        mock_git.return_value = MagicMock(returncode=0)
        mirror = tmp_path / "mirror" / "repo.git"
        mirror.mkdir(parents=True)
        create_workspace(
            base_dir=tmp_path,
            task_id="T-3",
            attempt=1,
            repo_url="https://github.com/o/r.git",
            ref="main",
            mirror_path=mirror,
        )
        calls = [str(c) for c in mock_git.call_args_list]
        assert any("dissociate" in c for c in calls)


class TestCleanupWorkspace:
    def test_removes_directory(self, tmp_path):
        ws = tmp_path / "workspace"
        ws.mkdir()
        (ws / "file.txt").write_text("x")
        cleanup_workspace(ws)
        assert not ws.exists()

    def test_missing_dir_no_error(self, tmp_path):
        cleanup_workspace(tmp_path / "nope")


class TestListWorkspaces:
    def test_lists_dirs(self, tmp_path):
        (tmp_path / "T-1" / "1").mkdir(parents=True)
        (tmp_path / "T-2" / "1").mkdir(parents=True)
        ws = list_workspaces(tmp_path)
        assert len(ws) >= 2

    def test_empty_base(self, tmp_path):
        assert list_workspaces(tmp_path) == []


================================================
FILE: tests/test_session_detect.py
================================================
"""Tests for multi-CLI session detection."""

from __future__ import annotations

import json
from pathlib import Path
from unittest.mock import patch

from maggy.services.session_detect import (
    detect_all,
    detect_claude,
    detect_codex,
    detect_kimi,
)

_MOD = "maggy.services.session_detect._home"


def _patch_home(tmp_path):
    return patch(_MOD, return_value=tmp_path)


def test_detect_claude_from_history(tmp_path):
    """Finds Claude session by matching working dir."""
    hist = tmp_path / ".claude" / "history.jsonl"
    hist.parent.mkdir(parents=True)
    entry = {"project": "/tmp/proj", "sessionId": "c-123"}
    hist.write_text(json.dumps(entry) + "\n")
    with _patch_home(tmp_path):
        result = detect_claude("/tmp/proj")
    assert result is not None
    assert result.cli == "claude"
    assert result.session_id == "c-123"


def test_detect_claude_no_match(tmp_path):
    """Returns None when no matching dir in history."""
    hist = tmp_path / ".claude" / "history.jsonl"
    hist.parent.mkdir(parents=True)
    entry = {"project": "/other", "sessionId": "x"}
    hist.write_text(json.dumps(entry) + "\n")
    with _patch_home(tmp_path):
        assert detect_claude("/tmp/proj") is None


def test_detect_claude_missing_file():
    """Returns None when history.jsonl doesn't exist."""
    with _patch_home(Path("/nonexistent_detect_xyz")):
        assert detect_claude("/tmp/proj") is None


def test_detect_kimi_from_state(tmp_path):
    """Finds Kimi session from kimi.json work_dirs."""
    kimi_f = tmp_path / ".kimi" / "kimi.json"
    kimi_f.parent.mkdir(parents=True)
    data = {"work_dirs": [
        {"path": "/tmp/proj", "last_session_id": "k-1"},
    ]}
    kimi_f.write_text(json.dumps(data))
    with _patch_home(tmp_path):
        result = detect_kimi("/tmp/proj")
    assert result is not None
    assert result.cli == "kimi"
    assert result.session_id == "k-1"


def test_detect_kimi_null_session(tmp_path):
    """Returns None when last_session_id is null."""
    kimi_f = tmp_path / ".kimi" / "kimi.json"
    kimi_f.parent.mkdir(parents=True)
    data = {"work_dirs": [
        {"path": "/tmp/proj", "last_session_id": None},
    ]}
    kimi_f.write_text(json.dumps(data))
    with _patch_home(tmp_path):
        assert detect_kimi("/tmp/proj") is None


def test_detect_kimi_no_file():
    with _patch_home(Path("/nonexistent_detect_xyz")):
        assert detect_kimi("/tmp/proj") is None


def test_detect_codex_from_session(tmp_path):
    """Finds Codex session from rollout session file."""
    sess = tmp_path / ".codex" / "sessions" / "2026" / "05"
    sess.mkdir(parents=True)
    meta = {
        "type": "session_meta",
        "payload": {"id": "cx-1", "cwd": "/tmp/proj"},
    }
    (sess / "rollout-test.jsonl").write_text(
        json.dumps(meta) + "\n",
    )
    with _patch_home(tmp_path):
        result = detect_codex("/tmp/proj")
    assert result is not None
    assert result.cli == "codex"
    assert result.session_id == "cx-1"


def test_detect_codex_no_dir():
    with _patch_home(Path("/nonexistent_detect_xyz")):
        assert detect_codex("/tmp/proj") is None


def test_detect_all_aggregates(tmp_path):
    """detect_all gathers results from all CLIs."""
    hist = tmp_path / ".claude" / "history.jsonl"
    hist.parent.mkdir(parents=True)
    entry = {"project": "/tmp/p", "sessionId": "s1"}
    hist.write_text(json.dumps(entry) + "\n")
    with _patch_home(tmp_path):
        result = detect_all("/tmp/p")
    clis = [s.cli for s in result.sessions]
    assert "claude" in clis


def test_detect_all_empty(tmp_path):
    """detect_all returns empty when nothing found."""
    with _patch_home(tmp_path):
        result = detect_all("/tmp/p")
    assert result.sessions == []


================================================
FILE: tests/test_skill_lint.py
================================================
"""Unit tests for skill-lint."""

from __future__ import annotations

import json
import sys
import tempfile
from pathlib import Path

import pytest

# Add scripts/ to path so we can import skill_lint
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))

from skill_lint import Finding, Severity
from skill_lint.frontmatter import check as fm_check, parse_frontmatter
from skill_lint.spec import check as sp_check
from skill_lint.content import check as cq_check
from skill_lint.references import check as ri_check
from skill_lint.report import format_json, format_text
from skill_lint.__main__ import main


@pytest.fixture
def skills_dir(tmp_path: Path) -> Path:
    """Create a temporary skills directory."""
    skills = tmp_path / 'skills'
    skills.mkdir()
    return skills


def _make_skill(skills_dir: Path, name: str, content: str) -> tuple[Path, Path]:
    """Create a skill directory with SKILL.md content. Returns (skill_dir, skill_path)."""
    skill_dir = skills_dir / name
    skill_dir.mkdir()
    skill_path = skill_dir / 'SKILL.md'
    skill_path.write_text(content, encoding='utf-8')
    return skill_dir, skill_path


# --- parse_frontmatter ---

class TestParseFrontmatter:
    def test_valid_frontmatter(self):
        content = '---\nname: test-skill\ndescription: A test\n---\n# Content'
        fields, end_line = parse_frontmatter(content)
        assert fields['name'] == 'test-skill'
        assert fields['description'] == 'A test'
        assert end_line == 4

    def test_no_frontmatter(self):
        content = '# Just content\nNo frontmatter here'
        fields, end_line = parse_frontmatter(content)
        assert fields == {}
        assert end_line == 0

    def test_unclosed_frontmatter(self):
        content = '---\nname: broken\n'
        fields, end_line = parse_frontmatter(content)
        assert end_line == 0

    def test_quoted_values(self):
        content = '---\nname: "quoted-name"\ndescription: \'single\'\n---\n'
        fields, _ = parse_frontmatter(content)
        assert fields['name'] == 'quoted-name'
        assert fields['description'] == 'single'


# --- FM checks ---

class TestFrontmatter:
    def test_no_frontmatter(self, skills_dir):
        _, path = _make_skill(skills_dir, 'bad-skill', '# No frontmatter\n')
        findings = fm_check(path, skills_dir / 'bad-skill', skills_dir)
        assert any(f.rule_id == 'FM001' for f in findings)

    def test_missing_name(self, skills_dir):
        _, path = _make_skill(skills_dir, 'test', '---\ndescription: hello\n---\n')
        findings = fm_check(path, skills_dir / 'test', skills_dir)
        assert any(f.rule_id == 'FM002' for f in findings)

    def test_missing_description(self, skills_dir):
        _, path = _make_skill(skills_dir, 'test', '---\nname: test\n---\n')
        findings = fm_check(path, skills_dir / 'test', skills_dir)
        assert any(f.rule_id == 'FM003' for f in findings)

    def test_name_mismatch(self, skills_dir):
        _, path = _make_skill(skills_dir, 'real-name', '---\nname: wrong-name\ndescription: x\n---\n')
        findings = fm_check(path, skills_dir / 'real-name', skills_dir)
        assert any(f.rule_id == 'FM004' for f in findings)

    def test_invalid_name_format(self, skills_dir):
        _, path = _make_skill(skills_dir, 'Test_Bad', '---\nname: Test_Bad\ndescription: x\n---\n')
        findings = fm_check(path, skills_dir / 'Test_Bad', skills_dir)
        assert any(f.rule_id == 'FM005' for f in findings)

    def test_clean_skill(self, skills_dir):
        content = (
            '---\n'
            'name: good-skill\n'
            'description: A well-formed skill\n'
            'when-to-use: When testing\n'
            'user-invocable: true\n'
            'effort: low\n'
            '---\n'
            '# Good Skill\n'
        )
        _, path = _make_skill(skills_dir, 'good-skill', content)
        findings = fm_check(path, skills_dir / 'good-skill', skills_dir)
        assert len(findings) == 0


# --- SP checks ---

class TestSpec:
    def test_missing_skill_md(self, skills_dir):
        skill_dir = skills_dir / 'empty-skill'
        skill_dir.mkdir()
        findings = sp_check(skill_dir / 'SKILL.md', skill_dir, skills_dir)
        assert any(f.rule_id == 'SP001' for f in findings)

    def test_over_500_lines(self, skills_dir):
        content = '---\nname: big\n---\n' + '\n'.join(f'line {i}' for i in range(550))
        _, path = _make_skill(skills_dir, 'big', content)
        findings = sp_check(path, skills_dir / 'big', skills_dir)
        assert any(f.rule_id == 'SP002' for f in findings)

    def test_between_300_500(self, skills_dir):
        content = '---\nname: medium\n---\n' + '\n'.join(f'line {i}' for i in range(350))
        _, path = _make_skill(skills_dir, 'medium', content)
        findings = sp_check(path, skills_dir / 'medium', skills_dir)
        assert any(f.rule_id == 'SP003' for f in findings)

    def test_inline_suppression(self, skills_dir):
        content = (
            '---\n'
            '<!-- skill-lint: disable=SP002 -->\n'
            'name: big\n'
            '---\n'
            + '\n'.join(f'line {i}' for i in range(550))
        )
        _, path = _make_skill(skills_dir, 'big', content)
        findings = sp_check(path, skills_dir / 'big', skills_dir)
        assert not any(f.rule_id == 'SP002' for f in findings)


# --- CQ checks ---

class TestContent:
    def test_ascii_art_detected(self, skills_dir):
        content = '---\nname: arty\ndescription: x\n---\n# Arty\n╔══════╗\n║ box  ║\n╚══════╝\n'
        _, path = _make_skill(skills_dir, 'arty', content)
        findings = cq_check(path, skills_dir / 'arty', skills_dir)
        assert any(f.rule_id == 'CQ001' for f in findings)

    def test_ascii_art_in_code_block_ok(self, skills_dir):
        content = '---\nname: code-art\ndescription: x\n---\n# Code\n```\n╔══════╗\n║ ok   ║\n╚══════╝\n```\n'
        _, path = _make_skill(skills_dir, 'code-art', content)
        findings = cq_check(path, skills_dir / 'code-art', skills_dir)
        assert not any(f.rule_id == 'CQ001' for f in findings)

    def test_vague_phrases(self, skills_dir):
        content = '---\nname: vague\ndescription: x\n---\n# Vague\nYou should follow best practices.\n'
        _, path = _make_skill(skills_dir, 'vague', content)
        findings = cq_check(path, skills_dir / 'vague', skills_dir)
        assert any(f.rule_id == 'CQ002' for f in findings)

    def test_filler_intensity(self, skills_dir):
        # 10 filler words in 20 lines = 50 per 100 lines (way over 2)
        filler_lines = '\n'.join(
            'This is MANDATORY and NON-NEGOTIABLE' for _ in range(10)
        )
        content = f'---\nname: filler\ndescription: x\n---\n# Filler\n{filler_lines}\n'
        _, path = _make_skill(skills_dir, 'filler', content)
        findings = cq_check(path, skills_dir / 'filler', skills_dir)
        assert any(f.rule_id == 'CQ003' for f in findings)

    def test_stale_load_ref(self, skills_dir):
        content = '---\nname: stale\ndescription: x\n---\n# Stale\n*Load with: base.md*\n'
        _, path = _make_skill(skills_dir, 'stale', content)
        findings = cq_check(path, skills_dir / 'stale', skills_dir)
        assert any(f.rule_id == 'CQ005' for f in findings)

    def test_no_h1_heading(self, skills_dir):
        content = '---\nname: headless\ndescription: x\n---\nNo heading here.\n'
        _, path = _make_skill(skills_dir, 'headless', content)
        findings = cq_check(path, skills_dir / 'headless', skills_dir)
        assert any(f.rule_id == 'CQ006' for f in findings)


# --- RI checks ---

class TestReferences:
    def test_broken_skill_ref(self, skills_dir):
        content = '---\nname: linker\ndescription: x\n---\n# Linker\nSee skills/nonexistent-skill for details.\n'
        _, path = _make_skill(skills_dir, 'linker', content)
        findings = ri_check(path, skills_dir / 'linker', skills_dir)
        assert any(f.rule_id == 'RI001' for f in findings)

    def test_valid_skill_ref(self, skills_dir):
        _make_skill(skills_dir, 'target', '---\nname: target\n---\n')
        content = '---\nname: linker\ndescription: x\n---\n# Linker\nSee skills/target for details.\n'
        _, path = _make_skill(skills_dir, 'linker', content)
        findings = ri_check(path, skills_dir / 'linker', skills_dir)
        assert not any(f.rule_id == 'RI001' for f in findings)


# --- Report ---

class TestReport:
    def test_text_format(self, skills_dir):
        findings = [
            Finding('FM001', Severity.ERROR, 'Missing frontmatter'),
            Finding('SP002', Severity.WARNING, 'Too long'),
        ]
        results = {'test-skill': findings}
        text = format_text(results)
        assert 'ERROR' in text
        assert 'WARNING' in text
        assert 'test-skill' in text

    def test_json_format(self, skills_dir):
        findings = [
            Finding('FM001', Severity.ERROR, 'Missing frontmatter'),
        ]
        results = {'test-skill': findings}
        output = format_json(results)
        data = json.loads(output)
        assert data['summary']['errors'] == 1
        assert 'test-skill' in data['skills']


# --- CLI ---

class TestCLI:
    def test_version(self, capsys):
        with pytest.raises(SystemExit) as exc:
            main(['--version'])
        assert exc.value.code == 0

    def test_missing_dir(self):
        ret = main(['/nonexistent/path'])
        assert ret == 2

    def test_single_skill(self, skills_dir):
        content = (
            '---\n'
            'name: clean\n'
            'description: A clean skill\n'
            'when-to-use: Always\n'
            'user-invocable: true\n'
            'effort: low\n'
            '---\n'
            '# Clean Skill\n'
            '\n```python\nprint("hello")\n```\n'
        )
        _make_skill(skills_dir, 'clean', content)
        ret = main(['--skill', 'clean', str(skills_dir)])
        assert ret == 0

    def test_fail_on_warning(self, skills_dir):
        content = '---\nname: big\ndescription: x\n---\n' + '\n'.join(f'line {i}' for i in range(550))
        _make_skill(skills_dir, 'big', content)
        ret = main(['--fail-on', 'warning', '--skill', 'big', str(skills_dir)])
        assert ret == 1


================================================
FILE: tests/validate-structure.sh
================================================
#!/bin/bash
# validate-structure.sh - Validates Maggy structure matches Claude Code requirements
# Run with: ./tests/validate-structure.sh
# Exit codes: 0 = all pass, 1 = failures

set -uo pipefail
# Note: not using -e so we can collect all failures

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"
SKILLS_DIR="$ROOT_DIR/skills"
COMMANDS_DIR="$ROOT_DIR/commands"
HOOKS_DIR="$ROOT_DIR/hooks"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

PASS_COUNT=0
FAIL_COUNT=0
WARN_COUNT=0

pass() {
    echo -e "${GREEN}✓${NC} $1"
    ((PASS_COUNT++))
}

fail() {
    echo -e "${RED}✗${NC} $1"
    ((FAIL_COUNT++))
}

warn() {
    echo -e "${YELLOW}⚠${NC} $1"
    ((WARN_COUNT++))
}

header() {
    echo ""
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo " $1"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
}

# ============================================================================
# TEST 1: Skills Structure
# Each skill must be a FOLDER containing SKILL.md (not a flat .md file)
# ============================================================================
test_skills_structure() {
    header "TEST: Skills Folder Structure"

    if [ ! -d "$SKILLS_DIR" ]; then
        fail "Skills directory does not exist: $SKILLS_DIR"
        return
    fi

    local skill_count=0
    local valid_count=0
    local flat_files=0

    # Check for flat .md files (WRONG structure)
    shopt -s nullglob
    for file in "$SKILLS_DIR"/*.md; do
        if [ -f "$file" ]; then
            flat_files=$((flat_files + 1))
            fail "Flat .md file found (should be folder): $(basename "$file")"
        fi
    done
    shopt -u nullglob

    if [ "$flat_files" -eq 0 ]; then
        pass "No flat .md files in skills/ (correct)"
    fi

    # Check for folders with SKILL.md (CORRECT structure)
    for skill_dir in "$SKILLS_DIR"/*/; do
        if [ -d "$skill_dir" ]; then
            skill_count=$((skill_count + 1))
            local skill_name=$(basename "$skill_dir")

            if [ -f "$skill_dir/SKILL.md" ]; then
                valid_count=$((valid_count + 1))
                pass "Skill '$skill_name' has SKILL.md"
            else
                fail "Skill '$skill_name' missing SKILL.md"
            fi
        fi
    done

    echo ""
    echo "Skills found: $skill_count folders, $flat_files flat files"

    if [ "$flat_files" -gt 0 ] && [ "$skill_count" -eq 0 ]; then
        fail "Skills use flat .md structure - must be folders with SKILL.md"
    fi
}

# ============================================================================
# TEST 2: SKILL.md YAML Frontmatter
# Each SKILL.md must have YAML frontmatter with 'name' and 'description'
# ============================================================================
test_skill_frontmatter() {
    header "TEST: SKILL.md YAML Frontmatter"

    for skill_dir in "$SKILLS_DIR"/*/; do
        if [ -d "$skill_dir" ] && [ -f "$skill_dir/SKILL.md" ]; then
            local skill_name=$(basename "$skill_dir")
            local skill_file="$skill_dir/SKILL.md"

            # Check for YAML frontmatter (starts with ---)
            if head -1 "$skill_file" | grep -q "^---$"; then
                # Extract frontmatter
                local frontmatter=$(sed -n '/^---$/,/^---$/p' "$skill_file" | head -20)

                # Check for 'name:' field
                if echo "$frontmatter" | grep -q "^name:"; then
                    pass "Skill '$skill_name' has 'name' field"
                else
                    fail "Skill '$skill_name' missing 'name' in frontmatter"
                fi

                # Check for 'description:' field
                if echo "$frontmatter" | grep -q "^description:"; then
                    pass "Skill '$skill_name' has 'description' field"
                else
                    fail "Skill '$skill_name' missing 'description' in frontmatter"
                fi
            else
                fail "Skill '$skill_name' missing YAML frontmatter (must start with ---)"
            fi
        fi
    done

    # Also check flat files that shouldn't exist
    shopt -s nullglob
    for file in "$SKILLS_DIR"/*.md; do
        if [ -f "$file" ]; then
            warn "Flat file '$(basename "$file")' cannot be validated (wrong structure)"
        fi
    done
    shopt -u nullglob
}

# ============================================================================
# TEST 3: Commands Structure
# Commands should be .md files in commands/
# ============================================================================
test_commands_structure() {
    header "TEST: Commands Structure"

    if [ ! -d "$COMMANDS_DIR" ]; then
        fail "Commands directory does not exist: $COMMANDS_DIR"
        return
    fi

    local cmd_count=0
    for cmd_file in "$COMMANDS_DIR"/*.md; do
        if [ -f "$cmd_file" ]; then
            cmd_count=$((cmd_count + 1))
            local cmd_name=$(basename "$cmd_file" .md)
            pass "Command found: $cmd_name"
        fi
    done

    if [ "$cmd_count" -eq 0 ]; then
        fail "No commands found in $COMMANDS_DIR"
    else
        echo ""
        echo "Total commands: $cmd_count"
    fi
}

# ============================================================================
# TEST 4: Hooks Structure (checks ALL hooks dynamically)
# ============================================================================
test_hooks_structure() {
    header "TEST: Hooks Structure"

    if [ ! -d "$HOOKS_DIR" ]; then
        warn "Hooks directory does not exist: $HOOKS_DIR"
        return
    fi

    local hook_count=0
    shopt -s nullglob
    for hook_file in "$HOOKS_DIR"/*; do
        if [ -f "$hook_file" ]; then
            hook_count=$((hook_count + 1))
            local hook_name=$(basename "$hook_file")

            pass "Hook found: $hook_name"

            if [ -x "$hook_file" ]; then
                pass "Hook '$hook_name' is executable"
            else
                fail "Hook '$hook_name' is NOT executable"
            fi

            # Check hook has shebang
            if head -1 "$hook_file" | grep -q "^#!"; then
                pass "Hook '$hook_name' has shebang"
            else
                warn "Hook '$hook_name' missing shebang"
            fi
        fi
    done
    shopt -u nullglob

    if [ "$hook_count" -eq 0 ]; then
        warn "No hooks found in $HOOKS_DIR"
    else
        echo ""
        echo "Total hooks: $hook_count"
    fi

    # Also check installed hooks
    local installed_hooks_dir="$HOME/.claude/hooks"
    if [ -d "$installed_hooks_dir" ]; then
        echo ""
        echo "Checking installed hooks (~/.claude/hooks/):"
        local installed_count=0
        for hook_file in "$installed_hooks_dir"/*; do
            if [ -f "$hook_file" ]; then
                installed_count=$((installed_count + 1))
                local hook_name=$(basename "$hook_file")
                if [ -x "$hook_file" ]; then
                    pass "Installed hook '$hook_name' is executable"
                else
                    fail "Installed hook '$hook_name' is NOT executable"
                fi
            fi
        done
        echo "Installed hooks: $installed_count"
    fi
}

# ============================================================================
# TEST 5: Install Script
# ============================================================================
test_install_script() {
    header "TEST: Install Script"

    if [ -f "$ROOT_DIR/install.sh" ]; then
        pass "install.sh exists"

        if [ -x "$ROOT_DIR/install.sh" ]; then
            pass "install.sh is executable"
        else
            fail "install.sh is not executable"
        fi

        # Check that it references correct structure
        if grep -q "SKILL.md" "$ROOT_DIR/install.sh"; then
            pass "install.sh references SKILL.md structure"
        else
            warn "install.sh may not handle SKILL.md structure"
        fi
    else
        fail "install.sh missing"
    fi
}

# ============================================================================
# TEST 6: Installed Skills (checks ~/.claude/skills/)
# ============================================================================
test_installed_skills() {
    header "TEST: Installed Skills (~/.claude/skills/)"

    local installed_dir="$HOME/.claude/skills"

    if [ ! -d "$installed_dir" ]; then
        warn "No skills installed at $installed_dir"
        return
    fi

    local folder_count=0
    local flat_count=0

    # Count folders with SKILL.md
    for skill_dir in "$installed_dir"/*/; do
        if [ -d "$skill_dir" ] && [ -f "$skill_dir/SKILL.md" ]; then
            folder_count=$((folder_count + 1))
        fi
    done

    # Count flat .md files
    shopt -s nullglob
    for file in "$installed_dir"/*.md; do
        if [ -f "$file" ]; then
            flat_count=$((flat_count + 1))
        fi
    done
    shopt -u nullglob

    if [ "$folder_count" -gt 0 ]; then
        pass "Found $folder_count properly structured skills"
    fi

    if [ "$flat_count" -gt 0 ]; then
        fail "Found $flat_count flat .md files (wrong structure)"
    fi

    echo ""
    echo "Installed: $folder_count folder skills, $flat_count flat files"
}

# ============================================================================
# TEST 7: README Documentation
# ============================================================================
test_readme() {
    header "TEST: README Documentation"

    if [ -f "$ROOT_DIR/README.md" ]; then
        pass "README.md exists"

        # Check for key sections
        if grep -q "Quick Start\|Quick Install" "$ROOT_DIR/README.md"; then
            pass "README has Quick Start section"
        else
            warn "README missing Quick Start section"
        fi

        if grep -q "Skills Included\|What's Included" "$ROOT_DIR/README.md"; then
            pass "README has Skills listing"
        else
            warn "README missing Skills listing"
        fi
    else
        fail "README.md missing"
    fi
}

# ============================================================================
# TEST 8: Scripts Structure
# ============================================================================
test_scripts_structure() {
    header "TEST: Scripts Structure"

    local scripts_dir="$ROOT_DIR/scripts"

    if [ ! -d "$scripts_dir" ]; then
        warn "Scripts directory does not exist: $scripts_dir"
        return
    fi

    local script_count=0
    shopt -s nullglob
    for script_file in "$scripts_dir"/*.sh; do
        if [ -f "$script_file" ]; then
            script_count=$((script_count + 1))
            local script_name=$(basename "$script_file")

            pass "Script found: $script_name"

            if [ -x "$script_file" ]; then
                pass "Script '$script_name' is executable"
            else
                fail "Script '$script_name' is NOT executable"
            fi
        fi
    done
    shopt -u nullglob

    if [ "$script_count" -eq 0 ]; then
        warn "No scripts found in $scripts_dir"
    else
        echo ""
        echo "Total scripts: $script_count"
    fi
}

# ============================================================================
# QUICK MODE - Essential checks only (for initialize-project)
# ============================================================================
quick_validate() {
    echo ""
    echo "🔍 Quick validation of Maggy installation..."
    echo ""

    local errors=0

    # Check skills directory exists and has content
    if [ -d "$HOME/.claude/skills" ]; then
        local skill_count=$(find "$HOME/.claude/skills" -maxdepth 1 -type d 2>/dev/null | wc -l)
        local flat_count=$(find "$HOME/.claude/skills" -maxdepth 1 -name "*.md" -type f 2>/dev/null | wc -l)

        if [ "$flat_count" -gt 0 ]; then
            echo -e "${RED}✗${NC} Skills use flat .md structure (need folder/SKILL.md)"
            errors=$((errors + 1))
        elif [ "$skill_count" -gt 1 ]; then
            echo -e "${GREEN}✓${NC} Skills installed ($((skill_count - 1)) skills)"
        else
            echo -e "${YELLOW}⚠${NC} No skills found in ~/.claude/skills/"
        fi
    else
        echo -e "${RED}✗${NC} Skills directory missing (~/.claude/skills/)"
        errors=$((errors + 1))
    fi

    # Check commands
    if [ -d "$HOME/.claude/commands" ]; then
        local cmd_count=$(find "$HOME/.claude/commands" -name "*.md" -type f 2>/dev/null | wc -l)
        if [ "$cmd_count" -gt 0 ]; then
            echo -e "${GREEN}✓${NC} Commands installed ($cmd_count commands)"
        else
            echo -e "${YELLOW}⚠${NC} No commands found"
        fi
    else
        echo -e "${RED}✗${NC} Commands directory missing (~/.claude/commands/)"
        errors=$((errors + 1))
    fi

    # Check hooks
    if [ -d "$HOME/.claude/hooks" ]; then
        local hook_count=$(find "$HOME/.claude/hooks" -type f 2>/dev/null | wc -l)
        if [ "$hook_count" -gt 0 ]; then
            echo -e "${GREEN}✓${NC} Hooks installed ($hook_count hooks)"
        else
            echo -e "${YELLOW}⚠${NC} No hooks found"
        fi
    else
        echo -e "${YELLOW}⚠${NC} Hooks directory missing (~/.claude/hooks/)"
    fi

    echo ""
    if [ "$errors" -gt 0 ]; then
        echo -e "${RED}Bootstrap has issues. Run full validation:${NC}"
        echo "  $ROOT_DIR/tests/validate-structure.sh"
        return 1
    else
        echo -e "${GREEN}Bootstrap installation OK${NC}"
        return 0
    fi
}

# ============================================================================
# MAIN
# ============================================================================
test_cross_tool_templates() {
    header "CROSS-TOOL TEMPLATES"

    # AGENTS.md template
    if [ -f "$ROOT_DIR/templates/AGENTS.md" ]; then
        pass "templates/AGENTS.md exists"
        if grep -q "## Skills" "$ROOT_DIR/templates/AGENTS.md"; then
            pass "AGENTS.md has Skills section"
        else
            fail "AGENTS.md missing Skills section"
        fi
    else
        fail "templates/AGENTS.md missing"
    fi

    # config.toml template
    if [ -f "$ROOT_DIR/templates/config.toml" ]; then
        pass "templates/config.toml exists"
        if grep -q '\[\[hooks\]\]' "$ROOT_DIR/templates/config.toml"; then
            pass "config.toml has [[hooks]] sections"
        else
            fail "config.toml missing [[hooks]] sections"
        fi
    else
        fail "templates/config.toml missing"
    fi

    # Cross-tool scripts
    for script in detect-agents.sh install-skills.sh; do
        if [ -f "$ROOT_DIR/scripts/$script" ]; then
            pass "scripts/$script exists"
            if [ -x "$ROOT_DIR/scripts/$script" ]; then
                pass "scripts/$script is executable"
            else
                fail "scripts/$script is not executable"
            fi
        else
            fail "scripts/$script missing"
        fi
    done

    # sync-agents command
    if [ -f "$ROOT_DIR/commands/sync-agents.md" ]; then
        pass "commands/sync-agents.md exists"
    else
        fail "commands/sync-agents.md missing"
    fi
}

# ============================================================================
show_help() {
    echo "Usage: $(basename "$0") [OPTIONS]"
    echo ""
    echo "Validates Maggy structure matches Claude Code requirements."
    echo ""
    echo "Options:"
    echo "  --quick     Quick validation (for initialize-project)"
    echo "  --full      Full validation (default)"
    echo "  --help      Show this help"
    echo ""
    echo "Exit codes:"
    echo "  0 = All validations passed"
    echo "  1 = Validation failures found"
}

main() {
    local mode="full"

    while [[ $# -gt 0 ]]; do
        case $1 in
            --quick|-q)
                mode="quick"
                shift
                ;;
            --full|-f)
                mode="full"
                shift
                ;;
            --help|-h)
                show_help
                exit 0
                ;;
            *)
                echo "Unknown option: $1"
                show_help
                exit 1
                ;;
        esac
    done

    if [ "$mode" = "quick" ]; then
        quick_validate
        exit $?
    fi

    # Full validation
    echo ""
    echo "╔════════════════════════════════════════════════════════════╗"
    echo "║     MAGGY STRUCTURE VALIDATION                              ║"
    echo "╚════════════════════════════════════════════════════════════╝"
    echo ""
    echo "Validating: $ROOT_DIR"

    test_skills_structure
    test_skill_frontmatter
    test_commands_structure
    test_hooks_structure
    test_scripts_structure
    test_install_script
    test_installed_skills
    test_readme
    test_cross_tool_templates

    header "SUMMARY"
    echo ""
    echo -e "${GREEN}Passed:${NC}  $PASS_COUNT"
    echo -e "${RED}Failed:${NC}  $FAIL_COUNT"
    echo -e "${YELLOW}Warnings:${NC} $WARN_COUNT"
    echo ""

    if [ "$FAIL_COUNT" -gt 0 ]; then
        echo -e "${RED}VALIDATION FAILED${NC} - $FAIL_COUNT issues need fixing"
        exit 1
    else
        echo -e "${GREEN}VALIDATION PASSED${NC}"
        exit 0
    fi
}

main "$@"