[
  {
    "path": ".coda/coda-start.service",
    "content": "[Unit]\nDescription=Coda Alloy Scenario Start\nAfter=network-online.target docker.service\nWants=network-online.target\nRequires=docker.service\n\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/coda-start.sh\nWorkingDirectory=/opt/alloy-scenarios\nStandardOutput=journal\nStandardError=journal\nRemainAfterExit=yes\n\n[Install]\nWantedBy=multi-user.target\n"
  },
  {
    "path": ".coda/coda-start.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\nSCENARIO_FILE=\"/etc/coda/scenario\"\nREPO_DIR=\"/opt/alloy-scenarios\"\n\n# Wait for the scenario file to be written by user_data\necho \"Waiting for ${SCENARIO_FILE}...\"\ntimeout=120\nelapsed=0\nwhile [[ ! -f \"$SCENARIO_FILE\" ]]; do\n  sleep 2\n  elapsed=$((elapsed + 2))\n  if [[ $elapsed -ge $timeout ]]; then\n    echo \"Timed out waiting for ${SCENARIO_FILE} after ${timeout}s\" >&2\n    exit 1\n  fi\ndone\n\nSCENARIO=\"$(cat \"$SCENARIO_FILE\")\"\necho \"Scenario: ${SCENARIO}\"\n\n# Pull latest changes from main so new scenarios are always available.\n# Explicitly fetch+reset main to handle AMIs built from non-main branches.\necho \"Updating alloy-scenarios repo...\"\ngit -C \"$REPO_DIR\" fetch origin main 2>/dev/null \\\n  && git -C \"$REPO_DIR\" checkout main 2>/dev/null \\\n  && git -C \"$REPO_DIR\" reset --hard origin/main 2>/dev/null \\\n  || echo \"Warning: git update failed, using baked version\"\n\n# Start the scenario (builds images on demand)\nexec \"$REPO_DIR/coda\" start \"$SCENARIO\"\n"
  },
  {
    "path": ".coda/packer-install.sh",
    "content": "#!/usr/bin/env bash\n# Packer provisioner: set up coda CLI and systemd services on an AMI.\n#\n# Expects the alloy-scenarios repo to already be cloned to /opt/alloy-scenarios.\n# This script is called by the consuming Packer template after cloning.\n#\n# It intentionally does NOT pre-build scenario images. Scenarios are built\n# on demand by `coda start`, so new scenarios work without re-baking the AMI.\nset -euo pipefail\n\nINSTALL_DIR=\"${1:-/opt/alloy-scenarios}\"\n\necho \"==> Adding host aliases for alloy\"\ngrep -qxF '127.0.0.1 alloy' /etc/hosts || echo '127.0.0.1 alloy' >> /etc/hosts\n\necho \"==> Symlinking coda CLI\"\nchmod +x \"${INSTALL_DIR}/coda\"\nln -sf \"${INSTALL_DIR}/coda\" /usr/local/bin/coda\n\necho \"==> Pre-pulling common base images\"\n# Only pull widely-shared base images to speed up first boot.\n# Scenario-specific images are built on demand by `coda start`.\ndocker pull \"python:3.11-slim\" || true\ndocker pull \"apache/kafka:3.9.0\" || true\n\necho \"==> Installing systemd services\"\ncp \"${INSTALL_DIR}/.coda/coda-start.service\" /etc/systemd/system/coda-start.service\ninstall -m 0755 \"${INSTALL_DIR}/.coda/coda-start.sh\" /usr/local/bin/coda-start.sh\nsystemctl daemon-reload\n\necho \"==> Done\"\n"
  },
  {
    "path": ".cursor/docker-example.mdc",
    "content": "---\ndescription: creating a new alloy docker example\nglobs: \nalwaysApply: false\n---\n# Grafana Alloy Docker Example Template\n\nThis template provides a comprehensive structure for creating a new Grafana Alloy example using Docker Compose. It includes all the necessary components to monitor your application or system with the LGMT stack (Loki, Grafana, Metrics/Prometheus, Tempo).\n\n## Directory Structure\n\n```\nyour-example-name/\n├── config.alloy            # Alloy configuration file\n├── docker-compose.yml      # Docker Compose configuration\n├── loki-config.yaml        # Loki configuration\n├── prom-config.yaml        # Prometheus configuration\n├── tempo-config.yaml       # Tempo configuration (optional)\n├── README.md               # Documentation for your example\n└── [additional files...]   # Any additional files needed for your example\n```\n\n## Docker Compose Template\n\nBelow is a template for your `docker-compose.yml` file that includes all components of the LGMT stack. You can customize it based on your specific needs.\n\n```yaml\nversion: '3.8'\n\nservices:\n  # Loki for log aggregation\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.7}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.10.0}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Memcached for Tempo\n  memcached:\n    image: memcached:1.6.29\n    container_name: memcached\n    ports:\n      - \"11211:11211\"\n    environment:\n      - MEMCACHED_MAX_MEMORY=64m  # Set the maximum memory usage\n      - MEMCACHED_THREADS=4       # Number of threads to use\n\n  # Tempo initialization (required for file permissions)\n  tempo-init:\n    image: &tempoImage grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.1}\n    user: root\n    entrypoint:\n      - \"chown\"\n      - \"10001:10001\"\n      - \"/var/tempo\"\n    volumes:\n      - ./tempo-data:/var/tempo\n\n  # Tempo for tracing\n  tempo:\n    image: *tempoImage\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n      - 4317:4317/tcp    # otlp grpc\n      - 4318:4318/tcp    # otlp http\n      - 14268:14268/tcp  # jaeger thrift http\n      - 14250:14250/tcp  # jaeger grpc\n      - 6831:6831/udp    # jaeger thrift compact\n      - 6832:6832/udp    # jaeger thrift binary\n      - 9411:9411/tcp    # zipkin\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n      - ./tempo-data:/var/tempo\n    depends_on:\n      - tempo-init\n      - memcached\n      - prometheus\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-12.4.0}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy for telemetry pipeline\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.14.0}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /var/run/docker.sock:/var/run/docker.sock  # For Docker monitoring (optional)\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n```\n\n## Configuration Files\n\n### Loki Configuration (loki-config.yaml)\n\n```yaml\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 2h\n```\n\n### Prometheus Configuration (prom-config.yaml)\n\n```yaml\nglobal:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\nscrape_configs:\n  - job_name: 'prometheus'\n    static_configs:\n      - targets: ['localhost:9090']\n\n  - job_name: 'alloy'\n    static_configs:\n      - targets: ['alloy:12345']\n\notlp:\n  # Recommended attributes to be promoted to labels.\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - cloud.availability_zone\n    - cloud.region\n    - container.name\n    - deployment.environment\n    - deployment.environment.name\n    - k8s.cluster.name\n    - k8s.container.name\n    - k8s.namespace.name\n    - k8s.pod.name\n\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n```\n\n### Tempo Configuration (tempo-config.yaml)\n\n```yaml\nserver:\n  http_listen_port: 3200\n  log_level: info\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           \n    jaeger:                            \n      protocols:                       \n        thrift_http:                   \n          endpoint: \"tempo:14268\"      \n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               \n\ncompactor:\n  compaction:\n    block_retention: 720h              \n\n# Note: The metrics_generator section below can be enabled for built-in service graphs.\n# Alternatively, use Alloy's servicegraph connector as shown in alloy-service-graphs example.\n# metrics_generator:\n#   registry:\n#     external_labels:\n#       source: tempo\n#       cluster: docker-compose\n#   storage:\n#     path: /var/tempo/generator/wal\n#     remote_write:\n#       - url: http://prometheus:9090/api/v1/write\n#         send_exemplars: true\n#   traces_storage:\n#     path: /var/tempo/generator/traces\n#   processor:\n#     local_blocks:\n#       filter_server_spans: false\n#       flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     \n    wal:\n      path: /var/tempo/wal             \n    local:\n      path: /var/tempo/blocks\n\n# Note: Service graph generation is commented out to allow using Alloy for this purpose.\n# overrides:\n#   defaults:\n#     metrics_generator:\n#       processors: [service-graphs, span-metrics, local-blocks]\n#       generate_native_histograms: both\n```\n\n### Alloy Configuration with Service Graph Generation (config.alloy)\n\n```river\n/*\n * Alloy Configuration for OpenTelemetry Trace Collection with Service Graph Generation\n */\n\n// Receive OpenTelemetry traces\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\n// Batch processor to improve performance\notelcol.processor.batch \"default\" {\n  output {\n    traces = [\n      otelcol.connector.servicegraph.default.input,\n      otelcol.exporter.otlp.tempo.input,\n    ]\n  }\n}\n\n// Service Graph Generator \notelcol.connector.servicegraph \"default\" {\n  metrics_flush_interval = \"10s\"\n  dimensions = [\"http.method\"]\n  \n  output {\n    metrics = [otelcol.exporter.otlphttp.prometheus.input]\n  }\n}\n\n// Send service graph metrics to Prometheus via OTLP\notelcol.exporter.otlphttp \"prometheus\" {\n  client {\n    endpoint = \"http://prometheus:9090/api/v1/otlp\"\n    tls {\n      insecure = true\n    }\n  }\n}\n\n// Send traces to Tempo for storage and visualization\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n      insecure = true\n    }\n  }\n}\n```\n\n## README Template\n\nThe README.md file for your example should include:\n\n1. A brief description of what the example demonstrates\n2. Instructions for running the example\n3. What to expect after running the example\n4. Any additional steps or configuration needed\n\nExample:\n\n```markdown\n# Your Example Name\n\nBrief description of what this example demonstrates and its purpose.\n\n## Overview\n\nThe example includes:\n- Component 1 (brief description)\n- Component 2 (brief description)\n- ...\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd your-example-name\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n   \n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh your-example-name\n   ```\n\n4. Access Grafana at http://localhost:3000\n\n## What to Expect\n\nDescribe what the user should see after running the example, including:\n- What metrics/logs are being collected\n- Any dashboards that are automatically set up\n- How to interact with the example\n\n## Service Graphs (if applicable)\n\nIf your example includes service graph visualization capabilities:\n\n1. Open Grafana (http://localhost:3000)\n2. Navigate to Explore\n3. Select the Tempo data source\n4. Click on the \"Service Graph\" tab\n5. You should see a visual representation of the relationships between services\n\n## Architecture\n\n```\n┌────────────┐     ┌──────────┐      ┌───────┐      ┌─────────┐\n│ Component1 │────▶│ Component2│─────▶│Component3│──▶│ Grafana │\n└────────────┘     └──────────┘      └───┬───┘      └─────────┘\n                                         │                ▲\n                                         ▼                │\n                                    ┌─────────┐           │\n                                    │Component4│───────────┘\n                                    └─────────┘\n```\n\nBrief explanation of the architecture and data flow.\n\n## Additional Configuration\n\nAny additional steps or configuration that might be needed.\n```\n\n## Customizing Your Example\n\nTo create your own example:\n\n1. Create a new directory with your example name at the root of the repository\n2. Copy the template files from this template\n3. Customize the files for your specific use case\n4. Update the README.md with specific instructions for your example\n5. Add your example to the main README.md table with a link and description\n"
  },
  {
    "path": ".cursor/k8s-example.mdc",
    "content": "---\ndescription: \nglobs: \nalwaysApply: false\n---\n# Grafana Alloy Kubernetes Example Template\n\nThis template provides a comprehensive structure for creating a new Grafana Alloy example using Kubernetes. It is based on the Kubernetes Monitoring Helm chart which abstracts the need to configure Loki and deploys with best practices for monitoring Kubernetes clusters.\n\n## Directory Structure\n\n```\nyour-k8s-example-name/\n├── k8s-monitoring-values.yml   # K8s monitoring helm chart values\n├── loki-values.yml             # Loki helm chart values\n├── grafana-values.yml          # Grafana helm chart values\n├── kind.yml                    # Kind cluster configuration (optional)\n├── README.md                   # Documentation for your example\n└── [additional files...]       # Any additional files needed for your example\n```\n\n## Kubernetes Configuration Files\n\n### Kind Cluster Configuration (kind.yml)\n\nIf you're using Kind for local development, you can use this configuration:\n\n```yaml\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n- role: control-plane\n  extraPortMappings:\n  - containerPort: 30000\n    hostPort: 30000\n```\n\n### K8s Monitoring Helm Chart Values (k8s-monitoring-values.yml)\n\nThis file configures the Kubernetes Monitoring Helm chart with Alloy settings:\n\n```yaml\n---\ncluster:\n  name: example-monitoring\n\ndestinations:\n  - name: loki\n    type: loki\n    url: http://loki-gateway.meta.svc.cluster.local/loki/api/v1/push\n\n# Cluster Events Collection\nclusterEvents:\n  enabled: true\n  collector: alloy-logs\n  namespaces:\n    - meta\n    - default\n\n# Node Logs Collection\nnodeLogs:\n  enabled: true\n  collector: alloy-logs\n\n# Pod Logs Collection\npodLogs:\n  enabled: true\n  gatherMethod: kubernetesApi\n  collector: alloy-logs\n  labelsToKeep: [\"app_kubernetes_io_name\",\"container\",\"instance\",\"job\",\"level\",\"namespace\",\"service_name\",\"service_namespace\",\"deployment_environment\",\"deployment_environment_name\"]\n  structuredMetadata:\n    pod: pod  # Set structured metadata \"pod\" from label \"pod\"\n  namespaces:\n    - meta\n    - default\n\n# Node Metrics Collection\nnodeMetrics:\n  enabled: true\n  collector: alloy-metrics\n\n# Pod Metrics Collection\npodMetrics:\n  enabled: true\n  collector: alloy-metrics\n  namespaces:\n    - meta\n    - default\n\n# Kubernetes API Server Metrics\nkubernetesMetrics:\n  enabled: true\n  collector: alloy-metrics\n\n# Traces Collection (if applicable)\ntraces:\n  enabled: true\n  collector: alloy-receiver\n  namespaces:\n    - meta\n    - default\n\n# Profiles Collection (if applicable)\nprofiles:\n  enabled: true\n  collector: alloy-profiles\n  namespaces:\n    - meta\n    - default\n\n# Collectors Configuration\nalloy-singleton:\n  enabled: false\n\nalloy-metrics:\n  enabled: true\n  alloy:\n    clustering:\n      enabled: true\n\nalloy-logs:\n  enabled: true\n  alloy:\n    mounts:\n      varlog: true\n    clustering:\n      enabled: true\n\nalloy-profiles:\n  enabled: true\n  alloy:\n    clustering:\n      enabled: true\n\nalloy-receiver:\n  enabled: true\n  alloy:\n    clustering:\n      enabled: true\n```\n\n### Loki Helm Chart Values (loki-values.yml)\n\nConfiguration for the Loki Helm chart:\n\n```yaml\n---\nloki:\n  auth_enabled: false\n  commonConfig:\n    replication_factor: 1\n  schemaConfig:\n    configs:\n      - from: 2024-01-01\n        store: tsdb\n        object_store: s3\n        schema: v13\n        index:\n          prefix: loki_index_\n          period: 24h\n  ingester:\n    chunk_encoding: snappy\n  tracing:\n    enabled: true\n  pattern_ingester:\n      enabled: true\n  limits_config:\n    allow_structured_metadata: true\n    volume_enabled: true\n  ruler:\n    enable_api: true\n  querier:\n    max_concurrent: 4\n\nminio:\n  enabled: true\n      \ndeploymentMode: SingleBinary\nsingleBinary:\n  replicas: 1\n  resources:\n    limits:\n      cpu: 4\n      memory: 4Gi\n    requests:\n      cpu: 2\n      memory: 2Gi\n  extraEnv:\n    - name: GOMEMLIMIT\n      value: 3750MiB\n\nchunksCache:\n  writebackSizeLimit: 10MB\n\n# Zero out replica counts of other deployment modes\nbackend:\n  replicas: 0\nread:\n  replicas: 0\nwrite:\n  replicas: 0\n\ningester:\n  replicas: 0\nquerier:\n  replicas: 0\nqueryFrontend:\n  replicas: 0\nqueryScheduler:\n  replicas: 0\ndistributor:\n  replicas: 0\ncompactor:\n  replicas: 0\nindexGateway:\n  replicas: 0\nbloomCompactor:\n  replicas: 0\nbloomGateway:\n  replicas: 0\n```\n\n### Grafana Helm Chart Values (grafana-values.yml)\n\nConfiguration for the Grafana Helm chart:\n\n```yaml\n---\npersistence:\n  type: pvc\n  enabled: true\n\n# DO NOT DO THIS IN PRODUCTION USECASES\nadminUser: admin\nadminPassword: adminadminadmin\n# CONSIDER USING AN EXISTING SECRET\n# admin:\n#  existingSecret: \"\"\n#  userKey: admin-user\n#  passwordKey: admin-password\n\nservice:\n  enabled: true\n  type: ClusterIP\n\ndatasources:\n  datasources.yaml:\n    apiVersion: 1\n    datasources:\n    - name: Loki\n      type: loki\n      access: proxy\n      orgId: 1\n      url: http://loki-gateway.meta.svc.cluster.local:80\n      basicAuth: false\n      isDefault: false\n      version: 1\n      editable: false\n    - name: Prometheus\n      type: prometheus\n      access: proxy\n      orgId: 1\n      url: http://prometheus-server.meta.svc.cluster.local:80\n      basicAuth: false\n      isDefault: true\n      version: 1\n      editable: false\n    - name: Tempo\n      type: tempo\n      access: proxy\n      orgId: 1\n      url: http://tempo.meta.svc.cluster.local:80\n      basicAuth: false\n      isDefault: false\n      version: 1\n      editable: false\n```\n\n## README Template\n\nHere's a template for your example's README.md:\n\n```markdown\n# Your Kubernetes Example Name\n\nBrief description of what this example demonstrates and its purpose.\n\n## Prerequisites\n\n- Kubernetes cluster (or Kind for local development)\n- Helm (v3.x)\n- kubectl\n\n## Setup\n\n### 1. Create a Kubernetes Cluster (Optional, if using Kind)\n\n```bash\nkind create cluster --config kind.yml\n```\n\n### 2. Create a Namespace for Monitoring\n\n```bash\nkubectl create namespace meta\n```\n\n### 3. Install Loki\n\nAdd the Grafana Helm repository if you haven't already:\n\n```bash\nhelm repo add grafana https://grafana.github.io/helm-charts\nhelm repo update\n```\n\nInstall Loki:\n\n```bash\nhelm install --values loki-values.yml loki grafana/loki -n meta\n```\n\n### 4. Install Grafana\n\n```bash\nhelm install --values grafana-values.yml grafana grafana/grafana --namespace meta\n```\n\n### 5. Install Kubernetes Monitoring (with Alloy)\n\n```bash\nhelm install --values ./k8s-monitoring-values.yml k8s grafana/k8s-monitoring -n meta\n```\n\n## Accessing the Dashboard\n\n### Port Forward Grafana\n\n```bash\nkubectl port-forward -n meta svc/grafana 3000:80\n```\n\nNavigate to http://localhost:3000 in your browser. The default credentials are:\n- Username: admin\n- Password: adminadminadmin\n\n## What to Expect\n\nDescribe what the user should see after setting up the example, including:\n- What metrics/logs are being collected\n- Any dashboards that are automatically set up\n- How to interact with the example\n\n## Cleanup\n\nTo remove the deployed resources:\n\n```bash\nhelm uninstall k8s -n meta\nhelm uninstall grafana -n meta\nhelm uninstall loki -n meta\nkubectl delete namespace meta\n```\n\nIf you created a Kind cluster:\n\n```bash\nkind delete cluster\n```\n```\n\n## Customizing Your Example\n\nTo create your own example:\n\n1. Create a new directory with your example name at the root of the repository\n2. Copy the template files from this template\n3. Customize the files for your specific use case\n4. Update the README.md with specific instructions for your example\n5. Add your example to the main README.md table with a link and description\n\n## Typical Use Cases for Kubernetes Examples\n\n1. **Logs Collection**: Collecting and analyzing logs from applications running in Kubernetes\n2. **Metrics Monitoring**: Monitoring application and infrastructure metrics\n3. **Tracing**: Distributed tracing for microservices\n4. **Profiling**: Performance profiling of applications\n5. **Combined Observability**: Demonstrating how to use all telemetry types together\n\n## Special Considerations for Kubernetes\n\n- **Resource Limits**: Adjust resource requests and limits based on your cluster capacity\n- **Persistent Storage**: Configure appropriate storage classes for your environment\n- **Security**: In production environments, use proper authentication methods\n- **Network Policies**: Consider adding network policies if required for your environment\n"
  },
  {
    "path": ".github/k8s-scenarios.json",
    "content": "{\n  \"metrics\": [\n    { \"release\": \"prometheus\", \"chart\": \"prometheus-community/prometheus\", \"values\": \"prometheus-values.yml\" },\n    { \"release\": \"grafana\",    \"chart\": \"grafana/grafana\",                 \"values\": \"grafana-values.yml\" },\n    { \"release\": \"k8s\",        \"chart\": \"grafana/k8s-monitoring\",          \"values\": \"k8s-monitoring-values.yml\", \"version\": \"^4.0.0\" }\n  ],\n  \"logs\": [\n    { \"release\": \"loki\",       \"chart\": \"grafana/loki\",                    \"values\": \"loki-values.yml\" },\n    { \"release\": \"grafana\",    \"chart\": \"grafana/grafana\",                 \"values\": \"grafana-values.yml\" },\n    { \"release\": \"k8s\",        \"chart\": \"grafana/k8s-monitoring\",          \"values\": \"k8s-monitoring-values.yml\", \"version\": \"^4.0.0\" }\n  ],\n  \"tracing\": [\n    { \"release\": \"tempo\",      \"chart\": \"grafana/tempo\",                   \"values\": \"tempo-values.yml\" },\n    { \"release\": \"grafana\",    \"chart\": \"grafana/grafana\",                 \"values\": \"grafana-values.yml\" },\n    { \"release\": \"k8s\",        \"chart\": \"grafana/k8s-monitoring\",          \"values\": \"k8s-monitoring-values.yml\", \"version\": \"^4.0.0\" }\n  ],\n  \"profiling\": [\n    { \"release\": \"pyroscope\",  \"chart\": \"grafana/pyroscope\",               \"values\": \"pyroscope-values.yml\" },\n    { \"release\": \"grafana\",    \"chart\": \"grafana/grafana\",                 \"values\": \"grafana-values.yml\" },\n    { \"release\": \"k8s\",        \"chart\": \"grafana/k8s-monitoring\",          \"values\": \"k8s-monitoring-values.yml\", \"version\": \"^4.0.0\" }\n  ],\n  \"events\": [\n    { \"release\": \"loki\",       \"chart\": \"grafana/loki\",                    \"values\": \"loki-values.yml\" },\n    { \"release\": \"grafana\",    \"chart\": \"grafana/grafana\",                 \"values\": \"grafana-values.yml\" }\n  ]\n}\n"
  },
  {
    "path": ".github/scenario-list.txt",
    "content": "aws-firehose-logs\nblackbox-probing\ncontinuous-profiling\ndocker-monitoring\nelasticsearch-monitoring\nfaro-frontend-observability\ngame-of-tracing\ngelf-log-ingestion\nkafka\nlinux\nlog-api-gateway\nlog-secret-filtering\nlogs-file\nlogs-tcp\nmail-house\nmemcached-monitoring\nmysql-monitoring\nnginx-monitoring\notel-basic-tracing\notel-metrics-pipeline\notel-span-metrics\notel-tail-sampling\notel-tracing-service-graphs\npostgres-monitoring\nredis-monitoring\nrouting\nself-monitoring\nsnmp\nsyslog\nsystemd-journal\ntrace-delivery\nvault-secrets\nwindows\nwindows-events\n"
  },
  {
    "path": ".github/workflows/check-image-versions.yml",
    "content": "name: check-image-versions\n\n# Drift guard: every ${VAR:-default} fallback in a docker-compose file\n# must match the value of VAR in image-versions.env.\n#\n# Without this check, renovate's docker manager (which updates fallbacks\n# in compose files) and the customManager in renovate.json (which\n# updates image-versions.env) can fall out of lockstep — leaving anyone\n# who runs `docker compose up` without `--env-file image-versions.env`\n# on stale versions.\n\non:\n  pull_request:\n    paths:\n      - '**/docker-compose.yml'\n      - '**/docker-compose.yaml'\n      - '**/docker-compose.coda.yml'\n      - '**/docker-compose.coda.yaml'\n      - 'image-versions.env'\n      - '.github/workflows/check-image-versions.yml'\n  push:\n    branches: [main]\n\npermissions:\n  contents: read\n\njobs:\n  check:\n    name: Compose fallbacks vs image-versions.env\n    runs-on: ubuntu-latest\n    timeout-minutes: 3\n    steps:\n      - name: Harden runner\n        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1\n        with:\n          egress-policy: block\n          allowed-endpoints: >\n            api.github.com:443\n            github.com:443\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n\n      - name: Compare fallbacks against image-versions.env\n        run: |\n          set -euo pipefail\n\n          # Build a map of VAR=value from image-versions.env\n          declare -A want\n          while IFS='=' read -r k v; do\n            [[ \"$k\" =~ ^[A-Z_]+_VERSION$ ]] || continue\n            want[$k]=\"$v\"\n          done < <(grep -E '^[A-Z_]+_VERSION=' image-versions.env)\n\n          echo \"Tracking ${#want[@]} version variables:\"\n          for k in \"${!want[@]}\"; do\n            echo \"  $k=${want[$k]}\"\n          done\n          echo\n\n          # Scan every fallback. Pattern: ${VAR:-default}\n          mismatches=0\n          while IFS= read -r -d '' f; do\n            while IFS= read -r line; do\n              if [[ \"$line\" =~ \\$\\{([A-Z_]+_VERSION):-([^}]+)\\} ]]; then\n                var=\"${BASH_REMATCH[1]}\"\n                fallback=\"${BASH_REMATCH[2]}\"\n                expected=\"${want[$var]:-}\"\n                if [ -z \"$expected\" ]; then\n                  echo \"::warning file=$f::unknown variable $var (not in image-versions.env)\"\n                  continue\n                fi\n                if [ \"$fallback\" != \"$expected\" ]; then\n                  echo \"::error file=$f::\\${$var:-$fallback} should be \\${$var:-$expected}\"\n                  mismatches=$((mismatches+1))\n                fi\n              fi\n            done < \"$f\"\n          done < <(find . -type f \\\n                    \\( -name 'docker-compose.yml' -o -name 'docker-compose.yaml' \\\n                       -o -name 'docker-compose.coda.yml' -o -name 'docker-compose.coda.yaml' \\) \\\n                    -not -path '*/k8s/*' -not -path '*/.git/*' -print0)\n\n          if [ \"$mismatches\" -gt 0 ]; then\n            echo\n            echo \"::error::Found $mismatches drift(s). Update either the fallback in the compose file or image-versions.env.\"\n            exit 1\n          fi\n          echo \"OK — all fallbacks match image-versions.env\"\n"
  },
  {
    "path": ".github/workflows/validate-k8s-scenarios.yml",
    "content": "name: validate-k8s-scenarios\n\n# Lightweight validation for k8s scenarios under k8s/. Mirrors the\n# defense-in-depth posture of validate-scenarios.yml (docker), but\n# without paying the cost of a real cluster on every PR:\n#\n#   validate (every PR):  helm template + kubeconform per chart per scenario.\n#                         Renders offline, validates against k8s API schemas.\n#   kind-integration:     opt-in via workflow_dispatch only. Boots kind,\n#                         helm-installs all charts, waits for pods Ready.\n#\n# Defense-in-depth (same as the docker workflow):\n#   - permissions: contents: read       (no token write, no secrets)\n#   - harden-runner egress allowlist    (compromised tool can't phone home)\n#   - third-party actions SHA-pinned    (tag pushes can't sneak in)\n#   - direct binary downloads, version-pinned (helm, kubeconform)\n#   - github-hosted ephemeral runners\n#   - pull_request, not pull_request_target\n\non:\n  pull_request:\n    paths:\n      - 'k8s/**'\n      - '.github/k8s-scenarios.json'\n      - '.github/workflows/validate-k8s-scenarios.yml'\n  workflow_dispatch:\n    inputs:\n      kind_integration:\n        description: 'Run the kind-cluster integration job after validation'\n        type: boolean\n        default: false\n      scenario:\n        description: 'Which scenario(s) to run kind-integration for (\"all\" or comma-separated subset, e.g. \"metrics,logs\")'\n        type: string\n        default: 'all'\n\npermissions:\n  contents: read\n\nconcurrency:\n  group: validate-k8s-${{ github.event.pull_request.number || github.run_id }}\n  cancel-in-progress: true\n\nenv:\n  HELM_VERSION: 'v4.1.4'\n  KUBECONFORM_VERSION: 'v0.6.7'\n  KUBERNETES_VERSION: '1.31.0'\n\njobs:\n  # ──────────────────────────────────────────────────────────────────\n  # validate: helm template + kubeconform per chart for each of the\n  # 4 scenarios. Pure offline — no API server, no real cluster.\n  # ──────────────────────────────────────────────────────────────────\n  validate:\n    name: Validate ${{ matrix.scenario }}\n    runs-on: ubuntu-latest\n    timeout-minutes: 8\n    strategy:\n      fail-fast: false\n      matrix:\n        scenario: [metrics, logs, tracing, profiling, events]\n    steps:\n      - name: Harden runner\n        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1\n        with:\n          egress-policy: block\n          allowed-endpoints: >\n            api.github.com:443\n            github.com:443\n            objects.githubusercontent.com:443\n            release-assets.githubusercontent.com:443\n            raw.githubusercontent.com:443\n            get.helm.sh:443\n            grafana.github.io:443\n            prometheus-community.github.io:443\n            charts.bitnami.com:443\n            pypi.org:443\n            files.pythonhosted.org:443\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n\n      - name: Install helm + kubeconform + yamllint\n        run: |\n          set -euo pipefail\n          # Helm — pinned by version. Upstream tarball, verify by sha would\n          # be ideal but Helm doesn't publish stable per-tag checksums in a\n          # consumable way; pinning the version + restricting egress is the\n          # workable compromise.\n          curl -fsSL \"https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz\" \\\n            | tar -xz -C /tmp\n          sudo install -m 0755 /tmp/linux-amd64/helm /usr/local/bin/helm\n\n          # kubeconform — pinned. The release archive contains just the\n          # binary; we extract it directly.\n          curl -fsSL \"https://github.com/yannh/kubeconform/releases/download/${KUBECONFORM_VERSION}/kubeconform-linux-amd64.tar.gz\" \\\n            | sudo tar -xz -C /usr/local/bin/ kubeconform\n\n          # yamllint — preinstalled python3 + pip on ubuntu-latest.\n          sudo pip install --quiet yamllint\n\n          helm version --short\n          kubeconform -v\n          yamllint --version\n\n      - name: Helm repo bootstrap\n        run: |\n          set -euo pipefail\n          helm repo add grafana https://grafana.github.io/helm-charts\n          helm repo add prometheus-community https://prometheus-community.github.io/helm-charts\n          helm repo update\n\n      - name: yamllint values files\n        # Loose ruleset — values files commonly use long datasource URLs and\n        # don't need a leading `---`. Run as advisory: don't fail the job on\n        # style; we want it for hygiene signal, not blocking.\n        continue-on-error: true\n        run: |\n          yamllint -d \"{extends: relaxed, rules: {line-length: disable, document-start: disable}}\" \\\n            k8s/${{ matrix.scenario }}/\n\n      - name: Helm template + kubeconform per chart\n        run: |\n          set -euo pipefail\n          mkdir -p /tmp/rendered\n          fail=0\n\n          # Iterate the scenario's chart list. helm template against the\n          # remote chart triggers values.schema.json validation upstream\n          # (most grafana charts ship a schema), then kubeconform validates\n          # the rendered Kubernetes API objects against the target version.\n          while IFS= read -r entry; do\n            release=$(jq -r '.release' <<<\"$entry\")\n            chart=$(jq -r '.chart'    <<<\"$entry\")\n            values=$(jq -r '.values'  <<<\"$entry\")\n            version=$(jq -r '.version // \"\"' <<<\"$entry\")\n            values_path=\"k8s/${{ matrix.scenario }}/$values\"\n\n            ver_arg=()\n            [ -n \"$version\" ] && ver_arg=(--version \"$version\")\n\n            echo \"::group::helm template $release ($chart${version:+ @$version})\"\n            out=\"/tmp/rendered/${{ matrix.scenario }}-$release.yaml\"\n            if ! helm template \"$release\" \"$chart\" \"${ver_arg[@]}\" \\\n                  -f \"$values_path\" > \"$out\" 2> \"/tmp/rendered/${{ matrix.scenario }}-$release.err\"; then\n              echo \"::error::helm template failed for $release\"\n              cat \"/tmp/rendered/${{ matrix.scenario }}-$release.err\"\n              fail=1\n              echo \"::endgroup::\"\n              continue\n            fi\n            lines=$(wc -l < \"$out\")\n            echo \"Rendered $lines lines to $out\"\n            echo \"::endgroup::\"\n\n            echo \"::group::kubeconform $release\"\n            # -ignore-missing-schemas: skip CRDs whose schemas aren't in the\n            # datree catalog (catching built-in K8s API drift is the real\n            # signal; CRD validation is the chart maintainer's responsibility).\n            if ! kubeconform -strict -summary \\\n                  -kubernetes-version \"$KUBERNETES_VERSION\" \\\n                  -schema-location default \\\n                  -schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \\\n                  -ignore-missing-schemas \\\n                  \"$out\"; then\n              echo \"::error::kubeconform failed for $release\"\n              fail=1\n            fi\n            echo \"::endgroup::\"\n          done < <(jq -c --arg s \"${{ matrix.scenario }}\" '.[$s][]' .github/k8s-scenarios.json)\n\n          exit $fail\n\n      - name: Upload rendered manifests\n        if: always()\n        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1\n        with:\n          name: rendered-${{ matrix.scenario }}\n          path: /tmp/rendered/\n          retention-days: 7\n\n  # ──────────────────────────────────────────────────────────────────\n  # kind-integration: Boots a real kind cluster and helm-installs all\n  # charts for the scenario. Heavy — only on workflow_dispatch.\n  # ──────────────────────────────────────────────────────────────────\n  kind-integration:\n    name: Kind integration ${{ matrix.scenario }}\n    if: github.event_name == 'workflow_dispatch' && inputs.kind_integration == true\n    runs-on: ubuntu-latest\n    timeout-minutes: 25\n    strategy:\n      fail-fast: false\n      matrix:\n        scenario: [metrics, logs, tracing, profiling, events]\n    steps:\n      - name: Harden runner\n        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1\n        with:\n          egress-policy: block\n          # Adds image registries on top of the validate allowlist —\n          # helm install actually pulls images for kind to schedule.\n          allowed-endpoints: >\n            api.github.com:443\n            github.com:443\n            objects.githubusercontent.com:443\n            release-assets.githubusercontent.com:443\n            raw.githubusercontent.com:443\n            get.helm.sh:443\n            grafana.github.io:443\n            prometheus-community.github.io:443\n            charts.bitnami.com:443\n            registry-1.docker.io:443\n            auth.docker.io:443\n            production.cloudflare.docker.com:443\n            ghcr.io:443\n            quay.io:443\n            cdn.quay.io:443\n            grafana.com:443\n            mcr.microsoft.com:443\n            public.ecr.aws:443\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n\n      - name: Filter by scenario input\n        id: filter\n        # User-supplied workflow_dispatch input is passed via env, NOT\n        # interpolated directly into the run block, to prevent\n        # template-injection (zizmor: template-injection rule).\n        # matrix.scenario IS safe to interpolate directly because it's\n        # constrained to the static list above.\n        env:\n          USER_SCENARIO: ${{ inputs.scenario }}\n          MATRIX_SCENARIO: ${{ matrix.scenario }}\n        run: |\n          set -euo pipefail\n          if [ \"$USER_SCENARIO\" = \"all\" ]; then\n            echo \"run=true\" >> \"$GITHUB_OUTPUT\"\n            exit 0\n          fi\n          if grep -qx \"$MATRIX_SCENARIO\" <(tr ',' '\\n' <<<\"$USER_SCENARIO\"); then\n            echo \"run=true\" >> \"$GITHUB_OUTPUT\"\n          else\n            echo \"run=false\" >> \"$GITHUB_OUTPUT\"\n            echo \"::notice::Skipping $MATRIX_SCENARIO (not in user-selected subset '$USER_SCENARIO')\"\n          fi\n\n      - name: Install helm\n        if: steps.filter.outputs.run == 'true'\n        run: |\n          curl -fsSL \"https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz\" \\\n            | tar -xz -C /tmp\n          sudo install -m 0755 /tmp/linux-amd64/helm /usr/local/bin/helm\n\n      - name: Create kind cluster\n        if: steps.filter.outputs.run == 'true'\n        uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0\n        with:\n          config: k8s/${{ matrix.scenario }}/kind.yml\n          cluster_name: ${{ matrix.scenario }}\n\n      - name: Helm bootstrap + install all charts\n        if: steps.filter.outputs.run == 'true'\n        run: |\n          set -euo pipefail\n          helm repo add grafana https://grafana.github.io/helm-charts\n          helm repo add prometheus-community https://prometheus-community.github.io/helm-charts\n          helm repo update\n          kubectl create namespace meta || true\n\n          while IFS= read -r entry; do\n            release=$(jq -r '.release' <<<\"$entry\")\n            chart=$(jq -r '.chart'    <<<\"$entry\")\n            values=$(jq -r '.values'  <<<\"$entry\")\n            version=$(jq -r '.version // \"\"' <<<\"$entry\")\n            values_path=\"k8s/${{ matrix.scenario }}/$values\"\n\n            ver_arg=()\n            [ -n \"$version\" ] && ver_arg=(--version \"$version\")\n\n            echo \"::group::helm install $release ($chart)\"\n            helm install \"$release\" \"$chart\" \"${ver_arg[@]}\" \\\n              -f \"$values_path\" -n meta --create-namespace \\\n              --wait --timeout 5m\n            echo \"::endgroup::\"\n          done < <(jq -c --arg s \"${{ matrix.scenario }}\" '.[$s][]' .github/k8s-scenarios.json)\n\n      - name: Wait for pods Ready in meta namespace\n        if: steps.filter.outputs.run == 'true'\n        run: |\n          if ! kubectl wait --for=condition=Ready pods --all -n meta --timeout=10m; then\n            echo \"::error::Pods did not become Ready\"\n            kubectl get pods -n meta -o wide\n            kubectl describe pods -n meta\n            exit 1\n          fi\n          kubectl get pods -n meta -o wide\n"
  },
  {
    "path": ".github/workflows/validate-scenarios.yml",
    "content": "name: validate-scenarios\n\n# Boots every scenario whose files were touched by the PR, after a CVE\n# scan of every image the scenario will run. Designed to make renovate\n# dependency PRs reviewable on signal rather than diff-eyeballing alone.\n#\n# Defense-in-depth (intentional, not paranoia):\n#   - permissions: contents: read       — no token write, no secrets\n#   - third-party actions SHA-pinned    — tag pushes can't sneak in\n#   - trivy advisory scan before boot   — known-bad images flagged in PR\n#   - github-hosted ephemeral runners   — runner state is not persisted\n#\n# Triggered on pull_request (NOT pull_request_target): fork PRs run\n# without secrets, which is the safe default. Updating this file\n# requires the same scrutiny as updating any third-party action SHA.\n\non:\n  pull_request:\n    paths:\n      - '*/docker-compose.yml'\n      - '*/docker-compose.yaml'\n      - '*/docker-compose.coda.yml'\n      - '*/Dockerfile'\n      - '*/config.alloy'\n      - '*/app/**'\n      - '*/*/Dockerfile'\n      - '*/*/requirements.txt'\n      - '*/*/package.json'\n      - '*/*/*.csproj'\n      - 'image-versions.env'\n      - '.github/scenario-list.txt'\n      - '.github/workflows/validate-scenarios.yml'\n  # Manual trigger — runs the full matrix without the sampling cap, so a\n  # maintainer can validate a cross-cutting change (e.g. an LGMT bump\n  # that touches every scenario) before merging. PRs auto-sample when\n  # affected count exceeds MATRIX_CAP; workflow_dispatch always runs all.\n  workflow_dispatch: {}\n\nenv:\n  # Maximum scenarios to validate on a PR before sampling kicks in.\n  # Picked so a typical big update finishes within ~30 min wall-clock\n  # at the configured max-parallel; bypassed by workflow_dispatch.\n  MATRIX_CAP: '8'\n\npermissions:\n  contents: read\n\nconcurrency:\n  # `pull_request.number || run_id` keeps PR runs grouped (and superseded\n  # by force-pushes) while still giving every workflow_dispatch run its\n  # own slot — manual full-matrix runs shouldn't cancel each other.\n  group: validate-scenarios-${{ github.event.pull_request.number || github.run_id }}\n  cancel-in-progress: true\n\njobs:\n  # ──────────────────────────────────────────────────────────────────\n  # detect: Map changed files to top-level scenario directories.\n  # Pure shell — no third-party action — to keep the supply-chain\n  # surface minimal.\n  # ──────────────────────────────────────────────────────────────────\n  detect:\n    name: Detect affected scenarios\n    runs-on: ubuntu-latest\n    timeout-minutes: 5\n    outputs:\n      scenarios: ${{ steps.filter.outputs.scenarios }}\n      count: ${{ steps.filter.outputs.count }}\n      count_full: ${{ steps.filter.outputs.count_full }}\n      sampled: ${{ steps.filter.outputs.sampled }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n        with:\n          fetch-depth: 0\n\n      - name: Compute affected scenarios\n        id: filter\n        env:\n          EVENT_NAME: ${{ github.event_name }}\n          BASE_SHA: ${{ github.event.pull_request.base.sha }}\n          HEAD_SHA: ${{ github.event.pull_request.head.sha }}\n        run: |\n          set -euo pipefail\n\n          if [ \"$EVENT_NAME\" = \"workflow_dispatch\" ]; then\n            # Manual run: validate every scenario in the canonical list.\n            # No diff to compute; sampling cap is bypassed.\n            cp .github/scenario-list.txt /tmp/affected.txt\n          else\n            # The base sha may not be in the local clone with a shallow\n            # checkout; fetch-depth: 0 avoids that, but be belt-and-braces.\n            git fetch origin \"$BASE_SHA\" \"$HEAD_SHA\" --depth=200 2>/dev/null || true\n\n            # Map every changed file to its first path segment. Empty lines\n            # come from root-level files (no segment); awk drops those.\n            git diff --name-only \"$BASE_SHA\" \"$HEAD_SHA\" \\\n              | awk -F/ 'NF>1 {print $1}' \\\n              | sort -u > /tmp/segments.txt\n\n            # Intersect with the canonical scenario list. `|| true` keeps\n            # the pipeline alive when there's no overlap (e.g. a PR that\n            # only touches docs/CI).\n            grep -Fxf /tmp/segments.txt .github/scenario-list.txt \\\n              | sort -u > /tmp/affected.txt || true\n          fi\n\n          count_full=$(wc -l < /tmp/affected.txt | tr -d ' ')\n          sampled=false\n\n          # Sampling cap: when a single PR touches more than MATRIX_CAP\n          # scenarios (typical for image-versions.env / shared-base\n          # changes), validate a deterministic representative subset\n          # rather than the full matrix. Maintainers can run the full\n          # matrix via workflow_dispatch before merging if signal on\n          # every scenario is wanted.\n          #\n          # Determinism: sort by the SHA-256 of \"<scenario><commit>\".\n          # Same commit → same subset, so re-runs are stable. Different\n          # commits get different subsets, so coverage rotates over\n          # time across many big-update PRs.\n          if [ \"$EVENT_NAME\" != \"workflow_dispatch\" ] \\\n             && [ \"$count_full\" -gt \"$MATRIX_CAP\" ]; then\n            sampled=true\n            commit_hash=\"${HEAD_SHA:-$GITHUB_SHA}\"\n            while read -r line; do\n              [ -z \"$line\" ] && continue\n              key=$(printf \"%s%s\" \"$line\" \"$commit_hash\" \\\n                    | sha256sum | head -c 16)\n              printf \"%s\\t%s\\n\" \"$key\" \"$line\"\n            done < /tmp/affected.txt \\\n              | sort | head -n \"$MATRIX_CAP\" | cut -f2 > /tmp/active.txt\n          else\n            cp /tmp/affected.txt /tmp/active.txt\n          fi\n\n          count=$(wc -l < /tmp/active.txt | tr -d ' ')\n          scenarios=$(jq -Rsc 'split(\"\\n\") | map(select(length>0))' /tmp/active.txt)\n\n          echo \"scenarios=$scenarios\"     >> \"$GITHUB_OUTPUT\"\n          echo \"count=$count\"             >> \"$GITHUB_OUTPUT\"\n          echo \"count_full=$count_full\"   >> \"$GITHUB_OUTPUT\"\n          echo \"sampled=$sampled\"         >> \"$GITHUB_OUTPUT\"\n\n          {\n            echo \"## Affected scenarios\"\n            echo\n            if [ \"$count_full\" = \"0\" ]; then\n              echo \"_None — PR does not touch any scenario directory._\"\n            elif [ \"$sampled\" = \"true\" ]; then\n              echo \"**$count_full** scenarios affected; sampled **$count** for validation (cap is \\`$MATRIX_CAP\\`).\"\n              echo\n              echo \"Trigger \\`workflow_dispatch\\` on this branch to validate the full matrix.\"\n              echo\n              echo \"Sampled subset:\"\n              echo '```'\n              cat /tmp/active.txt\n              echo '```'\n              echo\n              echo \"<details><summary>Full affected list ($count_full)</summary>\"\n              echo\n              echo '```'\n              cat /tmp/affected.txt\n              echo '```'\n              echo\n              echo \"</details>\"\n            else\n              echo \"Count: \\`$count\\`\"\n              echo\n              echo '```'\n              cat /tmp/active.txt\n              echo '```'\n            fi\n          } >> \"$GITHUB_STEP_SUMMARY\"\n\n          if [ \"$sampled\" = \"true\" ]; then\n            echo \"::warning::Sampled $count of $count_full affected scenarios. Run workflow_dispatch on this branch to validate them all.\"\n          fi\n\n  # ──────────────────────────────────────────────────────────────────\n  # scan: For each affected scenario, resolve every image reference\n  # via `docker compose config --images`, then trivy-scan each one.\n  # Hard-fails on HIGH/CRITICAL CVEs that have a fix available.\n  # ──────────────────────────────────────────────────────────────────\n  scan:\n    name: Scan images\n    needs: detect\n    if: needs.detect.outputs.count != '0'\n    runs-on: ubuntu-latest\n    timeout-minutes: 10\n    strategy:\n      fail-fast: false\n      max-parallel: 6\n      matrix:\n        scenario: ${{ fromJSON(needs.detect.outputs.scenarios) }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n\n      - name: Compute today's UTC date for cache key\n        id: date\n        run: echo \"today=$(date -u +%Y-%m-%d)\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Restore trivy DB cache\n        # Trivy fetches a fresh vulnerability DB on every cold scan\n        # (~30 MB, ~5-10 s per scenario from mirror.gcr.io). Caching\n        # the DB shaves the cold-pull off every matrix entry after the\n        # first one of the day. Key rotates daily so the DB stays\n        # fresh; the restore-keys fallback is intentional — even a\n        # stale-by-hours DB is far better than a cold fetch.\n        uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5\n        with:\n          path: /tmp/trivy-cache\n          key: trivy-db-${{ steps.date.outputs.today }}\n          restore-keys: |\n            trivy-db-\n\n      - name: Resolve images for ${{ matrix.scenario }}\n        id: images\n        run: |\n          set -euo pipefail\n          # Try .yml first, fall back to .yaml (some scenarios use either).\n          compose_file=\"\"\n          for ext in yml yaml; do\n            f=\"${{ matrix.scenario }}/docker-compose.$ext\"\n            if [ -f \"$f\" ]; then compose_file=\"$f\"; break; fi\n          done\n          if [ -z \"$compose_file\" ]; then\n            echo \"No docker-compose found for ${{ matrix.scenario }}\" >&2\n            exit 1\n          fi\n\n          # `docker compose config --images` returns service-name defaults\n          # like `game-of-tracing-ai-opponent` for `build:`-only services\n          # — those don't exist in any registry, so trivy fails with\n          # UNAUTHORIZED. Filter to services with an explicit `image:`\n          # field (third-party registry artifacts only). Locally-built\n          # images aren't directly scanned; their FROM base image (e.g.\n          # python:3.11-slim) lives in the Dockerfile and is tracked\n          # separately by renovate's docker manager.\n          docker compose -f \"$compose_file\" \\\n            --env-file image-versions.env \\\n            config --format json \\\n            | jq -r '.services | to_entries[]\n                     | select(.value.image != null)\n                     | .value.image' \\\n            | sort -u > /tmp/images.txt\n          echo \"Images to scan:\"\n          cat /tmp/images.txt\n          if [ ! -s /tmp/images.txt ]; then\n            echo \"::notice::No third-party images to scan in this scenario (all services build locally).\"\n          fi\n\n      - name: Trivy scan each image (advisory)\n        # Run trivy via its own docker image (digest-pinned). No\n        # docker.sock mount: trivy pulls the target image itself rather\n        # than reaching into the host's docker — keeps the trivy\n        # container from gaining root-equivalent access on the runner.\n        #\n        # Advisory mode: HIGH/CRITICAL findings are reported via the job\n        # log + step summary table + ::warning:: annotations, but the\n        # step always exits 0. These are demo scenarios; upstream LGMT\n        # images regularly carry HIGH-with-fix findings between releases\n        # and blocking every PR until they catch up isn't useful. Treat\n        # the report as a signal to bump base images, not a merge gate.\n        env:\n          # Suppress ANSI escapes so the log + summary parse cleanly\n          NO_COLOR: '1'\n        run: |\n          set -euo pipefail\n          TRIVY_IMAGE='aquasec/trivy:0.66.0@sha256:086971aaf400beebd94e8300fd8ea623774419597169156cec56eec5b00dfb1e'\n\n          # Pre-pull once so loop iterations don't re-resolve.\n          docker pull \"$TRIVY_IMAGE\"\n\n          mkdir -p /tmp/trivy-cache\n          report_log=/tmp/trivy-output.log\n          : > \"$report_log\"\n\n          while IFS= read -r img; do\n            [ -z \"$img\" ] && continue\n            echo \"::group::Scanning $img\"\n            echo \"=== $img ===\" >> \"$report_log\"\n            # `|| true` so a non-zero trivy exit (had findings) doesn't\n            # abort the loop — we want to scan every image.\n            docker run --rm \\\n                -e NO_COLOR=1 \\\n                -v /tmp/trivy-cache:/root/.cache/trivy \\\n                \"$TRIVY_IMAGE\" image \\\n                --severity HIGH,CRITICAL \\\n                --ignore-unfixed \\\n                --no-progress \\\n                --timeout 5m \\\n                \"$img\" 2>&1 | tee -a \"$report_log\" || true\n            echo \"::endgroup::\"\n          done < /tmp/images.txt\n\n          # Per-image summary table for the PR's step summary.\n          {\n            echo \"## CVE scan: ${{ matrix.scenario }}\"\n            echo\n            if [ ! -s /tmp/images.txt ]; then\n              echo \"_No third-party images to scan (all services build locally)._\"\n            else\n              echo \"| Image | HIGH | CRITICAL |\"\n              echo \"|---|---:|---:|\"\n              current=\"\"\n              h=0; c=0\n              while IFS= read -r line; do\n                if [[ \"$line\" =~ ^===\\ (.+)\\ ===$ ]]; then\n                  if [ -n \"$current\" ]; then\n                    echo \"| \\`$current\\` | $h | $c |\"\n                  fi\n                  current=\"${BASH_REMATCH[1]}\"\n                  h=0; c=0\n                elif [[ \"$line\" =~ Total:\\ [0-9]+\\ \\(HIGH:\\ ([0-9]+),\\ CRITICAL:\\ ([0-9]+)\\) ]]; then\n                  h=$((h + ${BASH_REMATCH[1]}))\n                  c=$((c + ${BASH_REMATCH[2]}))\n                fi\n              done < \"$report_log\"\n              if [ -n \"$current\" ]; then\n                echo \"| \\`$current\\` | $h | $c |\"\n              fi\n              echo\n              echo \"_HIGH+CRITICAL counts are unfixed CVEs with patches available upstream. Findings here don't block merge — see the job log for the full per-CVE table. Upgrade base images via the relevant renovate PR when fixes appear in a published release._\"\n            fi\n          } >> \"$GITHUB_STEP_SUMMARY\"\n\n          # Emit a single ::warning:: if anything was found, so the PR\n          # gets an inline annotation pointing at the job summary.\n          if grep -qE 'Total:\\ [^0]' \"$report_log\"; then\n            echo \"::warning::trivy found HIGH/CRITICAL unfixed CVEs in scanned images for ${{ matrix.scenario }}. See job summary for per-image counts and the log for details.\"\n          fi\n\n  # ──────────────────────────────────────────────────────────────────\n  # smoke: For each affected scenario, boot it via run-example.sh,\n  # wait until something healthy answers (Grafana, then Alloy, then\n  # Prometheus), then tear down.\n  # ──────────────────────────────────────────────────────────────────\n  smoke:\n    name: Smoke test\n    needs: [detect, scan]\n    runs-on: ubuntu-latest\n    timeout-minutes: 15\n    strategy:\n      fail-fast: false\n      max-parallel: 4\n      matrix:\n        scenario: ${{ fromJSON(needs.detect.outputs.scenarios) }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n\n      - name: Boot ${{ matrix.scenario }}\n        run: |\n          set -euo pipefail\n          chmod +x ./run-example.sh\n          ./run-example.sh \"${{ matrix.scenario }}\"\n\n      - name: Wait for a healthy endpoint (Grafana, Alloy, or Prometheus)\n        run: |\n          set -euo pipefail\n          # Probe in priority order. Most scenarios expose Grafana on\n          # :3000; self-monitoring exposes Alloy on :12345 instead;\n          # routing remaps Alloy. Grafana wins when present, else any\n          # ready endpoint counts as bring-up success.\n          probes=(\n            \"http://localhost:3000/api/health\"\n            \"http://localhost:12345/-/ready\"\n            \"http://localhost:9090/-/ready\"\n          )\n\n          deadline=$(( $(date +%s) + 180 ))   # 3 min total\n          while [ \"$(date +%s)\" -lt \"$deadline\" ]; do\n            for url in \"${probes[@]}\"; do\n              code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 \"$url\" 2>/dev/null || true)\n              if [ \"$code\" = \"200\" ]; then\n                echo \"Healthy: $url\"\n                exit 0\n              fi\n            done\n            sleep 5\n          done\n\n          echo \"::error::No probe endpoint became healthy within 3 min\"\n          exit 1\n\n      - name: Verify no exited containers\n        run: |\n          set -euo pipefail\n          cd \"${{ matrix.scenario }}\"\n          # `docker compose ps --status exited` lists any container that\n          # crashed during bring-up. An empty list is the pass case.\n          exited=$(docker compose ps --status exited --format '{{.Name}}' || true)\n          if [ -n \"$exited\" ]; then\n            echo \"::error::Exited containers detected:\"\n            echo \"$exited\"\n            exit 1\n          fi\n\n      - name: Dump container logs on failure\n        if: failure()\n        run: |\n          cd \"${{ matrix.scenario }}\"\n          docker compose logs --no-color || true\n\n      - name: Tear down\n        if: always()\n        run: |\n          cd \"${{ matrix.scenario }}\"\n          docker compose down --volumes --remove-orphans || true\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n"
  },
  {
    "path": "CLAUDE.md",
    "content": "# CLAUDE.md\n\nThis file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.\n\n## Project Overview\n\nThis is a collection of self-contained demonstration scenarios for **Grafana Alloy**, the telemetry collection and processing pipeline. Each scenario lives in its own top-level directory and showcases a specific monitoring use case using the **LGMT stack** (Loki, Grafana, Metrics/Prometheus, Tempo).\n\n## Running Scenarios\n\n```bash\n# Option 1: Direct (uses default image versions in docker-compose.yml)\ncd <scenario-dir> && docker compose up -d\n\n# Option 2: Centralized image versions (from repo root)\n./run-example.sh <scenario-dir>\n\n# Stop a scenario\ncd <scenario-dir> && docker compose down\n```\n\nImage versions are centralized in `image-versions.env` at the repo root. Docker-compose files reference these via `${VAR:-default}` syntax.\n\nKubernetes scenarios (under `k8s/`) use Helm charts instead of Docker Compose — see their individual READMEs.\n\n## Scenario Structure\n\nEvery Docker-based scenario follows this layout:\n\n```\nscenario-name/\n├── docker-compose.yml      # LGMT stack + Alloy (infrastructure only)\n├── docker-compose.coda.yml # Demo app services (run via coda CLI or -f flag)\n├── config.alloy             # Alloy pipeline configuration (River/HCL syntax)\n├── loki-config.yaml         # Loki backend config\n├── prom-config.yaml         # Prometheus backend config\n├── tempo-config.yaml        # Tempo config (if tracing is involved)\n├── README.md                # What the scenario demonstrates and how to use it\n└── app/                     # Optional demo application (typically Python/Flask)\n```\n\n## Alloy Configuration Language\n\n`config.alloy` files use Alloy's River syntax (HCL-like). Pipelines follow a consistent pattern:\n\n1. **Receivers/Sources** — ingest data (`loki.source.*`, `otelcol.receiver.*`, `prometheus.exporter.*`)\n2. **Processors/Transformers** — parse, relabel, batch (`loki.process.*`, `discovery.relabel`, `otelcol.processor.*`)\n3. **Writers/Exporters** — send to backends (`loki.write.*`, `prometheus.remote_write.*`, `otelcol.exporter.*`)\n\nComponents are wired together by passing outputs to inputs (e.g., `forward_to = [loki.write.default.receiver]`).\n\n## Creating a New Scenario\n\nTemplates exist in `.cursor/docker-example.mdc` (Docker) and `.cursor/k8s-example.mdc` (Kubernetes) with full boilerplate for all config files.\n\nChecklist for a new scenario:\n1. Create a new top-level directory named after the scenario\n2. Include `docker-compose.yml`, `config.alloy`, backend configs, and `README.md`\n3. Use `${VAR:-default}` for image versions matching `image-versions.env` keys\n4. Grafana service should auto-provision datasources via entrypoint script (see template)\n5. Add the scenario to the main `README.md` table\n6. Alloy UI is available at `http://localhost:12345` for debugging pipelines\n\n## Key Conventions\n\n- Grafana runs on port 3000 with anonymous admin auth enabled (no login required)\n- Alloy HTTP server runs on port 12345\n- Python demo apps use OpenTelemetry SDK for instrumentation (`telemetry.py` pattern)\n- Backend configs (loki, prometheus, tempo) are minimal single-instance dev configs — not production-ready\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<p align=\"center\">\n  <img src=\"./img/banner.png\" alt=\"Grafana Alloy Scenarios Banner\" width=\"300\"/>\n</p>\n\n# Grafana Alloy Scenarios\n\nA collection of self-contained, runnable scenarios demonstrating how to use [Grafana Alloy](https://grafana.com/docs/alloy/) for telemetry collection and processing. Each scenario includes a full LGMT stack (Loki, Grafana, Mimir, Tempo) with pre-configured dashboards so you can explore immediately.\n\n## Getting Started\n\n### Prerequisites\n\n- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/)\n\n### Run a scenario\n\n```bash\n# Option 1: Navigate to the scenario directory\ncd <scenario-dir> && docker compose up -d\n\n# Option 2: Use centralized image management (from repo root)\n./run-example.sh <scenario-directory>\n```\n\nThe centralized approach manages all Docker image versions in a single `image-versions.env` file, making it easy to update images across all scenarios.\n\n### Access the stack\n\nOnce a scenario is running:\n\n- **Grafana**: [http://localhost:3000](http://localhost:3000) (no login required)\n- **Alloy UI**: [http://localhost:12345](http://localhost:12345) (pipeline debugging)\n\n### Run with the Coda app overlay\n\nEach scenario includes a `docker-compose.coda.yml` file that defines the demo application services separately from the infrastructure stack. This lets you run just the observability backend on its own, or layer in the app when you're ready:\n\n```bash\n# Infrastructure only\ncd <scenario-dir> && docker compose up -d\n\n# Infrastructure + demo app\ncd <scenario-dir> && docker compose -f docker-compose.yml -f docker-compose.coda.yml up -d\n```\n\nIf you have the `coda` CLI installed, it manages the app overlay automatically:\n\n```bash\ncoda start <scenario-dir>   # Start app containers\ncoda stop <scenario-dir>    # Stop app containers\ncoda status <scenario-dir>  # Show container status\ncoda list                   # List all available scenarios\n```\n\n### Stop a scenario\n\n```bash\ncd <scenario-dir> && docker compose down\n```\n\n## Scenarios\n\n### Logs\n\n| Scenario | Description |\n| -------- | ----------- |\n| [GELF log ingestion](gelf-log-ingestion/) | Ingest structured logs from applications using the GELF (Graylog Extended Log Format) protocol over UDP. |\n| [Kafka logs](kafka/) | Consume and process logs from Apache Kafka topics. |\n| [Log API gateway](log-api-gateway/) | Use Alloy as a centralized log gateway that accepts logs via a Loki-compatible push API endpoint. |\n| [Log routing](routing/) | Route logs from multiple sources to different Loki tenants based on log content and origin. |\n| [Log secret filtering](log-secret-filtering/) | Automatically redact sensitive credentials and secrets from logs using pattern matching before storage. |\n| [Logs from file](logs-file/) | Monitor and tail log files using Alloy. |\n| [Logs over TCP](logs-tcp/) | Receive and process TCP logs in JSON format. |\n| [Popular logging frameworks](app-instrumentation/logging/popular-logging-frameworks/) | Parse logs from popular logging frameworks across 7 programming languages. |\n| [Structured log parsing](mail-house/) | Parse structured logs into labels and structured metadata. |\n| [Syslog monitoring](syslog/) | Monitor non-RFC5424 compliant syslog messages using `rsyslog` and Alloy. |\n\n### Tracing\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Distributed tracing](trace-delivery/) | Learn distributed tracing through a sofa delivery workflow from order to doorstep. |\n| [Game of tracing](game-of-tracing/) | An interactive strategy game teaching distributed tracing, sampling, and service graphs. |\n| [OpenTelemetry basic tracing](otel-basic-tracing/) | Collect and visualize OpenTelemetry traces using Alloy and Tempo. |\n| [OpenTelemetry service graphs](otel-tracing-service-graphs/) | Generate service graphs using the Alloy `servicegraph` connector. |\n| [OpenTelemetry span metrics](otel-span-metrics/) | Generate RED metrics (Request rate, Error rate, Duration) from OpenTelemetry traces using the span metrics connector. |\n| [OpenTelemetry tail sampling](otel-tail-sampling/) | Apply tail sampling policies to OpenTelemetry traces with Alloy and Tempo. |\n\n### Metrics\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Blackbox probing](blackbox-probing/) | Monitor endpoint availability and response times using synthetic HTTP probes. |\n| [OTel metrics pipeline](otel-metrics-pipeline/) | Forward OpenTelemetry metrics from applications through Alloy with batching and transformation into Prometheus. |\n\n### Profiling\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Continuous profiling](continuous-profiling/) | Collect and visualize CPU, memory, and goroutine profiles from Go applications using Grafana Pyroscope. |\n\n### Secrets and configuration\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Vault secrets](vault-secrets/) | Pull `prometheus.remote_write` basic_auth credentials from HashiCorp Vault at runtime using `remote.vault`, with hot-reload on rotation. |\n\n### Frontend\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Faro frontend observability](faro-frontend-observability/) | Collect frontend web telemetry (logs, errors, web vitals) from browser applications using the Faro Web SDK. |\n\n### Cloud Monitoring\n\n| Scenario | Description |\n| -------- | ----------- |\n| [CloudWatch metrics](cloudwatch-metrics/) | Pull AWS CloudWatch metrics into Prometheus via `prometheus.exporter.cloudwatch`. Uses LocalStack for offline reproducibility — no AWS account required. |\n\n### Infrastructure Monitoring\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Docker monitoring](docker-monitoring/) | Monitor Docker container metrics and logs. |\n| [Monitor Linux](linux/) | Monitor a Linux server's system metrics using Alloy. |\n| [Monitor Windows](windows/) | Monitor Windows system metrics and Event Logs. |\n| [Self-monitoring](self-monitoring/) | Configure Alloy to monitor itself, collecting its own metrics and logs. |\n| [SNMP monitoring](snmp/) | Monitor SNMP devices using the Alloy SNMP exporter. |\n\n### Database and Cache Monitoring\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Elasticsearch monitoring](elasticsearch-monitoring/) | Monitor Elasticsearch cluster health, node status, and performance metrics. |\n| [Memcached monitoring](memcached-monitoring/) | Monitor Memcached instance metrics including connections, memory usage, and command performance. |\n| [MySQL monitoring](mysql-monitoring/) | Monitor MySQL database server metrics and performance indicators. |\n| [PostgreSQL monitoring](postgres-monitoring/) | Monitor PostgreSQL transaction statistics, connections, and server configuration. |\n| [RabbitMQ monitoring](rabbitmq-monitoring/) | Monitor RabbitMQ queue, connection, and channel metrics plus broker container logs. |\n| [Redis monitoring](redis-monitoring/) | Monitor Redis instance metrics including connections, memory usage, and command throughput. |\n\n### Kubernetes\n\n| Scenario | Description |\n| -------- | ----------- |\n| [Kubernetes](k8s/) | A series of scenarios demonstrating Alloy setup using the Kubernetes monitoring Helm chart. See subdirectories for telemetry-specific examples. |\n\n### OTel Engine Examples (Experimental)\n\nAlloy v1.14+ includes an experimental **OTel Engine** that runs standard OpenTelemetry Collector YAML configs directly. These scenarios use `alloy otel` instead of River/HCL syntax. See the [OTel examples README](otel-examples/) for details.\n\n| Scenario | Description |\n| -------- | ----------- |\n| [File log processing](otel-examples/filelog-processing/) | Collect and parse mixed-format log files using the OTel `filelog` receiver with operator chains. |\n| [PII redaction](otel-examples/pii-redaction/) | Scrub credit cards, emails, and IPs from traces and logs using OTTL `replace_pattern`. |\n| [Multi-tenant routing](otel-examples/routing-multi-tenant/) | Route logs to different Loki tenants based on resource attributes using fan-out and filter. |\n| [Cost control](otel-examples/cost-control/) | Drop health checks, filter debug logs, and apply probabilistic sampling to cut telemetry volume. |\n| [Resource enrichment](otel-examples/resource-enrichment/) | Auto-attach host, OS, and Docker metadata to all signals via `resourcedetection`. |\n| [Count connector](otel-examples/count-connector/) | Derive request rate and error rate metrics from traces and logs using the `count` connector. |\n| [OTTL transform cookbook](otel-examples/ottl-transform/) | A cookbook of OTTL patterns: JSON parsing, severity mapping, attribute promotion, truncation. |\n| [Host metrics](otel-examples/host-metrics/) | Collect CPU, memory, disk, and network metrics using the `hostmetrics` receiver. |\n| [Multi-pipeline fan-out](otel-examples/multi-pipeline-fanout/) | Send traces to two backends with different processing per destination. |\n| [Kafka buffer](otel-examples/kafka-buffer/) | Buffer traces through Kafka for durability and backpressure handling. |\n\n## Contributing\n\nContributions of scenarios or improvements to scenarios are welcome. You can contribute in several ways:\n\n### Suggest a scenario\n\nIf you have an idea for a scenario but don't have time to implement it:\n\n1. Open an [issue](https://github.com/grafana/alloy-scenarios/issues/new) with the label `scenario-suggestion`\n2. Describe the scenario and what it would demonstrate\n3. Explain why this would be valuable to the community\n4. Outline any special requirements or considerations\n\n### Contribute a scenario\n\nIf you'd like to contribute a complete scenario:\n\n1. Fork this repository and create a branch\n2. Create a directory in the root of this repository with a descriptive name for your scenario\n3. Follow the [scenario template](#scenario-template) below\n4. Submit a pull request with your scenario\n\n### Improve a scenario\n\nTo improve a scenario:\n\n1. Fork this repository and create a branch\n2. Make your improvements to the scenario\n3. Submit a pull request with a clear description of your changes\n\n### Scenario template\n\nWhen creating a scenario, include the following files:\n\n- `docker-compose.yml` - Docker Compose file with the LGMT stack\n- `docker-compose.coda.yml` - Docker Compose override with the demo app services (for use with the `coda` CLI or `-f` flag)\n- `config.alloy` - Alloy configuration file for the scenario\n- `README.md` - Documentation explaining the scenario\n- Any additional files needed for your scenario, such as scripts or data files\n\n### Scenario checklist\n\nBefore submitting your scenario, ensure that you have:\n\n- [ ] Created a directory in the root of this repository with a descriptive name\n- [ ] Included a docker-compose.yml file with the necessary components, such as LGMT stack or subset\n- [ ] Created a complete config.alloy file that demonstrates the monitoring approach\n- [ ] Written a README.md with:\n  - A clear description of what the scenario demonstrates\n  - Prerequisites for running the demo\n  - Step-by-step instructions for running the demo\n  - Expected output and what to look for\n  - Screenshots if applicable\n  - Explanation of key configuration elements\n- [ ] Added the scenario to the table in this README.md\n- [ ] Ensured the scenario works with the centralized image management system\n- [ ] Verified all components start correctly with `docker compose up -d`\n\n### Best practices for scenarios\n\n- Keep the scenario focused on demonstrating one concept\n- Use clear, descriptive component and variable names\n- Add comments to explain complex parts of your Alloy configuration\n- Consider including a \"Customizing\" section in your README.md\n- Provide sample queries for Grafana/Prometheus/Loki/Tempo that work with your scenario\n- Use environment variables for versions and configurable parameters\n\n## Get help\n\nIf you have questions about creating a scenario or need help with Alloy:\n\n- Join the [Grafana Labs Community Forums](https://community.grafana.com/)\n- Check the [Grafana Alloy documentation](https://grafana.com/docs/alloy/)\n\n## License\n\nThis repository is licensed under the Apache License, Version 2.0. Refer to [LICENSE](LICENSE) for the full license text.\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/README.md",
    "content": "# App Instrumentation - Structured Logging with Alloy Parsing\n\nThis directory contains a comprehensive **Alloy tutorial** demonstrating how to parse structured logs from 7 popular programming languages using modern logging frameworks. Each language uses industry-standard structured logging libraries, and all logs are processed through a unified Alloy pipeline for collection, parsing, and storage in Loki.\n\n## 🎯 Tutorial Objectives\n\n- **Learn Alloy log parsing**: Understand how to parse different log formats using `loki.process` stages\n- **Multi-language support**: Handle logs from 7 different programming languages in a single pipeline\n- **Structured logging**: Demonstrate modern logging practices with structured data\n- **Real-world scenarios**: Show practical log parsing for containerized applications\n\n## Languages and Modern Logging Frameworks\n\n| Language | Logging Framework | Type | Key Features | Docker Base Image |\n|----------|------------------|------|--------------|-------------------|\n| **JavaScript** | `Pino` | JSON structured | High performance, child loggers, ndjson output | `node:22-alpine` |\n| **Python** | `logging` module | Structured text | Built-in standard library with custom formatting | `python:3.12-slim` |\n| **Java** | `SLF4J + Logback` | Structured text | Parameterized messages, MDC context, thread info | `openjdk:26-slim` |\n| **C#** | `Microsoft.Extensions.Logging` | Structured text | .NET standard framework, event IDs, structured data | `mcr.microsoft.com/dotnet/*:9.0` |\n| **C++** | `spdlog` | Structured text | High performance, source location, thread-safe | `ubuntu:24.04` |\n| **Go** | `Zap` | JSON structured | High performance, named loggers, structured fields | `golang:1.23-alpine` |\n| **PHP** | `Monolog` | Structured text | Context arrays, processors, multiple handlers | `php:8.3-cli-alpine` |\n\n## Directory Structure\n\n```\napp-instrumentation/logging/popular-logging-frameworks/\n├── alloy/\n│   ├── config.alloy          # Main Alloy configuration\n│   └── helper.alloy           # Language-specific log parsers\n├── javascript/\n│   ├── app.js                 # Pino structured logging\n│   └── Dockerfile\n├── python/\n│   ├── app.py                 # Python logging with custom format\n│   └── Dockerfile\n├── java/\n│   ├── App.java               # SLF4J + Logback\n│   ├── logback.xml\n│   └── Dockerfile\n├── csharp/\n│   ├── Program.cs            # Microsoft.Extensions.Logging\n│   ├── LoggingExample.csproj\n│   └── Dockerfile\n├── cpp/\n│   ├── main.cpp              # spdlog structured logging\n│   ├── CMakeLists.txt\n│   └── Dockerfile\n├── go/\n│   ├── main.go               # Zap JSON logging\n│   ├── go.mod\n│   ├── go.sum\n│   └── Dockerfile\n├── php/\n│   ├── app.php               # Monolog with context\n│   └── Dockerfile\n├── docker-compose.yml         # Complete stack with Loki + Grafana\n├── loki-config.yaml\n└── README.md\n```\n\n## 🔍 Alloy Parsing Features Demonstrated\n\n### Core Alloy Components Used\n- **`loki.source.docker`**: Automatic Docker container log discovery\n- **`loki.process`**: Multi-stage log parsing pipeline\n- **`discovery.docker`**: Container metadata extraction\n- **`discovery.relabel`**: Label transformation and routing\n\n### Advanced Parsing Techniques\nEach language parser demonstrates different Alloy parsing capabilities:\n\n- **Regex parsing** (`stage.regex`): Extract structured fields from text logs\n- **JSON parsing** (`stage.json`): Handle native JSON log formats  \n- **Multiline handling** (`stage.multiline`): Process stack traces and exception logs\n- **Label management** (`stage.labels`): Efficient indexing for filtering\n- **Structured metadata** (`stage.structured_metadata`): Searchable non-indexed data\n- **Timestamp parsing** (`stage.timestamp`): Multiple timestamp format support\n- **Template formatting** (`stage.template`): Custom output formatting\n- **Conditional logic**: Level conversion, error prioritization\n\n### Language-Specific Parsing Examples\n\n| Language | Primary Challenge | Alloy Solution |\n|----------|------------------|----------------|\n| **JavaScript (Pino)** | JSON numeric levels | Template stage for level conversion |\n| **Python** | Custom text format | Regex extraction with line numbers |\n| **Java (Logback)** | Multi-line stack traces | Multiline stage + regex parsing |\n| **C#** | Event IDs and namespaces | Regex parsing with structured metadata |\n| **C++** | Source location details | Complex regex for file:line extraction |\n| **Go (Zap)** | Unix timestamps | Timestamp parsing with fractional seconds |\n| **PHP (Monolog)** | Nested JSON context | Multiple JSON parsing stages |\n\n## 🚀 Quick Start Tutorial\n\n### Step 1: Clone the Repository\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd app-instrumentation/logging/popular-logging-frameworks\n```\n\n### Step 2: Launch the Complete Stack\n\n```bash\n# Build and run all applications with Alloy + Loki + Grafana\ndocker compose up --build\n\n# Run in detached mode to see clean output\ndocker compose up --build -d\n```\n\nThis starts:\n- **7 language applications** generating structured logs\n- **Alloy** parsing and forwarding logs to Loki\n- **Loki** storing parsed logs with labels and metadata\n- **Grafana** for log visualization and querying\n\n### Step 3: Explore the Logs\n\n- Head to http://localhost:3000/a/grafana-lokiexplore-app to see the logs in Grafana\n- Each language has its own service name / app so you can identify which languge you would like to see the parsed logs for\n\n## 📚 Learning Outcomes\n\nAfter completing this tutorial, you'll understand:\n\n### Alloy Concepts\n- **Multi-stage processing**: How to chain `loki.process` stages for complex parsing\n- **Component composition**: Using `import.file` to modularize configurations\n- **Discovery patterns**: Automatic service discovery with Docker integration\n- **Label vs. metadata strategy**: When to use indexed labels vs. structured metadata\n\n### Log Parsing Techniques\n- **Regex mastery**: Complex pattern matching for text log formats\n- **JSON handling**: Extracting nested fields from structured logs\n- **Timestamp parsing**: Supporting multiple timestamp formats across languages\n- **Multiline processing**: Handling stack traces and exception logs\n- **Conditional formatting**: Template logic for log transformation\n\n### Real-World Patterns\n- **Language-specific challenges**: Understanding unique parsing requirements per language\n- **Performance considerations**: Efficient labeling and metadata strategies\n- **Observability best practices**: Structured logging principles across tech stacks\n- **Container log collection**: Production-ready log aggregation patterns\n\n## 🔧 Configuration Details\n\n### Language-Specific Parsing Challenges\n\nEach language presents unique parsing requirements:\n\n#### JavaScript (Pino)\n```alloy\n// Challenge: Numeric log levels (10, 20, 30, 40, 50, 60)\nstage.template {\n  source = \"level\"\n  template = \"{{- if eq .level_num \\\"30\\\" -}}info{{- else if eq .level_num \\\"50\\\" -}}error{{- end -}}\"\n}\n```\n\n#### Java (Logback)  \n```alloy\n// Challenge: Multi-line stack traces\nstage.multiline {\n  firstline = \"^\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\"\n}\n```\n\n#### Go (Zap)\n```alloy\n// Challenge: Unix timestamp with fractional seconds\nstage.timestamp {\n  source = \"ts\"\n  format = \"1750342991.0445938\"\n}\n```"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/alloy/config.alloy",
    "content": "// ###############################\n// #### Main Logging Configuration ####\n// ###############################\n\n// Import the custom log parsing helper module.\n// This gives us access to the \"app_logs_parser\" component that handles\n// language-specific log parsing for Python, JavaScript, Go, Java, C#, PHP, and C++.\nimport.file \"helper\" {\n  filename = \"/etc/alloy/helper.alloy\"\n}\n\n// Discover Docker containers running on the local Docker daemon.\n// This component continuously monitors the Docker socket for container changes,\n// providing real-time discovery of log sources without manual configuration.\ndiscovery.docker \"linux\" {\n  host = \"unix:///var/run/docker.sock\"  // Connect to local Docker daemon via Unix socket\n}\n\n// Transform Docker container metadata into useful labels for log routing.\n// This creates a \"service_name\" label from the container name, which is used\n// by our parsing stages to determine which language parser to apply.\ndiscovery.relabel \"logs_integrations_docker\" {\n  targets = []  // Start with empty targets (will be populated by discovery.docker)\n  \n  // Extract container name and use it as service identifier\n  // Example: container \"/python-app\" becomes service_name=\"python-app\"\n  rule {\n    source_labels = [\"__meta_docker_container_name\"]  // Docker provides this metadata\n    regex = \"/(.*)\"                                   // Remove leading slash from container name\n    target_label = \"service_name\"                     // Create clean service identifier\n  }\n}\n\n// Instantiate our custom log parser with output destination.\n// This creates the processing pipeline that will parse logs from all supported languages\n// and forward them to Loki for storage and querying.\nhelper.app_logs_parser \"default\" {\n  write_to = [loki.write.local.receiver]  // Send parsed logs to our Loki instance\n}\n\n// Collect logs from all discovered Docker containers.\n// This is the main log collection engine that streams container logs in real-time\n// and feeds them into our language-specific parsing pipeline.\nloki.source.docker \"default\" {\n  host       = \"unix:///var/run/docker.sock\"                           // Connect to Docker daemon\n  targets    = discovery.docker.linux.targets                          // Use discovered containers\n  labels     = {\"platform\" = \"docker\"}                                 // Add platform label to all logs\n  relabel_rules = discovery.relabel.logs_integrations_docker.rules     // Apply container name transformation\n  forward_to = [helper.app_logs_parser.default.parser_input]           // Send raw logs to our parser\n}\n\n// Configure Loki write endpoint for log storage.\n// This is where all parsed and enriched logs are finally stored for querying,\n// alerting, and analysis in Grafana or other tools.\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"  // Loki's standard push API endpoint\n  }\n} "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/alloy/helper.alloy",
    "content": "declare \"app_logs_parser\" {\n  // argument.write_to is a required argument that specifies where parsed\n  // log lines are sent.\n  //\n  // The value of the argument is retrieved in this file with\n  // argument.write_to.value.\n  argument \"write_to\" {\n    optional = false\n  }\n\n  // loki.process.app_logs_parser is our component which executes the parsing,\n  // passing parsed logs to argument.write_to.value.\n  loki.process \"app_logs_parser\" {\n\n    // ## Python Processing ##\n    // Let only python logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"python\"\n      selector = \"{service_name=\\\"python\\\"}\"\n      \n      // Extract the timestamp, file, line number, level, and message from the log line.\n      // Python logs format: \"2025-06-17 09:54:15,283 - main.py:25 - INFO - Starting application\"\n      stage.regex {\n        expression = \"^(?P<timestamp>\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2},\\\\d{3}) - (?P<file>[^:]+):(?P<line_num>\\\\d+) - (?P<level>[^ ]+) - (?P<msg>.*)\"\n      }\n      \n      // Set the file and level as labels for efficient filtering and querying in Loki.\n      // Labels are indexed and should be used for high-cardinality filtering.\n      stage.labels {\n        values = {\n          file = \"\",\n          level = \"\",\n        }\n      }\n\n      // Set the timestamp to the timestamp extracted from the log line.\n      // This ensures proper chronological ordering in Loki.\n      stage.timestamp {\n        source = \"timestamp\"\n        format = \"2006-01-02 15:04:05,000\"\n      }\n\n      // Set the line number as structured metadata in Loki (non-indexed).\n      // Structured metadata is searchable but not indexed, reducing storage costs.\n      stage.structured_metadata {\n        values = {\n          line_num = \"\",\n        }\n      }\n\n      // We want to maintain a similar format to the original log line so we use template to create a new\n      // temporary variable called output. This creates a clean, consistent format across all Python logs.\n      stage.template {\n        source = \"output\"\n        template = \"{{.file}} - {{.line_num}} - {{.level}} - {{.msg}}\"\n      }\n\n      // We use the new output variable to create a new log body. This is the log line that will be sent to loki.\n      // The output stage replaces the original log message with our formatted version.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## Node.js Processing ##\n    // Let only node.js logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"javascript\"\n      selector = \"{service_name=\\\"javascript\\\"}\"\n      \n      // Extract fields from JSON-formatted Pino logs.\n      // Pino outputs structured JSON logs with fields like level (numeric), time (timestamp), msg, etc.\n      stage.json {\n        expressions = {\n          level_num        = \"level\",\n          time             = \"time\",\n          pid              = \"pid\",\n          hostname         = \"hostname\",\n          msg              = \"msg\",\n          obj              = \"obj\",\n          counter          = \"counter\",\n          component        = \"component\",\n          query            = \"query\",\n          duration         = \"duration\",\n          version          = \"version\",\n          method           = \"method\",\n          path             = \"path\",\n          status           = \"status\",\n          nested_obj       = \"nested.obj\",\n          nested_timestamp = \"nested.timestamp\",\n          err_type         = \"err.type\",\n          err_message      = \"err.message\",\n          err_stack        = \"err.stack\",\n        }\n      }\n\n      // Convert Pino's numeric log levels to human-readable strings.\n      // Pino uses numbers: 10=trace, 20=debug, 30=info, 40=warn, 50=error, 60=fatal\n      stage.template {\n        source   = \"level\"\n        template = \"{{- if eq .level_num \\\"10\\\" -}}trace{{- else if eq .level_num \\\"20\\\" -}}debug{{- else if eq .level_num \\\"30\\\" -}}info{{- else if eq .level_num \\\"40\\\" -}}warn{{- else if eq .level_num \\\"50\\\" -}}error{{- else if eq .level_num \\\"60\\\" -}}fatal{{- else -}}unknown{{- end -}}\"\n      }\n\n      // Set important fields as labels for efficient querying.\n      // hostname and component help identify log sources, level enables filtering by severity.\n      stage.labels {\n        values = {\n          file      = \"\",\n          hostname  = \"\",\n          component = \"\",\n          level     = \"\",\n        }\n      }\n\n      // Set the timestamp from Pino's Unix millisecond timestamp.\n      // Pino logs include precise timestamps for accurate log ordering.\n      stage.timestamp {\n        source = \"time\"\n        format = \"UnixMs\"\n      }\n\n      // Store all extracted fields as structured metadata for searchability without indexing costs.\n      // This includes process info, request details, and error information.\n      stage.structured_metadata {\n        values = {\n          level_num        = \"\",\n          pid              = \"\",\n          query            = \"\",\n          duration         = \"\",\n          version          = \"\",\n          method           = \"\",\n          path             = \"\",\n          status           = \"\",\n          nested_obj       = \"\",\n          nested_timestamp = \"\",\n          err_type         = \"\",\n          err_message      = \"\",\n          err_stack        = \"\",\n        }\n      }\n\n      // Create a consistent output format prioritizing error messages over regular messages.\n      // This provides better visibility of errors while maintaining standard log structure.\n      stage.template {\n        source   = \"output\"\n        template = \"{{.hostname}} - {{.level}} - {{ if .err_message }}{{ .err_message }}{{ else }}{{ .msg }}{{ end }}\"\n      }\n\n      // Apply the formatted output as the final log message sent to Loki.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## Go Processing ##\n    // Let only go logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"go\"\n      selector = \"{service_name=\\\"go\\\"}\"\n      \n      // Extract fields from Zap's JSON-structured logs.\n      // Zap outputs detailed JSON logs with structured fields for better observability.\n      stage.json {\n        expressions = {\n          level            = \"level\",\n          ts               = \"ts\",\n          logger           = \"logger\",\n          caller           = \"caller\",\n          msg              = \"msg\",\n          answer           = \"answer\",\n          obj              = \"obj\",\n          counter          = \"counter\",\n          feature          = \"feature\",\n          query            = \"query\",\n          duration         = \"duration\",\n          method           = \"method\",\n          path             = \"path\",\n          status           = \"status\",\n          requestId        = \"requestId\",\n          context1         = \"context1\",\n          context2         = \"context2\",\n          error            = \"error\",\n          stacktrace       = \"stacktrace\",\n          nested_obj       = \"nested.obj\",\n          nested_timestamp = \"nested.timestamp\",\n        }\n      }\n\n      // Set logger name and level as indexed labels for efficient filtering.\n      // This enables quick filtering by specific loggers (e.g., database, api) and log levels.\n      stage.labels {\n        values = {\n          logger = \"\",\n          level  = \"\",\n        }\n      }\n\n      // Parse Zap's Unix timestamp with fractional seconds.\n      // Zap provides high-precision timestamps for accurate log correlation.\n      stage.timestamp {\n        source = \"ts\"\n        format = \"1750342991.0445938\"\n      }\n\n      // Store all contextual information as structured metadata.\n      // This includes caller info, request details, errors, and application-specific data.\n      stage.structured_metadata {\n        values = {\n          caller           = \"caller\",\n          answer           = \"answer\",\n          obj              = \"obj\",\n          counter          = \"counter\",\n          feature          = \"feature\",\n          query            = \"query\",\n          duration         = \"duration\",\n          method           = \"method\",\n          path             = \"path\",\n          status           = \"status\",\n          requestId        = \"requestId\",\n          context1         = \"context1\",\n          context2         = \"context2\",\n          error            = \"error\",\n          stacktrace       = \"stacktrace\",\n          nested_obj       = \"nested.obj\",\n          nested_timestamp = \"nested.timestamp\",\n        }\n      }\n\n      // Create a clean, consistent output format showing logger, level, and message.\n      // This maintains readability while preserving structured data in metadata.\n      stage.template {\n        source   = \"output\"\n        template = \"{{.logger}} - {{.level}} - {{.msg}}\"\n      }\n\n      // Apply the formatted output as the final log message.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## Java Processing ##\n    // Let only java logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"java\"\n      selector = \"{service_name=\\\"java\\\"}\"\n      \n      // Handle multi-line Java stack traces by identifying the start of new log entries.\n      // Java exceptions often span multiple lines, so we need to group them properly.\n      stage.multiline {\n        firstline = \"^\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}\\\\+\\\\d{4}\\\\[[^\\\\]]+\\\\]\\\\s+[A-Z]+\\\\s+\\\\w+\\\\s+-\\\\s+\"\n      }\n\n      // Parse Logback's structured log format including timestamps, threads, levels, and stack traces.\n      // Format: \"2024-01-15T14:41:02.423+0000[main] INFO App - Starting application\"\n      stage.regex {\n        expression = \"^(?P<timestamp>\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}\\\\+\\\\d{4})\\\\[(?P<thread>[^\\\\]]+)\\\\] (?P<level>[A-Z]+)\\\\s+(?P<logger>[^ ]+) - (?P<msg>[^\\n]*)(?:\\\\n(?P<stacktrace>.*))?\"\n      }\n\n      // Set logger and level as indexed labels for efficient log filtering.\n      // This enables filtering by specific Java classes/packages and log severity.\n      stage.labels {\n        values = {\n          logger = \"\",\n          level  = \"\",\n        }\n      }\n\n      // Parse ISO 8601 timestamp with timezone for accurate time correlation.\n      // Java's Logback uses precise timestamps with timezone information.\n      stage.timestamp {\n        source = \"timestamp\"\n        format = \"2006-01-02T15:04:05.000-0700\"\n      }\n\n      // Store thread information and stack traces as structured metadata.\n      // Thread info helps with concurrent debugging, stack traces provide error context.\n      stage.structured_metadata {\n        values = {\n          thread     = \"\",\n          stacktrace = \"\",\n        }\n      }\n\n      // Format output to show essential information: logger, level, and message.\n      // Stack traces are preserved in metadata for when they're needed.\n      stage.template {\n        source   = \"output\"\n        template = \"{{.logger}} - {{.level}} - {{.msg}}\"\n      }\n\n      // Apply the clean formatted output while preserving detailed metadata.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## C# Processing ##\n    // Let only c# logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"csharp\"\n      selector = \"{service_name=\\\"csharp\\\"}\"\n\n      // Handle multi-line .NET logs and exception stack traces.\n      // .NET logging can span multiple lines, especially with structured logging and exceptions.\n      stage.multiline {\n        firstline = \"^\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3} [a-z]+: [^\\\\[]+\\\\[\\\\d+\\\\]\"\n      }\n\n      // Parse .NET's structured logging format with event IDs.\n      // Format: \"2024-01-15 14:41:02.423 info: Microsoft.Extensions.Hosting[1] Starting application\"\n      stage.regex {\n        expression = \"^(?P<timestamp>\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?P<level>[a-z]+): (?P<logger>[^\\\\[]+)\\\\[(?P<event_id>\\\\d+)\\\\]\\\\n\\\\s+(?P<msg>.*)\"\n      }\n\n      // Set logger namespace and level as indexed labels for filtering.\n      // .NET uses hierarchical logger names (e.g., Microsoft.Extensions.Hosting) for categorization.\n      stage.labels {\n        values = {\n          logger = \"\",\n          level  = \"\",\n        }\n      }\n\n      // Parse .NET's standard timestamp format (no timezone).\n      // .NET logging typically uses local time format.\n      stage.timestamp {\n        source = \"timestamp\"\n        format = \"2006-01-02 15:04:05.000\"\n      }\n\n      // Store .NET-specific event IDs as structured metadata.\n      // Event IDs help categorize and filter specific types of .NET framework events.\n      stage.structured_metadata {\n        values = {\n          event_id = \"\",\n        }\n      }\n\n      // Create consistent output format showing logger namespace, level, and message.\n      stage.template {\n        source = \"output\"\n        template = \"{{.logger}} - {{.level}} - {{.msg}}\"\n      }\n\n      // Apply the formatted output to maintain consistency with other language logs.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## PHP Processing ##\n    // Let only php logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"php\"\n      selector = \"{service_name=\\\"php\\\"}\"\n\n      // Parse Monolog's default line format with timestamp, logger, level, message, context, and extra data.\n      // Format: \"[2024-01-15T14:41:02.123456+00:00] app.INFO: hello world {\"counter\":42} {\"environment\":\"production\"}\"\n      stage.regex {\n        expression = \"^\\\\[(?P<timestamp>[^\\\\]]+)\\\\] (?P<logger>[^.]+)\\\\.(?P<level>[A-Z]+): (?P<msg>.*?) (?P<context_json>\\\\[\\\\]|\\\\{.*?\\\\}) (?P<extra_json>\\\\{.*?\\\\})$\"\n      }\n\n      // Set logger name and level as indexed labels for efficient querying.\n      // PHP applications often use multiple named loggers (app, database, api, etc.).\n      stage.labels {\n        values = {\n          logger = \"\",\n          level  = \"\",\n        }\n      }\n\n      // Parse Monolog's ISO 8601 timestamp with microseconds and timezone.\n      // Monolog provides high-precision timestamps for accurate log correlation.\n      stage.timestamp {\n        source = \"timestamp\"\n        format = \"2006-01-02T15:04:05.000000-07:00\"\n      }\n\n      // Extract application-specific data from the context JSON.\n      // Context contains request-specific data like counters, query info, API details, etc.\n      stage.json {\n        source = \"context_json\"\n        expressions = {\n          counter          = \"counter\",\n          obj              = \"obj\",\n          query            = \"query\",\n          duration         = \"duration\",\n          method           = \"method\",\n          path             = \"path\",\n          status           = \"status\",\n          exception        = \"exception\",\n          error_code       = \"error_code\",\n          affected_service = \"affected_service\",\n        }\n      }\n      \n      // Extract environment and system-level data from the extra JSON.\n      // Extra data typically contains environment info, process details, etc.\n      stage.json {\n        source = \"extra_json\"\n        expressions = {\n          environment = \"environment\",\n        }\n      }\n\n      // Store all extracted PHP context and environment data as structured metadata.\n      // This provides rich searchability for PHP application debugging and monitoring.\n      stage.structured_metadata {\n        values = {\n          counter          = \"\",\n          obj              = \"\",\n          query            = \"\",\n          duration         = \"\",\n          method           = \"\",\n          path             = \"\",\n          status           = \"\",\n          exception        = \"\",\n          error_code       = \"\",\n          affected_service = \"\",\n          environment      = \"\",\n        }\n      }\n\n      // Create clean output format showing logger, level, and message.\n      // Detailed context remains accessible in structured metadata.\n      stage.template {\n        source = \"output\"\n        template = \"{{.logger}} - {{.level}} - {{.msg}}\"\n      }\n\n      // Apply the standardized output format while preserving rich PHP context data.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // ## C++ Processing ##\n    // Let only cpp logs pass through this stage. This is done via the label match on the service_name label.\n    stage.match {\n      pipeline_name = \"cpp\"\n      selector = \"{service_name=\\\"cpp\\\"}\"\n\n      // Parse C++ structured logging format with detailed source location information.\n      // Format: \"2024-01-15 14:41:02.423 [info] [logger] [thread 1] [main.cpp:25 main] - Starting application\"\n      stage.regex {\n        expression = \"^(?P<timestamp>\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) \\\\[(?P<level>[^\\\\]]+)\\\\] \\\\[(?P<logger>[^\\\\]]+)\\\\] \\\\[(?P<thread>[^\\\\]]+)\\\\] \\\\[(?P<file>[^:]+):(?P<line_num>\\\\d+) (?P<function>[^\\\\]]+)\\\\] - (?P<msg>.*)\"\n      }\n\n      // Set logger, level, and source file as indexed labels for debugging.\n      // C++ logs benefit from file-based filtering for debugging specific modules.\n      stage.labels {\n        values = {\n          logger = \"\",\n          level  = \"\",\n          file   = \"\",\n        }\n      }\n\n      // Parse standard timestamp format used by C++ logging libraries.\n      stage.timestamp {\n        source = \"timestamp\"\n        format = \"2006-01-02 15:04:05.000\"\n      }\n\n      // Store detailed C++ debugging information as structured metadata.\n      // Thread info, line numbers, and function names are crucial for C++ debugging.\n      stage.structured_metadata {\n        values = {\n          thread   = \"\",\n          line_num = \"\",\n          function = \"\",\n        }\n      }\n\n      // Create detailed output showing file location, function, level, and message.\n      // C++ debugging often requires precise source location information.\n      stage.template {\n        source = \"output\"\n        template = \"{{.file}}:{{.line_num}} {{.function}} - {{.level}} - {{.msg}}\"\n      }\n\n      // Apply the detailed C++ format optimized for debugging and troubleshooting.\n      stage.output {\n        source = \"output\"\n      }\n    }\n\n    // Send processed logs to our argument.\n    forward_to = argument.write_to.value\n  }\n\n  // export.parser_input exports a value to the module consumer.\n  export \"parser_input\" {\n    // Expose the receiver of loki.process so the module importer can send\n    // logs to our loki.process component.\n    value = loki.process.app_logs_parser.receiver\n  }\n}"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/cpp/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.16)\nproject(LoggingExample)\n\nset(CMAKE_CXX_STANDARD 17)\nset(CMAKE_CXX_STANDARD_REQUIRED ON)\n\n# Find required packages\nfind_package(PkgConfig REQUIRED)\nfind_package(Threads REQUIRED)\n\n# Add spdlog\ninclude(FetchContent)\nFetchContent_Declare(\n    spdlog\n    GIT_REPOSITORY https://github.com/gabime/spdlog.git\n    GIT_TAG v1.12.0\n)\nFetchContent_MakeAvailable(spdlog)\n\n# Create executable\nadd_executable(logging_example main.cpp)\n\n# Link libraries\ntarget_link_libraries(logging_example \n    PRIVATE \n    spdlog::spdlog\n    Threads::Threads\n)\n\n# Compiler-specific options\nif(MSVC)\n    target_compile_options(logging_example PRIVATE /W4)\nelse()\n    target_compile_options(logging_example PRIVATE -Wall -Wextra -Wpedantic)\nendif() "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/cpp/Dockerfile",
    "content": "FROM ubuntu:26.04@sha256:f3d28607ddd78734bb7f71f117f3c6706c666b8b76cbff7c9ff6e5718d46ff64\n\n# Install build dependencies\nRUN apt-get update && apt-get install -y \\\n    cmake \\\n    g++ \\\n    make \\\n    git \\\n    pkg-config \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /app\n\nCOPY . .\n\n# Build the application\nRUN cmake -B build -S . && \\\n    cmake --build build --config Release\n\n# Run the application\nCMD [\"./build/logging_example\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/cpp/main.cpp",
    "content": "#include <spdlog/spdlog.h>\n#include <spdlog/sinks/stdout_color_sinks.h>\n#include <chrono>\n#include <thread>\n\nint main() {\n    auto console = spdlog::stdout_color_mt(\"logger\");\n    spdlog::set_default_logger(console);\n    spdlog::set_level(spdlog::level::debug);\n    spdlog::set_pattern(\n        \"%Y-%m-%d %H:%M:%S.%e [%^%l%$] [%n] [thread %t] [%s:%# %!] - %v\"\n    );\n\n    int counter = 0;\n\n    SPDLOG_LOGGER_INFO(console, \"Starting C++ basic logging example\");\n    SPDLOG_LOGGER_INFO(console, \"Demonstrating spdlog formatting\");\n\n    while (true) {\n        counter++;\n        int logType = counter % 5;\n\n        switch (logType) {\n            case 0:\n                SPDLOG_LOGGER_DEBUG(console, \"Basic debug message, counter: {}\", counter);\n                break;\n            case 1:\n                SPDLOG_LOGGER_INFO(console, \"Information message, counter: {}\", counter);\n                break;\n            case 2:\n                SPDLOG_LOGGER_WARN(console, \"Warning message, counter: {}\", counter);\n                break;\n            case 3:\n                SPDLOG_LOGGER_ERROR(console, \"Error message, counter: {}\", counter);\n                break;\n            case 4:\n                SPDLOG_LOGGER_CRITICAL(console, \"Critical message, counter: {}\", counter);\n                break;\n        }\n\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    return 0;\n}\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/csharp/Dockerfile",
    "content": "FROM mcr.microsoft.com/dotnet/sdk:9.0@sha256:0300d42309afd86168fa57d62db79020a34ee396d39c9634844b9c0ab285ea55 AS build\nWORKDIR /app\n\nCOPY *.csproj .\nRUN dotnet restore\n\nCOPY . .\nRUN dotnet publish -c Release -o out\n\nFROM mcr.microsoft.com/dotnet/runtime:9.0@sha256:7590f1b7e124fe7a4b7cffa5f6f9958f2c02a22bf5bd7a0387a84b88cddf4057\nWORKDIR /app\nCOPY --from=build /app/out .\n\nENTRYPOINT [\"dotnet\", \"LoggingExample.dll\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/csharp/LoggingExample.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net9.0</TargetFramework>\n    <Nullable>enable</Nullable>\n  </PropertyGroup>\n\n  <ItemGroup>\n    <PackageReference Include=\"Microsoft.Extensions.Hosting\" Version=\"10.0.7\" />\n    <PackageReference Include=\"Microsoft.Extensions.Logging\" Version=\"10.0.7\" />\n    <PackageReference Include=\"Microsoft.Extensions.Logging.Console\" Version=\"10.0.7\" />\n  </ItemGroup>\n\n</Project> "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/csharp/Program.cs",
    "content": "using Microsoft.Extensions.Logging;\nusing Microsoft.Extensions.DependencyInjection;\nusing Microsoft.Extensions.Hosting;\nusing System;\nusing System.Threading.Tasks;\n\nnamespace LoggingExample\n{\n    class Program\n    {\n        private static ILogger<Program>? _logger;\n\n        static async Task Main(string[] args)\n        {\n            // Configure logging with proper formatting\n            using var host = Host.CreateDefaultBuilder(args)\n                .ConfigureLogging(logging =>\n                {\n                    logging.ClearProviders();\n                    logging.AddConsole(options =>\n                    {\n                        options.TimestampFormat = \"yyyy-MM-dd HH:mm:ss.fff \";\n                        options.IncludeScopes = false;\n                    });\n                    logging.SetMinimumLevel(LogLevel.Debug);\n                })\n                .Build();\n\n            _logger = host.Services.GetRequiredService<ILogger<Program>>();\n\n            int counter = 0;\n\n            _logger.LogInformation(\"Starting C# basic logging example\");\n            _logger.LogInformation(\"Demonstrating Microsoft.Extensions.Logging\");\n\n            // Infinite loop with different log levels\n            while (true)\n            {\n                counter++;\n\n                // Cycle through different log levels\n                int logType = counter % 5;\n\n                switch (logType)\n                {\n                    case 0:\n                        _logger.LogDebug(\"Basic debug message, counter: {Counter}\", counter);\n                        break;\n                    case 1:\n                        _logger.LogInformation(\"Information message, counter: {Counter}\", counter);\n                        break;\n                    case 2:\n                        _logger.LogWarning(\"Warning message, counter: {Counter}\", counter);\n                        break;\n                    case 3:\n                        _logger.LogError(\"Error message, counter: {Counter}\", counter);\n                        break;\n                    case 4:\n                        _logger.LogCritical(\"Critical message, counter: {Counter}\", counter);\n                        break;\n                }\n\n                // Wait 1 second before next log\n                await Task.Delay(1000);\n            }\n        }\n    }\n} "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/docker-compose.coda.yml",
    "content": "services:\n  javascript-logging:\n    build:\n      context: ./javascript\n      dockerfile: Dockerfile\n    container_name: javascript\n    environment:\n      - NODE_ENV=production\n    restart: unless-stopped\n\n  python-logging:\n    build:\n      context: ./python\n      dockerfile: Dockerfile\n    container_name: python\n    environment:\n      - PYTHON_ENV=production\n    restart: unless-stopped\n\n  java-logging:\n    build:\n      context: ./java\n      dockerfile: Dockerfile\n    container_name: java\n    environment:\n      - JAVA_ENV=production\n    restart: unless-stopped\n\n  csharp-logging:\n    build:\n      context: ./csharp\n      dockerfile: Dockerfile\n    container_name: csharp\n    environment:\n      - DOTNET_ENVIRONMENT=Production\n    restart: unless-stopped\n\n  cpp-logging:\n    build:\n      context: ./cpp\n      dockerfile: Dockerfile\n    container_name: cpp\n    environment:\n      - CPP_ENV=production\n    restart: unless-stopped\n\n  go-logging:\n    build:\n      context: ./go\n      dockerfile: Dockerfile\n    container_name: go\n    environment:\n      - GO_ENV=production\n    restart: unless-stopped\n\n  php-logging:\n    build:\n      context: ./php\n      dockerfile: Dockerfile\n    container_name: php\n    environment:\n      - PHP_ENV=production\n    restart: unless-stopped\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  javascript-logging:\n    build:\n      context: ./javascript\n      dockerfile: Dockerfile\n    container_name: javascript\n    environment:\n      - NODE_ENV=production\n    restart: unless-stopped\n\n  python-logging:\n    build:\n      context: ./python\n      dockerfile: Dockerfile\n    container_name: python\n    environment:\n      - PYTHON_ENV=production\n    restart: unless-stopped\n\n  java-logging:\n    build:\n      context: ./java\n      dockerfile: Dockerfile\n    container_name: java\n    environment:\n      - JAVA_ENV=production\n    restart: unless-stopped\n\n  csharp-logging:\n    build:\n      context: ./csharp\n      dockerfile: Dockerfile\n    container_name: csharp\n    environment:\n      - DOTNET_ENVIRONMENT=Production\n    restart: unless-stopped\n\n  cpp-logging:\n    build:\n      context: ./cpp\n      dockerfile: Dockerfile\n    container_name: cpp\n    environment:\n      - CPP_ENV=production\n    restart: unless-stopped\n\n  go-logging:\n    build:\n      context: ./go\n      dockerfile: Dockerfile\n    container_name: go\n    environment:\n      - GO_ENV=production\n    restart: unless-stopped\n\n  php-logging:\n    build:\n      context: ./php\n      dockerfile: Dockerfile\n    container_name: php\n    environment:\n      - PHP_ENV=production\n    restart: unless-stopped\n\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    container_name: loki\n    ports:\n      - \"3100:3100\"\n    volumes:\n     - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n  grafana:\n   image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n   container_name: grafana\n   environment:\n     - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n     - GF_AUTH_ANONYMOUS_ENABLED=true\n     - GF_AUTH_BASIC_ENABLED=false\n   ports:\n     - 3000:3000/tcp\n   entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n   image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n   container_name: alloy\n   ports:\n     - 12345:12345\n     - 4317:4317\n     - 4318:4318\n   volumes:\n     - ./alloy/:/etc/alloy/\n     - /var/run/docker.sock:/var/run/docker.sock\n   command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n\n\nnetworks:\n  default:\n    name: logging-examples-network "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/go/Dockerfile",
    "content": "FROM golang:1.26-alpine@sha256:91eda9776261207ea25fd06b5b7fed8d397dd2c0a283e77f2ab6e91bfa71079d\n\nWORKDIR /app\n\n# Copy go.mod and go.sum for better caching\nCOPY go.mod go.sum ./\nRUN go mod download\n\n# Copy source code\nCOPY main.go .\n\nRUN go build -o logging_example main.go\n\nCMD [\"./logging_example\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/go/go.mod",
    "content": "module logging-example\n\ngo 1.23\n\nrequire go.uber.org/zap v1.28.0\n\nrequire go.uber.org/multierr v1.10.0 // indirect\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/go/go.sum",
    "content": "github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=\ngithub.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=\ngo.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=\ngo.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=\ngo.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=\ngo.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=\ngo.uber.org/zap v1.28.0 h1:IZzaP1Fv73/T/pBMLk4VutPl36uNC+OSUh3JLG3FIjo=\ngo.uber.org/zap v1.28.0/go.mod h1:rDLpOi171uODNm/mxFcuYWxDsqWSAVkFdX4XojSKg/Q=\ngo.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=\ngo.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/go/main.go",
    "content": "package main\n\nimport (\n\t\"errors\"\n\t\"time\"\n\n\t\"go.uber.org/zap\"\n\t\"go.uber.org/zap/zapcore\"\n)\n\nfunc main() {\n\t// Configure Zap logger for JSON output to stdout\n\tconfig := zap.NewProductionConfig()\n\tconfig.Level = zap.NewAtomicLevelAt(zap.DebugLevel)\n\tconfig.OutputPaths = []string{\"stdout\"}\n\n\tlogger, err := config.Build()\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\tdefer logger.Sync()\n\n\t// Create child loggers for different components\n\tappLogger := logger.Named(\"app\")\n\tdbLogger := logger.Named(\"database\")\n\tapiLogger := logger.Named(\"api\")\n\n\tcounter := 0\n\n\tappLogger.Info(\"Starting Go basic logging example with Zap\")\n\tappLogger.Info(\"Demonstrating Zap structured logging features\")\n\n\t// Infinite loop with different logging examples\n\tfor {\n\t\tcounter++\n\n\t\t// Cycle through different logging examples\n\t\tlogType := counter % 12\n\n\t\tswitch logType {\n\t\tcase 0:\n\t\t\tappLogger.Info(\"hello world\")\n\t\tcase 1:\n\t\t\tappLogger.Error(\"this is at error level\")\n\t\tcase 2:\n\t\t\tappLogger.Info(\"the answer is 42\", zap.Int(\"answer\", 42))\n\t\tcase 3:\n\t\t\tappLogger.Info(\"hello world\", zap.Int(\"obj\", 42))\n\t\tcase 4:\n\t\t\tappLogger.Info(\"hello world with counter\",\n\t\t\t\tzap.Int(\"obj\", 42),\n\t\t\t\tzap.Int(\"counter\", counter))\n\t\tcase 5:\n\t\t\tappLogger.Info(\"nested object\",\n\t\t\t\tzap.Object(\"nested\", zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error {\n\t\t\t\t\tenc.AddInt(\"obj\", 42)\n\t\t\t\t\tenc.AddTime(\"timestamp\", time.Now())\n\t\t\t\t\treturn nil\n\t\t\t\t})))\n\t\tcase 6:\n\t\t\tappLogger.Error(\"simulated error\", zap.Error(errors.New(\"kaboom\")))\n\t\tcase 7:\n\t\t\tappLogger.Info(\"hello from app component!\")\n\t\tcase 8:\n\t\t\tdbLogger.Warn(\"slow query detected\",\n\t\t\t\tzap.String(\"query\", \"SELECT * FROM users\"),\n\t\t\t\tzap.Duration(\"duration\", 250*time.Millisecond))\n\t\tcase 9:\n\t\t\tapiLogger.Info(\"API request completed\",\n\t\t\t\tzap.String(\"method\", \"GET\"),\n\t\t\t\tzap.String(\"path\", \"/api/users\"),\n\t\t\t\tzap.Int(\"status\", 200))\n\t\tcase 10:\n\t\t\ttempChild := appLogger.With(zap.String(\"requestId\", \"req-\"+string(rune(counter))))\n\t\t\ttempChild.Debug(\"this is a debug statement via child\")\n\t\tcase 11:\n\t\t\tappLogger.Error(\"error with additional context\",\n\t\t\t\tzap.Error(errors.New(\"kaboom\")),\n\t\t\t\tzap.String(\"context1\", \"additional\"),\n\t\t\t\tzap.String(\"context2\", \"information\"))\n\t\t}\n\n\t\t// Occasionally demonstrate sugar logger\n\t\tif counter%15 == 0 {\n\t\t\tsugar := logger.Sugar()\n\t\t\tsugar.Infow(\"using sugar logger\",\n\t\t\t\t\"counter\", counter,\n\t\t\t\t\"feature\", \"sugar\")\n\t\t}\n\n\t\t// Occasionally demonstrate different log levels\n\t\tif counter%20 == 0 {\n\t\t\tappLogger.Debug(\"this is a debug message\", zap.Int(\"counter\", counter))\n\t\t\tappLogger.Warn(\"this is a warning message\", zap.Int(\"counter\", counter))\n\t\t}\n\n\t\t// Wait 1 second before next log\n\t\ttime.Sleep(1 * time.Second)\n\t}\n}\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/java/App.java",
    "content": "import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;\nimport org.slf4j.MDC;\n\npublic class App {\n    private static final Logger logger = LoggerFactory.getLogger(App.class);\n    private static final Logger appLogger = LoggerFactory.getLogger(\"app\");\n    private static final Logger dbLogger = LoggerFactory.getLogger(\"database\");\n    private static final Logger apiLogger = LoggerFactory.getLogger(\"api\");\n    \n    public static void main(String[] args) {\n        int counter = 0;\n        \n        logger.info(\"Starting Java basic logging example with SLF4J + Logback\");\n        logger.info(\"Demonstrating SLF4J structured logging features\");\n        \n        // Infinite loop with different logging examples\n        while (true) {\n            counter++;\n            \n            // Cycle through different logging examples\n            int logType = counter % 12;\n            \n            switch (logType) {\n                case 0:\n                    logger.info(\"hello world\");\n                    break;\n                case 1:\n                    logger.error(\"this is at error level\");\n                    break;\n                case 2:\n                    logger.info(\"the answer is {}\", 42);\n                    break;\n                case 3:\n                    logger.info(\"hello world with obj {}\", 42);\n                    break;\n                case 4:\n                    logger.info(\"hello world with counter {} and obj {}\", counter, 42);\n                    break;\n                case 5:\n                    logger.info(\"nested object with timestamp {} and value {}\", \n                               java.time.LocalDateTime.now(), 42);\n                    break;\n                case 6:\n                    Exception simulatedError = new RuntimeException(\"kaboom\");\n                    logger.error(\"simulated error\", simulatedError);\n                    break;\n                case 7:\n                    appLogger.info(\"hello from app component!\");\n                    break;\n                case 8:\n                    dbLogger.warn(\"slow query detected: {} took {}ms\", \n                                 \"SELECT * FROM users\", 250);\n                    break;\n                case 9:\n                    apiLogger.info(\"API request completed: {} {} status={}\", \n                                  \"GET\", \"/api/users\", 200);\n                    break;\n                case 10:\n                    // Using MDC (Mapped Diagnostic Context) for contextual logging\n                    MDC.put(\"requestId\", \"req-\" + counter);\n                    logger.debug(\"this is a debug statement with MDC context\");\n                    MDC.clear();\n                    break;\n                case 11:\n                    Exception error = new RuntimeException(\"kaboom\");\n                    logger.error(\"error with additional context: {} {}\", \n                               \"additional\", \"information\", error);\n                    break;\n            }\n            \n            // Occasionally demonstrate different log levels\n            if (counter % 15 == 0) {\n                logger.debug(\"this is a debug message with counter {}\", counter);\n                logger.warn(\"this is a warning message with counter {}\", counter);\n            }\n            \n            // Occasionally demonstrate MDC usage\n            if (counter % 20 == 0) {\n                MDC.put(\"userId\", \"user123\");\n                MDC.put(\"sessionId\", \"session456\");\n                logger.info(\"using MDC for contextual logging\");\n                MDC.clear();\n            }\n            \n            // Wait 1 second before next log\n            try {\n                Thread.sleep(1000);\n            } catch (InterruptedException e) {\n                Thread.currentThread().interrupt();\n                logger.warn(\"Thread interrupted: {}\", e.getMessage());\n                break;\n            }\n        }\n    }\n} "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/java/Dockerfile",
    "content": "FROM openjdk:26-slim@sha256:63814a9d8bbea6d39d5ce9c91843bec5e9d9d1d1bc2bade4bb57ba70c0839553\n\nWORKDIR /app\n\n# Download SLF4J API, Logback dependencies, and Jackson for JSON encoding\nRUN apt-get update && apt-get install -y wget && \\\n    wget https://repo1.maven.org/maven2/org/slf4j/slf4j-api/2.0.9/slf4j-api-2.0.9.jar && \\\n    wget https://repo1.maven.org/maven2/ch/qos/logback/logback-classic/1.4.14/logback-classic-1.4.14.jar && \\\n    wget https://repo1.maven.org/maven2/ch/qos/logback/logback-core/1.4.14/logback-core-1.4.14.jar && \\\n    wget https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.16.1/jackson-core-2.16.1.jar && \\\n    wget https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.16.1/jackson-databind-2.16.1.jar && \\\n    wget https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-annotations/2.16.1/jackson-annotations-2.16.1.jar && \\\n    apt-get clean && rm -rf /var/lib/apt/lists/*\n\nCOPY App.java .\nCOPY logback.xml .\n\nRUN javac -cp \"slf4j-api-2.0.9.jar:logback-classic-1.4.14.jar:logback-core-1.4.14.jar:jackson-core-2.16.1.jar:jackson-databind-2.16.1.jar:jackson-annotations-2.16.1.jar\" App.java\n\nCMD [\"java\", \"-cp\", \".:slf4j-api-2.0.9.jar:logback-classic-1.4.14.jar:logback-core-1.4.14.jar:jackson-core-2.16.1.jar:jackson-databind-2.16.1.jar:jackson-annotations-2.16.1.jar\", \"App\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/java/logback.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<configuration>\n    <!-- Console appender with standard format -->\n    <appender name=\"STDOUT\" class=\"ch.qos.logback.core.ConsoleAppender\">\n        <encoder>\n            <pattern>%d{yyyy-MM-dd'T'HH:mm:ss.SSSZ}[%thread] %-5level %logger{36} - %msg%n</pattern>\n        </encoder>\n    </appender>\n\n    <!-- Root logger configuration -->\n    <root level=\"DEBUG\">\n        <appender-ref ref=\"STDOUT\" />\n    </root>\n    \n    <!-- Specific logger configurations -->\n    <logger name=\"app\" level=\"DEBUG\" />\n    <logger name=\"database\" level=\"DEBUG\" />\n    <logger name=\"api\" level=\"DEBUG\" />\n</configuration> "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/javascript/Dockerfile",
    "content": "FROM node:24-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f\n\nWORKDIR /app\n\n# Create package.json and install pino with pino-pretty for better output formatting\nRUN echo '{\"name\": \"logging-example\", \"version\": \"1.0.0\", \"dependencies\": {\"pino\": \"^8.17.2\", \"pino-pretty\": \"^10.3.1\"}}' > package.json\nRUN npm install\n\nCOPY app.js .\n\nRUN chmod +x app.js\n\nCMD [\"node\", \"app.js\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/javascript/app.js",
    "content": "#!/usr/bin/env node\n\n// Pino's primary usage writes ndjson to `stdout`:\nconst pino = require('pino')()\n\n// However, if \"human readable\" output is desired,\n// `pino-pretty` can be provided as the destination\n// stream by uncommenting the following line in place\n// of the previous declaration:\n// const pino = require('pino')(require('pino-pretty')())\n\nlet counter = 0;\n\npino.info('Starting JavaScript basic logging example with Pino');\npino.info('Demonstrating various Pino logging features');\n\n// Create child loggers with different contexts\nconst appLogger = pino.child({ component: 'app' });\nconst dbLogger = pino.child({ component: 'database' });\nconst apiLogger = pino.child({ component: 'api', version: '1.0' });\n\n// Function to demonstrate various logging features\nfunction logMessage() {\n    counter++;\n    \n    // Cycle through different logging examples\n    const logType = counter % 12;\n    \n    switch (logType) {\n        case 0:\n            pino.info('hello world');\n            break;\n        case 1:\n            pino.error('this is at error level');\n            break;\n        case 2:\n            pino.info('the answer is %d', 42);\n            break;\n        case 3:\n            pino.info({ obj: 42 }, 'hello world');\n            break;\n        case 4:\n            pino.info({ obj: 42, counter: counter }, 'hello world with counter');\n            break;\n        case 5:\n            pino.info({ nested: { obj: 42, timestamp: new Date() } }, 'nested object');\n            break;\n        case 6:\n            pino.error(new Error('simulated error'));\n            break;\n        case 7:\n            appLogger.info('hello from app component!');\n            break;\n        case 8:\n            dbLogger.warn({ query: 'SELECT * FROM users', duration: 250 }, 'slow query detected');\n            break;\n        case 9:\n            apiLogger.info({ method: 'GET', path: '/api/users', status: 200 }, 'API request completed');\n            break;\n        case 10:\n            const tempChild = pino.child({ requestId: `req-${counter}` });\n            tempChild.debug('this is a debug statement via child');\n            break;\n        case 11:\n            pino.info(new Error('kaboom'), 'with', 'additional', 'context');\n            break;\n    }\n    \n    // Occasionally demonstrate level changes\n    if (counter % 20 === 0) {\n        pino.level = 'debug';\n        pino.debug('switched to debug level - this should now be visible');\n        setTimeout(() => {\n            pino.level = 'info';\n            pino.info('switched back to info level');\n        }, 500);\n    }\n    \n    // Occasionally demonstrate trace level\n    if (counter % 25 === 0) {\n        const originalLevel = pino.level;\n        pino.level = 'trace';\n        pino.trace('this is a trace statement');\n        pino.level = originalLevel;\n    }\n}\n\n// Log every 1 second infinitely\nsetInterval(logMessage, 1000); "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\n\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/php/Dockerfile",
    "content": "FROM php:8.5-cli-alpine@sha256:6ca76906d789edfac74e5f109c800b71e571bd313277133eaddc079733ee0b65\n\nWORKDIR /app\n\n# Install Composer\nRUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer\n\n# Create composer.json for Monolog\nRUN echo '{\"require\": {\"monolog/monolog\": \"^3.5\"}}' > composer.json\n\n# Install dependencies\nRUN composer install --no-dev --optimize-autoloader\n\nCOPY app.php .\n\nCMD [\"php\", \"app.php\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/php/app.php",
    "content": "<?php\n\nrequire_once 'vendor/autoload.php';\n\nuse Monolog\\Logger;\nuse Monolog\\Handler\\StreamHandler;\nuse Exception;\n\n// Create the main logger\n$logger = new Logger('app');\n\n// Create a console handler that writes to stdout\n$consoleHandler = new StreamHandler('php://stdout', Logger::DEBUG);\n\n// Push the handler onto the logger\n$logger->pushHandler($consoleHandler);\n\n// Add a processor to inject an 'environment' extra field into every log entry\n$logger->pushProcessor(function ($record) {\n    $record['extra']['environment'] = 'production'; // You can set any value or use getenv() etc.\n    return $record;\n});\n\n// Create component-specific loggers if you want\n$appLogger = $logger->withName('app');\n$dbLogger = $logger->withName('database');\n$apiLogger = $logger->withName('api');\n\n$counter = 0;\n\n$logger->info(\"Starting PHP basic logging example with Monolog\");\n$logger->info(\"Demonstrating Monolog structured logging features\");\n\nwhile (true) {\n    $counter++;\n    $logType = $counter % 6;\n\n    switch ($logType) {\n        case 0:\n            $logger->info(\"hello world\");\n            break;\n        case 1:\n            $logger->error(\"this is at error level\");\n            break;\n        case 2:\n            $logger->info(\"hello world with counter\", [\n                'counter' => $counter,\n                'obj' => 42\n            ]);\n            break;\n        case 3:\n            $dbLogger->warning(\"slow query detected\", [\n                'query' => 'SELECT * FROM users',\n                'duration' => 250\n            ]);\n            break;\n        case 4:\n            $apiLogger->info(\"API request completed\", [\n                'method' => 'GET',\n                'path' => '/api/users',\n                'status' => 200\n            ]);\n            break;\n        case 5:\n            // Fatal error with stack trace\n            $fatalException = new Exception(\"Critical system failure - database connection lost\");\n            $logger->emergency(\"System encountered a fatal error\", [\n                'exception' => $fatalException,\n                'error_code' => 'DB_CONNECTION_LOST',\n                'affected_service' => 'user_authentication'\n            ]);\n            break;\n    }\n\n    // Wait 1 second before next log\n    sleep(1);\n}\n"
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/python/Dockerfile",
    "content": "FROM python:3.12-slim@sha256:46cb7cc2877e60fbd5e21a9ae6115c30ace7a077b9f8772da879e4590c18c2e3\n\nWORKDIR /app\n\nCOPY app.py .\n\nRUN chmod +x app.py\n\nCMD [\"python\", \"app.py\"] "
  },
  {
    "path": "app-instrumentation/logging/popular-logging-frameworks/python/app.py",
    "content": "#!/usr/bin/env python3\n\nimport logging\nimport time\n\n# Configure logging\nlogging.basicConfig(\n    level=logging.DEBUG,\n    format= '%(asctime)s - %(filename)s:%(lineno)d - %(levelname)s - %(message)s',\n    handlers=[\n        logging.StreamHandler(),\n    ]\n)\n\nlogger = logging.getLogger(__name__)\n\ndef main():\n    counter = 0\n    \n    logger.info(\"Starting Python basic logging example\")\n    logger.info(\"Demonstrating Python logging module\")\n    \n    # Infinite loop with different log levels\n    while True:\n        counter += 1\n        \n        # Cycle through different log levels\n        log_type = counter % 5\n        \n        if log_type == 0:\n            logger.debug(f\"Basic debug message, counter: {counter}\")\n        elif log_type == 1:\n            logger.info(f\"Information message, counter: {counter}\")\n        elif log_type == 2:\n            logger.warning(f\"Warning message, counter: {counter}\")\n        elif log_type == 3:\n            logger.error(f\"Error message, counter: {counter}\")\n        elif log_type == 4:\n            logger.critical(f\"Critical message, counter: {counter}\")\n        \n        # Wait 1 second before next log\n        time.sleep(1)\n\nif __name__ == \"__main__\":\n    main() "
  },
  {
    "path": "aws-firehose-logs/README.md",
    "content": "# AWS Kinesis Data Firehose to Loki — no AWS account required\n\nDemonstrates `loki.source.awsfirehose`, the HTTP receiver that accepts AWS Kinesis Data Firehose's documented delivery format. **You don't need an AWS account or any AWS SDKs** — Firehose is just an HTTPS POST in a known JSON shape, and this scenario emulates the producer with a small Python container.\n\nThis is the same producer-emulator pattern used by [`syslog/`](../syslog/) and [`gelf-log-ingestion/`](../gelf-log-ingestion/).\n\n## Architecture\n\n- **`alloy`** runs `loki.source.awsfirehose` on port `:9999`, listening at `/awsfirehose/api/v1/push`\n- **`firehose-sender`** (Python) generates synthetic CloudWatch-style log batches every 5 seconds and POSTs them to Alloy in the documented Firehose delivery format (records array with gzip-compressed, base64-encoded data fields)\n- **`loki`** + **`grafana`** for storage and visualization, with the Loki datasource auto-provisioned\n\nThe sender alternates between three log streams:\n1. VPC flow logs on `eni-0abc1234-all` (channel `/aws/vpc/flowlogs`)\n2. VPC flow logs on `eni-0def5678-all` (same channel, different stream)\n3. Lambda invocation logs on `[$LATEST]abc` (channel `/aws/lambda/checkout-service`)\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root\n./run-example.sh aws-firehose-logs\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login)\n- **Alloy UI**: http://localhost:12345 — confirm components healthy, use livedebugging to watch records flow through\n- **Firehose endpoint**: http://localhost:9999/awsfirehose/api/v1/push (POSTable from your laptop)\n- **Loki API**: http://localhost:3100\n\n## Trying it out\n\nWithin ~10 seconds of bring-up, the sender starts producing batches. In Grafana Explore on Loki:\n\n```logql\n# All Firehose-delivered logs\n{log_group=~\".+\"}\n\n# Just VPC flow logs\n{log_group=\"/aws/vpc/flowlogs\"}\n\n# A specific ENI\n{log_group=\"/aws/vpc/flowlogs\", log_stream=\"eni-0abc1234-all\"}\n\n# Lambda invocations\n{log_group=\"/aws/lambda/checkout-service\"}\n\n# Just the data records (vs control messages)\n{msg_type=\"DATA_MESSAGE\"}\n```\n\nThe promoted labels `log_group`, `log_stream`, and `msg_type` come from the CloudWatch envelope — `loki.source.awsfirehose` automatically attaches `__aws_cw_log_group`, `__aws_cw_log_stream`, and `__aws_cw_msg_type` discovery labels when the records contain a CloudWatch subscription filter envelope; this scenario's `loki.relabel` block promotes them.\n\n## Send your own records\n\nThe receiver is just an HTTP endpoint. From your laptop:\n\n```bash\ncurl -X POST http://localhost:9999/awsfirehose/api/v1/push \\\n  -H 'Content-Type: application/json' \\\n  -d '{\n    \"requestId\": \"test-1\",\n    \"timestamp\": 1234567890,\n    \"records\": [\n      {\"data\": \"'$(printf '{\"messageType\":\"DATA_MESSAGE\",\"logGroup\":\"/manual\",\"logStream\":\"laptop\",\"logEvents\":[{\"id\":\"x\",\"timestamp\":1234567890000,\"message\":\"hi from curl\"}]}' | gzip | base64)'\"}\n    ]\n  }'\n```\n\nThis adds a one-off entry visible at `{log_group=\"/manual\"}`.\n\n## Differences from real Firehose\n\nThis scenario emulates the wire format. A real Firehose delivery stream has a few additional concerns the demo doesn't cover:\n\n- **Authentication**: real Firehose includes an `X-Amz-Firehose-Access-Key` header that the receiver validates. `loki.source.awsfirehose` supports this via the `access_key` argument; we leave it disabled in the demo for ease of trying it from curl. In production, **always** set an access key.\n- **TLS**: real Firehose requires HTTPS. Add `tls { cert_file = ..., key_file = ... }` to the Alloy `http` block in production.\n- **Retry semantics**: real Firehose retries on 5xx and partial successes. The Python sender here just logs failures and moves on.\n- **Custom labels via header**: real Firehose can set `X-Amz-Firehose-Common-Attributes` (label names prefixed `lbl_`). Try adding this to your own producer to see additional discovery labels appear.\n\n## Stopping\n\n```bash\ndocker compose down -v\n```\n"
  },
  {
    "path": "aws-firehose-logs/config.alloy",
    "content": "// AWS Kinesis Data Firehose → Loki, no AWS account required.\n//\n// `loki.source.awsfirehose` is just an HTTP endpoint that accepts\n// Firehose's documented delivery format (a `records` array of base64\n// blobs). A small Python sender container in this scenario fakes the\n// producer side, posting CloudWatch-style log batches every few\n// seconds. The component auto-detects the CloudWatch envelope and\n// attaches the `__aws_cw_*` discovery labels we relabel below.\n\nlivedebugging { enabled = true }\n\n// CloudWatch envelope discovery labels are exposed by\n// `loki.source.awsfirehose` only via its `relabel_rules` argument\n// (same pattern as `loki.source.journal`). They are NOT attached to\n// outgoing entries by default — running them through a standalone\n// `loki.relabel` after the source would see no `__aws_cw_*` labels.\nloki.relabel \"firehose\" {\n\tforward_to = []\n\n\trule {\n\t\tsource_labels = [\"__aws_cw_log_group\"]\n\t\ttarget_label  = \"log_group\"\n\t}\n\trule {\n\t\tsource_labels = [\"__aws_cw_log_stream\"]\n\t\ttarget_label  = \"log_stream\"\n\t}\n\trule {\n\t\tsource_labels = [\"__aws_cw_msg_type\"]\n\t\ttarget_label  = \"msg_type\"\n\t}\n}\n\nloki.source.awsfirehose \"fake\" {\n\thttp {\n\t\tlisten_address = \"0.0.0.0\"\n\t\tlisten_port    = 9999\n\t}\n\trelabel_rules = loki.relabel.firehose.rules\n\tforward_to    = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "aws-firehose-logs/docker-compose.yml",
    "content": "services:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100/tcp\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - \"12345:12345\"\n      - \"9999:9999\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  firehose-sender:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./firehose_sender.py:/firehose_sender.py:ro\n    environment:\n      - ALLOY_FIREHOSE_URL=http://alloy:9999/awsfirehose/api/v1/push\n      - INTERVAL_SECONDS=5\n      - EVENTS_PER_BATCH=8\n    depends_on:\n      - alloy\n    command: [\"python3\", \"-u\", \"/firehose_sender.py\"]\n    restart: unless-stopped\n"
  },
  {
    "path": "aws-firehose-logs/firehose_sender.py",
    "content": "\"\"\"Fake AWS Kinesis Firehose producer for the aws-firehose-logs scenario.\n\nGenerates synthetic VPC-flow-style log batches, wraps them in the\nCloudWatch logs subscription envelope (so Alloy attaches the\n`__aws_cw_*` discovery labels), then posts them to Alloy's\n`loki.source.awsfirehose` HTTP endpoint in the documented Firehose\ndelivery format.\n\nNo AWS account or SDK required — this is just an HTTP client.\n\"\"\"\n\nimport base64\nimport gzip\nimport json\nimport os\nimport random\nimport sys\nimport time\nimport uuid\nfrom datetime import datetime\nfrom urllib import request as urlrequest\n\nENDPOINT = os.environ.get(\n    \"ALLOY_FIREHOSE_URL\",\n    \"http://alloy:9999/awsfirehose/api/v1/push\",\n)\nINTERVAL = float(os.environ.get(\"INTERVAL_SECONDS\", \"5\"))\nEVENTS_PER_BATCH = int(os.environ.get(\"EVENTS_PER_BATCH\", \"8\"))\n\nLOG_GROUPS = [\n    (\"/aws/vpc/flowlogs\", \"eni-0abc1234-all\"),\n    (\"/aws/vpc/flowlogs\", \"eni-0def5678-all\"),\n    (\"/aws/lambda/checkout-service\", \"2026/04/28/[$LATEST]abc\"),\n]\n\nACTIONS = [\"ACCEPT\", \"REJECT\"]\n\n\ndef vpc_flow_line() -> str:\n    src = f\"10.0.{random.randint(0,255)}.{random.randint(1,254)}\"\n    dst = f\"10.0.{random.randint(0,255)}.{random.randint(1,254)}\"\n    bytes_ = random.randint(40, 65000)\n    pkts = random.randint(1, 50)\n    action = random.choices(ACTIONS, weights=[9, 1])[0]\n    now = int(time.time())\n    return f\"2 123456789012 eni-0abc1234 {src} {dst} 12345 443 6 {pkts} {bytes_} {now-30} {now} {action} OK\"\n\n\ndef lambda_log_line() -> str:\n    levels = [\"INFO\", \"INFO\", \"INFO\", \"WARN\", \"ERROR\"]\n    level = random.choice(levels)\n    request_id = str(uuid.uuid4())\n    return f\"{datetime.utcnow().isoformat()}Z {level} RequestId: {request_id} processing checkout\"\n\n\ndef cloudwatch_envelope(log_group: str, log_stream: str, line_fn) -> dict:\n    \"\"\"Build a CloudWatch logs subscription delivery envelope.\n\n    See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/SubscriptionFilters.html\n    \"\"\"\n    return {\n        \"messageType\": \"DATA_MESSAGE\",\n        \"owner\": \"123456789012\",\n        \"logGroup\": log_group,\n        \"logStream\": log_stream,\n        \"subscriptionFilters\": [\"AlloyDemo\"],\n        \"logEvents\": [\n            {\n                \"id\": str(uuid.uuid4()),\n                \"timestamp\": int(time.time() * 1000),\n                \"message\": line_fn(),\n            }\n            for _ in range(EVENTS_PER_BATCH)\n        ],\n    }\n\n\ndef encode_record(envelope: dict) -> dict:\n    \"\"\"CloudWatch subscription delivery is gzip-compressed JSON, then\n    base64-encoded inside the Firehose record `data` field. See:\n    https://docs.aws.amazon.com/firehose/latest/dev/httpdeliveryrequestresponse.html\n    \"\"\"\n    raw = json.dumps(envelope).encode()\n    compressed = gzip.compress(raw)\n    return {\"data\": base64.b64encode(compressed).decode()}\n\n\ndef send_batch() -> None:\n    log_group, log_stream = random.choice(LOG_GROUPS)\n    line_fn = lambda_log_line if \"lambda\" in log_group else vpc_flow_line\n    envelope = cloudwatch_envelope(log_group, log_stream, line_fn)\n\n    body = {\n        \"requestId\": str(uuid.uuid4()),\n        \"timestamp\": int(time.time() * 1000),\n        \"records\": [encode_record(envelope)],\n    }\n    req = urlrequest.Request(\n        ENDPOINT,\n        data=json.dumps(body).encode(),\n        headers={\n            \"Content-Type\": \"application/json\",\n            \"X-Amz-Firehose-Request-Id\": body[\"requestId\"],\n        },\n    )\n    try:\n        with urlrequest.urlopen(req, timeout=5) as resp:\n            print(f\"POST {log_group}/{log_stream}: {resp.status}\", flush=True)\n    except Exception as e:\n        print(f\"POST {log_group}/{log_stream}: FAILED {e}\", flush=True)\n\n\ndef main() -> int:\n    # Wait briefly so Alloy's HTTP listener is up before the first POST.\n    time.sleep(3)\n    while True:\n        send_batch()\n        time.sleep(INTERVAL)\n\n\nif __name__ == \"__main__\":\n    sys.exit(main() or 0)\n"
  },
  {
    "path": "aws-firehose-logs/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 5m\n"
  },
  {
    "path": "blackbox-probing/README.md",
    "content": "# Blackbox Probing\n\nThis scenario demonstrates **synthetic monitoring** and **HTTP endpoint probing** using Grafana Alloy's `prometheus.exporter.blackbox` component.\n\n## Overview\n\nBlackbox probing (also known as synthetic monitoring) tests the availability and responsiveness of services from an external perspective. Instead of instrumenting applications to export metrics, the blackbox exporter actively probes endpoints and reports whether they are reachable, how long they take to respond, and other HTTP-level details.\n\nThis scenario probes two targets:\n- **nginx** — a simple web server running on port 80\n- **prometheus** — the Prometheus server running on port 9090\n\n## Architecture\n\n```\nAlloy (blackbox exporter) --probes--> nginx:80\n                          --probes--> prometheus:9090\n                          --writes--> Prometheus (remote write)\nGrafana --queries--> Prometheus\n```\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root\n./run-example.sh blackbox-probing\n```\n\n## Accessing the Stack\n\n| Service    | URL                        |\n|------------|----------------------------|\n| Grafana    | http://localhost:3000       |\n| Alloy UI   | http://localhost:12345      |\n| Prometheus | http://localhost:9090       |\n| nginx      | http://localhost:8080       |\n\n## Key Metrics\n\nOnce running, you can query these metrics in Grafana or Prometheus:\n\n- `probe_success` — 1 if the probe succeeded, 0 if it failed\n- `probe_duration_seconds` — total time the probe took\n- `probe_http_status_code` — HTTP status code returned by the target\n- `probe_http_duration_seconds` — duration of each phase of the HTTP request (resolve, connect, tls, processing, transfer)\n\n## Stopping\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "blackbox-probing/config.alloy",
    "content": "// --- Remote Write to Prometheus ---\nprometheus.remote_write \"remote\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n\n// --- Blackbox Exporter Configuration ---\nprometheus.exporter.blackbox \"default\" {\n\tconfig = \"{ modules: { http_2xx: { prober: http, timeout: 5s } } }\"\n\n\ttarget {\n\t\tname    = \"nginx\"\n\t\taddress = \"http://nginx:80\"\n\t\tmodule  = \"http_2xx\"\n\t}\n\n\ttarget {\n\t\tname    = \"prometheus\"\n\t\taddress = \"http://prometheus:9090\"\n\t\tmodule  = \"http_2xx\"\n\t}\n}\n\n// --- Blackbox Scrape Configuration ---\nprometheus.scrape \"blackbox_targets\" {\n\tscrape_interval = \"15s\"\n\ttargets         = prometheus.exporter.blackbox.default.targets\n\tforward_to      = [prometheus.remote_write.remote.receiver]\n}\n\n// --- Enable Live Debugging ---\nlivedebugging {\n\tenabled = true\n}\n"
  },
  {
    "path": "blackbox-probing/docker-compose.coda.yml",
    "content": "services:\n  nginx:\n    image: nginx:latest@sha256:1881968aff6f7cdcc4b888c00a11f4ce241ad7ec957e0cb4a9e19e93a3ff87ea\n    ports:\n      - 8080:80/tcp\n"
  },
  {
    "path": "blackbox-probing/docker-compose.yml",
    "content": "\nservices:\n\n  nginx:\n    image: nginx:latest@sha256:1881968aff6f7cdcc4b888c00a11f4ce241ad7ec957e0cb4a9e19e93a3ff87ea\n    ports:\n      - 8080:80/tcp\n\n  prometheus:\n     image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n     command:\n       - --web.enable-remote-write-receiver\n       - --config.file=/etc/prometheus/prometheus.yml\n     ports:\n      - 9090:9090/tcp\n     volumes:\n        - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Prometheus\n           type: prometheus\n           orgId: 1\n           url: http://prometheus:9090\n           basicAuth: false\n           isDefault: true\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n"
  },
  {
    "path": "blackbox-probing/prom-config.yaml",
    "content": "# Minimal Prometheus configuration\nglobal:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "cloudwatch-metrics/README.md",
    "content": "# AWS CloudWatch metrics — no AWS account required\n\nDemonstrates `prometheus.exporter.cloudwatch`, Alloy's built-in wrapper around [YACE](https://github.com/nerdswords/yet-another-cloudwatch-exporter). **No real AWS account or live infrastructure needed** — [LocalStack](https://localstack.cloud/) emulates the CloudWatch and STS APIs locally, and a small Python seeder container plants synthetic `EC2/CPUUtilization` data points every 30 s.\n\nThis is the same offline-reproducibility pattern used by [`aws-firehose-logs/`](../aws-firehose-logs/).\n\n## Architecture\n\n```\nmetric-seeder (Python)\n  └── put_metric_data → LocalStack CloudWatch (:4566)\n                              ↑\n                        Alloy prometheus.exporter.cloudwatch\n                              ↓\n                        prometheus.scrape → prometheus.remote_write\n                              ↓\n                        Prometheus (:9090)\n                              ↑\n                        Grafana (:3000)\n```\n\n- **`localstack`** — emulates `cloudwatch` + `sts` APIs; no AWS credentials required\n- **`metric-seeder`** — pushes `CPUUtilization` (random 5–85 %) for `i-1234567890abcdef0` every 30 s\n- **`alloy`** — runs `prometheus.exporter.cloudwatch` pointed at LocalStack via `AWS_ENDPOINT_URL`; scrapes every 60 s and remote-writes to Prometheus\n- **`prometheus`** — stores and serves metrics\n- **`grafana`** — visualises with Prometheus datasource auto-provisioned\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root\n./run-example.sh cloudwatch-metrics\n```\n\nLocalStack and the metric-seeder start first; Alloy waits for LocalStack to be healthy before scraping.\n\n## Accessing\n\n| Service | URL |\n|---|---|\n| **Grafana** | http://localhost:3000 (no login) |\n| **Prometheus** | http://localhost:9090 |\n| **Alloy UI** | http://localhost:12345 |\n| **LocalStack** | http://localhost:4566/_localstack/health |\n\n## Trying it out\n\nWithin ~90 s of bring-up (LocalStack ready → seeder plants first points → Alloy scrapes → Prometheus ingests), metrics appear in Prometheus.\n\nOpen **Grafana → Explore → Prometheus** and run:\n\n```promql\n# CPU utilisation for the seeded EC2 instance\naws_ec2_cpuutilization_average\n\n# Maximum CPU in the last 5 m\naws_ec2_cpuutilization_maximum\n\n# All CloudWatch-sourced metrics\n{job=\"cloudwatch/localstack/ec2_cpu\"}\n```\n\nOr query Prometheus directly:\n\n```bash\ncurl -sG 'http://localhost:9090/api/v1/query' \\\n  --data-urlencode 'query=aws_ec2_cpuutilization_average' | jq .\n```\n\nIn the **Alloy UI** (http://localhost:12345), navigate to **Graph** to see the pipeline:\n`prometheus.exporter.cloudwatch.localstack` → `prometheus.scrape.cloudwatch` → `prometheus.remote_write.local`\n\nUse **livedebugging** on `prometheus.scrape.cloudwatch` to watch metrics flow through in real time.\n\n## Adapting for real AWS\n\nTo point this scenario at real CloudWatch instead of LocalStack:\n\n1. Remove the `localstack` and `metric-seeder` services from `docker-compose.yml`\n2. Remove the `AWS_ENDPOINT_URL` environment variable from the `alloy` service\n3. Set real credentials:\n   ```yaml\n   environment:\n     - AWS_ACCESS_KEY_ID=<your-key>\n     - AWS_SECRET_ACCESS_KEY=<your-secret>\n     - AWS_DEFAULT_REGION=us-east-1\n   ```\n4. Update the `dimensions` in `config.alloy` to match a real `InstanceId` in your account\n\nThe `config.alloy` static job configuration and Alloy pipeline are identical for both LocalStack and real AWS.\n"
  },
  {
    "path": "cloudwatch-metrics/config.alloy",
    "content": "// AWS CloudWatch metrics → Prometheus — no AWS account required.\n//\n// Uses LocalStack to emulate CloudWatch locally. A companion `metric-seeder`\n// container pushes synthetic EC2/CPUUtilization data points every 30 s so\n// Alloy has real data to scrape immediately on start-up.\n//\n// `prometheus.exporter.cloudwatch` wraps YACE and honours AWS SDK v2 endpoint\n// overrides; we point it at LocalStack via AWS_ENDPOINT_URL in docker-compose.\n\nlivedebugging { enabled = true }\n\n// Static job: no live EC2 discovery needed — we target the exact InstanceId\n// that the metric-seeder plants in LocalStack CloudWatch.\nprometheus.exporter.cloudwatch \"localstack\" {\n\tsts_region = \"us-east-1\"\n\n\tstatic \"ec2_cpu\" {\n\t\tregions   = [\"us-east-1\"]\n\t\tnamespace = \"AWS/EC2\"\n\n\t\tdimensions = {\n\t\t\t\"InstanceId\" = \"i-1234567890abcdef0\",\n\t\t}\n\n\t\tmetric {\n\t\t\tname       = \"CPUUtilization\"\n\t\t\tstatistics = [\"Average\", \"Maximum\"]\n\t\t\tperiod     = \"1m\"\n\t\t}\n\t}\n}\n\n// Scrape the exporter every 60 s — CloudWatch data points are coarse-grained\n// so there is no benefit in scraping more frequently.\nprometheus.scrape \"cloudwatch\" {\n\ttargets         = prometheus.exporter.cloudwatch.localstack.targets\n\tforward_to      = [prometheus.remote_write.local.receiver]\n\tscrape_interval = \"60s\"\n}\n\n// Remote-write to the local Prometheus instance.\nprometheus.remote_write \"local\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "cloudwatch-metrics/docker-compose.yml",
    "content": "services:\n\n  # LocalStack emulates the CloudWatch + STS APIs locally.\n  # No real AWS account or credentials needed.\n  localstack:\n    image: localstack/localstack:${LOCALSTACK_VERSION:-4.4.0}\n    ports:\n      - \"4566:4566\"\n    environment:\n      - SERVICES=cloudwatch,sts\n      - DEFAULT_REGION=us-east-1\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-sf\", \"http://localhost:4566/_localstack/health\"]\n      interval: 5s\n      timeout: 5s\n      retries: 15\n\n  # Pushes synthetic EC2/CPUUtilization data into LocalStack every 30 s.\n  metric-seeder:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./seed-metrics.py:/seed-metrics.py:ro\n    environment:\n      - AWS_ACCESS_KEY_ID=test\n      - AWS_SECRET_ACCESS_KEY=test\n      - AWS_DEFAULT_REGION=us-east-1\n      - AWS_ENDPOINT_URL=http://localstack:4566\n      - INTERVAL_SECONDS=30\n    command: >\n      sh -c \"pip install boto3 --quiet && python -u /seed-metrics.py\"\n    depends_on:\n      localstack:\n        condition: service_healthy\n    restart: unless-stopped\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - \"12345:12345\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    environment:\n      # Point AWS SDK v2 at LocalStack instead of real AWS endpoints.\n      - AWS_ACCESS_KEY_ID=test\n      - AWS_SECRET_ACCESS_KEY=test\n      - AWS_DEFAULT_REGION=us-east-1\n      - AWS_ENDPOINT_URL=http://localstack:4566\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      localstack:\n        condition: service_healthy\n      prometheus:\n        condition: service_started\n"
  },
  {
    "path": "cloudwatch-metrics/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "cloudwatch-metrics/seed-metrics.py",
    "content": "\"\"\"\nCloudWatch metric seeder for LocalStack.\n\nPushes synthetic EC2 CPUUtilization data points into LocalStack every\nINTERVAL_SECONDS so that prometheus.exporter.cloudwatch has something\nto scrape immediately without a real AWS account.\n\"\"\"\nimport os\nimport random\nimport time\n\nimport boto3\nfrom botocore.config import Config\n\nENDPOINT    = os.getenv(\"AWS_ENDPOINT_URL\", \"http://localstack:4566\")\nREGION      = os.getenv(\"AWS_DEFAULT_REGION\", \"us-east-1\")\nINTERVAL    = int(os.getenv(\"INTERVAL_SECONDS\", \"30\"))\nINSTANCE_ID = \"i-1234567890abcdef0\"\n\ncw = boto3.client(\n    \"cloudwatch\",\n    endpoint_url=ENDPOINT,\n    region_name=REGION,\n    aws_access_key_id=os.getenv(\"AWS_ACCESS_KEY_ID\", \"test\"),\n    aws_secret_access_key=os.getenv(\"AWS_SECRET_ACCESS_KEY\", \"test\"),\n    config=Config(retries={\"max_attempts\": 5}),\n)\n\nprint(f\"Seeder started — pushing to {ENDPOINT} every {INTERVAL}s\", flush=True)\n\nwhile True:\n    cpu = round(random.uniform(5.0, 85.0), 2)\n    cw.put_metric_data(\n        Namespace=\"AWS/EC2\",\n        MetricData=[\n            {\n                \"MetricName\": \"CPUUtilization\",\n                \"Dimensions\": [{\"Name\": \"InstanceId\", \"Value\": INSTANCE_ID}],\n                \"Value\": cpu,\n                \"Unit\": \"Percent\",\n            }\n        ],\n    )\n    print(f\"  → CPUUtilization={cpu}%  instance={INSTANCE_ID}\", flush=True)\n    time.sleep(INTERVAL)\n"
  },
  {
    "path": "coda",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\n# On Coda VMs the repo lives at /opt/alloy-scenarios and this script is\n# symlinked from /usr/local/bin/coda. For local dev use the script's own\n# directory (works when invoked directly, not via symlink).\nif [[ -d /opt/alloy-scenarios ]]; then\n  REPO_DIR=\"/opt/alloy-scenarios\"\nelse\n  REPO_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\nfi\nENV_FILE=\"${REPO_DIR}/image-versions.env\"\nSCENARIO_FILE=\"/etc/coda/scenario\"\n\nusage() {\n  cat <<EOF\nUsage: coda <command> [scenario]\n\nCommands:\n  start  [scenario]   Start app containers for a scenario\n  stop   [scenario]   Stop app containers for a scenario\n  status [scenario]   Show container status for a scenario\n  list                List all available scenarios\n\nIf no scenario is given, reads from ${SCENARIO_FILE}.\nEOF\n  exit 1\n}\n\nresolve_scenario() {\n  local scenario=\"${1:-}\"\n  if [[ -z \"$scenario\" ]]; then\n    if [[ -f \"$SCENARIO_FILE\" ]]; then\n      scenario=\"$(cat \"$SCENARIO_FILE\")\"\n    else\n      echo \"Error: no scenario specified and ${SCENARIO_FILE} not found\" >&2\n      exit 1\n    fi\n  fi\n  echo \"$scenario\"\n}\n\ncompose_args() {\n  local scenario=\"$1\"\n  local dir=\"${REPO_DIR}/${scenario}\"\n  local compose_file=\"${dir}/docker-compose.coda.yml\"\n\n  if [[ ! -f \"$compose_file\" ]]; then\n    echo \"Error: ${compose_file} not found\" >&2\n    exit 1\n  fi\n\n  # Sanitize project name: replace / with -\n  local project_name=\"coda-${scenario//\\//-}\"\n\n  echo \"-f ${compose_file} --env-file ${ENV_FILE} -p ${project_name}\"\n}\n\ncmd_start() {\n  local scenario\n  scenario=\"$(resolve_scenario \"${1:-}\")\"\n  local args\n  args=\"$(compose_args \"$scenario\")\"\n  echo \"Starting scenario: ${scenario}\"\n  eval docker compose $args up -d --build\n}\n\ncmd_stop() {\n  local scenario\n  scenario=\"$(resolve_scenario \"${1:-}\")\"\n  local args\n  args=\"$(compose_args \"$scenario\")\"\n  echo \"Stopping scenario: ${scenario}\"\n  eval docker compose $args down\n}\n\ncmd_status() {\n  local scenario\n  scenario=\"$(resolve_scenario \"${1:-}\")\"\n  local args\n  args=\"$(compose_args \"$scenario\")\"\n  eval docker compose $args ps\n}\n\ncmd_list() {\n  echo \"Available scenarios:\"\n  find \"$REPO_DIR\" -name docker-compose.coda.yml 2>/dev/null \\\n    | sed \"s|^${REPO_DIR}/||; s|/docker-compose.coda.yml||\" \\\n    | sort \\\n    | while read -r s; do echo \"  $s\"; done\n}\n\n[[ $# -lt 1 ]] && usage\n\ncommand=\"$1\"\nshift\n\ncase \"$command\" in\n  start)  cmd_start \"$@\" ;;\n  stop)   cmd_stop \"$@\" ;;\n  status) cmd_status \"$@\" ;;\n  list)   cmd_list ;;\n  *)      usage ;;\nesac\n"
  },
  {
    "path": "continuous-profiling/README.md",
    "content": "# Continuous Profiling\n\nThis scenario demonstrates continuous profiling of a Go application using Grafana Alloy's `pyroscope.scrape` and `pyroscope.write` components, with Grafana Pyroscope as the profiling backend.\n\n## Overview\n\nThe example includes:\n- **demo-app** -- A Go application that performs CPU-intensive and memory-intensive work, exposing standard pprof endpoints on port 6060\n- **alloy** -- Grafana Alloy configured to scrape pprof profiles from the demo app and forward them to Pyroscope\n- **pyroscope** -- Grafana Pyroscope for storing and querying profiling data\n- **grafana** -- Grafana with the Pyroscope datasource pre-configured for visualizing profiles\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd continuous-profiling\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n\n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh continuous-profiling\n   ```\n\n4. Access Grafana at http://localhost:3000\n\n## What to Expect\n\nAfter starting the scenario, Alloy will scrape the following profile types from the demo app every 15 seconds:\n\n- **CPU** -- Identifies functions consuming the most CPU time (the `cpuIntensive` goroutine)\n- **Memory (heap)** -- Shows memory allocation patterns (the `memoryIntensive` goroutine allocating 1MB chunks)\n- **Goroutine** -- Displays active goroutines and their stack traces\n- **Mutex** -- Captures mutex contention profiles\n- **Block** -- Captures blocking operation profiles\n\nTo view profiles:\n\n1. Open Grafana at http://localhost:3000\n2. Navigate to **Explore**\n3. Select the **Pyroscope** datasource\n4. Choose a profile type (e.g., `process_cpu`) and the `demo-app` service\n5. You should see flame graphs showing where the application spends its time and allocates memory\n\n## Architecture\n\n```\n┌───────────┐     scrape pprof     ┌───────────┐     push profiles     ┌────────────┐\n│  demo-app │◀─────────────────────│   Alloy   │─────────────────────▶│ Pyroscope  │\n│  :6060    │     /debug/pprof/*   │  :12345   │                      │   :4040    │\n└───────────┘                      └───────────┘                      └─────┬──────┘\n                                                                            │\n                                                                            ▼\n                                                                      ┌──────────┐\n                                                                      │ Grafana  │\n                                                                      │  :3000   │\n                                                                      └──────────┘\n```\n\n## Useful Links\n\n- Alloy UI: http://localhost:12345 -- Inspect the Alloy pipeline and component status\n- Grafana: http://localhost:3000 -- Explore profiles via the Pyroscope datasource\n- Pyroscope: http://localhost:4040 -- Direct access to the Pyroscope UI\n- Demo app pprof index: http://localhost:6060/debug/pprof/ -- Raw pprof endpoints\n"
  },
  {
    "path": "continuous-profiling/app/go.mod",
    "content": "module demo\n\ngo 1.23\n"
  },
  {
    "path": "continuous-profiling/app/main.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"net/http\"\n\t_ \"net/http/pprof\"\n\t\"time\"\n)\n\nfunc cpuIntensive() {\n\tfor {\n\t\tsum := 0\n\t\tfor i := 0; i < 1000000; i++ {\n\t\t\tsum += rand.Intn(100)\n\t\t}\n\t\ttime.Sleep(100 * time.Millisecond)\n\t}\n}\n\nfunc memoryIntensive() {\n\tvar data [][]byte\n\tfor {\n\t\tchunk := make([]byte, 1024*1024) // 1MB\n\t\tfor i := range chunk {\n\t\t\tchunk[i] = byte(rand.Intn(256))\n\t\t}\n\t\tdata = append(data, chunk)\n\t\tif len(data) > 50 {\n\t\t\tdata = data[1:]\n\t\t}\n\t\ttime.Sleep(500 * time.Millisecond)\n\t}\n}\n\nfunc main() {\n\tgo cpuIntensive()\n\tgo memoryIntensive()\n\n\tfmt.Println(\"Demo app running on :6060 with pprof endpoints\")\n\thttp.ListenAndServe(\":6060\", nil)\n}\n"
  },
  {
    "path": "continuous-profiling/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Scrape pprof profiles from the demo Go application\npyroscope.scrape \"default\" {\n\ttargets = [\n\t\t{\"__address__\" = \"demo-app:6060\", \"service_name\" = \"demo-app\"},\n\t]\n\n\tscrape_interval = \"15s\"\n\n\tprofiling_config {\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.memory {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\t}\n\n\tforward_to = [pyroscope.write.default.receiver]\n}\n\npyroscope.write \"default\" {\n\tendpoint {\n\t\turl = \"http://pyroscope:4040\"\n\t}\n}\n"
  },
  {
    "path": "continuous-profiling/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    image: golang:1.26@sha256:2981696eed011d747340d7252620932677929cce7d2d539602f56a8d7e9b660b\n    ports:\n      - 6060:6060\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: go run main.go\n"
  },
  {
    "path": "continuous-profiling/docker-compose.yml",
    "content": "\nservices:\n  # Demo Go application with pprof endpoints\n  demo-app:\n    image: golang:1.26@sha256:2981696eed011d747340d7252620932677929cce7d2d539602f56a8d7e9b660b\n    ports:\n      - 6060:6060\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: go run main.go\n\n  # Pyroscope for continuous profiling storage and visualization\n  pyroscope:\n    image: grafana/pyroscope:2.0.1@sha256:704889ae04768d982a0a71935bb054948993ddc3fe80234611d20877ba8be4c9\n    ports:\n      - 4040:4040\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Pyroscope\n          type: grafana-pyroscope-datasource\n          access: proxy\n          orgId: 1\n          url: http://pyroscope:4040\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - pyroscope\n\n  # Alloy for telemetry pipeline\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - demo-app\n      - pyroscope\n"
  },
  {
    "path": "docker-monitoring/README.md",
    "content": "# Docker Monitoring with Grafana Alloy\n\nThis example demonstrates how to monitor Docker containers using Grafana Alloy.\n## Prerequisites\n- Docker\n- Docker Compose\n- Git\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/docker-monitoring\ndocker-compose up -d\n```\n\n> **Note (macOS Docker Desktop):** If Alloy cannot connect to the Docker socket, you may need to change the volume mount in `docker-compose.yml` from `/var/run/docker.sock` to `/var/run/docker.sock.raw`. This is a workaround specific to some versions of Docker Desktop on macOS.\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\n\n"
  },
  {
    "path": "docker-monitoring/config.alloy",
    "content": "// ###############################\n// #### Metrics Configuration ####\n// ###############################\n\n// Host Cadvisor on the Docker socket to expose container metrics.\nprometheus.exporter.cadvisor \"example\" {\n  docker_only = true\n}\n\ndiscovery.relabel \"example\" {\n    targets = prometheus.exporter.cadvisor.example.targets\n\n    rule {\n        target_label = \"job\"\n        replacement  = \"integrations/docker\"\n    }\n\n    rule {\n        target_label = \"instance\"\n        replacement  = constants.hostname\n    }\n}\n\n// Configure a prometheus.scrape component to collect cadvisor metrics.\nprometheus.scrape \"scraper\" {\n  targets    = discovery.relabel.example.output\n  forward_to = [ prometheus.remote_write.demo.receiver ]\n\n\n  scrape_interval = \"10s\"\n}\n\n// Configure a prometheus.remote_write component to send metrics to a Prometheus server.\nprometheus.remote_write \"demo\" {\n  endpoint {\n    url = \"http://prometheus:9090/api/v1/write\"\n  }\n}\n\n// ###############################\n// #### Logging Configuration ####\n// ###############################\n\n// Discover Docker containers and extract metadata.\ndiscovery.docker \"linux\" {\n  host = \"unix:///var/run/docker.sock\"\n}\n\n// Define a relabeling rule to create a service name from the container name.\ndiscovery.relabel \"logs_integrations_docker\" {\n      targets = []\n  \n      rule {\n          source_labels = [\"__meta_docker_container_name\"]\n          regex = \"/(.*)\"\n          target_label = \"container_name\"\n      }\n\n     rule {\n        target_label = \"instance\"\n        replacement  = constants.hostname\n    }\n\n  }\n\n\n// Configure a loki.source.docker component to collect logs from Docker containers.\nloki.source.docker \"default\" {\n  host       = \"unix:///var/run/docker.sock\"\n  targets    = discovery.docker.linux.targets\n  relabel_rules = discovery.relabel.logs_integrations_docker.rules\n  forward_to = [loki.process.docker_logs.receiver]\n}\n\n// Process and filter Docker logs before sending to Loki.\n// Example: Drop logs from infrastructure containers.\n// Modify the regex pattern to match container names you want to exclude.\nloki.process \"docker_logs\" {\n  forward_to = [loki.write.local.receiver]\n\n  stage.drop {\n    source     = \"container_name\"\n    expression = \"(alloy|grafana|loki)\"\n  }\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "docker-monitoring/docker-compose.yml",
    "content": "version: '3'\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n     - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n  grafana:\n   image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n   environment:\n     - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n     - GF_AUTH_ANONYMOUS_ENABLED=true\n     - GF_AUTH_BASIC_ENABLED=false\n   ports:\n     - 3000:3000/tcp\n   entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n   image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n   privileged: true\n   ports:\n     - 12345:12345\n     - 4317:4317\n     - 4318:4318\n   environment:\n      ALLOY_DEPLOY_MODE: docker\n   volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /proc:/rootproc:ro\n      - /var/run/docker.sock:/var/run/docker.sock\n      - /sys:/sys:ro\n      - /:/rootfs:ro\n      - /dev/disk/:/dev/disk:ro\n      - /var/lib/docker/:/var/lib/docker:ro\n   command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n   extra_hosts:\n        - \"host.docker.internal:host-gateway\"\n   devices:\n        - /dev/kmsg"
  },
  {
    "path": "docker-monitoring/grafana/datasources/default.yml",
    "content": "apiVersion: 1\ndatasources:\n- name: Loki\n  type: loki\n  access: proxy\n  url: http://loki:3100\n"
  },
  {
    "path": "docker-monitoring/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\ndistributor:\n  otlp_config:\n    # List of default otlp resource attributes to be picked as index labels\n    # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels\n      default_resource_attributes_as_index_labels: [service.name service.namespace service.instance.id deployment.environment deployment.environment.name cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name]\n\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true"
  },
  {
    "path": "elasticsearch-monitoring/README.md",
    "content": "# Elasticsearch Monitoring with Grafana Alloy\n\nThis scenario demonstrates how to monitor an Elasticsearch instance using Grafana Alloy's built-in `prometheus.exporter.elasticsearch` component.\n\n## Architecture\n\n- **Elasticsearch** - The monitored Elasticsearch instance (single-node, security disabled)\n- **Grafana Alloy** - Collects Elasticsearch metrics via `prometheus.exporter.elasticsearch` and remote writes them to Prometheus\n- **Prometheus** - Stores the scraped metrics\n- **Grafana** - Visualizes Elasticsearch metrics (auto-provisioned with Prometheus datasource)\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root using centralized image versions\n./run-example.sh elasticsearch-monitoring\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345\n- **Prometheus**: http://localhost:9090\n- **Elasticsearch**: http://localhost:9200\n\n## Key Metrics\n\nOnce running, you can query Elasticsearch metrics in Grafana or Prometheus. Some useful metrics include:\n\n- `elasticsearch_cluster_health_status` - Cluster health (green/yellow/red)\n- `elasticsearch_cluster_health_number_of_nodes` - Number of nodes in the cluster\n- `elasticsearch_indices_docs_total` - Total number of documents\n- `elasticsearch_indices_store_size_bytes` - Total store size\n- `elasticsearch_jvm_memory_used_bytes` - JVM memory usage\n- `elasticsearch_process_cpu_percent` - CPU usage\n- `elasticsearch_breakers_tripped` - Circuit breaker trip count\n\nMetrics are scraped every 30s by default — adjust `scrape_interval` in `config.alloy` if you need finer or coarser resolution.\n\n## Stopping\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "elasticsearch-monitoring/config.alloy",
    "content": "// Elasticsearch Monitoring with Grafana Alloy\n// This configuration scrapes Elasticsearch metrics using the built-in prometheus.exporter.elasticsearch component\n// and remote writes them to Prometheus.\n\nlivedebugging {\n\tenabled = true\n}\n\nprometheus.exporter.elasticsearch \"default\" {\n\taddress = \"http://elasticsearch:9200\"\n}\n\nprometheus.scrape \"elasticsearch\" {\n\ttargets         = prometheus.exporter.elasticsearch.default.targets\n\tforward_to      = [prometheus.remote_write.default.receiver]\n\tscrape_interval = \"30s\"\n}\n\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "elasticsearch-monitoring/docker-compose.coda.yml",
    "content": "services:\n  elasticsearch:\n    image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0@sha256:2f602552550869fb29b6fd5848c5118d3ef3a2e1d5d45802e3ab9088cb2de8e2\n    environment:\n      - discovery.type=single-node\n      - xpack.security.enabled=false\n      - ES_JAVA_OPTS=-Xms512m -Xmx512m\n    ports:\n      - \"9200:9200\"\n"
  },
  {
    "path": "elasticsearch-monitoring/docker-compose.yml",
    "content": "services:\n  elasticsearch:\n    image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0@sha256:2f602552550869fb29b6fd5848c5118d3ef3a2e1d5d45802e3ab9088cb2de8e2\n    environment:\n      - discovery.type=single-node\n      - xpack.security.enabled=false\n      - ES_JAVA_OPTS=-Xms512m -Xmx512m\n    ports:\n      - \"9200:9200\"\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - elasticsearch\n      - prometheus\n"
  },
  {
    "path": "elasticsearch-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "faro-frontend-observability/README.md",
    "content": "# Faro Frontend Observability\n\nThis scenario demonstrates collecting frontend web telemetry using Grafana Alloy's `faro.receiver` component and the [Grafana Faro Web SDK](https://github.com/grafana/faro-web-sdk).\n\nThe Faro Web SDK runs in the browser and captures logs, errors, events, and web vitals, then sends them to Alloy's Faro receiver endpoint. Alloy forwards the collected telemetry to Loki for storage and querying.\n\n## Architecture\n\n```\nBrowser (Faro Web SDK) --> Alloy (faro.receiver :12347) --> Loki (:3100)\n                                                                |\n                                                           Grafana (:3000)\n```\n\n## Getting Started\n\n1. Start all services:\n\n```bash\ndocker compose up -d\n```\n\n2. Open the demo web page at [http://localhost:8080](http://localhost:8080).\n\n3. Click the buttons to generate telemetry:\n   - **Send Log** -- pushes an info-level log message\n   - **Throw Error** -- catches and reports a JavaScript error\n   - **Send Event** -- sends a custom event with metadata\n   - **Unhandled Error** -- triggers an uncaught exception (automatically captured by Faro)\n\n4. View the collected telemetry in Grafana:\n   - Open [http://localhost:3000](http://localhost:3000)\n   - Go to **Explore** and select the **Loki** datasource\n   - Query with `{service_name=\"faro-demo\"}` to see all frontend telemetry\n\n## Services\n\n| Service | URL | Description |\n|---------|-----|-------------|\n| Web (nginx) | [http://localhost:8080](http://localhost:8080) | Demo frontend page with Faro Web SDK |\n| Alloy | [http://localhost:12345](http://localhost:12345) | Alloy UI for pipeline debugging |\n| Alloy Faro Receiver | `http://localhost:12347/collect` | Faro SDK collection endpoint |\n| Loki | [http://localhost:3100](http://localhost:3100) | Log aggregation backend |\n| Grafana | [http://localhost:3000](http://localhost:3000) | Visualization and querying |\n\n## Alloy Pipeline\n\nThe `config.alloy` pipeline is straightforward:\n\n1. **`faro.receiver`** -- listens on port 12347 for Faro Web SDK payloads with CORS enabled for all origins\n2. **`loki.write`** -- forwards the received logs to Loki\n\n## Cleanup\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "faro-frontend-observability/app/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>Faro Frontend Observability Demo</title>\n    <script src=\"https://unpkg.com/@grafana/faro-web-sdk@latest/dist/bundle/faro-web-sdk.iife.js\"></script>\n    <style>\n        body { font-family: sans-serif; max-width: 600px; margin: 40px auto; padding: 0 20px; }\n        button { padding: 10px 20px; margin: 5px; cursor: pointer; font-size: 14px; }\n        .error { background: #ff4444; color: white; border: none; border-radius: 4px; }\n        .log { background: #4488ff; color: white; border: none; border-radius: 4px; }\n        .event { background: #44bb44; color: white; border: none; border-radius: 4px; }\n        #output { margin-top: 20px; padding: 10px; background: #f0f0f0; border-radius: 4px; min-height: 100px; font-family: monospace; font-size: 12px; }\n    </style>\n</head>\n<body>\n    <h1>Faro Frontend Observability Demo</h1>\n    <p>Click the buttons below to generate frontend telemetry. Check Grafana Loki for the collected data.</p>\n\n    <button class=\"log\" onclick=\"sendLog()\">Send Log</button>\n    <button class=\"error\" onclick=\"throwError()\">Throw Error</button>\n    <button class=\"event\" onclick=\"sendEvent()\">Send Event</button>\n    <button class=\"error\" onclick=\"unhandledError()\">Unhandled Error</button>\n\n    <div id=\"output\">Telemetry output will appear here...</div>\n\n    <script>\n        var faro = window.GrafanaFaroWebSdk.initializeFaro({\n            url: 'http://localhost:12347/collect',\n            app: {\n                name: 'faro-demo',\n                version: '1.0.0',\n                environment: 'development',\n            },\n        });\n\n        var output = document.getElementById('output');\n        function log(msg) {\n            output.innerHTML = new Date().toISOString() + ' - ' + msg + '<br>' + output.innerHTML;\n        }\n\n        function sendLog() {\n            faro.api.pushLog(['User clicked the log button'], { level: 'info' });\n            log('Sent log to Faro');\n        }\n\n        function throwError() {\n            try {\n                throw new Error('Demo error from button click');\n            } catch (e) {\n                faro.api.pushError(e);\n                log('Sent error to Faro: ' + e.message);\n            }\n        }\n\n        function sendEvent() {\n            faro.api.pushEvent('button_click', { button: 'event', timestamp: Date.now().toString() });\n            log('Sent event to Faro');\n        }\n\n        function unhandledError() {\n            log('Throwing unhandled error...');\n            setTimeout(function() { undefinedFunction(); }, 100);\n        }\n    </script>\n</body>\n</html>\n"
  },
  {
    "path": "faro-frontend-observability/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Receive frontend telemetry from the Faro Web SDK\nfaro.receiver \"default\" {\n\tserver {\n\t\tlisten_address = \"0.0.0.0\"\n\t\tlisten_port    = 12347\n\n\t\tcors_allowed_origins = [\"*\"]\n\t}\n\n\toutput {\n\t\tlogs = [loki.write.local.receiver]\n\t}\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "faro-frontend-observability/docker-compose.coda.yml",
    "content": "services:\n  web:\n    image: nginx:latest@sha256:1881968aff6f7cdcc4b888c00a11f4ce241ad7ec957e0cb4a9e19e93a3ff87ea\n    ports:\n      - 8080:80\n    volumes:\n      - ./app:/usr/share/nginx/html:ro\n"
  },
  {
    "path": "faro-frontend-observability/docker-compose.yml",
    "content": "services:\n  # Nginx web server serving the demo frontend page\n  web:\n    image: nginx:latest@sha256:1881968aff6f7cdcc4b888c00a11f4ce241ad7ec957e0cb4a9e19e93a3ff87ea\n    ports:\n      - 8080:80\n    volumes:\n      - ./app:/usr/share/nginx/html:ro\n\n  # Alloy telemetry pipeline — receives Faro Web SDK telemetry and forwards logs to Loki\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 12347:12347\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data --stability.level=experimental /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  # Loki for log aggregation\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - loki\n"
  },
  {
    "path": "faro-frontend-observability/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 2h\n"
  },
  {
    "path": "game-of-tracing/AGENTS.md",
    "content": "# Game of Tracing — Agent Guide\n\n> Canonical guide for any AI coding agent working inside this scenario. Tool-agnostic (Cursor, Codex, Cline, Aider, Claude Code). Claude-specific dispatch lives in `CLAUDE.md`.\n\n## What this scenario is\n\n**Game of Tracing** (titled *War of Kingdoms* in the UI) is a distributed-tracing tutorial game in the `alloy-scenarios` repository. It is substantially more elaborate than other scenarios in the repo: 10 Python/Flask services, two kingdoms competing over 8 territories, an algorithmic AI opponent, and the full LGMT stack (Loki, Grafana, Metrics/Prometheus, Tempo) sitting behind Grafana Alloy.\n\nThe **headline feature** is **span-link-driven game replay**: every player and AI action stores its `trace_id`/`span_id` in SQLite; the next action creates an OpenTelemetry `trace.Link` to the previous one, producing a causal chain of traces that can be replayed from Tempo. See `SPAN_LINKS.md` for the full spec and `README.md` for the player-facing tutorial narrative.\n\n## Architecture at a glance\n\n```\n Players ──► war-map (8080) ──┐\n                              │\n AI Opponent (8081) ──────────┤──► 8 Location Services (5001-5008)\n                              │       southern-capital, northern-capital,\n                              │       village-1 … village-6\n                              │\n All services ──OTLP──► Alloy (4317 gRPC / 4318 HTTP) ──► Tempo (3200)\n                                                      ├─► Loki  (3100)\n                                                      └─► Prom  (9090)\n                                                          │\n Grafana (3000) ──datasources──► Tempo (default), Loki, Prometheus\n```\n\nAll services push OTLP to Alloy; Alloy fans out by signal (traces→Tempo, logs→Loki, metrics→Prometheus). Grafana is auto-provisioned with all three datasources plus traces↔logs↔metrics correlation.\n\n## Services and ports\n\n| Service | Port(s) | Build context | Image version env | Purpose |\n|---|---|---|---|---|\n| `loki` | 3100 | — | `GRAFANA_LOKI_VERSION` (default 3.6.7) | Log storage |\n| `prometheus` | 9090 | — | `PROMETHEUS_VERSION` (default v3.10.0) | Metrics storage + OTLP receiver |\n| `tempo` | 3200 | — | `GRAFANA_TEMPO_VERSION` (default 2.10.1) | Trace storage + metrics generator |\n| `grafana` | 3000 | — | `GRAFANA_VERSION` (default 12.4.0) | Visualization (anonymous admin) |\n| `alloy` | 12345, 4317, 4318 | — | `GRAFANA_ALLOY_VERSION` (default v1.14.0) | Telemetry pipeline |\n| `southern-capital` | 5001 | `./app` | — | Capital location service |\n| `northern-capital` | 5002 | `./app` | — | Capital location service |\n| `village-1` … `village-6` | 5003-5008 | `./app` | — | Village location services |\n| `war-map` | 8080 | `./war_map` | — | Game UI + span-link broker |\n| `ai-opponent` | 8081 | `./ai_opponent` | — | Algorithmic AI opponent |\n\nImage versions are centralized at `/Users/jayclifford/Repos/alloy-scenarios/image-versions.env` — edit that file, not the compose files (they use `${VAR:-default}` syntax).\n\n## Submodules (each has its own CLAUDE.md)\n\n- **`app/`** — the 8 location Flask services. See [`app/CLAUDE.md`](app/CLAUDE.md).\n- **`ai_opponent/`** — the algorithmic strategic AI (not LLM). See [`ai_opponent/CLAUDE.md`](ai_opponent/CLAUDE.md).\n- **`war_map/`** — the Flask UI and the owner of span-link reconstruction logic. See [`war_map/CLAUDE.md`](war_map/CLAUDE.md).\n\n## Shared state\n\nOne Docker volume, `game-data`, mounted at `/data`. **Two SQLite databases live under it, with different owners — do not confuse them:**\n\n| File | Owner | Mode | Purpose |\n|---|---|---|---|\n| `game_state.db` | All 8 location services (shared) | WAL | Canonical game state: resources, armies, faction per location |\n| `game_sessions.db` | `war_map/` only | default | `game_actions` table: per-action `trace_id`, `span_id`, `action_sequence`, `game_session_id` — drives span linking |\n\nOverriding `DATABASE_FILE` (game_state) or `GAME_SESSIONS_DB` (game_sessions) env vars on `war_map` is supported.\n\n### Extra tables added for multi-map support\n\n`game_state.db` also holds:\n\n- **`game_config`** — key/value store; the `active_map_id` row is authoritative at runtime. `war_map`'s `/select_map` route writes this; every location service reads it on boot and `/reload`.\n- **`faction_economy`** — `(faction, corpses)`. Holds the White Walkers' corpse pool on the WWA map. Populated by the post-battle hook in `LocationServer.receive_army` and by the passive corpse tick at the WW fortress. Consumed by `LocationServer.create_army` when the faction's currency is `corpses`.\n- **`wall_hold`** — `(map_id, faction, ticks, last_update)`. Written by `war_map`'s `_wall_tick_thread`. Non-zero rows mean that faction currently holds every wall keep on that map.\n\n`game_sessions.db` has a `map_id` column added to the `game_actions` table so replay queries can filter by map. Fresh installs seed `map_id=NULL` for any legacy rows; an additive `ALTER TABLE` migration runs on first boot after the upgrade.\n\n## Maps\n\n`app/game_config.py` defines a `MAPS` dict with two entries:\n\n| Map id | Players | Factions | Win | Notable rules |\n|---|---|---|---|---|\n| `war_of_kingdoms` (default) | 2 | `southern`, `northern`, `neutral` | Capture enemy capital | Classic — 30 resources per army, 20 resource/collect at capitals, village passive +10/15 s |\n| `white_walkers_attack` | 1 (player is `nights_watch`) | `nights_watch`, `white_walkers`, `barbarian`, `neutral` | Hold every `wall` keep for 5 × 30 s ticks | `wall` settlement type doubles defenders; WW spends 5 corpses per army (no resources); barbarian villages grow +1 army every 30 s; WW fortress passively +1 corpse every 15 s |\n\nEach map also defines a **slot assignments** dict (`slot_1` → logical location id) so the 8 physical containers can serve either map. See \"Slot identity\" below.\n\n### Slot identity\n\nEach location container has a fixed `SLOT_ID` env var (`slot_1` … `slot_8`). On boot, the container:\n\n1. Reads the shared `active_map_id` from `game_state.db`'s `game_config` table.\n2. Looks up `MAPS[active_map_id][\"slot_assignments\"][SLOT_ID]` → its logical `location_id`.\n3. Loads config from `MAPS[active_map_id][\"locations\"][location_id]`.\n\nThe container's **SERVICE_NAME** (used by Grafana dashboards) stays stable (`southern-capital`, `village-1`, etc.) regardless of the map — the *logical* location id is published as the `location.id` span attribute, not the service name.\n\nRuntime map switching: `war_map/select_map` writes a new `active_map_id`, POSTs `/reset` to any one container to wipe the `locations` table, then POSTs `/reload` to every container so they rebind in place without a restart.\n\n## Two Alloy configurations\n\n### Default — River (HCL)\n```bash\ncd game-of-tracing && docker compose up -d\n```\nUses `config.alloy`. Alloy runs with `run /etc/alloy/config.alloy`.\n\n### Alternate — OTel Collector YAML\n```bash\ncd game-of-tracing && docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n```\nUses `config-otel.yaml`. Alloy runs with its OTel Engine mode: `otel --config=/etc/alloy/config-otel.yaml`. The pipeline is functionally identical; this variant demonstrates Alloy's ability to accept OTel Collector syntax.\n\n## OpenTelemetry patterns you must respect\n\nEvery service has its own `telemetry.py` exposing a `GameTelemetry` class that wires up all three signals.\n\n- **Traces** — OTLP gRPC → `alloy:4317`, `BatchSpanProcessor(max_export_batch_size=1)`. The batch size of 1 is **intentional** for demo timing; do not tune it.\n- **Logs** — OTLP HTTP → `alloy:4318/v1/logs`, `BatchLogRecordProcessor(max_queue_size=30, max_export_batch_size=5)`.\n- **Metrics** — OTLP HTTP → `alloy:4318/v1/metrics`, `PeriodicExportingMetricReader(export_interval_millis=10000)`, `TraceBasedExemplarFilter` (so metric exemplars link to trace IDs).\n\n### Context propagation is manual\n\nIncoming requests extract W3C trace context from headers; outgoing requests inject it:\n\n```python\n# Incoming (every route handler):\nctx = extract(request.headers)\nwith tracer.start_as_current_span(\"name\", context=ctx, ...) as span:\n\n# Outgoing (canonical helper at app/location_server.py:327-352):\ninject(headers)\nrequests.post(url, headers=headers, ...)\n```\n\n### Background threads MUST capture context explicitly\n\nPython threads do not inherit OpenTelemetry context. The scenario's canonical pattern is to capture before spawning and attach inside the thread:\n\n```python\n# app/location_server.py:209-271 (_continue_army_movement) — canonical example:\nctx = get_current()\n\ndef move():\n    token = attach(ctx)\n    try:\n        with self.tracer.start_as_current_span(\"army_movement\", ...):\n            ...\n    finally:\n        detach(token)\n\nThread(target=move).start()\n```\n\nThe same pattern appears in `_transfer_resources_along_path` at `app/location_server.py:273-325`. If a background span shows up with a missing or different `trace_id`, the `get_current()` / `attach` / `detach` pair is the first thing to check.\n\n## Span links — the headline feature\n\nSpan links are the mechanism that turns a sequence of discrete player actions into a replayable narrative. See `SPAN_LINKS.md` for the full design.\n\n**Flow:**\n1. Player selects a faction → `war_map/app.py` creates a `game_session_id` (UUID).\n2. Every action handler (`/api/collect_resources`, `/api/create_army`, `/api/move_army`) does:\n   - Looks up the previous action for this session via `get_previous_action_context()` at `war_map/app.py:130-170`. That function reads `trace_id` and `span_id` from the `game_actions` SQLite table and rebuilds a `trace.SpanContext(..., is_remote=True, trace_flags=TraceFlags.SAMPLED)`.\n   - Wraps the context in a link via `create_span_link_from_context()` at `war_map/app.py:172-189`, attaching `link.type=\"game_sequence\"`, `link.relation=\"follows\"`, `game.sequence=\"true\"`.\n   - Starts its own action span with that link, then calls `store_game_action()` to record its own `trace_id`/`span_id` for the next action to link back to.\n3. The AI opponent uses the same primitive with a different link type — `link.type=\"ai_decision_trigger\"` — to link its decision span to the action execution span it spawns (see `ai_opponent/ai_server.py`).\n4. The replay UI queries Tempo:\n   - `GET /api/v2/search/tag/game.session.id/values` to enumerate sessions.\n   - `GET /api/search?q={game.session.id=\"<id>\"}` to pull every trace in a session.\n   - SQLite `game_actions` is the fallback if Tempo is unavailable.\n\n## Custom metrics reference\n\n### From `app/telemetry.py`\n| Metric | Type | Attributes | Notes |\n|---|---|---|---|\n| `game.resources` | observable gauge | `location`, `location_type` | Current resource pool per location |\n| `game.army_size` | observable gauge | `location`, `location_type`, `faction` | Current army strength |\n| `game.battles` | counter | `attacker_faction`, `defender_faction`, `result`, `location` | `result ∈ {attacker_victory, defender_victory, stalemate, reinforcement}` |\n| `game.resource_transfer_cooldown` | observable gauge | `location` | Seconds remaining |\n| `game.location_control` | observable gauge | `location`, `location_type`, `faction` | `northern=1, southern=2, neutral=0, unknown=-1` |\n\n### From `ai_opponent/telemetry.py`\n| Metric | Type | Attributes |\n|---|---|---|\n| `ai.decisions` | counter | `action_type`, `phase`, `reason` |\n| `ai.plans_created` | counter | `goal` |\n| `ai.plans_abandoned` | counter | `reason` |\n| `ai.decision_cycle_duration_seconds` | histogram | `phase` |\n| `ai.territory_count` | observable gauge | `faction` |\n| `ai.total_army` | observable gauge | `faction` |\n\n### Span attributes used by the provisioned Grafana dashboard\nPreserve these when adding new spans — the dashboard's TraceQL filters depend on them:\n- `span.resource.movement = true`\n- `span.battle.occurred = true`\n- `span.player.action = true`\n\n## Common tasks\n\n```bash\n# Start everything\ncd game-of-tracing && docker compose up -d\n\n# Stop (preserves volume)\ndocker compose down\n\n# Stop and wipe game state\ndocker compose down -v\n\n# Rebuild only one service after code change\ndocker compose up -d --build war-map\n\n# Switch to the OTel Engine variant\ndocker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n\n# Tail a trace end-to-end\n# 1. Game UI:      http://localhost:8080\n# 2. Grafana:      http://localhost:3000 (anonymous admin)\n# 3. Alloy debug:  http://localhost:12345/debug/livedebugging\n# 4. Tempo API:    http://localhost:3200\n```\n\n## Gotchas\n\n- **Hyphens vs underscores.** Service names are hyphenated (`southern-capital`, set via `SERVICE_NAME` resource attribute); location IDs in game_config.py and DB rows are underscored (`southern_capital`). Code that bridges them uses `location_id.replace('_', '-')`. Do not cross them.\n- **Two compose files — `docker-compose.yml` and `docker-compose.coda.yml`.** The coda variant redefines the same 10 app-layer services already defined in the main compose file, for use with the `coda` CLI. When editing app services, update both.\n- **Image versions.** Live in `/Users/jayclifford/Repos/alloy-scenarios/image-versions.env`. Compose files use `${VAR:-default}` — edit the env file, not the compose.\n- **Grafana is auto-provisioned** via `grafana/datasources/defaults.yml`. Tempo is the default datasource; service map, traces-to-logs (Loki `trace_id` label), traces-to-metrics, and exemplars are pre-wired. Do not add datasources via UI — edit the YAML.\n- **Tempo metrics generator is enabled** in `tempo-config.yaml` with processors `service-graphs`, `span-metrics`, `local-blocks`, writing to `prometheus:9090/api/v1/write`. Ingester `max_block_duration: 5m` and 720h compactor retention are demo-tuned, not production values.\n- **`grafana-traces-app` plugin** is installed via `GF_INSTALL_PLUGINS` at container start. If Grafana is slow on first boot, that is why.\n- **`war-map` strips `X-Frame-Options`** in an `@app.after_request` hook (`war_map/app.py:191-194`) so the UI can be embedded in Grafana iframes. Intentional — do not remove.\n\n## Keep docs current\n\n**Any change to this scenario must land in the same work unit as a doc update.** Stale line-number anchors, removed symbols, or new services that nobody documents are treated as regressions, not cleanup tasks.\n\nFiles that must be checked whenever the scenario changes:\n- `game-of-tracing/AGENTS.md` (this file)\n- `game-of-tracing/CLAUDE.md`\n- `game-of-tracing/app/CLAUDE.md`\n- `game-of-tracing/ai_opponent/CLAUDE.md`\n- `game-of-tracing/war_map/CLAUDE.md`\n- `.claude/agents/game-of-tracing-expert.md` (cheat-sheet references)\n\nTriggers that require a doc update: new service, renamed function, new/changed span attribute, new env var, added/removed metric, port change, dependency bump, new action type in the span-link chain, change to any cited line-number anchor.\n\nThe Claude sub-agent at `.claude/agents/game-of-tracing-expert.md` owns this responsibility end-to-end for Claude Code sessions. For non-Claude agents: before returning a response that involved a code edit, grep the six files above for any outdated references and update them.\n\n## Verification\n\nAfter any meaningful change, run through this sequence:\n\n1. **Smoke the scenario.** `cd game-of-tracing && docker compose up -d`; wait ~20s for all 10 services to be healthy (`docker compose ps` — all should be `(healthy)` or `Up`).\n2. **Confirm Alloy ingest.** Open `http://localhost:12345/debug/livedebugging`. Select the `otelcol.receiver.otlp.default` component and confirm non-zero signal counts for traces/logs/metrics.\n3. **Trigger a player action.** Open `http://localhost:8080`, pick a faction, collect resources, create an army, move it to a neutral village.\n4. **Inspect the resulting trace.** Grafana at `http://localhost:3000` → Explore → Tempo → Search by `game.session.id` tag. Verify:\n   - Parent player-action span in `war-map`.\n   - Child CLIENT span with propagated trace context.\n   - SERVER span in the target location (`village-X` etc.).\n   - Background `army_movement` span sharing the same `trace_id` (confirms `get_current()`/`attach` worked).\n   - A span link back to the previous action span (the headline feature).\n5. **Dashboard check.** Open the provisioned *War of Kingdoms* dashboard; TraceQL filters like `{span.resource.movement = true}` should return traces.\n6. **Shutdown.** `docker compose down` (add `-v` to wipe volumes).\n\n## Cross-references\n\n- Full span-link design: [`SPAN_LINKS.md`](SPAN_LINKS.md)\n- Player-facing tutorial: [`README.md`](README.md)\n- Generic scenario conventions: [`../CLAUDE.md`](../CLAUDE.md)\n- Submodule guides: [`app/CLAUDE.md`](app/CLAUDE.md), [`ai_opponent/CLAUDE.md`](ai_opponent/CLAUDE.md), [`war_map/CLAUDE.md`](war_map/CLAUDE.md)\n"
  },
  {
    "path": "game-of-tracing/CLAUDE.md",
    "content": "# CLAUDE.md — Game of Tracing (Claude Code)\n\n> Claude-specific workflow for this scenario. For architecture, services, OpenTelemetry patterns, span-link mechanics, and gotchas, **read [`./AGENTS.md`](AGENTS.md) first**. This file only covers what's different when the agent is Claude Code.\n\n## Start here\n\n1. Read `./AGENTS.md` for the scenario overview — including the **Maps** and **Slot identity** sections.\n2. Read the submodule `CLAUDE.md` matching the area you are touching: [`app/CLAUDE.md`](app/CLAUDE.md), [`ai_opponent/CLAUDE.md`](ai_opponent/CLAUDE.md), [`war_map/CLAUDE.md`](war_map/CLAUDE.md).\n3. If the task involves span links, trace replay, cross-service context propagation, or AI decision logic — delegate to the sub-agent below.\n\n### Two maps, one stack\n\nThe scenario ships **two maps** selected via an in-UI picker at game start: `war_of_kingdoms` (default 2-player) and `white_walkers_attack` (single-player Night's Watch vs AI White Walkers with `wall` keeps, corpse economy, and a 5-tick hold-to-win condition). Both reuse the same 8 location containers — each container has a constant `SLOT_ID` env and picks up its logical identity from `MAPS[active_map_id][\"slot_assignments\"][SLOT_ID]` in `app/game_config.py`. Changing maps writes a new `active_map_id` to the shared `game_config` table and POSTs `/reload` to every slot.\n\n## Sub-agent dispatch\n\nA specialized sub-agent lives at [`../.claude/agents/game-of-tracing-expert.md`](../.claude/agents/game-of-tracing-expert.md). Use it (via `Task` tool, `subagent_type: game-of-tracing-expert`) for any non-trivial question about:\n\n- Reconstructing or debugging span contexts / span links\n- Cross-service or cross-thread OpenTelemetry context propagation\n- The `StrategicAI` priority cascade, game phases, or AI metric instrumentation\n- Tempo TraceQL queries used by the replay UI\n- Why a trace is orphaned, missing, or appears duplicated in Grafana\n\nThe sub-agent is read-only (no Write/Edit tools) — it reports; the parent agent does the writes. It **also owns keeping the docs in sync with the code** — see \"Keep docs current\" below.\n\n## Tool preferences\n\n- **Use `Read`, not `cat`**, for the large files in this scenario. Use `offset` / `limit` to target line ranges rather than reading the whole file:\n  - `app/location_server.py` (~52 KB, ~1200 lines)\n  - `ai_opponent/ai_server.py` (~46 KB)\n  - `war_map/app.py` (~64 KB)\n  - `war_map/templates/map.html` (~50 KB)\n  - `war_map/templates/replay_session.html` (~28 KB)\n  - `SPAN_LINKS.md` (~17 KB)\n- **Use `Grep`, not `grep | head`** for pattern search across the scenario.\n- For the Alloy pipeline debug UI (`http://localhost:12345`), the stack has to be running — either ask the user to `docker compose up -d` or check `docker compose ps` first.\n\n## Read-before-edit checklist\n\nBefore editing any service, open these files to ground yourself:\n\n| Change area | Open first |\n|---|---|\n| Location server behavior | `app/telemetry.py`, relevant route handler in `app/location_server.py`, `app/game_config.py`, the service block in `docker-compose.yml` |\n| AI decision logic | `ai_opponent/telemetry.py`, `ai_opponent/ai_server.py`, `ai_opponent/README.md` |\n| UI, sessions, or replay | `war_map/telemetry.py`, `war_map/app.py` (especially `:130-189` for span-link plumbing), relevant template under `war_map/templates/` |\n| Telemetry pipeline | `config.alloy` (default) or `config-otel.yaml` (OTel variant), `tempo-config.yaml`, `loki-config.yaml`, `prom-config.yaml` |\n| Datasources / dashboards | `grafana/datasources/defaults.yml`, `grafana/dashboards/*.json` |\n| Image versions | `../image-versions.env` |\n\n## Keep docs current\n\n**Whenever a change to this scenario ships, the matching docs must ship in the same change.** The sub-agent (`game-of-tracing-expert`) enforces this during its work; Claude Code in the main loop is responsible whenever the sub-agent is not invoked.\n\nTriggers that require a doc update in the same commit:\n\n- New service, renamed function, relocated symbol (line-number anchors shift)\n- New, removed, or renamed span attribute — especially the ones that feed the Grafana dashboard TraceQL (`span.resource.movement`, `span.battle.occurred`, `span.player.action`)\n- New or removed env var\n- New or removed metric\n- Port change\n- Dependency version bump (update `image-versions.env` *and* any docs that quote a version)\n- New action type in the span-link chain (both `war_map/app.py` handler and `replay_session.html` renderer)\n\nFiles to sweep on every scenario change:\n\n1. `game-of-tracing/AGENTS.md`\n2. `game-of-tracing/CLAUDE.md` (this file)\n3. `game-of-tracing/app/CLAUDE.md`\n4. `game-of-tracing/ai_opponent/CLAUDE.md`\n5. `game-of-tracing/war_map/CLAUDE.md`\n6. `.claude/agents/game-of-tracing-expert.md`\n\nStale line-number anchors are treated as regressions, not cleanup tasks. If a cited `file:line` range no longer resolves to the referenced symbol, fix it.\n\n## Relationship to the repo root\n\n- `/Users/jayclifford/Repos/alloy-scenarios/CLAUDE.md` covers the generic multi-scenario conventions (run commands, scenario directory layout, Alloy pipeline shape).\n- This file overrides nothing; it extends the root with the patterns that are unique to this scenario (manual context propagation, background-thread context capture, span-link-driven replay, AI instrumentation).\n"
  },
  {
    "path": "game-of-tracing/README.md",
    "content": "---\ntitle: A Game of Traces\nmenuTitle: A Game of Traces\ndescription: A grand strategy game with distributed tracing\nweight: 600\nkillercoda:\n  title: A Game of Traces\n  description: A grand strategy game with distributed tracing\n  details:\n      intro:\n         foreground: docker-compose-update.sh\n  backend:\n    backend:\n    imageid: ubuntu\n---\n\n\n<!-- INTERACTIVE page intro.md START -->\n# War of Kingdoms: A Distributed Tracing Tutorial Game\n\n<!-- INTERACTIVE ignore START -->\n\n<div align=\"center\">\n<img src=\"https://grafana.com/media/docs/alloy/game-of-tracing.jpeg\" alt=\"Game of Tracing\" width=\"200\"/>\n</div>\n\n<!-- INTERACTIVE ignore END -->\n\nThis educational game demonstrates distributed tracing concepts through an interactive strategy game built with OpenTelemetry and Grafana Alloy. Players learn about trace sampling, service graphs, and observability while competing for territory control.\n\n## Educational Goals\n\nThis game teaches several key concepts in distributed tracing:\n\n1. **Distributed System Architecture**\n   - Multiple microservices (locations) communicating via HTTP\n   - Shared state management\n   - Event-driven updates\n   - Real-time data propagation\n\n2. **OpenTelemetry Concepts**\n   - Trace context propagation\n   - Span creation and attributes\n   - Service naming and resource attributes\n   - Manual instrumentation techniques\n\n3. **Observability Patterns**\n   - Trace sampling strategies\n   - Error tracking and monitoring\n   - Performance measurement\n   - Service dependencies visualization\n\n## Game Overview\n\nOpen the scenario at `http://localhost:8080` and you land on a **map picker**. Two maps ship today:\n\n### War of Kingdoms (default, 2-player)\n\nTwo rival kingdoms — Southern and Northern — race to capture the enemy capital. Players:\n\n- Collect resources from their territories\n- Build armies (30 resources per unit) to expand their influence\n- Capture neutral villages (6 of them)\n- Send resources back to their capital\n- Launch strategic attacks on enemy territories\n\n**Win condition:** capture the enemy capital.\n\n### White Walkers Attack (single-player)\n\nThe Long Night has come. The human plays the **Night's Watch** (player faction); the AI opponent plays the **White Walkers**. A new **Barbarian** faction controls two villages on the flanks — passive, slowly accruing army units, good raid targets.\n\nNew mechanics:\n\n- **Wall settlements** run across the middle of the map. Defenders count **2×** when a wall is attacked, making them hard to dislodge.\n- **Corpse economy.** White Walkers spend **corpses** (not resources) to raise new armies at their fortress. Corpses come from winning battles (every unit killed on either side becomes a corpse) plus a slow passive tick at the fortress itself. Cost: 5 corpses per unit.\n- **Barbarians** never attack. They accrue +1 army every 30 s — easy farm for White Walkers, but they also harass unguarded Night's Watch supply lines.\n\n**Win condition:** hold *every* wall settlement continuously for **5 ticks** (150 s, since the tick is 30 s). Any wall changing hands resets the counter.\n\nBoth maps share the same 8 location containers — the active map lives in `game_state.db`, and the `/reload` endpoint on each service rebinds the slot's identity when the player switches maps via the picker.\n\nEach action in the game generates traces that can be analyzed in Grafana Tempo, demonstrating how distributed tracing works in a real application.\n\n## Technical Components\n\nThe application consists of:\n\n- **Location Servers**: Python Flask microservices representing different map locations\n- **War Map UI**: Web interface for game interaction\n- **AI Opponent**: Intelligent computer player for single-player mode\n- **Telemetry Pipeline**:\n  - OpenTelemetry SDK for instrumentation\n  - `pyroscope-otel` bridge for linking traces to CPU profiles\n  - Grafana Alloy for trace/log/metric/profile processing\n  - Tempo for trace storage\n  - Prometheus for metrics\n  - Loki for logs\n  - Pyroscope for continuous profiling\n  - Grafana for visualization\n\n<!-- INTERACTIVE page intro.md END -->\n\n<!-- INTERACTIVE page step1.md START -->\n\n## Running the Demo\n\n1. Clone the repository:\n   ```bash\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example:\n   ```bash\n   cd game-of-tracing\n   ```\n\n3. Run using Docker Compose:\n   ```bash\n   docker compose up -d\n   ```\n\n4. Access the components:\n   - Game UI: [http://localhost:8080](http://localhost:8080)\n   - Grafana: [http://localhost:3000](http://localhost:3000)\n   - Prometheus: [http://localhost:9090](http://localhost:9090)\n   - Pyroscope: [http://localhost:4040](http://localhost:4040)\n   - Alloy Debug: [http://localhost:12345/debug/livedebugging](http://localhost:12345/debug/livedebugging)\n\n5. Multiplayer Access:\n   - The game supports multiple players simultaneously\n   - Players can join using:\n     - `http://localhost:8080` from the same machine\n     - `http://<host-ip>:8080` from other machines on the network\n   - Each player can choose either the Southern or Northern faction\n   - The game prevents multiple players from selecting the same faction\n\n6. Single-Player Mode:\n   - Toggle \"Enable AI Opponent\" in the game interface\n   - The AI will automatically control the faction not chosen by the player\n   - The AI provides a balanced challenge with adaptive strategies\n   - For two-player games, keep the AI toggle disabled\n\n<!-- INTERACTIVE page step1.md END -->\n\n<!-- INTERACTIVE page step2.md START -->\n\n## Setting Up the Dashboard\n\n1. Open Grafana at http://localhost:3000 (anonymous admin auth is enabled, no login required).\n\n2. The **War of Kingdoms** dashboard is auto-provisioned at startup — no manual import needed. Find it under Dashboards → Browse.\n\n3. Data sources (Prometheus, Loki, Tempo, **Pyroscope**) are auto-provisioned too. The Tempo datasource is pre-wired to Loki (traces-to-logs), Prometheus (traces-to-metrics), and Pyroscope (traces-to-profiles), so every span in Explore gets a \"View profile\" link.\n\n4. The dashboard provides:\n   - Real-time army and resource metrics\n   - Battle analytics\n   - Territory control visualization\n   - Service dependency mapping\n   - Trace analytics for game events\n\n### Viewing Profiles\n\nWith every player action the app emits CPU pprof samples via the `pyroscope-otel` bridge. Each span carries a `pyroscope.profile.id` attribute that Grafana uses to jump directly from a span to its flamegraph.\n\n- Explore → **Pyroscope** datasource → pick a service (e.g. `war-map`) → flamegraph renders.\n- Explore → **Tempo** → open a recent trace → right-click a span → **View Profile**.\n\n> **OTel-engine variant note**: when running the alternate pipeline via `docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d`, Alloy's OTel-engine mode has no native Pyroscope receiver. The Python services still profile themselves, but the default profile endpoint (`http://alloy:9999`) won't exist. Override with `PYROSCOPE_SERVER_ADDRESS=http://pyroscope:4040` in the environment to push profiles straight to Pyroscope.\n\n<!-- INTERACTIVE page step2.md END -->\n\n<!-- INTERACTIVE page step3.md START -->\n\n## Learning Through Play\n\n### 1. Trace Context Propagation\nWatch how actions propagate through the system:\n- Resource collection triggers spans across services\n- Army movements create trace chains\n- Battle events generate nested spans\n\n### 2. Service Graph Analysis\nLearn how services interact:\n- Village-to-capital resource flows\n- Army movement paths\n- Battle resolution chains\n\n## Observability Features\n\n### 1. Resource Movement Tracing\n```console\n{span.resource.movement = true}\n```\nTrack resource transfers between locations with detailed timing and amounts.\n\n### 2. Battle Analysis\n```console\n{span.battle.occurred = true}\n```\nAnalyze combat events, outcomes, and participating forces.\n\n### 3. Player Actions\n```console\n{span.player.action = true}\n```\nMonitor player interactions and their impact on the game state.\n\n<!-- INTERACTIVE page step3.md END -->\n\n<!-- INTERACTIVE page step4.md START -->\n\n## Architecture Deep Dive\n\n### Trace Flow Example: Army Movement\n\n1. Player initiates move (UI span)\n2. Source location processes request (source span)\n3. Movement calculation (path span)\n4. Target location receives army (target span)\n5. Battle resolution if needed (battle span)\n6. State updates propagate (update spans)\n\nEach step generates spans with relevant attributes, demonstrating trace context propagation in a distributed system.\n\n## Educational Use\n\nThis project is designed for educational purposes to teach:\n- Distributed systems concepts\n- Observability practices\n- Microservice architecture\n- Real-time data flow\n- System instrumentation\n\n<!-- INTERACTIVE page step4.md END -->\n\n<!-- INTERACTIVE page finish.md START -->\n\n## Contributing\n\nWe welcome contributions! Please see our [contribution guidelines](CONTRIBUTING.md) for details.\n\n## License\n\nThis project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.\n\n## Disclaimer\n\nThis is an educational project focused on teaching distributed tracing concepts. Any resemblance to existing games or properties is coincidental and falls under fair use for educational purposes.\n\n## Further Resources\n\n- [OpenTelemetry Documentation](https://opentelemetry.io/docs/)\n- [Grafana Alloy Documentation](https://grafana.com/docs/alloy/latest/)\n- [Distributed Tracing Guide](https://opentelemetry.io/docs/concepts/observability-primer/#distributed-traces) \n\n<!-- INTERACTIVE page finish.md END -->"
  },
  {
    "path": "game-of-tracing/SPAN_LINKS.md",
    "content": "# Span Links Implementation in Game of Tracing\n\nThis document explains how span links are implemented in the Game of Tracing game to enable game replay functionality.\n\n## What Are Span Links?\n\nSpan links allow you to create relationships between spans that aren't in a direct parent-child hierarchy. Unlike parent-child relationships (which are synchronous and hierarchical), links are more flexible and can connect spans across different traces or time periods.\n\n## Implementation Overview\n\n### Game Session Tracking\n\nEach player gets a unique `game_session_id` when they select a faction. This ID is used to track all their actions throughout the game:\n\n```python\n# Generated when player selects faction\nsession['game_session_id'] = str(uuid.uuid4())\nsession['action_sequence'] = 0\n```\n\n### Action Storage\n\nEvery significant game action is stored in a SQLite database with its trace information:\n\n```sql\nCREATE TABLE game_actions (\n    id INTEGER PRIMARY KEY AUTOINCREMENT,\n    game_session_id TEXT NOT NULL,\n    action_sequence INTEGER NOT NULL,\n    action_type TEXT NOT NULL,\n    player_name TEXT,\n    faction TEXT,\n    trace_id TEXT NOT NULL,\n    span_id TEXT NOT NULL,\n    location_id TEXT,\n    target_location_id TEXT,\n    timestamp INTEGER NOT NULL,\n    game_state_after TEXT\n)\n```\n\n### Span Link Creation\n\nEach new action creates a span link to the previous action in the sequence:\n\n```python\n# Get previous action's span context\nprevious_span_context = get_previous_action_context(game_session_id, current_sequence)\n\n# Create span link using official OpenTelemetry API\nif previous_span_context:\n    link = trace.Link(\n        previous_span_context,\n        attributes={\n            \"link.type\": \"game_sequence\",\n            \"link.relation\": \"follows\",\n            \"game.sequence\": \"true\"\n        }\n    )\n    links.append(link)\n\n# Create new span with links\nwith tracer.start_as_current_span(\n    \"move_army\",\n    kind=SpanKind.SERVER,\n    links=links,  # Links to previous actions\n    attributes={\n        \"game.session.id\": game_session_id,\n        \"game.action.type\": \"move_army\",\n        \"game.action.sequence\": current_sequence + 1\n    }\n) as span:\n    # ... action logic ...\n```\n\n## Supported Actions\n\nThe following game actions create span links:\n\n1. **collect_resources** - Collecting resources at a location\n2. **create_army** - Creating armies at capitals\n3. **move_army** - Moving armies between locations\n4. **all_out_attack** - Launching all-out attacks\n\n## Battle Mechanics\n\nThe game uses simple but effective battle calculations:\n\n### Combat Rules\n\n1. **Same Faction**: Reinforcement\n   - Armies combine: `final_army = attacking_army + defending_army`\n   - Used for friendly army movements and reinforcements\n\n2. **Different Factions**: Combat\n   - **Attacker Victory**: `remaining_army = attacking_army - defending_army`\n   - **Defender Victory**: `remaining_army = defending_army - attacking_army`\n   - **Stalemate**: `remaining_army = 0` (equal armies destroy each other)\n\n### All-Out Attack Special Rules\n\n- All-out attacks automatically collect armies from friendly villages along the path\n- This simulates gathering reinforcements during the march to enemy territory\n- Example: 5 armies + 2 village armies = 7 armies continuing to target\n\n### Battle Calculation Code\n\n```python\ndef _handle_battle(self, attacking_army: int, attacking_faction: str, \n                  defending_army: int, defending_faction: str) -> tuple[str, int, str]:\n    # Same faction = reinforcement\n    if attacking_faction == defending_faction:\n        return \"reinforcement\", attacking_army + defending_army, attacking_faction\n    \n    # Actual combat\n    if attacking_army > defending_army:\n        remaining = attacking_army - defending_army\n        return \"attacker_victory\", remaining, attacking_faction\n    elif defending_army > attacking_army:\n        remaining = defending_army - attacking_army\n        return \"defender_victory\", remaining, defending_faction\n    else:\n        return \"stalemate\", 0, defending_faction\n```\n\n## Game Restart Functionality\n\nThe restart system ensures complete game state reset:\n\n### What Gets Reset\n\n1. **Game State Variables**\n   - `GAME_OVER`, `WINNER`, `VICTORY_MESSAGE` flags\n   - Global game state in war map\n\n2. **Span Links Database**\n   - All game action records cleared\n   - Fresh start for span link chains\n\n3. **Faction Assignments**\n   - Player faction selections cleared\n   - All factions become available\n\n4. **AI Opponent**\n   - AI automatically deactivated\n   - Prevents ghost AI actions\n\n5. **Location Database**\n   - All locations reset to initial state\n   - Resources, armies, and factions restored\n\n### Restart Process\n\n```python\ndef reset_game_data():\n    # Reset local game state\n    reset_game_state()\n    \n    # Deactivate AI\n    requests.post(f\"{AI_SERVICE_URL}/deactivate\")\n    \n    # Clear faction assignments\n    release_all_factions()\n    \n    # Clear span links database\n    cursor.execute(\"DELETE FROM game_actions\")\n    \n    # Reset location database\n    make_api_request('southern_capital', 'reset', method='POST')\n```\n\n### Verification\n\nUse the debug endpoint to verify complete reset:\n\n```bash\ncurl http://localhost:8080/api/debug/restart_verification\n```\n\nExpected response:\n```json\n{\n  \"success\": true,\n  \"all_systems_reset\": true,\n  \"details\": {\n    \"game_state_reset\": true,\n    \"span_links_cleared\": true,\n    \"faction_assignments_cleared\": true,\n    \"ai_deactivated\": true,\n    \"database_reset\": true\n  }\n}\n```\n\n## Game Replay Chain\n\nWith span links, you can trace the complete game narrative:\n\n```\nGame Start → Collect Resources → Create Army → Move Army → Battle → Victory\n     ↑              ↑               ↑           ↑         ↑        ↑\n  [trace_1]     [trace_2]       [trace_3]   [trace_4] [trace_5] [trace_6]\n                    ↑               ↑           ↑         ↑        ↑\n               [links to]      [links to]  [links to] [links to] [links to]\n               trace_1         trace_2     trace_3    trace_4   trace_5\n```\n\n## Game Replay Through Tempo\n\n### TraceQL Queries for Replay\n\n#### 1. Find All Game Sessions\n```traceql\n{game.session.id!=\"\"}\n```\n\n#### 2. Get Specific Game Session\n```traceql\n{game.session.id=\"abc-123-def\"}\n```\n\n#### 3. Find Actions with Span Links\n```traceql\n{link.type=\"game_sequence\"}\n```\n\n#### 4. Find Game Actions by Type\n```traceql\n{game.action.type=\"move_army\"}\n```\n\n#### 5. Find Actions by Player\n```traceql\n{player.name=\"Alice\" && game.session.id!=\"\"}\n```\n\n#### 6. Find Battle Outcomes\n```traceql\n{span.battle.occurred=true}\n```\n\n### Tempo API Integration\n\nThe replay system uses Tempo's HTTP API:\n\n```python\n# 1. Search for game sessions\nGET /api/search?q={game.session.id!=\"\"}\n\n# 2. Get specific session traces  \nGET /api/search?q={game.session.id=\"session-id\"}\n\n# 3. Get full trace details\nGET /api/traces/{trace-id}\n\n# 4. Extract span links from trace data\nfor span in trace['batches'][0]['spans']:\n    for ref in span.get('references', []):\n        if ref.get('refType') == 'FOLLOWS_FROM':\n            # This is a span link\n            linked_span_id = ref.get('spanID')\n```\n\n### Replay Engine Architecture\n\n```python\nclass GameReplayEngine:\n    def find_game_sessions(self) -> List[str]:\n        \"\"\"Query Tempo for all game sessions\"\"\"\n        \n    def get_session_traces(self, session_id: str) -> List[Dict]:\n        \"\"\"Get all traces for a specific session\"\"\"\n        \n    def extract_game_actions(self, traces: List[Dict]) -> List[GameAction]:\n        \"\"\"Parse traces into game actions\"\"\"\n        \n    def verify_span_links(self, actions: List[GameAction]) -> None:\n        \"\"\"Verify span link chain integrity\"\"\"\n        \n    def replay_session_step_by_step(self, session: GameSession) -> None:\n        \"\"\"Replay game session action by action\"\"\"\n```\n\n### Web UI Replay\n\nThe game includes web endpoints for replay:\n\n- `GET /api/replay/sessions` - List available game sessions\n- `GET /api/replay/session/{id}` - Get detailed replay data\n- `GET /replay` - Replay dashboard page\n- `GET /replay/{session-id}` - Specific session replay\n\n### Replay Data Structure\n\n```json\n{\n  \"session_id\": \"abc-123-def\",\n  \"player_name\": \"Alice\",\n  \"faction\": \"southern\",\n  \"actions\": [\n    {\n      \"sequence\": 1,\n      \"action_type\": \"collect_resources\",\n      \"trace_id\": \"trace-1\",\n      \"span_id\": \"span-1\",\n      \"span_links\": [],  // First action has no links\n      \"timestamp\": \"2024-01-01T10:00:00Z\",\n      \"location_id\": \"southern_capital\"\n    },\n    {\n      \"sequence\": 2,\n      \"action_type\": \"create_army\", \n      \"trace_id\": \"trace-2\",\n      \"span_id\": \"span-2\",\n      \"span_links\": [\"span-1\"],  // Links to previous action\n      \"timestamp\": \"2024-01-01T10:01:00Z\",\n      \"location_id\": \"southern_capital\"\n    }\n  ],\n  \"span_link_chain\": [\n    {\"sequence\": 1, \"valid_chain\": true, \"note\": \"First action\"},\n    {\"sequence\": 2, \"valid_chain\": true, \"note\": \"Correctly links to action 1\"}\n  ]\n}\n```\n\n## Querying Span Links\n\n### In Grafana Tempo\n\nSearch for traces with game session information:\n```\n{game.session.id!=\"\"}\n```\n\nFind spans with links:\n```\n{link.type=\"game_sequence\"}\n```\n\n### Trace Attributes\n\nEach span includes these attributes for game replay:\n- `game.session.id` - Unique session identifier\n- `game.action.type` - Type of action (move_army, create_army, etc.)\n- `game.action.sequence` - Sequence number in the game\n- `link.type` - Type of link (game_sequence)\n- `link.relation` - Relationship (follows)\n\n## Testing\n\nRun the test script to verify span links are working:\n\n```bash\ncd game-of-tracing\npython debug_span_links.py\n```\n\nThis will:\n1. Select a faction\n2. Perform a sequence of actions\n3. Each action will link to the previous one\n4. Provide instructions for viewing the links in Grafana\n5. Test battle calculation mechanics\n6. Verify restart functionality\n\nTest the replay functionality:\n\n```bash\ncd game-of-tracing\npython war_map/replay.py\n```\n\n## Educational Value\n\nSpan links demonstrate:\n- **Cross-trace relationships** - Actions in different traces can be related\n- **Historical context** - Each action knows what came before it\n- **Game narrative** - Complete story of how the game unfolded\n- **Advanced OpenTelemetry** - Real-world use of span links feature\n- **Tempo integration** - How to query and reconstruct trace relationships\n\n## Game Replay Benefits\n\n1. **Debugging** - Understand what led to game outcomes\n2. **Analytics** - Analyze player behavior patterns\n3. **Education** - Show distributed tracing concepts in action\n4. **Auditing** - Verify game logic and fairness\n5. **Entertainment** - Watch epic games unfold step by step\n\n## Future Enhancements\n\nPotential additions:\n- AI action links to player actions that triggered them\n- Battle outcome links to the actions that led to the battle\n- Resource transfer chains across multiple locations\n- Victory condition traces showing the sequence that led to game end\n- Interactive replay UI with game map visualization\n- Export replay data for external analysis \n\n## Troubleshooting Replay Functionality\n\n### Tempo API Query Strategy\n\nThe replay system uses a **two-step approach** to work reliably with Tempo:\n\n#### **Step 1: Discover Game Sessions**\nUses Tempo's tag values API to find all available game session IDs:\n```bash\nGET /api/v2/search/tag/game.session.id/values?start=<timestamp>&end=<timestamp>&limit=50\n```\n\nThis returns all unique values for the `game.session.id` tag, giving us a list of available sessions.\n\n#### **Step 2: Query Each Session**\nFor each discovered session ID, queries for its traces:\n```bash\nGET /api/search?q={game.session.id=\"specific-session-id\"}&limit=100\n```\n\nThis approach avoids complex TraceQL queries that might fail with 400 errors.\n\n### Common Issues and Solutions\n\n#### 1. Tempo Query Errors (400 Bad Request)\n\n**Problem**: Getting 400 errors when querying Tempo with complex TraceQL\n\n**Solutions**:\n- **New approach**: Use tag values API first, then simple session-specific queries\n- **Fallback**: System automatically falls back to local SQLite database\n- **Logging**: Enhanced logging shows exactly which queries are being attempted\n\n#### 2. Missing Span Attributes\n\n**Problem**: Custom span attributes like `game.session.id` may not be indexed in Tempo\n\n**Solutions**:\n- **Attribute verification**: Check that spans are being created with correct attributes\n- **Hybrid approach**: Local database stores action sequence as backup\n- **Index configuration**: Ensure Tempo is configured to index custom attributes\n\n#### 3. Time Range Issues\n\n**Solutions**:\n- **4-hour window**: System now uses 4-hour time windows for discovery\n- **Unix timestamps**: Uses seconds-based timestamps for better compatibility\n- **Configurable ranges**: Time ranges can be adjusted based on game session length\n\n### Data Source Fallbacks\n\nThe replay system has multiple data sources in order of preference:\n\n1. **`tempo_tag_values`** - Primary approach using tag values API\n2. **`tempo_search_only`** - Basic span data from search results only  \n3. **`local_db_fallback`** - SQLite database as final fallback\n\n### Debug Tools\n\n#### 1. Replay Debug Script\n```bash\ncd game-of-tracing\npython debug_replay.py\n```\n\nThis comprehensive script tests:\n- Tempo connection and version\n- Basic TraceQL query functionality  \n- Game-specific attribute queries\n- Replay API endpoints\n- Local database fallback\n\n#### 2. Manual Tempo Queries\n\nTest Tempo directly using curl:\n\n```bash\n# Basic connectivity\ncurl http://localhost:3200/ready\n\n# Simple trace search\ncurl \"http://localhost:3200/api/search?q={span.name!=\\\"\\\"}&limit=5\"\n\n# Game-specific search\ncurl \"http://localhost:3200/api/search?q={span.name=\\\"collect_resources\\\"}&limit=10\"\n```\n\n#### 3. Replay API Testing\n\n```bash\n# Get available sessions\ncurl http://localhost:8080/api/replay/sessions\n\n# Get specific session\ncurl http://localhost:8080/api/replay/session/your-session-id\n\n# Check local database health\ncurl http://localhost:8080/api/debug/health\n```\n\n### Replay System Architecture\n\nThe improved replay system uses a **hybrid approach**:\n\n#### 1. Primary Data Source: Tempo\n- Queries Tempo using multiple TraceQL approaches\n- Extracts complete span information including links\n- Provides full distributed tracing context\n\n#### 2. Fallback Data Source: Local SQLite\n- Stores essential game action metadata\n- Always available even if Tempo queries fail\n- Enables replay functionality regardless of Tempo state\n\n#### 3. Query Strategy\n```python\n# Multiple query attempts with increasing specificity\nqueries = [\n    '{span.name=\"collect_resources\" || span.name=\"create_army\" || span.name=\"move_army\" || span.name=\"all_out_attack\"}',\n    '{resource.service.name=\"war_map\"}', \n    '{game.action.type!=\"\"}',\n    '{span.name!=\"\"}'  # Fallback to any spans\n]\n```\n\n### Performance Optimizations\n\n#### 1. Time Window Optimization\n- **Before**: 24-hour windows with nanosecond precision\n- **After**: 1-hour windows with Unix second precision\n- **Result**: Faster queries, reduced timeout errors\n\n#### 2. Query Prioritization\n- Try specific game queries first\n- Fall back to broader queries if needed\n- Use local database if all Tempo queries fail\n\n#### 3. Response Caching\n- Session metadata cached in local database\n- Reduces repeated Tempo queries\n- Improves UI responsiveness\n\n### Access After Game Reset\n\nThe replay page is now accessible from the faction selection screen:\n\n**Location**: [http://localhost:8080](http://localhost:8080) → \"View Game Replays\" button\n\n**Benefits**:\n- No need to be in an active game session\n- Available immediately after game reset\n- Persistent access to historical game data\n\n### Expected Response Format\n\n#### Successful Tempo Response\n```json\n{\n  \"success\": true,\n  \"sessions\": [\n    {\n      \"session_id\": \"abc-123-def\",\n      \"player_name\": \"Alice\", \n      \"faction\": \"southern\",\n      \"start_time\": 1234567890000000000,\n      \"action_count\": 5,\n      \"last_action\": \"move_army\"\n    }\n  ],\n  \"query_method\": \"tempo\",\n  \"total_sessions\": 1\n}\n```\n\n#### Fallback Local Database Response\n```json\n{\n  \"success\": true,\n  \"sessions\": [...],\n  \"query_method\": \"local_db_fallback\",\n  \"warning\": \"Tempo query failed: connection timeout\"\n}\n```\n\n### Tempo Configuration Requirements\n\nFor optimal replay functionality, ensure Tempo is configured with:\n\n```yaml\n# tempo-config.yaml\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\nstream_over_http_enabled: true\n```\n\nAnd in docker-compose.yml:\n```yaml\nenvironment:\n  - TEMPO_URL=http://tempo:3200\n```\n\n### TraceQL Query Examples\n\nBased on the [Tempo API documentation](https://grafana.com/docs/tempo/latest/api_docs/), these queries should work:\n\n#### Basic Queries\n```traceql\n# Find any spans with duration\n{duration>1ms}\n\n# Find spans by name\n{span.name=\"collect_resources\"}\n\n# Find spans by service\n{resource.service.name=\"war_map\"}\n```\n\n#### Game-Specific Queries\n```traceql\n# Find game actions (if attributes are indexed)\n{game.action.type!=\"\"}\n\n# Find player actions (if attributes are indexed)  \n{player.name!=\"\"}\n\n# Combine conditions\n{span.name=\"move_army\" && player.faction=\"southern\"}\n```\n\n### Integration with Grafana\n\nOnce the replay data is accessible, you can:\n\n1. **View in Grafana Tempo**: Search for game session traces directly\n2. **Create dashboards**: Visualize game progression over time\n3. **Set up alerts**: Monitor for specific game events\n4. **Analyze patterns**: Study player behavior across multiple games "
  },
  {
    "path": "game-of-tracing/ai_opponent/CLAUDE.md",
    "content": "# ai_opponent/ — Strategic AI Decision Engine\n\n> Algorithmic opponent (not LLM-based) that plays the faction not chosen by a human player. This doc is read by any AI coding agent. For scenario-wide context read [`../AGENTS.md`](../AGENTS.md) first.\n\n## Purpose\n\n`ai-opponent` is a Flask service on port **8081** that takes control of a faction and makes strategic decisions on a recurring loop. It is activated by `war_map` via `POST /activate` with JSON body `{\"faction\": ..., \"map_id\": ...}` — on the WoK map the player toggles it on manually; on WWA it auto-activates as `white_walkers` the moment the player picks the map.\n\nTwo AI variants dispatch off the `faction` field at activation time:\n\n- **`StrategicAI`** — classic WoK opponent (southern / northern). 6-step priority cascade: capital defense → zero-risk captures → resource transfers → plan execution → plan creation → fallback.\n- **`WhiteWalkerAI(StrategicAI)`** — single-player WWA opponent. Different cascade: defend fortress → capture unowned wall → reinforce weakest wall (non-capital neighbours preferred; capital is a fallback when no other source has spare army, since `move_army` empties the source) → raid barbarian village (for corpses) → raise army from corpses at the fortress (only requires the capital to still belong to the AI; no minimum garrison) → idle. Reads its corpse pool via `GET /faction_economy?faction=white_walkers` on any location service; spends 5 corpses per army unit instead of 30 resources.\n\nCommon to both: the AI:\n\n- Fetches the state of all 8 locations.\n- Runs a priority cascade of checks to decide the next action (defend, capture, transfer, plan, fallback).\n- Executes the action via the same HTTP API the player uses (against the location services on 5001-5008).\n- Emits fully-linked traces so the replay UI can narrate the AI's reasoning alongside the human player's.\n- Adapts its loop cadence (2-15 s) to the current game phase.\n\n**This is deterministic code, not an LLM.** No `anthropic`, `openai`, or other model SDKs are imported.\n\n## File map\n\n| File | Size | Purpose |\n|---|---|---|\n| `ai_server.py` | ~46 KB | Main decision engine: `StrategicAI`, `PhaseDetector`, `Planner`, `MapAnalyzer`, Flask routes, decision loop. |\n| `telemetry.py` | ~7.7 KB | `AITelemetry` class for `ai-opponent` — traces, logs, AI-specific metrics, plus Pyroscope profiling with OTel span-profile linkage. |\n| `README.md` | ~2.6 KB | Feature doc. |\n| `Dockerfile` | small | `python:3.11-slim`, `pip install -r requirements.txt`, runs `python ai_server.py`. |\n| `requirements.txt` | small | Flask 3.1.3, requests 2.33.1, OpenTelemetry SDK/API + exporters, `pyroscope-io` + `pyroscope-otel` for profiling. |\n\n## Decision model\n\n### Priority cascade — `StrategicAI.decide()`\n\nExecuted every cycle; returns the first non-null action:\n\n1. **Capital defense.** If the capital is under threat (enemy army adjacent with path-army-estimate exceeding capital garrison), react: build army, pull army back, or preempt.\n2. **Zero-risk captures.** Grab any neutral village reachable with overwhelming numerical advantage.\n3. **Resource transfers.** Move resources from villages to the capital when the capital is running low.\n4. **Plan execution.** If a multi-step plan is active and valid, advance to the next step.\n5. **Plan creation.** Propose a new plan targeting the most valuable enemy territory.\n6. **Fallback.** Collect resources at the capital.\n\n### Phase detection — `PhaseDetector.detect()` at `ai_server.py:195-212`\n\nFive phases drive cadence and aggressiveness:\n\n| Phase | Condition | Cadence (seconds) |\n|---|---|---|\n| `READY_TO_ATTACK` | `total_army >= 8` | 3-8 |\n| `DESPERATE` | `my_count <= 1` | 2-5 |\n| `DEFENSIVE` | `my_count < enemy_count` | medium |\n| `DOMINATING` | `my_count > enemy_count + 1` | 5-15 |\n| `BALANCED` | everything else | 5-15 |\n\nCadence is set by `StrategicAI.get_pause_time()`; faster in crisis, slower in stability.\n\n### Supporting classes\n\n- **`MapAnalyzer`** (`ai_server.py:64-135`) — precomputes BFS distances between all location pairs at startup. Used by `path_army_estimate()` to sum enemy armies along shortest path to a target — enabling threat assessment.\n- **`Planner`** (`ai_server.py:216+`) — multi-step goal sequences like `[create_army, create_army, create_army, move_army(target)]`. Validated every cycle via `Planner.validate()`; abandoned if preconditions break (e.g., capital lost, source location flipped).\n- **`GameMemory`** — tracks territory-loss history, failed attacks, enemy push directions; used by `territory_lost_recently()` etc. at `ai_server.py:180-191` to adjust reactive behavior.\n\n## Custom metrics\n\n| Metric | Type | Attributes | Emitter |\n|---|---|---|---|\n| `ai.decisions` | counter | `action_type`, `phase`, `reason` | `decide()` / `execute_strategic_action()` |\n| `ai.plans_created` | counter | `goal` | `Planner.set_plan` |\n| `ai.plans_abandoned` | counter | `reason` | `Planner.abandon` |\n| `ai.decision_cycle_duration_seconds` | histogram | `phase` | Each decision cycle |\n| `ai.territory_count` | observable gauge | `faction` | Callback into live state |\n| `ai.total_army` | observable gauge | `faction` | Callback into live state |\n\n## Span events\n\nSignificant state transitions are emitted as events on the active decision span (rather than as standalone spans):\n\n- `phase_transition` — with `from_phase`, `to_phase` attributes\n- `territory_change` — with `gained` / `lost` territory lists\n- `plan_abandoned` — with `reason` and `original_goal`\n- `threat_detected` — with `threat_source`, `threat_army`, `target`\n\nLocations: `ai_server.py:299-327`.\n\n## Span links unique to `ai_opponent/`\n\nThe AI opponent instruments its own causal chain **inside a single decision cycle**:\n\n- `ai_decision_cycle` span (SpanKind.INTERNAL) wraps the whole cycle.\n- `ai_decision` span (child, INTERNAL) captures the cascade evaluation and chosen action.\n- `execute_ai_action` span (INTERNAL) is the action execution — it starts with a `Link` back to the `ai_decision` span's context, with `link.type=\"ai_decision_trigger\"`. This allows the replay UI to jump from the executed action back to the reasoning that produced it.\n\nThe linking logic lives around `ai_server.py:888-901`. The AI does **not** participate in the cross-session `game_sequence` chain that `war_map` builds — that is player-only.\n\n## Environment\n\n| Var | Default | Purpose |\n|---|---|---|\n| `PORT` | `8081` | Flask listen port |\n| `IN_DOCKER` | unset | When set, location URLs resolve via container DNS (`southern-capital:5001`) instead of `localhost:5001` |\n\nTelemetry endpoints are hard-coded in `telemetry.py` to `alloy:4317` (gRPC traces) and `alloy:4318` (HTTP logs + metrics). The service resource is registered with `SERVICE_NAME=\"ai-opponent\"`.\n\n## Activation flow\n\n1. `war_map` calls `POST http://ai-opponent:8081/activate` with JSON body `{\"faction\": \"northern\"}`.\n2. The handler constructs a `StrategicAI(faction)` instance and starts `ai_decision_loop()` in a daemon thread.\n3. The loop runs until `/deactivate` is called or the game is marked over.\n4. Each cycle captures a span, logs, and increments the appropriate metrics.\n\n## Common edits\n\n**Tune aggressiveness.**\nAdjust thresholds in `PhaseDetector.detect()` at `ai_server.py:195-212`, or the cadence ranges in `get_pause_time()`.\n\n**Change the priority cascade.**\nEdit `StrategicAI.decide()`. Each priority is its own helper (`_check_capital_defense`, `_find_zero_risk_captures`, `_do_resource_transfers`, plan steps). Reorder by reshuffling the cascade.\n\n**Add a new AI metric.**\nMirror the observable-gauge pattern in `telemetry.py` and wire a callback that reads from `StrategicAI` live state (via a registered state accessor, same pattern as `app/telemetry.py`).\n\n**Add a new span event.**\nCall `span.add_event(\"event_name\", attributes={...})` inside the decision span. Keep the existing four event names stable — they feed replay UI rendering.\n\n## Keep this doc current\n\nPer the sub-agent rule, any change to the priority cascade, phase thresholds, metric set, env vars, or the line-number anchors above must land in the same work unit. Before returning a response that touched `ai_opponent/`, grep this file for references to anything you changed.\n\n## Cross-references\n\n- [`../AGENTS.md`](../AGENTS.md) — scenario-wide architecture and patterns\n- [`../app/CLAUDE.md`](../app/CLAUDE.md) — the location-server HTTP API this AI calls\n- [`../war_map/CLAUDE.md`](../war_map/CLAUDE.md) — the orchestrator that activates/deactivates this service\n- [`../SPAN_LINKS.md`](../SPAN_LINKS.md) — span-link design, including the `ai_decision_trigger` link type\n"
  },
  {
    "path": "game-of-tracing/ai_opponent/Dockerfile",
    "content": "FROM python:3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install --no-cache-dir -r requirements.txt\n\nCOPY . .\n\nENV FLASK_APP=ai_server.py\nENV FLASK_DEBUG=0\nENV IN_DOCKER=1\n\nEXPOSE 8081\n\nCMD [\"flask\", \"run\", \"--host=0.0.0.0\", \"--port=8081\"] "
  },
  {
    "path": "game-of-tracing/ai_opponent/README.md",
    "content": "# AI Opponent for War of Kingdoms\n\nThis Flask-based AI service provides an intelligent opponent for single-player games in the War of Kingdoms distributed tracing tutorial.\n\n## Features\n\n### Adaptive Strategy\nThe AI adapts its strategy based on the game phase:\n- **Early Game (0-5 minutes)**: Focuses on resource collection and capturing neutral villages\n- **Mid Game (5-15 minutes)**: Balances expansion with army building and defense\n- **Late Game (15+ minutes)**: Shifts to aggressive tactics and all-out attacks\n\n### Natural Behavior\n- Takes 15-45 second pauses between actions to simulate human thinking time\n- Uses weighted random decisions to avoid predictable patterns\n- Reacts to player threats by reinforcing endangered locations\n- Manages resources by transferring them from villages to capitals\n\n### Decision Making\nThe AI analyzes the game state to make intelligent decisions:\n1. **Threat Analysis**: Identifies enemy armies near its territories\n2. **Expansion Targets**: Finds neutral villages and weak enemy locations\n3. **Resource Management**: Collects resources and creates armies when needed\n4. **Strategic Movement**: Reinforces threatened locations and attacks vulnerable targets\n\n### OpenTelemetry Integration\nAll AI actions are fully instrumented with OpenTelemetry:\n- Traces show decision-making process\n- Spans include game phase, threats, and chosen actions\n- Integrates with the game's distributed tracing pipeline\n\n## API Endpoints\n\n- `POST /activate` - Activate the AI for a specific faction\n- `POST /deactivate` - Deactivate the AI\n- `GET /status` - Get current AI status\n- `GET /health` - Health check endpoint\n\n## How It Works\n\n1. When activated, the AI starts a background thread that runs the decision loop\n2. Every 15-45 seconds, it:\n   - Fetches the current game state from all locations\n   - Analyzes threats and opportunities\n   - Makes a weighted random decision based on the game phase\n   - Executes the chosen action via location server APIs\n3. The AI automatically stops when it detects game over\n\n## Configuration\n\nThe AI difficulty is set to \"normal\" and provides a balanced challenge. Decision weights can be adjusted in the `DECISION_WEIGHTS` dictionary to make the AI more aggressive or defensive.\n\n## Usage\n\nThe AI is integrated with the War Map UI:\n1. Players can toggle \"Enable AI Opponent\" in the game interface\n2. The AI automatically takes control of the faction not chosen by the player\n3. For two-player games, keep the AI toggle off\n\n## Observability\n\nMonitor AI behavior through:\n- **Traces**: View AI decision-making and action execution\n- **Logs**: Track AI state changes and decisions\n- **Service Map**: See AI interactions with location servers "
  },
  {
    "path": "game-of-tracing/ai_opponent/ai_server.py",
    "content": "import os\nimport time\nimport random\nimport requests\nimport threading\nimport atexit\nfrom collections import deque\nfrom flask import Flask, jsonify, request\nfrom telemetry import AITelemetry\nfrom opentelemetry import trace, baggage\nfrom opentelemetry.trace import SpanKind, Link\nfrom opentelemetry.propagate import inject\nfrom datetime import datetime, timedelta\nfrom enum import Enum\n\napp = Flask(__name__)\n\n# Initialize telemetry\ntelemetry = AITelemetry()\nlogger = telemetry.get_logger()\ntracer = telemetry.get_tracer()\natexit.register(telemetry.shutdown)\n\n# ─── Constants ─────────────────────────────────────────────────────────────────\n\n# Per-map adjacency lists. Keep keys in sync with\n# game-of-tracing/app/game_config.py's MAPS[*][\"locations\"][*][\"connections\"].\nMAP_GRAPHS_BY_MAP = {\n    \"war_of_kingdoms\": {\n        \"southern_capital\": [\"village_1\", \"village_3\"],\n        \"northern_capital\": [\"village_2\", \"village_6\"],\n        \"village_1\": [\"southern_capital\", \"village_2\", \"village_4\"],\n        \"village_2\": [\"northern_capital\", \"village_1\", \"village_5\"],\n        \"village_3\": [\"southern_capital\", \"village_5\", \"village_6\"],\n        \"village_4\": [\"village_1\", \"village_5\"],\n        \"village_5\": [\"village_2\", \"village_3\", \"village_4\", \"village_6\"],\n        \"village_6\": [\"northern_capital\", \"village_3\", \"village_5\"],\n    },\n    \"white_walkers_attack\": {\n        \"nights_watch_fortress\": [\n            \"wall_west\", \"wall_center_west\", \"wall_center_east\", \"wall_east\",\n        ],\n        \"white_walker_fortress\": [\n            \"wall_west\", \"wall_center_west\", \"wall_center_east\", \"wall_east\",\n        ],\n        \"wall_west\": [\n            \"nights_watch_fortress\", \"white_walker_fortress\",\n            \"wall_center_west\", \"barbarian_village_west\",\n        ],\n        \"wall_center_west\": [\n            \"nights_watch_fortress\", \"white_walker_fortress\",\n            \"wall_west\", \"wall_center_east\",\n        ],\n        \"wall_center_east\": [\n            \"nights_watch_fortress\", \"white_walker_fortress\",\n            \"wall_center_west\", \"wall_east\",\n        ],\n        \"wall_east\": [\n            \"nights_watch_fortress\", \"white_walker_fortress\",\n            \"wall_center_east\", \"barbarian_village_east\",\n        ],\n        \"barbarian_village_west\": [\"wall_west\"],\n        \"barbarian_village_east\": [\"wall_east\"],\n    },\n}\n\n# Per-map capital mapping (faction -> location_id of that faction's capital).\nCAPITALS_BY_MAP = {\n    \"war_of_kingdoms\": {\n        \"southern\": \"southern_capital\",\n        \"northern\": \"northern_capital\",\n    },\n    \"white_walkers_attack\": {\n        \"nights_watch\": \"nights_watch_fortress\",\n        \"white_walkers\": \"white_walker_fortress\",\n    },\n}\n\n# Per-map location type lookup (capital / village / wall).\nLOCATION_TYPES_BY_MAP = {\n    \"war_of_kingdoms\": {\n        \"southern_capital\": \"capital\", \"northern_capital\": \"capital\",\n        \"village_1\": \"village\", \"village_2\": \"village\", \"village_3\": \"village\",\n        \"village_4\": \"village\", \"village_5\": \"village\", \"village_6\": \"village\",\n    },\n    \"white_walkers_attack\": {\n        \"nights_watch_fortress\": \"capital\",\n        \"white_walker_fortress\": \"capital\",\n        \"wall_west\": \"wall\", \"wall_center_west\": \"wall\",\n        \"wall_center_east\": \"wall\", \"wall_east\": \"wall\",\n        \"barbarian_village_west\": \"village\",\n        \"barbarian_village_east\": \"village\",\n    },\n}\n\n# Per-map location faction (static initial ownership — what the AI reasons\n# about for walls-are-neutral / barbarian-villages-are-barbarian etc.).\nINITIAL_FACTIONS_BY_MAP = {\n    \"war_of_kingdoms\": {\n        \"southern_capital\": \"southern\", \"northern_capital\": \"northern\",\n        \"village_1\": \"neutral\", \"village_2\": \"neutral\", \"village_3\": \"neutral\",\n        \"village_4\": \"neutral\", \"village_5\": \"neutral\", \"village_6\": \"neutral\",\n    },\n    \"white_walkers_attack\": {\n        \"nights_watch_fortress\": \"nights_watch\",\n        \"white_walker_fortress\": \"white_walkers\",\n        \"wall_west\": \"neutral\", \"wall_center_west\": \"neutral\",\n        \"wall_center_east\": \"neutral\", \"wall_east\": \"neutral\",\n        \"barbarian_village_west\": \"barbarian\",\n        \"barbarian_village_east\": \"barbarian\",\n    },\n}\n\n# Per-map army cost per faction. Matches app/game_config.py's rules.army_cost.\nARMY_COST_BY_MAP = {\n    \"war_of_kingdoms\": {\"default\": 30},\n    \"white_walkers_attack\": {\"default\": 30, \"white_walkers\": 5},\n}\n\n# Backward-compat alias: legacy code that references MAP_GRAPH still sees WoK.\nMAP_GRAPH = MAP_GRAPHS_BY_MAP[\"war_of_kingdoms\"]\n\nARMY_COST = 30\nVILLAGE_INCOME_PER_MIN = 40  # ~10 resources every 15s\nRESOURCE_TRANSFER_THRESHOLD = 30\n\n# Single port table keyed by location id (same ports are shared across maps\n# because a slot's port is fixed and each map just renames the slot).\nLOCATION_PORTS = {\n    \"southern_capital\": 5001,\n    \"northern_capital\": 5002,\n    \"village_1\": 5003,\n    \"village_2\": 5004,\n    \"village_3\": 5005,\n    \"village_4\": 5006,\n    \"village_5\": 5007,\n    \"village_6\": 5008,\n    # White Walkers Attack aliases (same physical slot → same port).\n    \"nights_watch_fortress\": 5001,\n    \"white_walker_fortress\": 5002,\n    \"wall_west\": 5003,\n    \"wall_center_west\": 5004,\n    \"wall_center_east\": 5005,\n    \"wall_east\": 5006,\n    \"barbarian_village_west\": 5007,\n    \"barbarian_village_east\": 5008,\n}\n\n# Container hostname per logical location id (resolves HTTP URLs in docker).\nCONTAINER_FOR_LOCATION_ID = {\n    # WoK ids are their own container names.\n    \"southern_capital\": \"southern-capital\",\n    \"northern_capital\": \"northern-capital\",\n    \"village_1\": \"village-1\",\n    \"village_2\": \"village-2\",\n    \"village_3\": \"village-3\",\n    \"village_4\": \"village-4\",\n    \"village_5\": \"village-5\",\n    \"village_6\": \"village-6\",\n    # WWA ids share containers with their slot peer.\n    \"nights_watch_fortress\": \"southern-capital\",\n    \"white_walker_fortress\": \"northern-capital\",\n    \"wall_west\": \"village-1\",\n    \"wall_center_west\": \"village-2\",\n    \"wall_center_east\": \"village-3\",\n    \"wall_east\": \"village-4\",\n    \"barbarian_village_west\": \"village-5\",\n    \"barbarian_village_east\": \"village-6\",\n}\n\n\ndef get_map_graph(map_id):\n    return MAP_GRAPHS_BY_MAP.get(map_id, MAP_GRAPH)\n\n\ndef get_capitals(map_id):\n    return CAPITALS_BY_MAP.get(map_id, CAPITALS_BY_MAP[\"war_of_kingdoms\"])\n\n\ndef get_location_types(map_id):\n    return LOCATION_TYPES_BY_MAP.get(map_id, LOCATION_TYPES_BY_MAP[\"war_of_kingdoms\"])\n\n\ndef get_initial_factions(map_id):\n    return INITIAL_FACTIONS_BY_MAP.get(map_id, INITIAL_FACTIONS_BY_MAP[\"war_of_kingdoms\"])\n\n\ndef get_army_cost_for(map_id, faction):\n    costs = ARMY_COST_BY_MAP.get(map_id, ARMY_COST_BY_MAP[\"war_of_kingdoms\"])\n    return costs.get(faction, costs[\"default\"])\n\n# ─── Game Phase ────────────────────────────────────────────────────────────────\n\nclass GamePhase(Enum):\n    DESPERATE = \"desperate\"\n    DEFENSIVE = \"defensive\"\n    BALANCED = \"balanced\"\n    DOMINATING = \"dominating\"\n    READY_TO_ATTACK = \"ready_to_attack\"\n\n# ─── Map Analyzer ──────────────────────────────────────────────────────────────\n\nclass MapAnalyzer:\n    \"\"\"Precomputed map analysis: BFS distances, strategic values, path army estimation.\"\"\"\n\n    def __init__(self, graph=None, capitals=None):\n        # ``graph`` defaults to WoK to preserve legacy behaviour; new callers\n        # pass the active map's adjacency list. ``capitals`` is the map's\n        # faction→capital dict (needed for the strategic-value heuristic).\n        self.graph = graph if graph is not None else MAP_GRAPH\n        self.capitals = capitals if capitals is not None else CAPITALS_BY_MAP[\"war_of_kingdoms\"]\n        self.distances = self._compute_all_distances()\n        self.strategic_values = self._compute_strategic_values()\n\n    def _bfs_distances(self, start):\n        \"\"\"BFS from start node, returns dict {node: distance}.\"\"\"\n        visited = {start: 0}\n        queue = deque([start])\n        while queue:\n            node = queue.popleft()\n            for neighbor in self.graph[node]:\n                if neighbor not in visited:\n                    visited[neighbor] = visited[node] + 1\n                    queue.append(neighbor)\n        return visited\n\n    def _compute_all_distances(self):\n        \"\"\"Precompute all-pairs BFS distances.\"\"\"\n        return {loc: self._bfs_distances(loc) for loc in self.graph}\n\n    def _compute_strategic_values(self):\n        \"\"\"Score each location by connectivity + centrality.\n\n        High connectivity or short distance to either capital = valuable.\n        Works identically across maps because it reads capitals from the\n        per-map mapping rather than hardcoding WoK's capital names.\n        \"\"\"\n        values = {}\n        capital_ids = list(self.capitals.values())\n        for loc in self.graph:\n            connections = len(self.graph[loc])\n            if capital_ids:\n                avg_capital_dist = sum(\n                    self.distances[loc].get(cap, 99) for cap in capital_ids\n                ) / float(len(capital_ids))\n            else:\n                avg_capital_dist = 99\n            values[loc] = connections + (4.0 / max(avg_capital_dist, 1))\n        return values\n\n    def distance(self, a, b):\n        return self.distances[a].get(b, 99)\n\n    def neighbors(self, loc):\n        return self.graph.get(loc, [])\n\n    def path_army_estimate(self, game_state, from_loc, to_loc, my_faction):\n        \"\"\"Estimate total enemy army along BFS shortest path from from_loc to to_loc.\"\"\"\n        parent = {from_loc: None}\n        queue = deque([from_loc])\n        while queue:\n            node = queue.popleft()\n            if node == to_loc:\n                break\n            for neighbor in self.graph[node]:\n                if neighbor not in parent:\n                    parent[neighbor] = node\n                    queue.append(neighbor)\n\n        if to_loc not in parent:\n            return 999  # unreachable\n\n        # Walk path and sum enemy armies (excluding from_loc)\n        path = []\n        node = to_loc\n        while node is not None:\n            path.append(node)\n            node = parent[node]\n        path.reverse()\n\n        enemy_army = 0\n        for loc in path[1:]:  # skip from_loc\n            loc_data = game_state.get(loc, {})\n            if loc_data.get('faction') != my_faction:\n                enemy_army += loc_data.get('army', 0)\n        return enemy_army\n\n# ─── Game Memory ───────────────────────────────────────────────────────────────\n\nclass GameMemory:\n    \"\"\"Tracks territory changes, failed attacks, and enemy push direction.\"\"\"\n\n    def __init__(self):\n        self.territory_history = []  # list of (timestamp, my_territories set)\n        self.failed_attacks = {}     # {target_loc: last_failure_time}\n        self.enemy_push_direction = None\n        self.last_enemy_territories = set()\n\n    def update(self, game_state, my_faction):\n        now = time.time()\n        my_territories = set()\n        enemy_territories = set()\n\n        for loc_id, data in game_state.items():\n            if data.get('faction') == my_faction:\n                my_territories.add(loc_id)\n            elif data.get('faction') not in (my_faction, 'neutral'):\n                enemy_territories.add(loc_id)\n\n        self.territory_history.append((now, my_territories.copy()))\n        if len(self.territory_history) > 20:\n            self.territory_history = self.territory_history[-20:]\n\n        # Detect enemy push direction: new enemy territory closest to our capital\n        new_enemy = enemy_territories - self.last_enemy_territories\n        if new_enemy:\n            self.enemy_push_direction = list(new_enemy)[0]\n        self.last_enemy_territories = enemy_territories\n\n        return my_territories, enemy_territories\n\n    def record_failed_attack(self, target):\n        self.failed_attacks[target] = time.time()\n\n    def recently_failed(self, target, cooldown=60):\n        last = self.failed_attacks.get(target)\n        if last is None:\n            return False\n        return (time.time() - last) < cooldown\n\n    def territory_lost_recently(self, seconds=30):\n        \"\"\"Check if we lost territory in the last N seconds.\"\"\"\n        if len(self.territory_history) < 2:\n            return False\n        now = time.time()\n        current = self.territory_history[-1][1]\n        for ts, territories in reversed(self.territory_history[:-1]):\n            if now - ts > seconds:\n                break\n            if len(territories) > len(current):\n                return True\n        return False\n\n# ─── Phase Detector ────────────────────────────────────────────────────────────\n\nclass PhaseDetector:\n    \"\"\"State-based phase detection using territory count and total army.\"\"\"\n\n    @staticmethod\n    def detect(my_territories, enemy_territories, total_army):\n        my_count = len(my_territories)\n        enemy_count = len(enemy_territories)\n\n        if total_army >= 8:\n            return GamePhase.READY_TO_ATTACK\n        if my_count <= 1:\n            return GamePhase.DESPERATE\n        elif my_count < enemy_count:\n            return GamePhase.DEFENSIVE\n        elif my_count > enemy_count + 1:\n            return GamePhase.DOMINATING\n        else:\n            return GamePhase.BALANCED\n\n# ─── Planner ───────────────────────────────────────────────────────────────────\n\nclass Planner:\n    \"\"\"Multi-step goal planning: sequences like [create_army x3, move_army(target)].\"\"\"\n\n    def __init__(self):\n        self.steps = []\n        self.goal = None\n\n    @property\n    def active(self):\n        return len(self.steps) > 0\n\n    def set_plan(self, goal, steps):\n        self.goal = goal\n        self.steps = list(steps)\n\n    def next_step(self):\n        if self.steps:\n            return self.steps[0]\n        return None\n\n    def advance(self):\n        if self.steps:\n            self.steps.pop(0)\n\n    def abandon(self, reason=\"\"):\n        self.steps = []\n        self.goal = None\n\n    def validate(self, game_state, my_faction, my_capital):\n        \"\"\"Check if the current plan is still valid. Abandon if not.\"\"\"\n        if not self.active:\n            return\n\n        step = self.steps[0]\n        action = step.get(\"action\")\n\n        if action == \"create_army\":\n            cap_data = game_state.get(my_capital, {})\n            if cap_data.get('faction') != my_faction:\n                self.abandon(\"lost capital\")\n        elif action == \"move_army\":\n            from_loc = step.get(\"from\")\n            loc_data = game_state.get(from_loc, {})\n            if loc_data.get('faction') != my_faction or loc_data.get('army', 0) == 0:\n                self.abandon(\"lost staging location or no army\")\n        elif action == \"all_out_attack\":\n            cap_data = game_state.get(my_capital, {})\n            if cap_data.get('faction') != my_faction or cap_data.get('army', 0) < 3:\n                self.abandon(\"insufficient army for all-out attack\")\n\n# ─── Strategic AI ──────────────────────────────────────────────────────────────\n\nclass StrategicAI:\n    \"\"\"Main decision engine with priority cascade.\"\"\"\n\n    def __init__(self, faction, map_id=\"war_of_kingdoms\"):\n        self.faction = faction\n        self.map_id = map_id\n        capitals = get_capitals(map_id)\n        self.my_capital = capitals.get(faction)\n        enemies = [cap for fac, cap in capitals.items() if fac != faction]\n        self.enemy_capital = enemies[0] if enemies else None\n        self.map = MapAnalyzer(graph=get_map_graph(map_id), capitals=capitals)\n        self.memory = GameMemory()\n        self.planner = Planner()\n        self.phase = GamePhase.BALANCED\n        self.my_territories = set()\n        self.enemy_territories = set()\n        self.total_army = 0\n        self._previous_phase = None\n        self._previous_territories = set()\n        self._last_evaluated = []\n        # Army cost for this faction on this map.\n        self.army_cost = get_army_cost_for(map_id, faction)\n\n    def decide(self, game_state):\n        \"\"\"Run the priority cascade and return an action dict or None.\"\"\"\n        # Update memory and phase\n        self.my_territories, self.enemy_territories = self.memory.update(game_state, self.faction)\n        self.total_army = sum(\n            data.get('army', 0) for loc, data in game_state.items()\n            if data.get('faction') == self.faction\n        )\n        self.phase = PhaseDetector.detect(self.my_territories, self.enemy_territories, self.total_army)\n\n        # Span events: phase transition\n        span = trace.get_current_span()\n        if self._previous_phase is not None and self.phase != self._previous_phase:\n            span.add_event(\"phase_transition\", attributes={\n                \"previous_phase\": self._previous_phase.value,\n                \"new_phase\": self.phase.value,\n                \"territory_count\": len(self.my_territories),\n                \"total_army\": self.total_army,\n            })\n        self._previous_phase = self.phase\n\n        # Span events: territory change\n        current_territory_set = set(self.my_territories)\n        gained = current_territory_set - self._previous_territories\n        lost = self._previous_territories - current_territory_set\n        if gained or lost:\n            span.add_event(\"territory_change\", attributes={\n                \"territories_gained\": str(list(gained)),\n                \"territories_lost\": str(list(lost)),\n                \"current_count\": len(current_territory_set),\n            })\n        self._previous_territories = current_territory_set\n\n        # Validate active plan (track if it gets abandoned)\n        had_plan = self.planner.active\n        previous_goal = self.planner.goal\n        self.planner.validate(game_state, self.faction, self.my_capital)\n        if had_plan and not self.planner.active:\n            span.add_event(\"plan_abandoned\", attributes={\n                \"previous_goal\": previous_goal or \"unknown\",\n                \"reason\": \"validation_failed\",\n            })\n            telemetry.record_plan_abandoned(\"validation_failed\")\n\n        # Priority cascade with alternatives tracking\n        evaluated = []\n\n        action = self._check_capital_defense(game_state)\n        if action:\n            evaluated.append(f\"capital_defense: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"capital_defense: skipped\")\n\n        action = self._find_zero_risk_captures(game_state)\n        if action:\n            evaluated.append(f\"zero_risk_capture: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"zero_risk_capture: skipped\")\n\n        action = self._do_resource_transfers(game_state)\n        if action:\n            evaluated.append(f\"resource_transfer: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"resource_transfer: skipped\")\n\n        action = self._execute_plan_step(game_state)\n        if action:\n            evaluated.append(f\"execute_plan: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"execute_plan: skipped\")\n\n        action = self._create_new_plan(game_state)\n        if action:\n            evaluated.append(f\"create_plan: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"create_plan: skipped\")\n\n        evaluated.append(\"fallback: TRIGGERED\")\n        self._last_evaluated = evaluated\n        return self._fallback(game_state)\n\n    # ── Priority 1: Capital Defense ────────────────────────────────────────────\n\n    def _check_capital_defense(self, game_state):\n        \"\"\"If enemies adjacent to capital, create armies or reinforce.\"\"\"\n        cap_data = game_state.get(self.my_capital, {})\n        if not cap_data or cap_data.get('faction') != self.faction:\n            return None\n\n        my_army = cap_data.get('army', 0)\n        neighbors = self.map.neighbors(self.my_capital)\n        max_threat = 0\n        threat_loc = None\n\n        for n in neighbors:\n            n_data = game_state.get(n, {})\n            if n_data.get('faction') not in (self.faction, 'neutral') and n_data.get('army', 0) > 0:\n                if n_data['army'] > max_threat:\n                    max_threat = n_data['army']\n                    threat_loc = n\n\n        if max_threat == 0:\n            return None\n\n        needed = max_threat + 2\n        trace.get_current_span().add_event(\"threat_detected\", attributes={\n            \"threat_location\": threat_loc,\n            \"threat_army\": max_threat,\n            \"capital_army\": my_army,\n            \"armies_needed\": needed,\n        })\n        if my_army < needed:\n            if cap_data.get('resources', 0) >= ARMY_COST:\n                armies_to_create = min(\n                    needed - my_army,\n                    cap_data['resources'] // ARMY_COST\n                )\n                return {\n                    \"action\": \"create_army\",\n                    \"location\": self.my_capital,\n                    \"count\": max(1, armies_to_create),\n                    \"reason\": f\"capital_defense against {max_threat} at {threat_loc}\"\n                }\n            return self._reinforce_capital(game_state)\n\n        return None\n\n    def _reinforce_capital(self, game_state):\n        \"\"\"Move friendly armies within 2 hops toward capital.\"\"\"\n        best_source = None\n        best_army = 0\n        for loc in MAP_GRAPH:\n            if loc == self.my_capital:\n                continue\n            loc_data = game_state.get(loc, {})\n            if loc_data.get('faction') == self.faction and loc_data.get('army', 0) > 0:\n                dist = self.map.distance(loc, self.my_capital)\n                if dist <= 2 and loc_data['army'] > best_army:\n                    best_army = loc_data['army']\n                    best_source = loc\n\n        if best_source:\n            target = self._step_toward(best_source, self.my_capital)\n            if target:\n                return {\n                    \"action\": \"move_army\",\n                    \"from\": best_source,\n                    \"to\": target,\n                    \"reason\": f\"reinforce capital from {best_source}\"\n                }\n        return None\n\n    def _step_toward(self, from_loc, toward_loc):\n        \"\"\"Return the neighbor of from_loc that is closest to toward_loc.\"\"\"\n        # Must consult the *active map's* adjacency, not the global\n        # ``MAP_GRAPH`` (which is hard-coded to WoK). On WWA the from_loc is\n        # e.g. ``white_walker_fortress`` — absent from the WoK graph and\n        # raises ``KeyError`` mid-cascade, leaving the AI stuck.\n        best = None\n        best_dist = 99\n        for n in self.map.graph[from_loc]:\n            d = self.map.distance(n, toward_loc)\n            if d < best_dist:\n                best_dist = d\n                best = n\n        return best\n\n    # ── Priority 2: Zero-Risk Captures ─────────────────────────────────────────\n\n    def _find_zero_risk_captures(self, game_state):\n        \"\"\"Capture locations where our army > target army + 1, sorted by strategic value.\"\"\"\n        candidates = []\n        for loc in MAP_GRAPH:\n            loc_data = game_state.get(loc, {})\n            if loc_data.get('faction') == self.faction:\n                continue\n            target_army = loc_data.get('army', 0)\n\n            for neighbor in MAP_GRAPH[loc]:\n                n_data = game_state.get(neighbor, {})\n                if n_data.get('faction') == self.faction and n_data.get('army', 0) > target_army + 1:\n                    # Don't attack from capital if it would leave it defenseless\n                    if neighbor == self.my_capital:\n                        cap_threatened = False\n                        for cap_n in MAP_GRAPH[self.my_capital]:\n                            cn_data = game_state.get(cap_n, {})\n                            if cn_data.get('faction') not in (self.faction, 'neutral') and cn_data.get('army', 0) > 0:\n                                cap_threatened = True\n                                break\n                        if cap_threatened:\n                            continue\n\n                    if self.memory.recently_failed(loc):\n                        continue\n\n                    candidates.append({\n                        \"target\": loc,\n                        \"from\": neighbor,\n                        \"our_army\": n_data['army'],\n                        \"their_army\": target_army,\n                        \"strategic_value\": self.map.strategic_values.get(loc, 0),\n                        \"is_neutral\": loc_data.get('faction') == 'neutral',\n                    })\n\n        if not candidates:\n            return None\n\n        candidates.sort(key=lambda c: (-c['is_neutral'], -c['strategic_value']))\n        best = candidates[0]\n        return {\n            \"action\": \"move_army\",\n            \"from\": best[\"from\"],\n            \"to\": best[\"target\"],\n            \"reason\": f\"zero_risk_capture {best['target']} (our {best['our_army']} vs {best['their_army']})\"\n        }\n\n    # ── Priority 3: Resource Transfers ─────────────────────────────────────────\n\n    def _do_resource_transfers(self, game_state):\n        \"\"\"Transfer resources from ALL villages above threshold to capital, every cycle.\"\"\"\n        transfer_targets = []\n        for loc in MAP_GRAPH:\n            if loc == self.my_capital:\n                continue\n            loc_data = game_state.get(loc, {})\n            if (loc_data.get('faction') == self.faction and\n                'village' in loc and\n                loc_data.get('resources', 0) > RESOURCE_TRANSFER_THRESHOLD):\n                transfer_targets.append(loc)\n\n        if not transfer_targets:\n            return None\n\n        return {\n            \"action\": \"resource_transfer\",\n            \"locations\": transfer_targets,\n            \"reason\": f\"transfer resources from {len(transfer_targets)} villages\"\n        }\n\n    # ── Priority 4: Execute Active Plan Step ───────────────────────────────────\n\n    def _execute_plan_step(self, game_state):\n        \"\"\"Execute next step of active plan.\"\"\"\n        if not self.planner.active:\n            return None\n\n        step = self.planner.next_step()\n        if not step:\n            return None\n\n        action = step.get(\"action\")\n\n        if action == \"create_army\":\n            cap_data = game_state.get(self.my_capital, {})\n            if cap_data.get('resources', 0) >= ARMY_COST:\n                self.planner.advance()\n                return {\n                    \"action\": \"create_army\",\n                    \"location\": self.my_capital,\n                    \"count\": 1,\n                    \"reason\": f\"plan step: {self.planner.goal}\"\n                }\n            else:\n                return {\n                    \"action\": \"collect_resources\",\n                    \"location\": self.my_capital,\n                    \"reason\": \"waiting for resources for plan\"\n                }\n\n        elif action == \"move_army\":\n            from_loc = step.get(\"from\")\n            to_loc = step.get(\"to\")\n            loc_data = game_state.get(from_loc, {})\n            if loc_data.get('faction') == self.faction and loc_data.get('army', 0) > 0:\n                self.planner.advance()\n                return {\n                    \"action\": \"move_army\",\n                    \"from\": from_loc,\n                    \"to\": to_loc,\n                    \"reason\": f\"plan step: {self.planner.goal}\"\n                }\n            else:\n                reason = \"can't execute move step\"\n                self.planner.abandon(reason)\n                trace.get_current_span().add_event(\"plan_abandoned\", attributes={\n                    \"reason\": reason,\n                })\n                telemetry.record_plan_abandoned(reason)\n                return None\n\n        elif action == \"all_out_attack\":\n            self.planner.advance()\n            return {\n                \"action\": \"all_out_attack\",\n                \"location\": self.my_capital,\n                \"reason\": f\"plan step: {self.planner.goal}\"\n            }\n\n        self.planner.advance()\n        return None\n\n    # ── Priority 5: Create New Plan ────────────────────────────────────────────\n\n    def _create_new_plan(self, game_state):\n        \"\"\"Create a new plan based on current phase.\"\"\"\n        # Sub-priority: if total army < 3, always build armies first\n        if self.total_army < 3:\n            armies_needed = 3 - self.total_army\n            steps = [{\"action\": \"create_army\"} for _ in range(armies_needed)]\n            goal = f\"build {armies_needed} armies\"\n            self.planner.set_plan(goal, steps)\n            trace.get_current_span().add_event(\"plan_created\", attributes={\n                \"goal\": goal, \"step_count\": len(steps),\n            })\n            telemetry.record_plan_created(goal)\n            return self._execute_plan_step(game_state)\n\n        # Sub-priority: capturable targets exist -> plan capture\n        capture_plan = self._plan_capture(game_state)\n        if capture_plan:\n            return capture_plan\n\n        # Sub-priority: READY_TO_ATTACK + feasible all-out\n        if self.phase == GamePhase.READY_TO_ATTACK:\n            attack_plan = self._plan_all_out_attack(game_state)\n            if attack_plan:\n                return attack_plan\n\n        # Sub-priority: DESPERATE -> emergency build\n        if self.phase == GamePhase.DESPERATE:\n            cap_data = game_state.get(self.my_capital, {})\n            if cap_data.get('resources', 0) >= ARMY_COST:\n                goal = \"emergency army build\"\n                steps = [{\"action\": \"create_army\"}]\n                self.planner.set_plan(goal, steps)\n                trace.get_current_span().add_event(\"plan_created\", attributes={\n                    \"goal\": goal, \"step_count\": len(steps),\n                })\n                telemetry.record_plan_created(goal)\n                return self._execute_plan_step(game_state)\n\n        # Sub-priority: concentrate isolated armies\n        concentrate = self._concentrate_forces(game_state)\n        if concentrate:\n            return concentrate\n\n        return None\n\n    def _plan_capture(self, game_state):\n        \"\"\"Plan a capture: build N armies then move toward target.\"\"\"\n        targets = self._find_capturable_targets(game_state)\n        if not targets:\n            return None\n\n        target = targets[0]\n        target_loc = target[\"target\"]\n        target_army = game_state.get(target_loc, {}).get('army', 0)\n        needed_army = target_army + 3\n\n        steps = []\n\n        # Build armies if needed\n        armies_to_build = max(0, needed_army - self.total_army)\n        for _ in range(min(armies_to_build, 5)):  # cap at 5 to avoid over-planning\n            steps.append({\"action\": \"create_army\"})\n\n        # Move one hop from capital toward target\n        next_hop = self._step_toward(self.my_capital, target_loc)\n        if next_hop:\n            steps.append({\"action\": \"move_army\", \"from\": self.my_capital, \"to\": next_hop})\n\n        if steps:\n            goal = f\"capture {target_loc}\"\n            self.planner.set_plan(goal, steps)\n            trace.get_current_span().add_event(\"plan_created\", attributes={\n                \"goal\": goal, \"step_count\": len(steps),\n            })\n            telemetry.record_plan_created(goal)\n            return self._execute_plan_step(game_state)\n\n        return None\n\n    def _find_capturable_targets(self, game_state):\n        \"\"\"Find targets we could capture, prioritizing low-defense neutrals for income.\"\"\"\n        targets = []\n        for loc in MAP_GRAPH:\n            loc_data = game_state.get(loc, {})\n            if loc_data.get('faction') == self.faction:\n                continue\n            if self.memory.recently_failed(loc):\n                continue\n\n            target_army = loc_data.get('army', 0)\n            is_neutral = loc_data.get('faction') == 'neutral'\n            strat_value = self.map.strategic_values.get(loc, 0)\n\n            # Find best staging location (closest of our territories)\n            best_staging = None\n            best_staging_dist = 99\n            for our_loc in self.my_territories:\n                dist = self.map.distance(our_loc, loc)\n                if dist < best_staging_dist:\n                    best_staging_dist = dist\n                    best_staging = our_loc\n\n            path_enemy = self.map.path_army_estimate(\n                game_state, best_staging, loc, self.faction\n            ) if best_staging else 999\n\n            targets.append({\n                \"target\": loc,\n                \"staging\": best_staging,\n                \"target_army\": target_army,\n                \"path_enemy\": path_enemy,\n                \"is_neutral\": is_neutral,\n                \"strategic_value\": strat_value,\n                \"distance\": best_staging_dist,\n            })\n\n        # Sort: neutrals first, then by lowest defense, then by strategic value\n        targets.sort(key=lambda t: (\n            not t['is_neutral'],\n            t['target_army'],\n            -t['strategic_value'],\n        ))\n\n        return targets\n\n    def _plan_all_out_attack(self, game_state):\n        \"\"\"Plan an all-out attack if feasible (expected remaining army > 2).\"\"\"\n        path_enemy = self.map.path_army_estimate(\n            game_state, self.my_capital, self.enemy_capital, self.faction\n        )\n        expected_remaining = self.total_army - path_enemy\n        if expected_remaining > 2:\n            goal = \"all-out attack on enemy capital\"\n            steps = [{\"action\": \"all_out_attack\"}]\n            self.planner.set_plan(goal, steps)\n            trace.get_current_span().add_event(\"plan_created\", attributes={\n                \"goal\": goal, \"step_count\": len(steps),\n            })\n            telemetry.record_plan_created(goal)\n            return self._execute_plan_step(game_state)\n        return None\n\n    def _concentrate_forces(self, game_state):\n        \"\"\"Move isolated friendly armies toward threats or strategic hub (V5).\"\"\"\n        target_loc = self.memory.enemy_push_direction or \"village_5\"\n\n        for loc in MAP_GRAPH:\n            if loc == self.my_capital:\n                continue\n            loc_data = game_state.get(loc, {})\n            if loc_data.get('faction') == self.faction and loc_data.get('army', 0) > 0:\n                # Check if this army is isolated (no enemy neighbors)\n                has_enemy_neighbor = False\n                for n in MAP_GRAPH[loc]:\n                    n_data = game_state.get(n, {})\n                    if n_data.get('faction') not in (self.faction, 'neutral'):\n                        has_enemy_neighbor = True\n                        break\n\n                if not has_enemy_neighbor:\n                    next_hop = self._step_toward(loc, target_loc)\n                    if next_hop and next_hop != loc:\n                        n_data = game_state.get(next_hop, {})\n                        if n_data.get('faction') == self.faction or n_data.get('army', 0) < loc_data['army']:\n                            return {\n                                \"action\": \"move_army\",\n                                \"from\": loc,\n                                \"to\": next_hop,\n                                \"reason\": f\"concentrate forces from {loc} toward {target_loc}\"\n                            }\n        return None\n\n    # ── Priority 6: Fallback ───────────────────────────────────────────────────\n\n    def _fallback(self, game_state):\n        \"\"\"Collect resources at capital.\"\"\"\n        return {\n            \"action\": \"collect_resources\",\n            \"location\": self.my_capital,\n            \"reason\": \"fallback: collect resources\"\n        }\n\n    # ── Adaptive Timing ────────────────────────────────────────────────────────\n\n    def get_pause_time(self):\n        \"\"\"Adaptive loop timing based on phase.\"\"\"\n        if self.phase == GamePhase.DESPERATE or self.memory.territory_lost_recently():\n            return random.randint(2, 5)\n        elif self.phase == GamePhase.READY_TO_ATTACK:\n            return random.randint(3, 8)\n        else:\n            return random.randint(5, 15)\n\n\n# ─── White Walkers AI ─────────────────────────────────────────────────────────\n\nclass WhiteWalkerAI(StrategicAI):\n    \"\"\"Single-player opponent on the White Walkers Attack map.\n\n    Economy: corpses, not resources. Corpses come from winning battles and\n    passive generation at the fortress. Army units cost\n    ``ARMY_COST_BY_MAP[\"white_walkers_attack\"][\"white_walkers\"]`` corpses.\n\n    Priority cascade (replaces ``StrategicAI.decide``):\n\n      1. Defend the fortress when enemies are adjacent and the garrison is\n         outnumbered.\n      2. Capture any wall that the White Walkers do not already control,\n         preferring the wall that needs the fewest attacking troops to beat\n         its 2× defender multiplier.\n      3. Reinforce the weakest White Walker-held wall.\n      4. Raid the nearest barbarian village whose army is less than or equal\n         to the closest White Walker garrison — a clean harvest for corpses.\n      5. If corpses are at or above the army cost and the fortress holds any\n         troops, raise a new undead unit.\n      6. No-op fallback (corpse stream keeps flowing via the passive tick).\n    \"\"\"\n\n    def decide(self, game_state):\n        self.my_territories, self.enemy_territories = self.memory.update(\n            game_state, self.faction\n        )\n        self.total_army = sum(\n            data.get('army', 0) for loc, data in game_state.items()\n            if data.get('faction') == self.faction\n        )\n        self.phase = PhaseDetector.detect(\n            self.my_territories, self.enemy_territories, self.total_army\n        )\n\n        span = trace.get_current_span()\n        span.set_attribute(\"ai.variant\", \"white_walkers\")\n        span.set_attribute(\"game.map.id\", self.map_id)\n\n        corpses = fetch_faction_corpses(self.faction)\n        span.set_attribute(\"ai.corpse_pool\", corpses)\n\n        evaluated = []\n\n        action = self._defend_fortress(game_state)\n        if action:\n            evaluated.append(f\"defend_fortress: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"defend_fortress: skipped\")\n\n        action = self._capture_unowned_wall(game_state)\n        if action:\n            evaluated.append(f\"capture_wall: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"capture_wall: skipped\")\n\n        action = self._reinforce_weakest_wall(game_state)\n        if action:\n            evaluated.append(f\"reinforce_wall: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"reinforce_wall: skipped\")\n\n        action = self._raid_barbarian(game_state)\n        if action:\n            evaluated.append(f\"raid_barbarian: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"raid_barbarian: skipped\")\n\n        action = self._raise_army_from_corpses(game_state, corpses)\n        if action:\n            evaluated.append(f\"raise_army: TRIGGERED ({action.get('reason', '')})\")\n            self._last_evaluated = evaluated\n            return action\n        evaluated.append(\"raise_army: skipped\")\n\n        self._last_evaluated = evaluated\n        return self._passive_fallback()\n\n    # ── Cascade helpers ───────────────────────────────────────────────────────\n\n    def _defend_fortress(self, game_state):\n        cap_data = game_state.get(self.my_capital, {})\n        if not cap_data or cap_data.get('faction') != self.faction:\n            return None\n\n        garrison = cap_data.get('army', 0)\n        max_threat = 0\n        threat_loc = None\n        for n in self.map.neighbors(self.my_capital):\n            n_data = game_state.get(n, {})\n            n_faction = n_data.get('faction')\n            if n_faction and n_faction != self.faction and n_faction != 'barbarian':\n                if n_data.get('army', 0) > max_threat:\n                    max_threat = n_data['army']\n                    threat_loc = n\n        if max_threat == 0 or max_threat <= garrison:\n            return None\n\n        # Pull back from the strongest adjacent wall we own (if any).\n        best_source = None\n        best_army = 0\n        for wall in self._walls():\n            w_data = game_state.get(wall, {})\n            if w_data.get('faction') == self.faction and w_data.get('army', 0) > best_army:\n                best_source = wall\n                best_army = w_data['army']\n        if best_source:\n            return {\n                \"action\": \"move_army\",\n                \"from\": best_source,\n                \"to\": self.my_capital,\n                \"reason\": f\"defend fortress vs {threat_loc} ({max_threat} army)\",\n            }\n        return None\n\n    def _capture_unowned_wall(self, game_state):\n        best = None\n        best_cost = float(\"inf\")\n        for wall in self._walls():\n            w_data = game_state.get(wall, {})\n            if w_data.get('faction') == self.faction:\n                continue\n            defender = w_data.get('army', 0)\n            # Wall multiplier = 2 — must exceed 2 * defender to take it.\n            needed = 2 * defender + 1\n            source, source_army = self._nearest_source_with_army(game_state, wall, needed)\n            if source is None:\n                continue\n            total_cost = needed\n            if total_cost < best_cost:\n                best_cost = total_cost\n                best = (source, wall, defender)\n        if best is None:\n            return None\n        source, wall, defender = best\n        return {\n            \"action\": \"move_army\",\n            \"from\": source,\n            \"to\": self._step_toward(source, wall),\n            \"reason\": f\"capture {wall} (defender {defender}, needed {best_cost})\",\n        }\n\n    def _reinforce_weakest_wall(self, game_state):\n        mine = [\n            (w, game_state.get(w, {}).get('army', 0))\n            for w in self._walls()\n            if game_state.get(w, {}).get('faction') == self.faction\n        ]\n        if not mine:\n            return None\n        weakest, weakest_army = min(mine, key=lambda item: item[1])\n\n        # Prefer non-capital neighbours so corpse-driven army production at\n        # the capital isn't drained on every tick. Capital is a fallback\n        # below — without it the AI gets stuck post-capture, since\n        # ``move_army`` moves *all* army, leaving walls at 0 and capital as\n        # the only source.\n        capital_neighbour = None\n        for n in self.map.neighbors(weakest):\n            n_data = game_state.get(n, {})\n            if n_data.get('faction') != self.faction:\n                continue\n            n_army = n_data.get('army', 0)\n            if n_army <= 1:\n                continue\n            if n == self.my_capital:\n                capital_neighbour = (n, n_army)\n                continue\n            return {\n                \"action\": \"move_army\",\n                \"from\": n,\n                \"to\": weakest,\n                \"reason\": f\"reinforce {weakest} from {n}\",\n            }\n\n        # Capital fallback. Only fire if (a) the capital has more than the\n        # weakest wall (otherwise it's not really reinforcing) and (b) the\n        # capital has enough to spare — leaving 0 garrison is fine because\n        # ``_raise_army_from_corpses`` no longer requires a non-zero\n        # garrison to wrap a fresh unit around.\n        if capital_neighbour is not None:\n            cap_loc, cap_army = capital_neighbour\n            if cap_army > weakest_army + 1:\n                return {\n                    \"action\": \"move_army\",\n                    \"from\": cap_loc,\n                    \"to\": weakest,\n                    \"reason\": f\"reinforce {weakest} from capital ({cap_army} → wall {weakest_army})\",\n                }\n        return None\n\n    def _raid_barbarian(self, game_state):\n        targets = [\n            loc for loc, t in get_location_types(self.map_id).items()\n            if t == \"village\"\n            and get_initial_factions(self.map_id).get(loc) == \"barbarian\"\n            and game_state.get(loc, {}).get('faction') == \"barbarian\"\n        ]\n        if not targets:\n            return None\n\n        best = None\n        best_margin = -1\n        for target in targets:\n            defender = game_state.get(target, {}).get('army', 0)\n            source, source_army = self._nearest_source_with_army(\n                game_state, target, defender + 1\n            )\n            if source is None:\n                continue\n            margin = source_army - defender\n            if margin > best_margin:\n                best_margin = margin\n                best = (source, target, defender)\n        if best is None:\n            return None\n        source, target, defender = best\n        return {\n            \"action\": \"move_army\",\n            \"from\": source,\n            \"to\": self._step_toward(source, target),\n            \"reason\": f\"raid {target} (defender {defender}) for corpses\",\n        }\n\n    def _raise_army_from_corpses(self, game_state, corpses):\n        # Capital must still belong to us — if NW captured it the AI has\n        # soft-lost. The earlier `army >= 1` gate has been dropped: it\n        # blocked the AI's primary economic loop after every capital→wall\n        # reinforcement (move_army drains the source to 0), leaving the AI\n        # idle until corpses overflowed.\n        cap_data = game_state.get(self.my_capital, {})\n        if cap_data.get('faction') != self.faction:\n            return None\n        if corpses < self.army_cost:\n            return None\n        return {\n            \"action\": \"create_army\",\n            \"location\": self.my_capital,\n            \"count\": 1,\n            \"reason\": f\"raise undead ({corpses} corpses, cost {self.army_cost})\",\n        }\n\n    def _passive_fallback(self):\n        # No-op for White Walkers: the passive corpse tick handles \"idle\".\n        return {\n            \"action\": \"noop\",\n            \"reason\": \"passive: corpses accumulate at fortress\",\n        }\n\n    # ── Utility ───────────────────────────────────────────────────────────────\n\n    def _walls(self):\n        types = get_location_types(self.map_id)\n        return [loc for loc, t in types.items() if t == \"wall\"]\n\n    def _nearest_source_with_army(self, game_state, target, needed):\n        \"\"\"Return the (location_id, army) of the closest friendly node with\n        at least ``needed`` troops, or ``(None, 0)`` if nothing qualifies.\n        \"\"\"\n        best = (None, 0)\n        best_dist = float(\"inf\")\n        for loc, data in game_state.items():\n            if data.get('faction') != self.faction:\n                continue\n            if data.get('army', 0) < needed:\n                continue\n            dist = self.map.distance(loc, target)\n            if dist < best_dist:\n                best = (loc, data.get('army', 0))\n                best_dist = dist\n        return best\n\n\n# ─── AI State ──────────────────────────────────────────────────────────────────\n\nclass AIState:\n    def __init__(self):\n        self.faction = None\n        self.map_id = \"war_of_kingdoms\"\n        self.active = False\n        self.last_action_time = None\n        self.game_start_time = None\n        self.strategic_ai = None\n        self.decision_thread = None\n        self.stop_flag = threading.Event()\n\nai_state = AIState()\n\n# ─── Preserved Helpers ─────────────────────────────────────────────────────────\n\ndef get_location_url(location_id):\n    \"\"\"Get the URL for a location's API.\n\n    Container hostnames in docker-compose are the stable WoK names\n    (``southern-capital``, ``village-1`` …). On WWA the *logical* location id\n    differs (``wall_west`` → still lives on container ``village-1``), so we\n    look up the container via ``CONTAINER_FOR_LOCATION_ID`` rather than\n    naively hyphenating the location id.\n    \"\"\"\n    if os.environ.get('IN_DOCKER'):\n        host = CONTAINER_FOR_LOCATION_ID.get(location_id, location_id.replace('_', '-'))\n    else:\n        host = 'localhost'\n\n    port = LOCATION_PORTS[location_id]\n    return f\"http://{host}:{port}\"\n\n\ndef fetch_faction_corpses(faction):\n    \"\"\"Query any location service for the faction's corpse pool. Returns 0 on error.\"\"\"\n    # Use slot_1 (southern-capital container); any container is fine since\n    # the DB is shared.\n    try:\n        if os.environ.get('IN_DOCKER'):\n            base = \"http://southern-capital:5001\"\n        else:\n            base = \"http://localhost:5001\"\n        resp = requests.get(f\"{base}/faction_economy\", params={\"faction\": faction}, timeout=2)\n        resp.raise_for_status()\n        return int(resp.json().get(\"corpses\", 0))\n    except Exception:\n        return 0\n\ndef make_api_request(location_id, endpoint, method='GET', data=None):\n    \"\"\"Make an API request to a location server with trace context\"\"\"\n    url = f\"{get_location_url(location_id)}/{endpoint}\"\n    headers = {\"Content-Type\": \"application/json\"}\n\n    with tracer.start_as_current_span(\n        \"ai_api_request\",\n        kind=SpanKind.CLIENT,\n        attributes={\n            \"location.id\": location_id,\n            \"location.endpoint\": endpoint,\n            \"http.method\": method\n        }\n    ) as span:\n        inject(headers)  # Inject trace context\n\n        try:\n            if method == 'GET':\n                response = requests.get(url, headers=headers)\n            else:  # POST\n                response = requests.post(url, json=data, headers=headers)\n\n            span.set_attribute(\"http.status_code\", response.status_code)\n            response.raise_for_status()\n            result = response.json()\n\n            if not result.get(\"success\", True):\n                span.set_status(trace.StatusCode.ERROR, result.get(\"message\", \"Unknown error\"))\n\n            return result\n        except requests.RequestException as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            logger.error(\"API request failed\", extra={\"error\": str(e)})\n            return {\"error\": str(e)}\n\ndef get_game_state(parent_ctx):\n    \"\"\"Get the current state of every location on the currently active map.\"\"\"\n    # Which set of location ids belongs to this AI's map? Fall back to\n    # WoK's 8 ids if AI isn't initialised yet.\n    if ai_state.strategic_ai is not None:\n        location_ids = list(get_map_graph(ai_state.strategic_ai.map_id).keys())\n    else:\n        location_ids = list(MAP_GRAPH.keys())\n\n    with tracer.start_as_current_span(\n        \"get_game_state\",\n        kind=SpanKind.INTERNAL,\n        context=parent_ctx,\n        attributes={\"location_count\": len(location_ids)}\n    ) as span:\n        game_state = {}\n        error_count = 0\n\n        for location_id in location_ids:\n            data = make_api_request(location_id, '')\n            if 'error' not in data:\n                game_state[location_id] = data\n            else:\n                error_count += 1\n                span.add_event(\n                    \"location_fetch_error\",\n                    attributes={\n                        \"location\": location_id,\n                        \"error\": str(data.get('error', 'Unknown error'))\n                    }\n                )\n\n        span.set_attribute(\"locations_retrieved\", len(game_state))\n        span.set_attribute(\"errors\", error_count)\n\n        if error_count > 0:\n            span.set_status(trace.StatusCode.ERROR, f\"Failed to fetch {error_count} locations\")\n\n        return game_state\n\n# ─── Action Executor ───────────────────────────────────────────────────────────\n\ndef execute_strategic_action(action, game_state, parent_ctx, decision_link=None):\n    \"\"\"Execute an action returned by StrategicAI.decide().\"\"\"\n    if not action:\n        return\n\n    action_type = action.get(\"action\")\n    reason = action.get(\"reason\", \"\")\n\n    links = []\n    if decision_link:\n        links = [Link(decision_link, attributes={\"link.type\": \"ai_decision_trigger\"})]\n\n    with tracer.start_as_current_span(\n        \"execute_ai_action\",\n        kind=SpanKind.INTERNAL,\n        context=parent_ctx,\n        links=links,\n        attributes={\n            \"action_type\": action_type,\n            \"reason\": reason,\n        }\n    ) as span:\n        try:\n            if action_type == \"create_army\":\n                location = action.get(\"location\", ai_state.strategic_ai.my_capital)\n                count = action.get(\"count\", 1)\n                armies_created = 0\n                for i in range(count):\n                    result = make_api_request(location, 'create_army', method='POST')\n                    if result.get('success'):\n                        armies_created += 1\n                        logger.info(\"AI created army\", extra={\"army_number\": armies_created, \"total_requested\": count, \"reason\": reason})\n                    else:\n                        logger.warning(\"Failed to create army\", extra={\"message\": result.get('message', 'unknown')})\n                        break\n                    if i < count - 1:\n                        time.sleep(0.5)\n                span.set_attribute(\"armies_created\", armies_created)\n                span.set_attribute(\"armies_requested\", count)\n\n            elif action_type == \"move_army\":\n                from_loc = action[\"from\"]\n                to_loc = action[\"to\"]\n                result = make_api_request(\n                    from_loc,\n                    'move_army',\n                    method='POST',\n                    data={\"target_location\": to_loc}\n                )\n                success = result.get('success', False)\n                span.set_attribute(\"from_location\", from_loc)\n                span.set_attribute(\"target_location\", to_loc)\n                span.set_attribute(\"move_success\", success)\n                logger.info(\"AI move army\", extra={\"from_location\": from_loc, \"to_location\": to_loc, \"reason\": reason, \"success\": success})\n                if not success:\n                    ai_state.strategic_ai.memory.record_failed_attack(to_loc)\n\n            elif action_type == \"all_out_attack\":\n                location = action.get(\"location\", ai_state.strategic_ai.my_capital)\n                result = make_api_request(location, 'all_out_attack', method='POST')\n                span.set_attribute(\"all_out_attack\", True)\n                logger.info(\"AI all-out attack\", extra={\"location\": location, \"reason\": reason})\n\n            elif action_type == \"collect_resources\":\n                location = action.get(\"location\", ai_state.strategic_ai.my_capital)\n                result = make_api_request(location, 'collect_resources', method='POST')\n                logger.info(\"AI collected resources\", extra={\"location\": location, \"reason\": reason})\n\n            elif action_type == \"resource_transfer\":\n                locations = action.get(\"locations\", [])\n                for loc in locations:\n                    result = make_api_request(loc, 'send_resources_to_capital', method='POST')\n                    logger.info(\"AI transferred resources\", extra={\"from_location\": loc})\n                span.set_attribute(\"transfers_count\", len(locations))\n\n            elif action_type == \"noop\":\n                # WhiteWalkerAI uses ``noop`` as a quiet-tick fallback when\n                # corpses are accruing but no actionable move exists. Still\n                # emit a span so replay shows the AI was awake but chose not\n                # to act.\n                span.set_attribute(\"ai.cycle.idle\", True)\n                logger.debug(\"AI idle cycle\", extra={\"reason\": reason})\n\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            logger.error(\"Error executing AI action\", extra={\"error\": str(e), \"action_type\": action_type})\n\n# ─── Decision Loop ─────────────────────────────────────────────────────────────\n\ndef ai_decision_loop():\n    \"\"\"Main AI decision loop that runs in a separate thread\"\"\"\n    logger.info(\"AI decision loop started\", extra={\"faction\": ai_state.faction})\n\n    decision_count = 0\n\n    while ai_state.active and not ai_state.stop_flag.is_set():\n        decision_count += 1\n\n        with tracer.start_as_current_span(\n            \"ai_decision_cycle\",\n            kind=SpanKind.INTERNAL,\n            attributes={\n                \"faction\": ai_state.faction,\n                \"game_phase\": ai_state.strategic_ai.phase.value if ai_state.strategic_ai else \"unknown\",\n                \"cycle_number\": decision_count,\n                \"cycle_start\": datetime.now().isoformat(),\n                \"session_start\": ai_state.game_start_time.isoformat() if ai_state.game_start_time else None\n            }\n        ) as cycle_span:\n            parent_ctx = baggage.set_baggage(\"context\", \"parent\")\n            cycle_start_time = time.time()\n            try:\n                # Get current game state\n                game_state = get_game_state(parent_ctx)\n                my_capital = ai_state.strategic_ai.my_capital\n\n                # Check if game is over\n                if my_capital not in game_state or game_state[my_capital].get('faction') != ai_state.faction:\n                    logger.info(\"AI detected game over\", extra={\"faction\": ai_state.faction, \"cycle_number\": decision_count})\n                    cycle_span.set_attribute(\"game_over_detected\", True)\n                    cycle_span.set_attribute(\"final_cycle\", True)\n                    ai_state.active = False\n                    break\n\n                # Make decision using StrategicAI\n                decision_context = None\n                with tracer.start_as_current_span(\n                    \"ai_decision\",\n                    kind=SpanKind.INTERNAL,\n                    context=parent_ctx,\n                    attributes={\"game_phase\": ai_state.strategic_ai.phase.value}\n                ) as decision_span:\n                    action = ai_state.strategic_ai.decide(game_state)\n                    decision_context = decision_span.get_span_context()\n\n                    if action:\n                        decision_span.set_attribute(\"chosen_action\", action.get(\"action\", \"none\"))\n                        decision_span.set_attribute(\"reason\", action.get(\"reason\", \"\"))\n\n                    # Strategic context on spans\n                    decision_span.set_attribute(\"my_territories\", str(list(ai_state.strategic_ai.my_territories)))\n                    decision_span.set_attribute(\"enemy_territories\", str(list(ai_state.strategic_ai.enemy_territories)))\n                    decision_span.set_attribute(\"total_army\", ai_state.strategic_ai.total_army)\n                    decision_span.set_attribute(\"game_phase\", ai_state.strategic_ai.phase.value)\n                    decision_span.set_attribute(\"priorities_evaluated\", str(ai_state.strategic_ai._last_evaluated))\n\n                if action:\n                    action_type = action.get(\"action\", \"none\")\n                    telemetry.record_decision(action_type, ai_state.strategic_ai.phase.value)\n                    execute_strategic_action(action, game_state, parent_ctx, decision_link=decision_context)\n                    ai_state.last_action_time = datetime.now()\n                    cycle_span.set_attribute(\"action_executed\", True)\n                    cycle_span.set_attribute(\"action_type\", action_type)\n                else:\n                    cycle_span.set_attribute(\"no_action_taken\", True)\n\n                cycle_span.set_attribute(\"cycle_complete\", True)\n\n                # Session metrics\n                if ai_state.game_start_time:\n                    elapsed_time = (datetime.now() - ai_state.game_start_time).total_seconds()\n                    cycle_span.set_attribute(\"session_elapsed_seconds\", elapsed_time)\n\n                # Record cycle duration\n                telemetry.record_cycle_duration(time.time() - cycle_start_time)\n\n                # Adaptive pause\n                pause_time = ai_state.strategic_ai.get_pause_time()\n                cycle_span.set_attribute(\"pause_duration_seconds\", pause_time)\n                logger.info(\"AI waiting\", extra={\"pause_seconds\": pause_time, \"phase\": ai_state.strategic_ai.phase.value})\n\n                if ai_state.stop_flag.wait(pause_time):\n                    cycle_span.set_attribute(\"interrupted\", True)\n                    break\n\n                if not ai_state.active:\n                    cycle_span.set_attribute(\"ai_deactivated\", True)\n                    break\n\n            except Exception as e:\n                cycle_span.record_exception(e)\n                cycle_span.set_status(trace.StatusCode.ERROR, str(e))\n                logger.error(\"Error in AI decision cycle\", extra={\"error\": str(e), \"cycle_number\": decision_count})\n                time.sleep(5)\n\n# ─── Flask Endpoints ───────────────────────────────────────────────────────────\n\n@app.route('/activate', methods=['POST'])\ndef activate_ai():\n    \"\"\"Activate the AI for a specific faction on a specific map.\n\n    Accepts ``{\"faction\": ..., \"map_id\": ...}``. Defaults to\n    War of Kingdoms when ``map_id`` is omitted (backward compat).\n    Dispatches to ``WhiteWalkerAI`` when the requested faction is\n    ``white_walkers``; otherwise uses the classic ``StrategicAI``.\n    \"\"\"\n    data = request.get_json() or {}\n    faction = data.get('faction')\n    map_id = data.get('map_id', 'war_of_kingdoms')\n\n    valid_factions = set()\n    for m in CAPITALS_BY_MAP.values():\n        valid_factions.update(m.keys())\n    if faction not in valid_factions:\n        return jsonify({\"success\": False, \"message\": \"Invalid faction\"}), 400\n\n    if map_id not in MAP_GRAPHS_BY_MAP:\n        return jsonify({\"success\": False, \"message\": f\"Unknown map_id: {map_id}\"}), 400\n\n    if faction not in get_capitals(map_id):\n        return jsonify({\n            \"success\": False,\n            \"message\": f\"Faction {faction} is not valid on map {map_id}\"\n        }), 400\n\n    if ai_state.active:\n        return jsonify({\"success\": False, \"message\": \"AI already active\"}), 400\n\n    ai_state.faction = faction\n    ai_state.map_id = map_id\n    ai_state.active = True\n    ai_state.game_start_time = datetime.now()\n    ai_state.stop_flag.clear()\n\n    if faction == \"white_walkers\":\n        ai_state.strategic_ai = WhiteWalkerAI(faction, map_id=map_id)\n    else:\n        ai_state.strategic_ai = StrategicAI(faction, map_id=map_id)\n\n    # Register state callback for observable gauges\n    telemetry.set_state_callback(lambda: {\n        \"territory_count\": len(ai_state.strategic_ai.my_territories),\n        \"total_army\": ai_state.strategic_ai.total_army,\n        \"faction\": ai_state.faction or \"unknown\",\n    } if ai_state.strategic_ai else None)\n\n    # Corpse-pool gauge: only meaningful for White Walkers. For other AIs\n    # the callback returns None so the gauge stays unobserved.\n    def _corpse_cb():\n        if ai_state.faction == \"white_walkers\":\n            return (\"white_walkers\", fetch_faction_corpses(\"white_walkers\"))\n        return None\n    telemetry.set_corpse_callback(_corpse_cb)\n\n    # Start AI decision thread\n    ai_state.decision_thread = threading.Thread(target=ai_decision_loop, daemon=True)\n    ai_state.decision_thread.start()\n\n    logger.info(\n        \"AI activated\",\n        extra={\"faction\": faction, \"map_id\": map_id, \"variant\": type(ai_state.strategic_ai).__name__},\n    )\n    return jsonify({\n        \"success\": True,\n        \"message\": f\"AI activated for {faction} faction on {map_id}\",\n        \"map_id\": map_id,\n        \"variant\": type(ai_state.strategic_ai).__name__,\n    })\n\n@app.route('/deactivate', methods=['POST'])\ndef deactivate_ai():\n    \"\"\"Deactivate the AI\"\"\"\n    if not ai_state.active:\n        return jsonify({\"success\": False, \"message\": \"AI not active\"}), 400\n\n    ai_state.active = False\n    ai_state.stop_flag.set()\n\n    # Wait for thread to stop (with timeout)\n    if ai_state.decision_thread:\n        ai_state.decision_thread.join(timeout=5)\n\n    logger.info(\"AI deactivated\", extra={\"faction\": ai_state.faction})\n    return jsonify({\"success\": True, \"message\": \"AI deactivated\"})\n\n@app.route('/status', methods=['GET'])\ndef ai_status():\n    \"\"\"Get current AI status\"\"\"\n    return jsonify({\n        \"active\": ai_state.active,\n        \"faction\": ai_state.faction,\n        \"last_action\": ai_state.last_action_time.isoformat() if ai_state.last_action_time else None,\n        \"game_phase\": ai_state.strategic_ai.phase.value if ai_state.active and ai_state.strategic_ai else None\n    })\n\n@app.route('/health', methods=['GET'])\ndef health_check():\n    \"\"\"Health check endpoint\"\"\"\n    return jsonify({\"status\": \"healthy\"})\n\nif __name__ == '__main__':\n    port = int(os.environ.get('PORT', 8081))\n    app.run(host='0.0.0.0', port=port, debug=False)\n"
  },
  {
    "path": "game-of-tracing/ai_opponent/requirements.txt",
    "content": "flask==3.1.3\nrequests==2.33.1\nopentelemetry-api==1.41.1\nopentelemetry-sdk==1.41.1\nopentelemetry-exporter-otlp==1.41.1\npyroscope-io==1.0.6\npyroscope-otel==1.0.0\n"
  },
  {
    "path": "game-of-tracing/ai_opponent/telemetry.py",
    "content": "import os\n\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry import trace\n\n# Logging setup\nimport logging\nfrom opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry._logs import set_logger_provider\n\n# Metrics setup\nfrom opentelemetry import metrics\nfrom opentelemetry.sdk.metrics import MeterProvider\nfrom opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter\nfrom opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader\nfrom opentelemetry.sdk.metrics import TraceBasedExemplarFilter\nfrom opentelemetry.metrics import CallbackOptions, Observation\nfrom typing import Iterable\n\n# Profiling setup (Pyroscope v2 + OTel span-profile linking)\nimport pyroscope\nfrom pyroscope.otel import PyroscopeSpanProcessor\n\nclass AITelemetry:\n    def __init__(self, service_name=\"ai-opponent\", logging_endpoint=\"http://alloy:4318\", tracing_endpoint=\"http://alloy:4317\", metrics_endpoint=\"http://alloy:4318\"):\n        self.service_name = service_name\n        self.logging_endpoint = logging_endpoint\n        self.tracing_endpoint = tracing_endpoint\n        self.metrics_endpoint = metrics_endpoint\n        self._state_callback = None\n        self.resource = Resource.create(attributes={\n            SERVICE_NAME: service_name,\n            \"ai.difficulty\": \"normal\",\n            \"ai.version\": \"1.0\"\n        })\n\n        self._setup_logging()\n        self._setup_tracing()\n        self._setup_metrics()\n        self._setup_profiling()\n        \n    def _setup_logging(self):\n        \"\"\"Configure OpenTelemetry logging\"\"\"\n        self.logger_provider = LoggerProvider(resource=self.resource)\n        set_logger_provider(self.logger_provider)\n        \n        log_exporter = OTLPLogExporter(\n            endpoint=f\"{self.logging_endpoint}/v1/logs\"\n        )\n        \n        self.logger_provider.add_log_record_processor(\n            BatchLogRecordProcessor(\n                exporter=log_exporter,\n                max_queue_size=30,\n                max_export_batch_size=5\n            )\n        )\n        \n        # Setup root logger\n        handler = LoggingHandler(\n            level=logging.NOTSET,\n            logger_provider=self.logger_provider\n        )\n        logging.getLogger().addHandler(handler)\n        logging.getLogger().setLevel(logging.INFO)\n        \n        self.logger = logging.getLogger(self.service_name)\n    \n    def _setup_tracing(self):\n        \"\"\"Configure OpenTelemetry tracing\"\"\"\n        trace.set_tracer_provider(TracerProvider(resource=self.resource))\n        \n        otlp_exporter = OTLPSpanExporter(\n            endpoint=f\"{self.tracing_endpoint}/v1/traces\",\n            insecure=True\n        )\n        \n        span_processor = BatchSpanProcessor(\n            span_exporter=otlp_exporter,\n            max_export_batch_size=1\n        )\n        \n        trace.get_tracer_provider().add_span_processor(span_processor)\n        self.tracer = trace.get_tracer(__name__)\n\n    def _setup_profiling(self):\n        \"\"\"Configure Pyroscope profiling + OTel span-profile linkage.\"\"\"\n        pyroscope.configure(\n            application_name=self.service_name,\n            server_address=os.getenv(\"PYROSCOPE_SERVER_ADDRESS\", \"http://alloy:9999\"),\n            tags={\"service_name\": self.service_name},\n            oncpu=True,\n            gil_only=True,\n        )\n        trace.get_tracer_provider().add_span_processor(PyroscopeSpanProcessor())\n\n    def _setup_metrics(self):\n        \"\"\"Configure OpenTelemetry metrics\"\"\"\n        self.metric_exporter = OTLPMetricExporter(\n            endpoint=f\"{self.metrics_endpoint}/v1/metrics\"\n        )\n\n        self.metric_reader = PeriodicExportingMetricReader(\n            self.metric_exporter,\n            export_interval_millis=10000\n        )\n\n        self.meter_provider = MeterProvider(\n            metric_readers=[self.metric_reader],\n            resource=self.resource,\n            exemplar_filter=TraceBasedExemplarFilter()\n        )\n        metrics.set_meter_provider(self.meter_provider)\n\n        self.meter = metrics.get_meter(__name__)\n\n        # Counters\n        self._decisions_counter = self.meter.create_counter(\n            name=\"ai.decisions\",\n            description=\"Number of AI decisions made\",\n            unit=\"1\"\n        )\n        self._plans_created_counter = self.meter.create_counter(\n            name=\"ai.plans_created\",\n            description=\"Number of plans created\",\n            unit=\"1\"\n        )\n        self._plans_abandoned_counter = self.meter.create_counter(\n            name=\"ai.plans_abandoned\",\n            description=\"Number of plans abandoned\",\n            unit=\"1\"\n        )\n\n        # Histogram\n        self._cycle_duration_histogram = self.meter.create_histogram(\n            name=\"ai.decision_cycle_duration_seconds\",\n            description=\"Duration of AI decision cycles\",\n            unit=\"s\"\n        )\n\n        # Observable gauges\n        self.meter.create_observable_gauge(\n            name=\"ai.territory_count\",\n            description=\"Number of territories controlled by faction\",\n            callbacks=[self._observe_territory_count],\n            unit=\"1\"\n        )\n        self.meter.create_observable_gauge(\n            name=\"ai.total_army\",\n            description=\"Total army size for faction\",\n            callbacks=[self._observe_total_army],\n            unit=\"1\"\n        )\n\n        # White Walkers Attack metrics (additive; only populate when the\n        # relevant callback is wired).\n        self._walls_captured_counter = self.meter.create_counter(\n            name=\"ai.walls_captured\",\n            description=\"Number of wall keeps captured by this AI variant\",\n            unit=\"1\",\n        )\n        self._corpse_callback = None\n        self.meter.create_observable_gauge(\n            name=\"ai.corpse_pool\",\n            description=\"White Walker corpse pool (cost pool for raising armies)\",\n            callbacks=[self._observe_corpse_pool],\n            unit=\"1\",\n        )\n\n    def _observe_territory_count(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback for territory count observable gauge\"\"\"\n        if self._state_callback:\n            try:\n                state = self._state_callback()\n                if state:\n                    yield Observation(\n                        value=state[\"territory_count\"],\n                        attributes={\"faction\": state[\"faction\"]}\n                    )\n            except Exception:\n                pass\n\n    def _observe_total_army(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback for total army observable gauge\"\"\"\n        if self._state_callback:\n            try:\n                state = self._state_callback()\n                if state:\n                    yield Observation(\n                        value=state[\"total_army\"],\n                        attributes={\"faction\": state[\"faction\"]}\n                    )\n            except Exception:\n                pass\n\n    def set_state_callback(self, fn):\n        \"\"\"Register a callback that returns current AI state for observable gauges\"\"\"\n        self._state_callback = fn\n\n    def set_corpse_callback(self, fn):\n        \"\"\"Register a callback that returns ``(faction, corpses)`` for the\n        ``ai.corpse_pool`` gauge. ``fn`` should return ``None`` when the\n        current AI variant does not use the corpse economy.\n        \"\"\"\n        self._corpse_callback = fn\n\n    def _observe_corpse_pool(self, options: CallbackOptions) -> Iterable[Observation]:\n        if not self._corpse_callback:\n            return\n        try:\n            result = self._corpse_callback()\n            if not result:\n                return\n            faction, corpses = result\n            yield Observation(value=int(corpses), attributes={\"faction\": faction})\n        except Exception:\n            pass\n\n    def record_wall_captured(self, wall_id, source):\n        \"\"\"Increment the walls-captured counter. ``source`` is the AI variant name.\"\"\"\n        self._walls_captured_counter.add(\n            1, {\"wall_id\": wall_id, \"variant\": source}\n        )\n\n    def record_decision(self, action_type, phase):\n        \"\"\"Record an AI decision metric\"\"\"\n        self._decisions_counter.add(1, {\"action_type\": action_type, \"phase\": phase})\n\n    def record_plan_created(self, goal):\n        \"\"\"Record a plan creation metric\"\"\"\n        self._plans_created_counter.add(1, {\"goal\": goal})\n\n    def record_plan_abandoned(self, reason):\n        \"\"\"Record a plan abandonment metric\"\"\"\n        self._plans_abandoned_counter.add(1, {\"reason\": reason})\n\n    def record_cycle_duration(self, seconds):\n        \"\"\"Record decision cycle duration\"\"\"\n        self._cycle_duration_histogram.record(seconds)\n\n    def collect_metrics(self):\n        \"\"\"Force collection and export of all metrics\"\"\"\n        try:\n            self.metric_reader.collect()\n            self.meter_provider.force_flush()\n        except Exception:\n            pass\n\n    def get_tracer(self):\n        \"\"\"Get the configured tracer\"\"\"\n        return self.tracer\n\n    def get_logger(self):\n        \"\"\"Get the configured logger\"\"\"\n        return self.logger\n\n    def shutdown(self):\n        \"\"\"Flush and shutdown all telemetry providers.\"\"\"\n        try:\n            trace.get_tracer_provider().shutdown()\n        except Exception:\n            pass\n        try:\n            self.meter_provider.shutdown()\n        except Exception:\n            pass\n        try:\n            self.logger_provider.shutdown()\n        except Exception:\n            pass"
  },
  {
    "path": "game-of-tracing/app/CLAUDE.md",
    "content": "# app/ — Location Servers\n\n> 8 Flask microservices representing map territories in the *War of Kingdoms* game. This doc is read by any AI coding agent (Claude, Cursor, Codex, Cline). For scenario-wide context read [`../AGENTS.md`](../AGENTS.md) first.\n\n## Purpose\n\nAll 8 locations run the same codebase. A container's **slot** (set via `SLOT_ID` env var, `slot_1` … `slot_8`) is fixed at build time; the **logical identity** it serves (`southern_capital`, `wall_west`, `barbarian_village_east`, …) is resolved at boot and on `/reload` from the active map in `game_state.db`. Each location:\n\n- Owns a row in the shared `game_state.db` (resources, army, faction).\n- Exposes an HTTP API for collecting resources, creating armies, moving armies, and launching attacks.\n- Instruments every route with OpenTelemetry traces, logs, and five custom game metrics.\n- Runs passive resource generation for villages (every 15 s) and handles cooldowns for capitals.\n- On the White Walkers Attack map, also runs: passive barbarian army growth (every 30 s at barbarian villages), passive corpse generation (every 15 s at the White Walker fortress), passive resource generation at the Night's Watch capital (+5 every 10 s — WWA has no friendly villages, so this replaces the click-only economy), and the wall multiplier (defenders count 2× at `wall`-type locations).\n\nPorts 5001-5008:\n\n| Location ID | Service name | Port | Type |\n|---|---|---|---|\n| `southern_capital` | `southern-capital` | 5001 | capital |\n| `northern_capital` | `northern-capital` | 5002 | capital |\n| `village_1` | `village-1` | 5003 | village |\n| `village_2` | `village-2` | 5004 | village |\n| `village_3` | `village-3` | 5005 | village |\n| `village_4` | `village-4` | 5006 | village |\n| `village_5` | `village-5` | 5007 | village |\n| `village_6` | `village-6` | 5008 | village |\n\nService names (hyphenated) match the `SERVICE_NAME` resource attribute used in traces. Location IDs (underscored) are what DB rows and `game_config.py` use. Bridge: `location_id.replace('_', '-')`.\n\n## File map\n\n| File | Size | Purpose |\n|---|---|---|\n| `game_config.py` | ~3 KB | `LOCATIONS` dict: coordinates, connections, initial resources/army/faction, passive-rate, costs. |\n| `telemetry.py` | ~11 KB | `GameTelemetry` class — traces, logs, metrics (5 observable gauges + 1 counter for game state), plus Pyroscope profiling with OTel span-profile linkage. |\n| `location_server.py` | ~52 KB (~1200 lines) | `LocationServer` class — Flask app, routes, DB access, pathfinding, battle resolution, background-thread movement. |\n| `run_game.py` | — | CLI to run all 8 services as separate local processes (non-Docker). |\n| `Dockerfile` | small | `python:3.11-slim`, `pip install -r requirements.txt`, runs `python location_server.py`. |\n| `requirements.txt` | small | Flask 3.1.3, requests 2.33.1, OpenTelemetry SDK/API + OTLP gRPC/HTTP exporters, `pyroscope-io` + `pyroscope-otel` for profiling. |\n\n## Routes\n\n| Method | Path | Handler span name | Purpose |\n|---|---|---|---|\n| `GET` | `/` | `get_location_info` | Location state + optional cooldown |\n| `POST` | `/collect_resources` | `collect_resources` | Capital-only; 5 s cooldown; +20 resources |\n| `POST` | `/create_army` | `create_army` | Capital-only; costs 30 resources → +1 army unit |\n| `POST` | `/move_army` | `move_army_request` | Move army to adjacent location; spawns background movement thread |\n| `POST` | `/all_out_attack` | `all_out_attack` | Capital-to-capital attack via `_find_path(target, ATTACK)` |\n| `POST` | `/receive_army` | `receive_army` | Target of `_continue_army_movement`; resolves battle via `_handle_battle` |\n| `POST` | `/receive_resources` | `receive_resources` | Target of `_transfer_resources_along_path` |\n| `GET` | `/health` | — | Docker health check; returns `{\"status\":\"ok\"}` |\n| `POST` | `/send_resources_to_capital` | — | Village → friendly capital resource forwarding (used by AI) |\n| `POST` | `/reload` | — | Re-read `active_map_id` + rebind slot identity in place (war_map calls this after `/select_map`) |\n| `GET` | `/faction_economy?faction=...` | — | Read a faction's corpse pool (AI uses it) |\n\n## Key algorithms\n\n### Dijkstra pathfinding — `_find_path()` at `location_server.py:128-182`\n\nFaction-aware edge weights:\n\n| Mode | Friendly | Neutral | Enemy |\n|---|---|---|---|\n| `PathType.RESOURCE` | 1 | 2 | ∞ (unreachable) |\n| `PathType.ATTACK` | 1 | 2 | 3 |\n\nResource routing only returns a path if the source is a capital of a known faction. Attack routing allows crossing enemy terrain at a cost.\n\n### Battle resolution — `_handle_battle()` at `location_server.py:184-207`\n\n| Case | Outcome | New army | New faction |\n|---|---|---|---|\n| Same faction | `reinforcement` | `attacking + defending` | defender's |\n| `attacking > defending` | `attacker_victory` | `attacking - defending` | attacker's |\n| `defending > attacking` | `defender_victory` | `defending - attacking` | defender's |\n| equal | `stalemate` | `0` | defender's (territory held by default) |\n\nEvery outcome calls `telemetry.record_battle(attacker_faction, defender_faction, result)`, which increments the `game.battles` counter and force-flushes metrics.\n\n### Atomic state updates — `_update_location_state()`\n\nForces metric collection at `location_server.py:124` on important changes (`faction`, `resources`, or `army` mutated), so the dashboard reflects state within ~1 s of the mutating request instead of waiting for the 10 s `PeriodicExportingMetricReader` cycle.\n\n## OpenTelemetry patterns specific to `app/`\n\n### HTTP clients go through one helper\n\n`_make_request_with_trace()` at `location_server.py:327-352` is the only place outbound HTTP happens. It wraps every call in a CLIENT span, sets `http.url` and `http.status_code` attributes, and calls `inject(headers)` to propagate W3C trace context downstream. If you add a new outbound call, use this helper — do not call `requests.post` directly.\n\n### Background threads capture context explicitly\n\nTwo methods spawn background threads for delayed operations:\n\n- `_continue_army_movement()` at `location_server.py:209-271` — 5 s delay before the army arrives at the next location.\n- `_transfer_resources_along_path()` at `location_server.py:273-325` — 5 s delay before the resources arrive.\n\nBoth follow the canonical pattern:\n\n```python\nctx = get_current()              # capture before Thread().start()\n\ndef work():\n    token = attach(ctx)          # re-attach inside the thread\n    try:\n        with tracer.start_as_current_span(\"...\"):\n            ...                  # span now belongs to the captured trace\n    finally:\n        detach(token)\n\nThread(target=work).start()\n```\n\nIf you add a new background thread, replicate this pattern. Python threads will **not** inherit OTel context on their own — the span will be orphaned with a fresh trace_id.\n\n### Span attributes that feed the Grafana dashboard\n\nPreserve these when adding or modifying spans (the provisioned dashboard's TraceQL filters depend on them):\n\n- `span.resource.movement = true` — any resource transfer span\n- `span.battle.occurred = true` — any span that triggers `_handle_battle`\n- `span.player.action = true` — any span caused by a human player action\n\n## Custom metrics — `telemetry.py`\n\nSee `AGENTS.md` for the full cross-service metrics table. `app/`-specific:\n\n| Metric | Type | Callback location in `telemetry.py` |\n|---|---|---|\n| `game.resources` | observable gauge | `_observe_resources` at `:176-193` |\n| `game.army_size` | observable gauge | `_observe_army_size` at `:195-213` |\n| `game.battles` | counter | `record_battle` at `:274-290` |\n| `game.resource_transfer_cooldown` | observable gauge | `_observe_resource_cooldown` at `:215-233` |\n| `game.location_control` | observable gauge | `_observe_location_control` at `:235-260` (values: `northern=1`, `southern=2`, `neutral=0`, unknown=`-1`) |\n\nThe gauge callbacks read from live server state via `_get_location_state()`, which the `LocationServer` registers on the telemetry instance at construction time.\n\n## New mechanics (White Walkers Attack)\n\nAll defined in `app/game_config.py`'s `MAPS[\"white_walkers_attack\"][\"rules\"]`. All behave as no-ops on `war_of_kingdoms`.\n\n- **Wall defender multiplier** — `_handle_battle` accepts a `location_type` argument and scales `defending_army` by `rules[\"wall_multiplier\"]` (2.0 on WWA, 1.0 on WoK) when the location type is `wall`. Remaining defender count is converted back to physical units after the fight.\n- **Corpse economy** — when the battle winner is `white_walkers`, the post-battle hook in `receive_army` calls `self._add_corpses(attacking + defending - remaining, \"white_walkers\")`. `create_army` reads `get_army_currency(map_id, faction)` and, for `currency == \"corpses\"`, atomically decrements via `_spend_corpses` instead of touching `resources`. The corpse pool lives in `faction_economy` (persistent) so a `/reload` doesn't wipe it.\n- **Barbarian passive growth** — `_start_barbarian_growth(interval_s)` runs when `faction == \"barbarian\"`; adds +1 army every `rules[\"barbarian_army_growth_interval_s\"]` (30 s). Guards each iteration against identity changes via `/reload`.\n- **Captured-camp resource generation** — `_start_passive_generation()` is launched for *every* `type == \"village\"` slot at boot (including barbarian Free Folk camps). The per-iteration `faction != \"barbarian\"` guard keeps it a no-op while the camp is still barbarian, then it starts producing the standard village amount the moment the player captures it. Without this fallthrough, captured camps stayed unproductive because the thread was never started on barbarian slots.\n- **White Walker passive corpses** — `_start_white_walker_corpse_tick(interval_s)` runs at the WW fortress, +1 corpse every `rules[\"white_walker_passive_corpse_interval_s\"]` (15 s).\n- **Night's Watch passive resources** — `_start_nights_watch_capital_resource_tick(interval_s, amount)` runs at Castle Black on WWA (`faction == \"nights_watch\"`, `loc_type == \"capital\"`), adding `rules[\"nights_watch_capital_passive_amount\"]` resources every `rules[\"nights_watch_capital_passive_interval_s\"]` (5 per 10 s). Manual `/collect_resources` (+20, 5 s cooldown) still works alongside.\n\n## DB additions (live in `game_state.db`)\n\n- **`game_config`** — `(key, value)` key/value store. The `active_map_id` row is authoritative; containers re-read it on boot and on `/reload`.\n- **`faction_economy`** — `(faction, corpses)`. Updated by `_add_corpses` / `_spend_corpses`. Read by the AI via `/faction_economy?faction=white_walkers`.\n\n## Environment\n\n| Var | Default | Purpose |\n|---|---|---|\n| `SLOT_ID` | — (required, `slot_1` … `slot_8`) | Fixed physical slot this container occupies |\n| `LOCATION_ID` | — (legacy; no longer authoritative) | Kept for backward-compat with `run_game.py` local dev |\n| `PORT` | derived from `LOCATION_ID` | HTTP listen port |\n| `IN_DOCKER` | unset | When set, location URLs resolve via container DNS (`village-2:5004`) instead of `localhost:5004` |\n| `DATABASE_FILE` | `/data/game_state.db` (Docker) / `./game_state.db` (local) | SQLite WAL-mode DB |\n\n## Common edits\n\n**Add a new location.**\n1. Add an entry to `LOCATIONS` in `game_config.py` (connections list, initial resources/army/faction, port).\n2. Add a `village-N` service in both `docker-compose.yml` and `docker-compose.coda.yml`.\n3. Add to the `LOCATION_PORTS` dict in `war_map/app.py` and `ai_opponent/ai_server.py`.\n4. Update the services-and-ports table in `../AGENTS.md` and the location table at the top of this file.\n\n**Add a new metric.**\n1. Add an observable gauge (or counter) in `telemetry.py` next to the existing ones.\n2. If it reads from location state, register a callback that calls `self._get_location_state(...)`.\n3. Add a row to the metrics table in this doc and in `../AGENTS.md`.\n\n**Add a new route.**\n1. Wrap the handler in `tracer.start_as_current_span(..., context=extract(request.headers), ...)`.\n2. Add `\"span.player.action\": True` (if triggered by a player) so the dashboard picks it up.\n3. If the route spawns a background thread, follow the `get_current()` / `attach` / `detach` pattern from `:209-271`.\n\n## Keep this doc current\n\nPer the sub-agent rule, any change to routes, metrics, span attributes, env vars, or the line-number anchors above must land in the same work unit. Before returning a response that touched `app/`, grep this file for references to anything you changed.\n\n## Cross-references\n\n- [`../AGENTS.md`](../AGENTS.md) — scenario-wide architecture and patterns\n- [`../war_map/CLAUDE.md`](../war_map/CLAUDE.md) — the consumer of this service's HTTP API on behalf of the player\n- [`../ai_opponent/CLAUDE.md`](../ai_opponent/CLAUDE.md) — the other consumer of this API (autonomous)\n- [`../SPAN_LINKS.md`](../SPAN_LINKS.md) — how action spans chain across services\n"
  },
  {
    "path": "game-of-tracing/app/Dockerfile",
    "content": "FROM python:3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install --no-cache-dir -r requirements.txt\n\nCOPY . .\n\nENV FLASK_APP=location_server.py\n\n# Default to running the main server script\nCMD [\"python\", \"run_game.py\"] "
  },
  {
    "path": "game-of-tracing/app/game_config.py",
    "content": "\"\"\"Game configuration for all maps in the game-of-tracing scenario.\n\nEach entry in ``MAPS`` describes a playable map. A map has:\n\n- ``display_name`` / ``description`` — surfaced by the map picker UI.\n- ``single_player`` + ``player_faction`` / ``ai_faction`` — the map picker uses\n  these to skip faction selection and auto-activate the AI when appropriate.\n- ``factions`` — the valid faction strings for this map.\n- ``slot_assignments`` — maps the fixed container slot ids (``slot_1`` …\n  ``slot_8``) to the logical location id that slot serves on this map. The 8\n  location containers carry only their ``SLOT_ID`` — their in-game identity\n  is resolved at boot (and on ``/reload``) via this table.\n- ``locations`` — per-location config (name, type, faction, connections,\n  initial resources/army, port).\n- ``rules`` — map-wide game rules (army costs and currency per faction, wall\n  multiplier, tick interval, hold-to-win ticks, passive growth intervals).\n\nThe active map id is stored at runtime in the shared ``game_state.db`` in the\n``game_config`` key-value table (written by ``war_map`` on ``/select_map``).\nBoth ``location_server`` and ``war_map`` read it to resolve per-service state.\n\"\"\"\n\nfrom __future__ import annotations\n\nDATABASE_FILE = \"game_state.db\"\nDEFAULT_MAP_ID = \"war_of_kingdoms\"\n\n# Each of the 8 location containers has a fixed SLOT_ID env var\n# (slot_1 .. slot_8). Its in-game identity is resolved through the active\n# map's slot_assignments table, so the same container can serve \"village_1\" on\n# War of Kingdoms and \"wall_west\" on White Walkers Attack.\nSLOT_IDS = tuple(f\"slot_{i}\" for i in range(1, 9))\n\n\nMAPS = {\n    \"war_of_kingdoms\": {\n        \"display_name\": \"War of Kingdoms\",\n        \"description\": (\n            \"Northern and Southern kingdoms clash for dominance. \"\n            \"Capture the enemy capital to win.\"\n        ),\n        \"single_player\": False,\n        \"factions\": [\"northern\", \"southern\"],\n        \"slot_assignments\": {\n            \"slot_1\": \"southern_capital\",\n            \"slot_2\": \"northern_capital\",\n            \"slot_3\": \"village_1\",\n            \"slot_4\": \"village_2\",\n            \"slot_5\": \"village_3\",\n            \"slot_6\": \"village_4\",\n            \"slot_7\": \"village_5\",\n            \"slot_8\": \"village_6\",\n        },\n        \"locations\": {\n            \"southern_capital\": {\n                \"name\": \"Southern Capital\",\n                \"type\": \"capital\",\n                \"faction\": \"southern\",\n                \"connections\": [\"village_1\", \"village_3\"],\n                \"initial_resources\": 100,\n                \"initial_army\": 1,\n                \"port\": 5001,\n            },\n            \"northern_capital\": {\n                \"name\": \"Northern Capital\",\n                \"type\": \"capital\",\n                \"faction\": \"northern\",\n                \"connections\": [\"village_2\", \"village_6\"],\n                \"initial_resources\": 100,\n                \"initial_army\": 1,\n                \"port\": 5002,\n            },\n            \"village_1\": {\n                \"name\": \"Village 1\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"southern_capital\", \"village_2\", \"village_4\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 2,\n                \"port\": 5003,\n            },\n            \"village_2\": {\n                \"name\": \"Village 2\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"northern_capital\", \"village_1\", \"village_5\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 3,\n                \"port\": 5004,\n            },\n            \"village_3\": {\n                \"name\": \"Village 3\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"southern_capital\", \"village_5\", \"village_6\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 2,\n                \"port\": 5005,\n            },\n            \"village_4\": {\n                \"name\": \"Village 4\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"village_1\", \"village_5\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 1,\n                \"port\": 5006,\n            },\n            \"village_5\": {\n                \"name\": \"Village 5\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"village_2\", \"village_3\", \"village_4\", \"village_6\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 4,\n                \"port\": 5007,\n            },\n            \"village_6\": {\n                \"name\": \"Village 6\",\n                \"type\": \"village\",\n                \"faction\": \"neutral\",\n                \"connections\": [\"northern_capital\", \"village_3\", \"village_5\"],\n                \"initial_resources\": 50,\n                \"initial_army\": 2,\n                \"port\": 5008,\n            },\n        },\n        \"rules\": {\n            \"resource_generation\": {\"capital\": 20, \"village\": 10},\n            \"army_cost\": {\"default\": 30},\n            \"army_currency\": {\"default\": \"resources\"},\n            \"wall_multiplier\": 1.0,\n            \"barbarian_army_growth_interval_s\": 0,\n            \"white_walker_passive_corpse_interval_s\": 0,\n            \"tick_interval_s\": 0,\n            \"win_hold_ticks\": 0,\n        },\n    },\n    \"white_walkers_attack\": {\n        \"display_name\": \"White Walkers Attack\",\n        \"description\": (\n            \"The Long Night has come. As the Night's Watch, hold every Wall \"\n            \"keep for 5 ticks (150 s) before the White Walkers do. Single-player.\"\n        ),\n        \"single_player\": True,\n        \"player_faction\": \"nights_watch\",\n        \"ai_faction\": \"white_walkers\",\n        \"factions\": [\"nights_watch\", \"white_walkers\", \"barbarian\"],\n        \"slot_assignments\": {\n            \"slot_1\": \"nights_watch_fortress\",\n            \"slot_2\": \"white_walker_fortress\",\n            \"slot_3\": \"wall_west\",\n            \"slot_4\": \"wall_center_west\",\n            \"slot_5\": \"wall_center_east\",\n            \"slot_6\": \"wall_east\",\n            \"slot_7\": \"barbarian_village_west\",\n            \"slot_8\": \"barbarian_village_east\",\n        },\n        \"locations\": {\n            \"nights_watch_fortress\": {\n                \"name\": \"Castle Black\",\n                \"type\": \"capital\",\n                \"faction\": \"nights_watch\",\n                \"connections\": [\n                    \"wall_west\",\n                    \"wall_center_west\",\n                    \"wall_center_east\",\n                    \"wall_east\",\n                ],\n                \"initial_resources\": 150,\n                \"initial_army\": 3,\n                \"port\": 5001,\n            },\n            \"white_walker_fortress\": {\n                \"name\": \"The Lands of Always Winter\",\n                \"type\": \"capital\",\n                \"faction\": \"white_walkers\",\n                \"connections\": [\n                    \"wall_west\",\n                    \"wall_center_west\",\n                    \"wall_center_east\",\n                    \"wall_east\",\n                ],\n                # White Walkers spend corpses, not resources. Keep the column\n                # populated so the DB row shape stays uniform; the create_army\n                # handler reads currency from the map rules.\n                \"initial_resources\": 0,\n                \"initial_army\": 2,\n                \"port\": 5002,\n            },\n            \"wall_west\": {\n                \"name\": \"Westwatch\",\n                \"type\": \"wall\",\n                \"faction\": \"neutral\",\n                \"connections\": [\n                    \"nights_watch_fortress\",\n                    \"white_walker_fortress\",\n                    \"wall_center_west\",\n                    \"barbarian_village_west\",\n                ],\n                \"initial_resources\": 0,\n                \"initial_army\": 1,\n                \"port\": 5003,\n            },\n            \"wall_center_west\": {\n                \"name\": \"Queensgate\",\n                \"type\": \"wall\",\n                \"faction\": \"neutral\",\n                \"connections\": [\n                    \"nights_watch_fortress\",\n                    \"white_walker_fortress\",\n                    \"wall_west\",\n                    \"wall_center_east\",\n                ],\n                \"initial_resources\": 0,\n                \"initial_army\": 1,\n                \"port\": 5004,\n            },\n            \"wall_center_east\": {\n                \"name\": \"Deep Lake\",\n                \"type\": \"wall\",\n                \"faction\": \"neutral\",\n                \"connections\": [\n                    \"nights_watch_fortress\",\n                    \"white_walker_fortress\",\n                    \"wall_center_west\",\n                    \"wall_east\",\n                ],\n                \"initial_resources\": 0,\n                \"initial_army\": 1,\n                \"port\": 5005,\n            },\n            \"wall_east\": {\n                \"name\": \"Eastwatch-by-the-Sea\",\n                \"type\": \"wall\",\n                \"faction\": \"neutral\",\n                \"connections\": [\n                    \"nights_watch_fortress\",\n                    \"white_walker_fortress\",\n                    \"wall_center_east\",\n                    \"barbarian_village_east\",\n                ],\n                \"initial_resources\": 0,\n                \"initial_army\": 1,\n                \"port\": 5006,\n            },\n            \"barbarian_village_west\": {\n                \"name\": \"Free Folk Camp (West)\",\n                \"type\": \"village\",\n                \"faction\": \"barbarian\",\n                \"connections\": [\"wall_west\"],\n                \"initial_resources\": 0,\n                \"initial_army\": 2,\n                \"port\": 5007,\n            },\n            \"barbarian_village_east\": {\n                \"name\": \"Free Folk Camp (East)\",\n                \"type\": \"village\",\n                \"faction\": \"barbarian\",\n                \"connections\": [\"wall_east\"],\n                \"initial_resources\": 0,\n                \"initial_army\": 2,\n                \"port\": 5008,\n            },\n        },\n        \"rules\": {\n            # Night's Watch capital collects resources on the classic schedule.\n            # White Walker fortress ignores resource_generation (uses corpses).\n            \"resource_generation\": {\"capital\": 20, \"village\": 10},\n            \"army_cost\": {\n                \"default\": 30,\n                \"white_walkers\": 5,\n            },\n            \"army_currency\": {\n                \"default\": \"resources\",\n                \"white_walkers\": \"corpses\",\n            },\n            \"wall_multiplier\": 2.0,\n            \"barbarian_army_growth_interval_s\": 30,\n            \"white_walker_passive_corpse_interval_s\": 15,\n            # WWA gives the Night's Watch no friendly villages, so its only\n            # income source is /collect_resources at Castle Black. Add a slow\n            # passive trickle so the resource HUD ticks up without click-spam.\n            # Keep it well below the click rate (+20 per 5 s) — passive should\n            # supplement, not replace, active play.\n            \"nights_watch_capital_passive_amount\": 5,\n            \"nights_watch_capital_passive_interval_s\": 10,\n            \"tick_interval_s\": 30,\n            \"win_hold_ticks\": 5,\n        },\n    },\n}\n\n# Backward-compat exports: unchanged shape for callers that don't know about\n# maps yet. These always reflect the War of Kingdoms defaults.\nLOCATIONS = MAPS[DEFAULT_MAP_ID][\"locations\"]\nRESOURCE_GENERATION = MAPS[DEFAULT_MAP_ID][\"rules\"][\"resource_generation\"]\nCOSTS = {\"create_army\": MAPS[DEFAULT_MAP_ID][\"rules\"][\"army_cost\"][\"default\"]}\n\n\ndef get_map(map_id):\n    \"\"\"Return the full map-config dict for ``map_id``.\"\"\"\n    if map_id not in MAPS:\n        raise KeyError(f\"Unknown map_id: {map_id}\")\n    return MAPS[map_id]\n\n\ndef resolve_slot(map_id, slot_id):\n    \"\"\"Return the location_id the given slot serves on the given map.\"\"\"\n    return MAPS[map_id][\"slot_assignments\"][slot_id]\n\n\ndef get_location_config(map_id, location_id):\n    \"\"\"Return the per-location config dict for (map_id, location_id).\"\"\"\n    return MAPS[map_id][\"locations\"][location_id]\n\n\ndef get_rules(map_id):\n    \"\"\"Return the ``rules`` dict for ``map_id``.\"\"\"\n    return MAPS[map_id][\"rules\"]\n\n\ndef get_army_cost(map_id, faction):\n    \"\"\"Return the army-creation cost for ``faction`` on ``map_id``.\"\"\"\n    costs = MAPS[map_id][\"rules\"][\"army_cost\"]\n    return costs.get(faction, costs[\"default\"])\n\n\ndef get_army_currency(map_id, faction):\n    \"\"\"Return ``\"resources\"`` or ``\"corpses\"`` for ``faction`` on ``map_id``.\"\"\"\n    currencies = MAPS[map_id][\"rules\"][\"army_currency\"]\n    return currencies.get(faction, currencies[\"default\"])\n\n\ndef locations_by_type(map_id, type_name):\n    \"\"\"Return the list of location_ids on ``map_id`` of the given ``type_name``.\"\"\"\n    return [\n        lid\n        for lid, cfg in MAPS[map_id][\"locations\"].items()\n        if cfg[\"type\"] == type_name\n    ]\n"
  },
  {
    "path": "game-of-tracing/app/location_server.py",
    "content": "\"\"\"Location server implementation.\n\nEach of the 8 location containers has a constant ``SLOT_ID`` env var\n(``slot_1`` … ``slot_8``). The in-game identity a slot serves (e.g.\n``southern_capital`` in War of Kingdoms, ``wall_west`` in White Walkers\nAttack) is resolved at boot and on ``/reload`` via the active map stored\nin the shared ``game_config`` key-value table. See ``game_config.MAPS``.\n\nThe per-container SERVICE_NAME (used by Grafana dashboards) stays stable\nregardless of map — it's derived from ``LOCATION_NAME`` env / slot id, not\nfrom the logical location id.\n\"\"\"\nimport os, sqlite3, requests, random, time, threading, atexit\nfrom threading import Thread, Lock\nfrom datetime import datetime, timedelta\nfrom flask import Flask, jsonify, request\nfrom game_config import (\n    MAPS,\n    COSTS,\n    DATABASE_FILE,\n    DEFAULT_MAP_ID,\n    LOCATIONS,\n    RESOURCE_GENERATION,\n    SLOT_IDS,\n    get_army_cost,\n    get_army_currency,\n    get_location_config,\n    get_map,\n    get_rules,\n    resolve_slot,\n)\nfrom telemetry import GameTelemetry\nfrom opentelemetry.propagate import extract, inject\nfrom opentelemetry import trace\nfrom opentelemetry.trace import SpanKind\nfrom opentelemetry.context import get_current, attach, detach\nfrom enum import Enum\nfrom typing import Optional, List, Tuple, Dict\n\nclass PathType(Enum):\n    RESOURCE = 'resource'\n    ATTACK = 'attack'\n\nclass LocationServer:\n    def __init__(self, slot_or_location=None):\n        # Accept either a slot id (new, preferred) or a legacy location id\n        # (for backward compat with local dev scripts). Falls back to env.\n        raw = slot_or_location or os.environ.get('SLOT_ID')\n        if raw in SLOT_IDS:\n            self.slot_id = raw\n        elif raw in MAPS[DEFAULT_MAP_ID][\"locations\"]:\n            # Legacy: caller passed a War of Kingdoms location id; resolve to\n            # its slot via the reverse map.\n            inverse = {v: k for k, v in MAPS[DEFAULT_MAP_ID][\"slot_assignments\"].items()}\n            self.slot_id = inverse[raw]\n        else:\n            raise ValueError(\n                f\"Cannot determine SLOT_ID from {raw!r}; expected one of {SLOT_IDS} \"\n                f\"or a War of Kingdoms location id.\"\n            )\n\n        self.app = Flask(__name__)\n        self.last_resource_collection = {}\n        self.resource_cooldown = {}\n        self.lock = Lock()\n\n        # SERVICE_NAME must stay stable across map switches so Grafana\n        # dashboards keep their series. Prefer the explicit LOCATION_NAME env\n        # (matches container name in docker-compose); else synthesise from the\n        # slot id.\n        service_name = os.environ.get('LOCATION_NAME') or self.slot_id.replace('_', '-')\n        self.telemetry = GameTelemetry(service_name=service_name)\n        self.logger = self.telemetry.get_logger()\n        self.tracer = self.telemetry.get_tracer()\n\n        # Give telemetry access to location state\n        self.telemetry._get_location_state = self._get_location_state\n        # And access to faction-scoped economy (for the corpse gauge).\n        self.telemetry._get_corpse_count = self._get_corpses\n\n        self.db_path = os.environ.get('DATABASE_FILE', DATABASE_FILE)\n\n        # Populated by _load_identity().\n        self.map_id = DEFAULT_MAP_ID\n        self.location_id = None\n        self.location_info = None\n        self._passive_thread_started = False\n        self._barbarian_thread_started = False\n        self._corpse_thread_started = False\n        self._nw_capital_thread_started = False\n\n        self._initialize_database()\n        self._load_identity()\n        self.setup_routes()\n\n        atexit.register(self.telemetry.shutdown)\n\n    # ----------------------------------------------------------------\n    # Map / slot identity resolution\n    # ----------------------------------------------------------------\n\n    def _current_locations(self) -> Dict:\n        \"\"\"Return the active map's ``location_id → config`` dict.\"\"\"\n        return MAPS[self.map_id][\"locations\"]\n\n    def _current_rules(self) -> Dict:\n        return MAPS[self.map_id][\"rules\"]\n\n    def _read_active_map_id(self) -> str:\n        conn = self._get_db_connection()\n        try:\n            row = conn.execute(\n                \"SELECT value FROM game_config WHERE key = 'active_map_id'\"\n            ).fetchone()\n        finally:\n            conn.close()\n        return row['value'] if row else DEFAULT_MAP_ID\n\n    def _load_identity(self):\n        \"\"\"Resolve slot → (map, location_id, config); seed this slot's row.\"\"\"\n        self.map_id = self._read_active_map_id()\n        self.location_id = resolve_slot(self.map_id, self.slot_id)\n        self.location_info = get_location_config(self.map_id, self.location_id)\n\n        # Publish live identity to the telemetry instance so the observable\n        # gauges report the currently-served id, not whatever id was derived\n        # from the container's SERVICE_NAME at boot.\n        self.telemetry._location_id = self.location_id\n        self.telemetry._location_type = self.location_info[\"type\"]\n\n        # Seed this slot's row in the locations table if missing. Idempotent:\n        # INSERT OR IGNORE handles the case where war_map already re-seeded.\n        conn = self._get_db_connection()\n        try:\n            conn.execute(\n                \"INSERT OR IGNORE INTO locations (id, resources, army, faction) VALUES (?, ?, ?, ?)\",\n                (\n                    self.location_id,\n                    self.location_info[\"initial_resources\"],\n                    self.location_info[\"initial_army\"],\n                    self.location_info[\"faction\"],\n                ),\n            )\n            conn.commit()\n        finally:\n            conn.close()\n\n        self._start_passive_threads_if_needed()\n\n        self.logger.info(\n            f\"Identity loaded: slot={self.slot_id} map={self.map_id} \"\n            f\"location_id={self.location_id} type={self.location_info['type']} \"\n            f\"faction={self.location_info['faction']}\"\n        )\n\n    def _start_passive_threads_if_needed(self):\n        \"\"\"Kick off whichever passive loop matches this slot's identity.\n\n        Threads are started at most once per process lifetime. If a slot's\n        identity changes through ``/reload``, the *old* thread keeps running\n        but becomes a no-op because it guards each iteration against the\n        current location type/faction.\n        \"\"\"\n        loc_type = self.location_info[\"type\"]\n        faction = self.location_info[\"faction\"]\n        rules = self._current_rules()\n\n        # Launch the village resource thread for *every* village, including\n        # barbarian-faction slots (Free Folk camps). The thread guards each\n        # iteration on ``faction != \"barbarian\"``, so it stays a no-op while\n        # the camp is still barbarian and starts producing for the player\n        # the moment they capture it. Without this fallthrough, captured\n        # camps stay unproductive because the thread was never started.\n        if loc_type == \"village\" and not self._passive_thread_started:\n            self._start_passive_generation()\n            self._passive_thread_started = True\n\n        if faction == \"barbarian\" and not self._barbarian_thread_started:\n            interval = rules.get(\"barbarian_army_growth_interval_s\", 0) or 0\n            if interval > 0:\n                self._start_barbarian_growth(interval)\n                self._barbarian_thread_started = True\n\n        if (\n            loc_type == \"capital\"\n            and faction == \"white_walkers\"\n            and not self._corpse_thread_started\n        ):\n            interval = rules.get(\"white_walker_passive_corpse_interval_s\", 0) or 0\n            if interval > 0:\n                self._start_white_walker_corpse_tick(interval)\n                self._corpse_thread_started = True\n\n        if (\n            loc_type == \"capital\"\n            and faction == \"nights_watch\"\n            and not self._nw_capital_thread_started\n        ):\n            interval = rules.get(\"nights_watch_capital_passive_interval_s\", 0) or 0\n            amount = rules.get(\"nights_watch_capital_passive_amount\", 0) or 0\n            if interval > 0 and amount > 0:\n                self._start_nights_watch_capital_resource_tick(interval, amount)\n                self._nw_capital_thread_started = True\n\n    # ----------------------------------------------------------------\n    # Corpse economy (faction-scoped; lives in faction_economy table)\n    # ----------------------------------------------------------------\n\n    def _get_corpses(self, faction: str = \"white_walkers\") -> int:\n        conn = self._get_db_connection()\n        try:\n            row = conn.execute(\n                \"SELECT corpses FROM faction_economy WHERE faction = ?\", (faction,)\n            ).fetchone()\n        finally:\n            conn.close()\n        return int(row['corpses']) if row else 0\n\n    def _add_corpses(self, delta: int, faction: str = \"white_walkers\"):\n        if delta <= 0:\n            return\n        conn = self._get_db_connection()\n        try:\n            conn.execute(\n                \"INSERT INTO faction_economy (faction, corpses) VALUES (?, ?) \"\n                \"ON CONFLICT(faction) DO UPDATE SET corpses = corpses + excluded.corpses\",\n                (faction, delta),\n            )\n            conn.commit()\n        finally:\n            conn.close()\n\n    def _spend_corpses(self, amount: int, faction: str = \"white_walkers\") -> bool:\n        \"\"\"Atomically decrement ``faction``'s corpse pool. Returns True on success.\"\"\"\n        conn = self._get_db_connection()\n        try:\n            cursor = conn.execute(\n                \"UPDATE faction_economy SET corpses = corpses - ? \"\n                \"WHERE faction = ? AND corpses >= ?\",\n                (amount, faction, amount),\n            )\n            conn.commit()\n            return cursor.rowcount > 0\n        finally:\n            conn.close()\n\n    def _find_capital(self, faction: str) -> Optional[str]:\n        \"\"\"Return the location_id of the capital with the given faction in the active map, by static config.\"\"\"\n        for loc_id, cfg in self._current_locations().items():\n            if cfg[\"type\"] == \"capital\" and cfg[\"faction\"] == faction:\n                return loc_id\n        return None\n\n    def _find_enemy_capital(self, faction: str) -> Optional[str]:\n        \"\"\"Return the location_id of a capital not belonging to ``faction`` (and not barbarian), by static config.\"\"\"\n        for loc_id, cfg in self._current_locations().items():\n            if cfg[\"type\"] == \"capital\" and cfg[\"faction\"] not in (faction, \"barbarian\"):\n                return loc_id\n        return None\n\n    def _get_db_connection(self):\n        # ``timeout`` applies before the first PRAGMA runs, so concurrent\n        # boot of all 8 containers doesn't race on ``PRAGMA journal_mode=WAL``\n        # (which briefly acquires an exclusive lock to switch modes).\n        conn = sqlite3.connect(self.db_path, timeout=15)\n        conn.execute(\"PRAGMA busy_timeout=5000\")\n        conn.execute(\"PRAGMA journal_mode=WAL\")\n        conn.row_factory = sqlite3.Row\n        return conn\n\n    def _initialize_database(self):\n        conn = self._get_db_connection()\n        cursor = conn.cursor()\n\n        # Canonical per-location state.\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS locations (\n            id TEXT PRIMARY KEY,\n            resources INTEGER NOT NULL,\n            army INTEGER NOT NULL,\n            faction TEXT NOT NULL\n        )\n        ''')\n\n        # Key/value game-wide config; holds active_map_id (authoritative at\n        # runtime; overrides whatever the process started with).\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS game_config (\n            key TEXT PRIMARY KEY,\n            value TEXT NOT NULL\n        )\n        ''')\n        cursor.execute(\n            \"INSERT OR IGNORE INTO game_config (key, value) VALUES ('active_map_id', ?)\",\n            (DEFAULT_MAP_ID,),\n        )\n\n        # Faction-scoped economy (White Walkers' corpse pool today; room for\n        # additional faction-level currencies later).\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS faction_economy (\n            faction TEXT PRIMARY KEY,\n            corpses INTEGER NOT NULL DEFAULT 0\n        )\n        ''')\n\n        conn.commit()\n        conn.close()\n\n    def _get_location_state(self, location_id):\n        conn = self._get_db_connection()\n        cursor = conn.cursor()\n        \n        cursor.execute(\"SELECT * FROM locations WHERE id = ?\", (location_id,))\n        row = cursor.fetchone()\n        \n        state = None\n        if row:\n            state = {\n                \"resources\": row['resources'],\n                \"army\": row['army'],\n                \"faction\": row['faction']\n            }\n        conn.close()\n        return state\n\n    def _update_location_state(self, location_id, resources=None, army=None, faction=None):\n        set_clauses = []\n        params = []\n        \n        if resources is not None:\n            set_clauses.append(\"resources = ?\")\n            params.append(resources)\n        if army is not None:\n            set_clauses.append(\"army = ?\")\n            params.append(army)\n        if faction is not None:\n            set_clauses.append(\"faction = ?\")\n            params.append(faction)\n        \n        if not set_clauses:\n            return False\n        \n        params.append(location_id)\n        \n        conn = self._get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\n            f\"UPDATE locations SET {', '.join(set_clauses)} WHERE id = ?\",\n            params\n        )\n        conn.commit()\n        conn.close()\n\n        # Force metric collection on important state changes\n        if faction is not None or resources is not None or army is not None:\n            self.telemetry.collect_metrics()\n            \n        return True\n\n    def _find_path(self, target: str, path_type: PathType) -> Optional[List[str]]:\n        \"\"\"Unified pathfinding for both resources and armies on the active map.\"\"\"\n        locations = self._current_locations()\n        location_state = self._get_location_state(self.location_id)\n        faction = location_state[\"faction\"]\n\n        # Resource routing only makes sense for factions that have a resource\n        # economy. ``barbarian`` and ``white_walkers`` don't send resources.\n        resource_factions = {\"southern\", \"northern\", \"nights_watch\"}\n        if path_type == PathType.RESOURCE and faction not in resource_factions:\n            return None\n\n        distances = {loc: float('infinity') for loc in locations.keys()}\n        distances[self.location_id] = 0\n        previous = {loc: None for loc in locations.keys()}\n        unvisited = set(locations.keys())\n\n        def get_weight(loc_id: str) -> float:\n            state = self._get_location_state(loc_id)\n            loc_faction = state[\"faction\"] if state else \"neutral\"\n\n            if path_type == PathType.RESOURCE:\n                if loc_faction == faction:\n                    return 1\n                elif loc_faction == \"neutral\":\n                    return 2\n                return float('infinity')\n            else:  # PathType.ATTACK\n                if loc_faction == faction:\n                    return 1\n                elif loc_faction == \"neutral\":\n                    return 2\n                return 3\n\n        while unvisited:\n            current = min(unvisited, key=lambda loc: distances[loc])\n            if current == target:\n                break\n\n            unvisited.remove(current)\n            for neighbor in locations[current][\"connections\"]:\n                if neighbor in unvisited:\n                    weight = get_weight(neighbor)\n                    distance = distances[current] + weight\n\n                    if distance < distances[neighbor]:\n                        distances[neighbor] = distance\n                        previous[neighbor] = current\n\n        if previous[target] is None:\n            return None\n\n        path = []\n        current = target\n        while current is not None:\n            path.append(current)\n            current = previous[current]\n\n        return list(reversed(path))\n\n    def _handle_battle(self, attacking_army: int, attacking_faction: str,\n                      defending_army: int, defending_faction: str,\n                      location_type: Optional[str] = None) -> tuple[str, int, str]:\n        \"\"\"Handle battle between armies and return ``(result, remaining_army, new_faction)``.\n\n        ``location_type`` lets the active map's rules modify the fight. For\n        ``wall`` settlements on a map with ``wall_multiplier`` > 1 the defender's\n        effective strength is scaled up — the physical garrison plays harder to\n        dislodge, but the ``remaining_army`` reported back is converted back to\n        physical units so DB rows stay honest.\n        \"\"\"\n        # Same faction = reinforcement. Multiplier never applies.\n        if attacking_faction == defending_faction:\n            self.logger.info(f\"Reinforcement battle between {attacking_faction} armies\")\n            self.telemetry.record_battle(attacking_faction, defending_faction, \"reinforcement\")\n            return \"reinforcement\", attacking_army + defending_army, attacking_faction\n\n        multiplier = 1.0\n        if location_type == \"wall\":\n            multiplier = float(self._current_rules().get(\"wall_multiplier\", 1.0) or 1.0)\n        effective_defender = int(defending_army * multiplier)\n\n        if attacking_army > effective_defender:\n            remaining = attacking_army - effective_defender\n            self.logger.info(\n                f\"Attacker victory: {attacking_army} vs {defending_army} \"\n                f\"(effective {effective_defender}, mult {multiplier}) -> {remaining}\"\n            )\n            self.telemetry.record_battle(attacking_faction, defending_faction, \"attacker_victory\")\n            return \"attacker_victory\", remaining, attacking_faction\n        elif effective_defender > attacking_army:\n            # Convert defender's surviving *effective* strength back to physical.\n            effective_remaining = effective_defender - attacking_army\n            remaining = max(1, int(effective_remaining / multiplier)) if multiplier > 0 else effective_remaining\n            self.logger.info(\n                f\"Defender victory: {defending_army} vs {attacking_army} \"\n                f\"(effective {effective_defender}, mult {multiplier}) -> {remaining}\"\n            )\n            self.telemetry.record_battle(attacking_faction, defending_faction, \"defender_victory\")\n            return \"defender_victory\", remaining, defending_faction\n        else:\n            self.logger.info(\n                f\"Stalemate: {attacking_army} vs {defending_army} \"\n                f\"(effective {effective_defender}, mult {multiplier})\"\n            )\n            self.telemetry.record_battle(attacking_faction, defending_faction, \"stalemate\")\n            return \"stalemate\", 0, defending_faction\n\n    def _continue_army_movement(self, army_size: int, faction: str, current_loc: str,\n                              next_loc: str, remaining_path: List[str], is_attack_move: bool = False) -> Dict:\n        \"\"\"Continue army movement to next location.\"\"\"\n        # Capture the full context before spawning the thread\n        ctx = get_current()\n\n        def move():\n            token = attach(ctx)\n            try:\n                time.sleep(5)  # Wait 5 seconds before moving\n\n                with self.tracer.start_as_current_span(\n                        \"army_movement\",\n                        kind=SpanKind.SERVER,\n                        attributes={\n                            \"source_location\": current_loc,\n                            \"target_location\": next_loc,\n                            \"army_size\": army_size,\n                            \"is_attack_move\": is_attack_move\n                        }\n                    ) as movement_span:\n                        target_url = f\"{self.get_location_url(next_loc)}/receive_army\"\n                        self.logger.info(f\"Moving army from {current_loc} to {next_loc}\")\n                        \n                        result = self._make_request_with_trace(\n                            'post',\n                            target_url,\n                            {\n                                \"army_size\": army_size,\n                                \"faction\": faction,\n                                \"source_location\": current_loc,\n                                \"remaining_path\": remaining_path,\n                                \"is_attack_move\": is_attack_move\n                            },\n                            span_name=\"http_request.move_army\"\n                        )\n                        \n                        if not result.get(\"success\", False):\n                            movement_span.set_status(trace.StatusCode.ERROR, \"Army movement failed\")\n                            movement_span.set_attribute(\"error\", result.get(\"message\", \"Unknown error\"))\n                            self.logger.error(f\"Army movement failed: {result.get('message', 'Unknown error')}\")\n                        else:\n                            # Force metric collection after successful army movement\n                            self.telemetry.collect_metrics()\n                \n            except Exception as e:\n                self.logger.error(f\"Failed to move army to {next_loc}: {str(e)}\")\n                raise\n            finally:\n                detach(token)\n\n        # Start movement in background thread\n        Thread(target=move).start()\n        \n        # Force metric collection at the start of movement\n        self.telemetry.collect_metrics()\n        \n        # Return immediate response indicating movement has started\n        return {\n            \"success\": True,\n            \"message\": f\"Army movement started from {current_loc} to {next_loc}\",\n            \"is_attack_move\": is_attack_move\n        }\n\n    def _transfer_resources_along_path(self, resources: int, path: List[str]) -> bool:\n        \"\"\"Transfer resources along a path with delays.\"\"\"\n        if not path or len(path) < 2:\n            return False\n            \n        # Capture the full context before spawning the thread\n        ctx = get_current()\n\n        def transfer():\n            current_loc = path[0]\n            next_loc = path[1]\n\n            token = attach(ctx)\n            try:\n                time.sleep(5)  # Wait before starting transfer\n\n                with self.tracer.start_as_current_span(\n                    \"resource_movement\",\n                    kind=SpanKind.SERVER,\n                    attributes={\n                        \"source_location\": current_loc,\n                        \"target_location\": next_loc,\n                        \"resources_amount\": resources\n                    }\n                ) as movement_span:\n                    target_url = f\"{self.get_location_url(next_loc)}/receive_resources\"\n                    result = self._make_request_with_trace(\n                        'post',\n                        target_url,\n                        {\n                            \"resources\": resources,\n                            \"source_location\": current_loc,\n                            \"remaining_path\": path[1:],\n                            \"faction\": self._get_location_state(self.location_id)[\"faction\"]\n                        },\n                        span_name=\"http_request.transfer_resources\"\n                    )\n\n                    if result.get(\"success\", False):\n                        current_loc_resources = self._get_location_state(current_loc)['resources']\n                        self._update_location_state(current_loc, resources=current_loc_resources - resources)\n                        # Force metric collection after successful resource transfer\n                        self.telemetry.collect_metrics()\n                    else:\n                        movement_span.set_status(trace.StatusCode.ERROR, \"Resource transfer failed\")\n\n            except Exception as e:\n                self.logger.error(f\"Failed to send resources to {next_loc} from {current_loc}: {str(e)}\")\n            finally:\n                detach(token)\n\n        Thread(target=transfer).start()\n        return True\n\n    def _make_request_with_trace(self, method: str, url: str, json_data: Optional[Dict] = None, span_name: str = \"http_request\") -> Dict:\n        \"\"\"Make HTTP request with trace context propagated in headers.\"\"\"\n        headers = {\"Content-Type\": \"application/json\"}\n\n        with self.tracer.start_as_current_span(\n            span_name,\n            kind=SpanKind.CLIENT,\n            attributes={\"http.url\": url}\n        ) as request_span:\n            inject(headers)  # This will now inject the current request_span's context\n            \n            try:\n                if method.lower() == 'get':\n                    response = requests.get(url, headers=headers)\n                elif method.lower() == 'post':\n                    response = requests.post(url, json=json_data, headers=headers)\n                else:\n                    raise ValueError(f\"Unsupported method: {method}\")\n                \n                request_span.set_attribute(\"http.status_code\", response.status_code)\n                response.raise_for_status()\n                return response.json()\n            except requests.RequestException as e:\n                request_span.set_status(trace.StatusCode.ERROR, str(e))\n                self.logger.error(f\"Request failed: {str(e)}\")\n                raise\n\n    def _can_collect_resources(self) -> tuple[bool, Optional[str], Optional[int]]:\n        \"\"\"Check if location can collect resources.\n        Returns:\n            tuple: (can_collect, message, cooldown_seconds)\n        \"\"\"\n        with self.lock:\n            if self.location_info[\"type\"] != \"capital\":\n                return False, \"Only capitals can manually collect resources\", None\n            \n            now = datetime.now()\n            \n            # Check resource sending cooldown\n            if self.location_id in self.resource_cooldown:\n                cooldown_end = self.resource_cooldown[self.location_id]\n                if now < cooldown_end:\n                    remaining = (cooldown_end - now).seconds\n                    return False, f\"Resource generation on cooldown for {remaining} seconds\", remaining\n            \n            # Check collection cooldown\n            last_time = self.last_resource_collection.get(self.location_id, datetime.min)\n            wait_time = timedelta(seconds=5)\n            \n            if now - last_time < wait_time:\n                remaining = wait_time - (now - last_time)\n                return False, f\"Must wait {remaining.seconds} seconds to collect resources\", remaining.seconds\n            \n            return True, None, None\n\n    def _start_resource_cooldown(self):\n        with self.lock:\n            self.resource_cooldown[self.location_id] = datetime.now() + timedelta(seconds=5)\n\n    def get_location_url(self, location_id):\n        \"\"\"Return the HTTP base URL for reaching another location service.\n\n        Uses the active map's port assignment; falls back to WoK's port for a\n        legacy id if the location isn't on the current map (shouldn't happen\n        during a coherent game but guards against transition races).\n        \"\"\"\n        locations = self._current_locations()\n        if location_id in locations:\n            port = locations[location_id][\"port\"]\n        else:\n            port = MAPS[DEFAULT_MAP_ID][\"locations\"][location_id][\"port\"]\n        if os.environ.get('IN_DOCKER') or os.environ.get('LOCATION_ID'):\n            docker_service_name = self._container_for(location_id)\n            return f\"http://{docker_service_name}:{port}\"\n        return f\"http://localhost:{port}\"\n\n    def _container_for(self, location_id: str) -> str:\n        \"\"\"Return the stable container hostname for another location id.\n\n        Containers are named after their *slot* (slot_1 → southern-capital in\n        docker-compose, which is slot_1's stable identity). We reverse-look up\n        the slot that currently serves ``location_id`` on the active map, then\n        translate that slot back to its container hostname using the WoK\n        default slot assignments (which match docker-compose service names).\n        \"\"\"\n        active = MAPS[self.map_id][\"slot_assignments\"]\n        wok = MAPS[DEFAULT_MAP_ID][\"slot_assignments\"]\n        for slot, active_loc in active.items():\n            if active_loc == location_id:\n                return wok[slot].replace('_', '-')\n        # Unknown id — best-effort: use the hyphenated form.\n        return location_id.replace('_', '-')\n\n    def _start_passive_generation(self):\n        def generate_resources():\n            while True:\n                time.sleep(15)\n                # Static identity guards against /reload moving this slot off\n                # of a village type entirely.\n                if self.location_info[\"type\"] != \"village\":\n                    continue\n                # Live-DB guard: gate on the *current* faction, not the\n                # boot-time identity, so a captured Free Folk camp starts\n                # producing for the new owner the moment its row flips. The\n                # static ``self.location_info[\"faction\"]`` is set at boot\n                # from MAPS config and never updates on battle.\n                location_state = self._get_location_state(self.location_id)\n                if location_state is None:\n                    continue\n                if location_state[\"faction\"] == \"barbarian\":\n                    continue\n                amount = self._current_rules()[\"resource_generation\"][\"village\"]\n                with self.tracer.start_as_current_span(\n                    \"passive_resource_generation\",\n                    attributes={\n                        \"location.id\": self.location_id,\n                        \"resources_gained\": amount,\n                        \"game.map.id\": self.map_id,\n                        \"owner.faction\": location_state[\"faction\"],\n                    }\n                ):\n                    new_resources = location_state[\"resources\"] + amount\n                    self._update_location_state(self.location_id, resources=new_resources)\n                    self.telemetry.collect_metrics()\n\n        Thread(target=generate_resources, daemon=True).start()\n\n    def _start_barbarian_growth(self, interval_s: int):\n        \"\"\"Barbarian villages grow +1 army every ``interval_s`` seconds.\n\n        Barbarians never initiate combat; they exist to pressure the map and\n        feed the White Walker corpse economy. The thread self-gates against\n        identity changes so it becomes a no-op if /reload moves this slot off\n        a barbarian role.\n        \"\"\"\n        def grow():\n            while True:\n                time.sleep(interval_s)\n                if self.location_info[\"faction\"] != \"barbarian\":\n                    continue\n                with self.tracer.start_as_current_span(\n                    \"barbarian_passive_growth\",\n                    attributes={\n                        \"location.id\": self.location_id,\n                        \"game.map.id\": self.map_id,\n                        \"army_gained\": 1,\n                    }\n                ):\n                    state = self._get_location_state(self.location_id)\n                    if state is None:\n                        continue\n                    # Only grow while still barbarian-controlled.\n                    if state[\"faction\"] != \"barbarian\":\n                        continue\n                    self._update_location_state(self.location_id, army=state[\"army\"] + 1)\n                    self.telemetry.collect_metrics()\n\n        Thread(target=grow, daemon=True).start()\n\n    def _start_nights_watch_capital_resource_tick(self, interval_s: int, amount: int):\n        \"\"\"Passive resource generation at the Night's Watch capital (WWA only).\n\n        WWA gives the player no friendly villages, so /collect_resources at\n        Castle Black is the only income source — leading to click-spam UX. A\n        slow passive tick supplements that without removing the incentive to\n        actively collect (manual is +20 per 5 s; passive is +amount per\n        interval_s, configured well below that).\n        \"\"\"\n        def tick():\n            while True:\n                time.sleep(interval_s)\n                if (self.location_info[\"faction\"] != \"nights_watch\"\n                    or self.location_info[\"type\"] != \"capital\"):\n                    continue\n                with self.tracer.start_as_current_span(\n                    \"nights_watch_passive_resource\",\n                    attributes={\n                        \"location.id\": self.location_id,\n                        \"game.map.id\": self.map_id,\n                        \"resources_gained\": amount,\n                    }\n                ):\n                    state = self._get_location_state(self.location_id)\n                    if state is None:\n                        continue\n                    if state[\"faction\"] != \"nights_watch\":\n                        continue\n                    self._update_location_state(\n                        self.location_id, resources=state[\"resources\"] + amount\n                    )\n                    self.telemetry.collect_metrics()\n\n        Thread(target=tick, daemon=True).start()\n\n    def _start_white_walker_corpse_tick(self, interval_s: int):\n        \"\"\"Passive corpse generation at the White Walker fortress.\n\n        Simulates the undead slowly rising — keeps the WW economy nonzero even\n        when no battles are happening. Corpses accrue to the faction pool.\n        \"\"\"\n        def tick():\n            while True:\n                time.sleep(interval_s)\n                if self.location_info[\"faction\"] != \"white_walkers\" or self.location_info[\"type\"] != \"capital\":\n                    continue\n                with self.tracer.start_as_current_span(\n                    \"white_walker_corpse_tick\",\n                    attributes={\n                        \"location.id\": self.location_id,\n                        \"game.map.id\": self.map_id,\n                        \"game.corpses.harvested\": 1,\n                        \"corpse.source\": \"passive\",\n                    }\n                ):\n                    self._add_corpses(1, \"white_walkers\")\n                    self.telemetry.collect_metrics()\n\n        Thread(target=tick, daemon=True).start()\n\n    def reset_database(self):\n        \"\"\"Reset every location row + the corpse pool to the active map's initial state.\"\"\"\n        conn = self._get_db_connection()\n        cursor = conn.cursor()\n\n        cursor.execute(\"DELETE FROM locations\")\n\n        for loc_id, loc_info in self._current_locations().items():\n            cursor.execute(\n                \"INSERT INTO locations VALUES (?, ?, ?, ?)\",\n                (\n                    loc_id,\n                    loc_info[\"initial_resources\"],\n                    loc_info[\"initial_army\"],\n                    loc_info[\"faction\"],\n                ),\n            )\n\n        cursor.execute(\"DELETE FROM faction_economy\")\n\n        conn.commit()\n        conn.close()\n        self.logger.info(f\"Database reset to initial state for map {self.map_id}\")\n\n    def setup_routes(self):\n        @self.app.route('/', methods=['GET'])\n        def info():\n            context = extract(request.headers)\n            with self.tracer.start_as_current_span(\n                \"get_location_info\",\n                context=context,\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location.id\": self.location_id,\n                    \"location.name\": self.location_info[\"name\"],\n                    \"location.type\": self.location_info[\"type\"]\n                }\n            ):\n                location_state = self._get_location_state(self.location_id)\n\n                cooldown_info = None\n                with self.lock:\n                    now = datetime.now()\n                    last_time = self.last_resource_collection.get(self.location_id, datetime.min)\n                    wait_time = timedelta(seconds=15 if self.location_info[\"type\"] == \"village\" else 5)\n\n                    if now - last_time < wait_time:\n                        remaining = wait_time - (now - last_time)\n                        cooldown_info = remaining.seconds\n\n                return jsonify({\n                    \"location_id\": self.location_id,\n                    \"name\": self.location_info[\"name\"],\n                    \"faction\": location_state[\"faction\"],\n                    \"connections\": self.location_info[\"connections\"],\n                    \"resources\": location_state[\"resources\"],\n                    \"army\": location_state[\"army\"],\n                    \"resource_cooldown\": cooldown_info\n                })\n\n        @self.app.route('/health', methods=['GET'])\n        def health():\n            return jsonify({\"status\": \"ok\"})\n\n        @self.app.route('/collect_resources', methods=['POST'])\n        def collect_resources():\n            \"\"\"Collect resources from a location\"\"\"\n            # Extract trace context from request headers\n            context = extract(request.headers)\n            \n            with self.tracer.start_as_current_span(\n                \"collect_resources\",\n                context=context,\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location_name\": self.location_info[\"name\"],\n                    \"location_type\": self.location_info[\"type\"]\n                }\n            ) as span:\n                can_collect, message, cooldown_seconds = self._can_collect_resources()\n                if not can_collect:\n                    span.set_status(trace.StatusCode.ERROR, message)\n                    span.set_attribute(\"cooldown_seconds\", cooldown_seconds or 0)\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": message,\n                        \"cooldown\": True,\n                        \"cooldown_seconds\": cooldown_seconds\n                    }), 200  # Return 200 for cooldown, as it's an expected state\n                \n                location_type = self.location_info[\"type\"]\n                resources_gained = self._current_rules()[\"resource_generation\"].get(location_type, 0)\n\n                location_state = self._get_location_state(self.location_id)\n                new_resources = location_state[\"resources\"] + resources_gained\n                self._update_location_state(self.location_id, resources=new_resources)\n                \n                span.set_attribute(\"resources_gained\", resources_gained)\n                span.set_attribute(\"new_resources_total\", new_resources)\n                \n                with self.lock:\n                    self.last_resource_collection[self.location_id] = datetime.now()\n                \n                # Force metric collection after resource update\n                self.telemetry.collect_metrics()\n                \n                return jsonify({\n                    \"success\": True,\n                    \"message\": f\"Collected {resources_gained} resources\",\n                    \"current_resources\": new_resources,\n                    \"cooldown\": False\n                })\n        \n        @self.app.route('/create_army', methods=['POST'])\n        def create_army():\n            # Extract trace context from request headers\n            context = extract(request.headers)\n\n            with self.tracer.start_as_current_span(\n                \"create_army\",\n                context=context,\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location_name\": self.location_info[\"name\"],\n                    \"location_type\": self.location_info[\"type\"],\n                    \"game.map.id\": self.map_id,\n                }\n            ) as span:\n                if self.location_info[\"type\"] != \"capital\":\n                    span.set_status(trace.StatusCode.ERROR, \"Only capitals can create armies\")\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": \"Only capitals can create armies\"\n                    }), 403\n\n                location_state = self._get_location_state(self.location_id)\n                current_resources = location_state[\"resources\"]\n                current_army = location_state[\"army\"]\n                faction = location_state[\"faction\"]\n                currency = get_army_currency(self.map_id, faction)\n                cost = get_army_cost(self.map_id, faction)\n\n                span.set_attribute(\"current_resources\", current_resources)\n                span.set_attribute(\"current_army\", current_army)\n                span.set_attribute(\"army_cost\", cost)\n                span.set_attribute(\"army_currency\", currency)\n                span.set_attribute(\"faction\", faction)\n\n                if currency == \"corpses\":\n                    # White Walkers spend corpses from the faction pool, not\n                    # resources from the location.\n                    if not self._spend_corpses(cost, faction):\n                        available = self._get_corpses(faction)\n                        span.set_status(trace.StatusCode.ERROR, \"Insufficient corpses\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": f\"Not enough corpses. Need {cost}, have {available}\"\n                        }), 400\n                    new_resources = current_resources\n                    new_army = current_army + 1\n                    self._update_location_state(self.location_id, army=new_army)\n                    span.set_attribute(\"game.corpses.spent\", cost)\n                    span.set_attribute(\"corpses_remaining\", self._get_corpses(faction))\n                else:\n                    if current_resources < cost:\n                        span.set_status(trace.StatusCode.ERROR, \"Insufficient resources\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": f\"Not enough resources. Need {cost}, have {current_resources}\"\n                        }), 400\n\n                    new_resources = current_resources - cost\n                    new_army = current_army + 1\n\n                    self._update_location_state(\n                        self.location_id,\n                        resources=new_resources,\n                        army=new_army\n                    )\n\n                span.set_attribute(\"new_resources\", new_resources)\n                span.set_attribute(\"new_army\", new_army)\n\n                self.telemetry.collect_metrics()\n\n                return jsonify({\n                    \"success\": True,\n                    \"message\": \"Army created\",\n                    \"current_army\": new_army,\n                    \"current_resources\": new_resources,\n                    \"currency\": currency,\n                })\n        \n        @self.app.route('/move_army', methods=['POST'])\n        def move_army():\n            # Extract trace context from request headers\n            context = extract(request.headers)\n            \n            with self.tracer.start_as_current_span(\n                \"move_army_request\",\n                context=context,\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location_name\": self.location_info[\"name\"],\n                    \"location_type\": self.location_info[\"type\"]\n                }\n            ) as move_span:\n                data = request.get_json()\n                if not data or 'target_location' not in data:\n                    move_span.set_status(trace.StatusCode.ERROR, \"Target location not specified\")\n                    return jsonify({\"success\": False, \"message\": \"Target location not specified\"}), 400\n                \n                target_location = data['target_location']\n                remaining_path = data.get('remaining_path', [])\n                is_attack_move = data.get('is_attack_move', False)\n                \n                move_span.set_attribute(\"target_location\", target_location)\n                move_span.set_attribute(\"is_attack_move\", is_attack_move)\n                \n                if target_location not in self.location_info[\"connections\"]:\n                    move_span.set_status(trace.StatusCode.ERROR, f\"Cannot move to {target_location}\")\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": f\"Cannot move to {target_location}. Not connected to {self.location_id}\"\n                    }), 400\n                \n                location_state = self._get_location_state(self.location_id)\n                if location_state[\"army\"] <= 0:\n                    move_span.set_status(trace.StatusCode.ERROR, \"No army to move\")\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": \"No army to move\"\n                    }), 400\n                \n                try:\n                    army_size = location_state[\"army\"]\n                    current_faction = location_state[\"faction\"]\n                    \n                    move_span.set_attribute(\"army_size\", army_size)\n                    move_span.set_attribute(\"faction\", current_faction)\n                    \n                    # Update the source location's army to 0\n                    self._update_location_state(self.location_id, army=0)\n                    \n                    # Force metric collection after army leaves the location\n                    self.telemetry.collect_metrics()\n                    \n                    result = self._continue_army_movement(\n                        army_size,\n                        current_faction,\n                        self.location_id,\n                        target_location,\n                        remaining_path,\n                        is_attack_move\n                    )\n                    \n                    if not result.get(\"success\", True):\n                        move_span.set_status(trace.StatusCode.ERROR, result.get(\"message\", \"Unknown error\"))\n                    \n                    return jsonify(result)\n                except Exception as e:\n                    move_span.record_exception(e)\n                    move_span.set_status(trace.StatusCode.ERROR, str(e))\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": f\"Failed to move army: {str(e)}\"\n                    }), 500\n        \n        @self.app.route('/all_out_attack', methods=['POST'])\n        def all_out_attack():\n            \"\"\"Launch an all-out attack from a capital to the enemy capital\"\"\"\n            context = extract(request.headers)\n            \n            with self.tracer.start_as_current_span(\n                \"all_out_attack\",\n                context=context,\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location_name\": self.location_info[\"name\"],\n                    \"location_type\": self.location_info[\"type\"]\n                }\n            ) as attack_span:\n                try:\n                    if self.location_info[\"type\"] != \"capital\":\n                        attack_span.set_status(trace.StatusCode.ERROR, \"Only capitals can launch all-out attacks\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"Only capitals can launch all-out attacks\"\n                        }), 403\n                    \n                    location_state = self._get_location_state(self.location_id)\n                    army_size = location_state[\"army\"]\n                    faction = location_state[\"faction\"]\n                    \n                    if army_size <= 0:\n                        attack_span.set_status(trace.StatusCode.ERROR, \"No army available for attack\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"No army available for attack\"\n                        }), 400\n                    \n                    # Determine enemy capital based on the active map's config.\n                    target_capital = self._find_enemy_capital(faction)\n                    if not target_capital:\n                        attack_span.set_status(trace.StatusCode.ERROR, \"No enemy capital on this map\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"No enemy capital to attack on this map\"\n                        }), 400\n                    attack_span.set_attribute(\"target_capital\", target_capital)\n                    \n                    attack_path = self._find_path(target_capital, PathType.ATTACK)\n                    \n                    if not attack_path:\n                        attack_span.set_status(trace.StatusCode.ERROR, \"No valid path to enemy capital\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"No valid path to enemy capital\"\n                        }), 400\n                    \n                    attack_span.set_attribute(\"attack_path\", str(attack_path))\n                    attack_span.set_attribute(\"initial_army_size\", army_size)\n                    \n                    # Set army to 0 before starting the attack\n                    self._update_location_state(self.location_id, army=0)\n                    \n                    if len(attack_path) > 1:\n                        next_loc = attack_path[1]\n                        result = self._continue_army_movement(\n                            army_size,\n                            faction,\n                            self.location_id,\n                            next_loc,\n                            attack_path[1:],\n                            is_attack_move=True\n                        )\n                        \n                        if not result.get(\"success\", False):\n                            # If movement fails, restore the army\n                            self._update_location_state(self.location_id, army=army_size)\n                            attack_span.set_status(trace.StatusCode.ERROR, \"Failed to start attack\")\n                            return jsonify({\n                                \"success\": False,\n                                \"message\": f\"Failed to start attack: {result.get('message', 'Unknown error')}\"\n                            }), 400\n                        \n                        return jsonify({\n                            \"success\": True,\n                            \"message\": f\"All-out attack started with {army_size} troops\",\n                            \"path\": attack_path,\n                            \"army_size\": army_size\n                        })\n                    \n                    return jsonify({\n                        \"success\": False,\n                        \"message\": \"Invalid attack path\"\n                    }), 400\n                    \n                except Exception as e:\n                    attack_span.record_exception(e)\n                    attack_span.set_status(trace.StatusCode.ERROR, str(e))\n                    raise\n        \n        @self.app.route('/receive_army', methods=['POST'])\n        def receive_army():\n            try:\n                data = request.get_json()\n                self.logger.info(f\"Received army at {self.location_id}: {data}\")\n                \n                if not data or 'army_size' not in data or 'faction' not in data:\n                    return jsonify({\"success\": False, \"message\": \"Invalid army data\"}), 400\n                \n                context = extract(request.headers)\n                \n                with self.tracer.start_as_current_span(\n                    \"receive_army\",\n                    context=context,\n                    kind=SpanKind.SERVER,\n                    attributes={\n                        \"location_name\": self.location_info[\"name\"],\n                        \"location_type\": self.location_info[\"type\"]\n                    }\n                ) as battle_span:\n                    attacking_army = data['army_size']\n                    attacking_faction = data['faction']\n                    source_location = data.get('source_location', 'unknown')\n                    remaining_path = data.get('remaining_path', [])\n                    is_attack_move = data.get('is_attack_move', False)\n                    \n                    location_state = self._get_location_state(self.location_id)\n                    defending_army = location_state[\"army\"]\n                    defending_faction = location_state[\"faction\"]\n                    \n                    battle_span.set_attribute(\"source_location\", source_location)\n                    battle_span.set_attribute(\"attacking_army\", attacking_army)\n                    battle_span.set_attribute(\"defending_army\", defending_army)\n                    battle_span.set_attribute(\"remaining_path\", str(remaining_path))\n                    battle_span.set_attribute(\"is_attack_move\", is_attack_move)\n\n                    self.logger.info(f\"Received army at {self.location_id}: {data}\")\n                    self.logger.info(f\"Remaining path: {remaining_path}, is_attack_move: {is_attack_move}\")\n                    \n                    if attacking_faction == defending_faction:\n                        # For all-out attacks, combine armies with friendly villages\n                        if is_attack_move and self.location_info[\"type\"] == \"village\":\n                            # Add village's army to the attacking force\n                            attacking_army += defending_army\n                            # Set village's army to 0\n                            self._update_location_state(self.location_id, army=0)\n                            battle_span.set_attribute(\"combined_army_size\", attacking_army)\n                            self.logger.info(f\"Combined armies at {self.location_id}: {attacking_army} (village army was {defending_army})\")\n                        \n                        # Continue movement if there's a path remaining\n                        if is_attack_move and remaining_path:\n                            next_location = remaining_path[0]\n                            new_remaining_path = remaining_path[1:] if len(remaining_path) > 1 else []\n                            self.logger.info(f\"Continuing attack from {self.location_id} to {next_location}, new path: {new_remaining_path}\")\n                            \n                            result = self._continue_army_movement(\n                                attacking_army,  # Use the potentially increased army size\n                                attacking_faction,\n                                self.location_id,\n                                next_location,\n                                new_remaining_path,\n                                is_attack_move\n                            )\n                            battle_span.set_attribute(\"result\", \"friendly_passage\")\n                            self.logger.info(f\"Friendly passage result: {result}\")\n                            # Force metric collection after friendly passage\n                            self.telemetry.collect_metrics()\n                            return jsonify(result)\n                        elif not is_attack_move:\n                            # Normal army movement - combine armies\n                            new_army = defending_army + attacking_army\n                            self._update_location_state(self.location_id, army=new_army)\n                            battle_span.set_attribute(\"result\", \"armies_combined\")\n                            self.logger.info(f\"Armies combined at {self.location_info['name']}: {new_army}\")\n                            # Force metric collection after combining armies\n                            self.telemetry.collect_metrics()\n                            return jsonify({\n                                \"success\": True,\n                                \"message\": f\"Armies combined at {self.location_info['name']}\",\n                                \"current_army\": new_army,\n                                \"faction\": defending_faction\n                            })\n                        else:\n                            # All-out attack reached friendly location with no remaining path\n                            # This shouldn't normally happen, but handle it gracefully\n                            if self.location_info[\"type\"] == \"capital\":\n                                # If it's our own capital, stop here\n                                self._update_location_state(self.location_id, army=attacking_army)\n                                battle_span.set_attribute(\"result\", \"returned_to_capital\")\n                                self.logger.warning(f\"All-out attack returned to own capital with {attacking_army} troops\")\n                            else:\n                                # For villages, the army should already be zeroed out above\n                                battle_span.set_attribute(\"result\", \"attack_ended_at_village\")\n                                self.logger.warning(f\"All-out attack ended at friendly village {self.location_id}\")\n                            \n                            self.telemetry.collect_metrics()\n                            return jsonify({\n                                \"success\": True,\n                                \"message\": f\"Army movement ended at {self.location_info['name']}\",\n                                \"current_army\": self._get_location_state(self.location_id)[\"army\"],\n                                \"faction\": defending_faction\n                            })\n                    \n                    battle_result, remaining_army, new_faction = self._handle_battle(\n                        attacking_army,\n                        attacking_faction,\n                        defending_army,\n                        defending_faction,\n                        location_type=self.location_info[\"type\"],\n                    )\n\n                    # Corpse harvesting: the White Walkers reap from any battle\n                    # they win (either as attacker or defender). Corpses equal\n                    # the total physical units that died on both sides.\n                    if new_faction == \"white_walkers\":\n                        dead = max(0, attacking_army + defending_army - remaining_army)\n                        if dead > 0:\n                            self._add_corpses(dead, \"white_walkers\")\n                            battle_span.set_attribute(\"game.corpses.harvested\", dead)\n                            battle_span.set_attribute(\"corpse.source\", \"battle\")\n\n                    self._update_location_state(\n                        self.location_id,\n                        army=remaining_army,\n                        faction=new_faction\n                    )\n\n                    battle_span.set_attribute(\"result\", battle_result)\n                    battle_span.set_attribute(\"remaining_army\", remaining_army)\n                    battle_span.set_attribute(\"game.map.id\", self.map_id)\n                    if self.location_info[\"type\"] == \"wall\":\n                        battle_span.set_attribute(\"game.wall.held\", new_faction != \"neutral\")\n                        battle_span.set_attribute(\"span.wall.battle\", True)\n                    \n                    if battle_result == \"attacker_victory\" and is_attack_move and remaining_path:\n                        self.logger.info(f\"Continuing army movement at {self.location_id}: {remaining_army}\")\n                        self.logger.info(f\"Battle victory - continuing to {remaining_path[0]}, path: {remaining_path[1:]}\")\n                        result = self._continue_army_movement(\n                            remaining_army,\n                            attacking_faction,\n                            self.location_id,\n                            remaining_path[0],\n                            remaining_path[1:] if len(remaining_path) > 1 else [],\n                            is_attack_move\n                        )\n                        return jsonify(result)\n                    \n                    if battle_result != \"attacker_victory\":\n                        self.logger.warning(f\"Battle result: {battle_result}\")\n                        battle_span.add_event(\"battle_result\", attributes={\n                            \"outcome\": battle_result,\n                            \"attacker_faction\": attacking_faction,\n                            \"defender_faction\": defending_faction,\n                            \"remaining_army\": remaining_army,\n                        })\n                    \n                    # Force metric collection after battle resolution\n                    self.telemetry.collect_metrics()\n                    \n                    return jsonify({\n                        \"success\": battle_result == \"attacker_victory\",\n                        \"message\": f\"Battle at {self.location_info['name']}: {battle_result}\",\n                        \"current_army\": remaining_army,\n                        \"faction\": new_faction\n                    })\n                    \n            except Exception as e:\n                self.logger.error(f\"Error in receive_army: {str(e)}\")\n                return jsonify({\"success\": False, \"message\": f\"Error: {str(e)}\"}), 500\n        \n        @self.app.route('/reset', methods=['POST'])\n        def reset():\n            self.reset_database()\n            return jsonify({\"success\": True, \"message\": \"Game state reset to initial values\"})\n\n        @self.app.route('/reload', methods=['POST'])\n        def reload_identity():\n            \"\"\"Re-read the active map from the DB and rebind this slot's identity.\n\n            Called by ``war_map`` after ``/select_map``. The slot's port + the\n            telemetry service name do not change — only the logical\n            ``location_id``, ``name``, ``type``, ``faction``, connections, and\n            rules-scoped behaviour.\n            \"\"\"\n            self._load_identity()\n            return jsonify({\n                \"success\": True,\n                \"slot_id\": self.slot_id,\n                \"map_id\": self.map_id,\n                \"location_id\": self.location_id,\n                \"faction\": self.location_info[\"faction\"],\n                \"type\": self.location_info[\"type\"],\n            })\n\n        @self.app.route('/faction_economy', methods=['GET'])\n        def faction_economy():\n            \"\"\"Expose the corpse pool for a faction (used by the AI).\"\"\"\n            faction = request.args.get('faction', 'white_walkers')\n            return jsonify({\n                \"faction\": faction,\n                \"corpses\": self._get_corpses(faction),\n            })\n        \n        @self.app.route('/send_resources_to_capital', methods=['POST'])\n        def send_resources_to_capital():\n            # Extract trace context from request headers\n            context = extract(request.headers)\n            \n            with self.tracer.start_as_current_span(\n                \"send_resources_to_capital\",\n                context=context,  # Use the extracted context\n                kind=SpanKind.SERVER,\n                attributes={\n                    \"location_name\": self.location_info[\"name\"],\n                    \"location_type\": self.location_info[\"type\"]\n                }\n            ) as span:\n                try:\n                    location_state = self._get_location_state(self.location_id)\n                    current_resources = location_state[\"resources\"]\n                    faction = location_state[\"faction\"]\n                    \n                    span.set_attribute(\"resources_amount\", current_resources)\n                    span.set_attribute(\"faction\", faction)\n                    \n                    if self.location_info[\"type\"] != \"village\":\n                        span.set_status(trace.StatusCode.ERROR, \"Only villages can send resources\")\n                        self.logger.error(f\"Only villages can send resources to capital\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"Only villages can send resources to capital\"\n                        }), 403\n                    \n                    resource_factions = {\"southern\", \"northern\", \"nights_watch\"}\n                    if faction not in resource_factions:\n                        span.set_status(trace.StatusCode.ERROR, \"Faction has no resource economy\")\n                        self.logger.error(\n                            f\"Faction {faction!r} has no resource economy; cannot send to capital\"\n                        )\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"This faction does not send resources\",\n                        }), 403\n\n                    # Target this faction's capital on the active map.\n                    target_capital = self._find_capital(faction)\n                    if not target_capital:\n                        span.set_status(trace.StatusCode.ERROR, \"No friendly capital on this map\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"No friendly capital to send resources to\"\n                        }), 400\n                    path = self._find_path(target_capital, PathType.RESOURCE)\n                    if not path:\n                        span.set_status(trace.StatusCode.ERROR, \"No valid path to capital\")\n                        self.logger.error(f\"No valid path to capital found\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"No valid path to capital found\"\n                        }), 400\n                    \n                    span.set_attribute(\"path_to_capital\", str(path))\n                    \n                    if self._transfer_resources_along_path(current_resources, path):\n                        self._start_resource_cooldown()\n                        self.logger.info(f\"Resources sent to capital via {path}\")\n                        # Force metric collection after initiating resource transfer\n                        self.telemetry.collect_metrics()\n                        return jsonify({\n                            \"success\": True,\n                            \"message\": f\"Sending {current_resources} resources to capital via {' -> '.join(path)}\",\n                            \"path\": path,\n                            \"amount\": current_resources\n                        })\n                    else:\n                        span.set_status(trace.StatusCode.ERROR, \"Failed to start resource transfer\")\n                        self.logger.error(f\"Failed to start resource transfer\")\n                        return jsonify({\n                            \"success\": False,\n                            \"message\": \"Failed to start resource transfer\"\n                        }), 500\n                except Exception as e:\n                    span.record_exception(e)\n                    span.set_status(trace.StatusCode.ERROR, str(e))\n                    self.logger.error(f\"Error in send_resources_to_capital: {str(e)}\")\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": f\"Error: {str(e)}\"\n                    }), 500\n        \n        @self.app.route('/receive_resources', methods=['POST'])\n        def receive_resources():\n            data = request.get_json()\n            if not data or 'resources' not in data or 'faction' not in data:\n                return jsonify({\"success\": False, \"message\": \"Invalid resource data\"}), 400\n            \n            context = extract(request.headers)\n            \n            with self.tracer.start_as_current_span(\n                \"receive_resources\",\n                context=context,\n                attributes={\n                    \"location\": self.location_id,\n                    \"location_type\": self.location_info[\"type\"],\n                    \"sending_faction\": data['faction'],\n                    \"receiving_faction\": self._get_location_state(self.location_id)[\"faction\"],\n                    \"resources_amount\": data['resources']\n                }\n            ) as transfer_span:\n                incoming_resources = data['resources']\n                source_location = data.get('source_location', 'unknown')\n                remaining_path = data.get('remaining_path', [])\n                faction = data['faction']\n                \n                transfer_span.set_attribute(\"source_location\", source_location)\n                \n                location_state = self._get_location_state(self.location_id)\n                current_resources = location_state[\"resources\"]\n                current_faction = location_state[\"faction\"]\n                \n                if current_faction != faction:\n                    transfer_span.set_status(trace.Status(trace.StatusCode.ERROR, f\"Resources captured by {current_faction}\"))\n                    self._update_location_state(self.location_id, resources=current_resources + incoming_resources)\n                    # Force metric collection after resource capture\n                    self.telemetry.collect_metrics()\n                    self.logger.error(f\"Resources captured by {current_faction}\")\n                    return jsonify({\n                        \"success\": False,\n                        \"message\": f\"Resources captured by {current_faction}!\",\n                        \"current_resources\": current_resources + incoming_resources\n                    })\n                \n                new_resources = current_resources + incoming_resources\n                self._update_location_state(self.location_id, resources=new_resources)\n                # Force metric collection after receiving resources\n                self.telemetry.collect_metrics()\n                self.logger.info(f\"Resources updated to {new_resources}\")\n                \n                if len(remaining_path) > 1:\n                    next_loc = remaining_path[1]\n                    \n                    def continue_transfer():\n                        with self._start_movement_trace(\n                            \"resource_movement\",\n                            self.location_id,\n                            next_loc,\n                            resources=incoming_resources\n                        ) as movement_span:\n                            try:\n                                time.sleep(5)\n                                target_url = f\"{self.get_location_url(next_loc)}/receive_resources\"\n                                self.logger.info(f\"Sending resources to {next_loc} with target URL: {target_url}\")\n                                result = self._make_request_with_trace('post', target_url, {\n                                    \"resources\": incoming_resources,\n                                    \"source_location\": self.location_id,\n                                    \"remaining_path\": remaining_path[1:],\n                                    \"faction\": faction\n                                }, span_name=\"http_request.forward_resources\")\n                                \n                                if not result.get(\"success\", False):\n                                    movement_span.set_status(trace.Status(trace.StatusCode.ERROR, \"Resource transfer failed\"))\n                                \n                                current_state = self._get_location_state(self.location_id)\n                                self._update_location_state(self.location_id, \n                                    resources=current_state[\"resources\"] - incoming_resources)\n                                # Force metric collection after forwarding resources\n                                self.telemetry.collect_metrics()\n                                self.logger.info(f\"Resources updated to {current_state['resources'] - incoming_resources}\")\n                            except Exception as e:\n                                movement_span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))\n                                self.logger.error(f\"Failed to forward resources to {next_loc}: {str(e)}\")\n                    \n                    Thread(target=continue_transfer).start()\n                \n                transfer_span.set_attribute(\"final_resources\", new_resources)\n                if self.location_info[\"type\"] == \"capital\":\n                    transfer_span.set_attribute(\"resources_reached_capital\", True)\n                \n                self.logger.info(f\"Resources received at {self.location_info['name']}\")\n                return jsonify({\n                    \"success\": True,\n                    \"message\": f\"Resources received at {self.location_info['name']}\",\n                    \"current_resources\": new_resources\n                })\n    \n    def run(self):\n        port = self.location_info[\"port\"]\n        self.app.run(host='0.0.0.0', port=port)\n        self.logger.info(f\"Location server running on port {port}\")\n\n\nif __name__ == '__main__':\n    # Docker entrypoint: read SLOT_ID env var, resolve identity from the\n    # shared active_map_id, and serve. SERVICE_NAME comes from LOCATION_NAME\n    # (set per-container in docker-compose.yml) or is synthesised from slot.\n    LocationServer().run()"
  },
  {
    "path": "game-of-tracing/app/requirements.txt",
    "content": "flask==3.1.3\nrequests==2.33.1\nopentelemetry-api==1.41.1\nopentelemetry-sdk==1.41.1\nopentelemetry-exporter-otlp==1.41.1\npyroscope-io==1.0.6\npyroscope-otel==1.0.0\n"
  },
  {
    "path": "game-of-tracing/app/run_game.py",
    "content": "import os\nimport sys\nimport json\nimport sqlite3\nimport argparse\nimport multiprocessing\nfrom game_config import LOCATIONS, DATABASE_FILE\nfrom location_server import LocationServer\n\ndef reset_game():\n    \"\"\"Reset the database to initial state\"\"\"\n    db_path = os.environ.get('DATABASE_FILE', DATABASE_FILE)\n    \n    if os.path.exists(db_path):\n        # Connect to database and reset it\n        conn = sqlite3.connect(db_path)\n        cursor = conn.cursor()\n        \n        # Delete all data\n        cursor.execute(\"DELETE FROM locations\")\n        \n        # Reinitialize locations\n        for loc_id, loc_info in LOCATIONS.items():\n            cursor.execute(\n                \"INSERT INTO locations VALUES (?, ?, ?, ?)\",\n                (\n                    loc_id,\n                    loc_info[\"initial_resources\"],\n                    loc_info[\"initial_army\"],\n                    loc_info[\"faction\"]\n                )\n            )\n        \n        conn.commit()\n        conn.close()\n        print(f\"Game reset successfully. Database {db_path} reset to initial state.\")\n    else:\n        print(\"Database not found. It will be created when the game starts.\")\n\ndef run_location(location_id):\n    \"\"\"Run a location server in a separate process\"\"\"\n    print(f\"Starting {LOCATIONS[location_id]['name']} (Port: {LOCATIONS[location_id]['port']})\")\n    server = LocationServer(location_id)\n    server.run()\n\ndef run_single_location():\n    \"\"\"Run a single location server based on environment variable\"\"\"\n    location_id = os.environ.get('LOCATION_ID')\n    if not location_id:\n        print(\"Error: LOCATION_ID environment variable not set\")\n        sys.exit(1)\n        \n    if location_id not in LOCATIONS:\n        print(f\"Error: Invalid location_id '{location_id}'\")\n        sys.exit(1)\n        \n    print(f\"Starting {LOCATIONS[location_id]['name']} server (Port: {LOCATIONS[location_id]['port']})\")\n    server = LocationServer(location_id)\n    server.run()\n\ndef show_game_state():\n    \"\"\"Show the current game state from the database\"\"\"\n    db_path = os.environ.get('DATABASE_FILE', DATABASE_FILE)\n    \n    if not os.path.exists(db_path):\n        print(\"Database not found. Starting a new game...\")\n        return\n    \n    try:\n        conn = sqlite3.connect(db_path)\n        conn.row_factory = sqlite3.Row\n        cursor = conn.cursor()\n        \n        cursor.execute(\"SELECT * FROM locations\")\n        rows = cursor.fetchall()\n        \n        if not rows:\n            print(\"No game state found. Starting a new game...\")\n            return\n        \n        print(\"Current Game State:\")\n        for row in rows:\n            loc_id = row['id']\n            print(f\"{LOCATIONS[loc_id]['name']} ({loc_id}): Faction={row['faction']}, Army={row['army']}, Resources={row['resources']}\")\n        \n        conn.close()\n    except sqlite3.Error as e:\n        print(f\"Error accessing database: {e}\")\n        print(\"Starting a new game...\")\n\ndef run_game(reset=False):\n    \"\"\"Run all location servers\"\"\"\n    if reset:\n        reset_game()\n    \n    # Check if we're in Docker and should run just one location\n    if os.environ.get('LOCATION_ID'):\n        run_single_location()\n        return\n\n    # Show initial game state\n    show_game_state()\n    \n    # Start each location server in a separate process\n    processes = []\n    for location_id in LOCATIONS:\n        p = multiprocessing.Process(target=run_location, args=(location_id,))\n        p.start()\n        processes.append(p)\n    \n    print(\"\\nAll locations are running!\")\n    print(\"Game Instructions:\")\n    print(\"1. Each location is running a Flask server at its designated port\")\n    print(\"2. Use HTTP requests to interact with locations\")\n    print(\"3. Example commands:\")\n    print(\"   - Get location info: curl http://localhost:[PORT]/\")\n    print(\"   - Collect resources: curl -X POST http://localhost:[PORT]/collect_resources\")\n    print(\"   - Create army: curl -X POST http://localhost:[PORT]/create_army\")\n    print(\"   - Move army: curl -X POST -H \\\"Content-Type: application/json\\\" -d '{\\\"target_location\\\":\\\"village_1\\\"}' http://localhost:[PORT]/move_army\")\n    print(\"   - Reset game: curl -X POST http://localhost:[PORT]/reset\")\n    print(\"4. Or use the game client: python game_client.py map\")\n    \n    try:\n        # Wait for processes to complete (they won't unless terminated)\n        for p in processes:\n            p.join()\n    except KeyboardInterrupt:\n        print(\"\\nShutting down all servers...\")\n        for p in processes:\n            p.terminate()\n        print(\"Game ended.\")\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"War of Westeros Game\")\n    parser.add_argument(\"--reset\", action=\"store_true\", help=\"Reset the game state\")\n    args = parser.parse_args()\n    \n    run_game(args.reset) "
  },
  {
    "path": "game-of-tracing/app/telemetry.py",
    "content": "import os\n\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry import trace\n\n# Logging setup\nimport logging\nfrom opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry._logs import set_logger_provider\n\n# Metrics setup\nfrom opentelemetry import metrics\nfrom opentelemetry.sdk.metrics import MeterProvider\nfrom opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter\nfrom opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader\nfrom opentelemetry.sdk.metrics import TraceBasedExemplarFilter\nfrom opentelemetry.metrics import CallbackOptions, Observation\nfrom typing import Iterable\n\n# Profiling setup (Pyroscope v2 + OTel span-profile linking)\nimport pyroscope\nfrom pyroscope.otel import PyroscopeSpanProcessor\n\nclass GameTelemetry:\n    def __init__(self, service_name, logging_endpoint=\"http://alloy:4318\", tracing_endpoint=\"http://alloy:4317\", metrics_endpoint=\"http://alloy:4318\"):\n        self.service_name = service_name\n        self.logging_endpoint = logging_endpoint\n        self.tracing_endpoint = tracing_endpoint\n        self.metrics_endpoint = metrics_endpoint\n        self.resource = Resource.create(attributes={\n            SERVICE_NAME: service_name\n        })\n\n        self._setup_logging()\n        self._setup_tracing()\n        self._setup_metrics()\n        self._setup_profiling()\n        \n    def _setup_logging(self):\n        \"\"\"Configure OpenTelemetry logging\"\"\"\n        self.logger_provider = LoggerProvider(resource=self.resource)\n        set_logger_provider(self.logger_provider)\n        \n        log_exporter = OTLPLogExporter(\n            endpoint=f\"{self.logging_endpoint}/v1/logs\"\n        )\n        \n        self.logger_provider.add_log_record_processor(\n            BatchLogRecordProcessor(\n                exporter=log_exporter,\n                max_queue_size=30,\n                max_export_batch_size=5\n            )\n        )\n        \n        # Setup root logger\n        handler = LoggingHandler(\n            level=logging.NOTSET,\n            logger_provider=self.logger_provider\n        )\n        logging.getLogger().addHandler(handler)\n        logging.getLogger().setLevel(logging.INFO)\n        \n        self.logger = logging.getLogger(self.service_name)\n    \n    def _setup_tracing(self):\n        \"\"\"Configure OpenTelemetry tracing\"\"\"\n        trace.set_tracer_provider(TracerProvider(resource=self.resource))\n        \n        otlp_exporter = OTLPSpanExporter(\n            endpoint=f\"{self.tracing_endpoint}/v1/traces\",\n            insecure=True\n        )\n        \n        span_processor = BatchSpanProcessor(\n            span_exporter=otlp_exporter,\n            max_export_batch_size=1\n        )\n        \n        trace.get_tracer_provider().add_span_processor(span_processor)\n        self.tracer = trace.get_tracer(__name__)\n\n    def _setup_profiling(self):\n        \"\"\"Configure Pyroscope profiling + OTel span-profile linkage.\n\n        Pyroscope collects CPU samples from this process and pushes pprof to\n        the configured server. ``PyroscopeSpanProcessor`` attaches the current\n        profile id to every span so the trace view in Grafana can link back\n        to the flamegraph captured while each span was active.\n        \"\"\"\n        pyroscope.configure(\n            application_name=self.service_name,\n            server_address=os.getenv(\"PYROSCOPE_SERVER_ADDRESS\", \"http://alloy:9999\"),\n            tags={\"service_name\": self.service_name},\n            oncpu=True,\n            gil_only=True,\n        )\n        trace.get_tracer_provider().add_span_processor(PyroscopeSpanProcessor())\n\n    def _setup_metrics(self):\n        \"\"\"Configure OpenTelemetry metrics\"\"\"\n        # Create the metrics exporter\n        self.metric_exporter = OTLPMetricExporter(\n            endpoint=f\"{self.metrics_endpoint}/v1/metrics\"\n        )\n\n        # Set up periodic metric reader with manual collection capability\n        self.metric_reader = PeriodicExportingMetricReader(\n            self.metric_exporter,\n            export_interval_millis=10000  # Export every 10 seconds\n        )\n\n        # Create and set meter provider with exemplar support\n        self.meter_provider = MeterProvider(\n            metric_readers=[self.metric_reader],\n            resource=self.resource,\n            exemplar_filter=TraceBasedExemplarFilter()\n        )\n        metrics.set_meter_provider(self.meter_provider)\n\n        # Get meter for creating metrics\n        self.meter = metrics.get_meter(__name__)\n\n        # Create observable gauges for game metrics\n        self._setup_game_gauges()\n\n    def _setup_game_gauges(self):\n        \"\"\"Set up observable gauges for game metrics\"\"\"\n        # Resource gauge\n        self.resource_gauge = self.meter.create_observable_gauge(\n            name=\"game.resources\",\n            description=\"Current resources at location\",\n            callbacks=[self._observe_resources],\n            unit=\"1\"\n        )\n\n        # Army size gauge\n        self.army_gauge = self.meter.create_observable_gauge(\n            name=\"game.army_size\",\n            description=\"Current army size at location\",\n            callbacks=[self._observe_army_size],\n            unit=\"1\"\n        )\n\n        # Battle count counter\n        self.battle_counter = self.meter.create_counter(\n            name=\"game.battles\",\n            description=\"Number of battles fought\",\n            unit=\"1\"\n        )\n\n        # Resource transfer gauge\n        self.cooldown_gauge = self.meter.create_observable_gauge(\n            name=\"game.resource_transfer_cooldown\",\n            description=\"Resource transfer cooldown status\",\n            callbacks=[self._observe_resource_cooldown],\n            unit=\"s\"\n        )\n\n        # Location control gauge\n        self.control_gauge = self.meter.create_observable_gauge(\n            name=\"game.location_control\",\n            description=\"Current faction controlling the location\",\n            callbacks=[self._observe_location_control],\n            unit=\"1\"\n        )\n\n        # Log that metrics have been set up\n        self.logger.info(\"Game metrics initialized\")\n\n    # Faction → numeric value for the ``game.location_control`` gauge.\n    # Existing WoK values (0/1/2) preserved for dashboard backward compat;\n    # new factions appended with fresh values.\n    _FACTION_VALUE = {\n        \"neutral\": 0,\n        \"northern\": 1,\n        \"southern\": 2,\n        \"nights_watch\": 3,\n        \"white_walkers\": 4,\n        \"barbarian\": 5,\n    }\n\n    def _active_location_id(self):\n        \"\"\"Return the currently served logical location id.\n\n        ``LocationServer`` sets ``self._location_id`` on the telemetry instance\n        at boot and refreshes it on ``/reload``. Fall back to the legacy\n        ``service_name.replace('-', '_')`` pattern for non-slot deployments.\n        \"\"\"\n        return getattr(self, \"_location_id\", None) or self.service_name.replace(\"-\", \"_\")\n\n    def _active_location_type(self):\n        return getattr(self, \"_location_type\", None) or \"village\"\n\n    def _observe_resources(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback to observe current resources\"\"\"\n        try:\n            location_id = self._active_location_id()\n            if hasattr(self, '_get_location_state'):\n                state = self._get_location_state(location_id)\n                if state:\n                    self.logger.debug(f\"Observing resources for {location_id}: {state['resources']}\")\n                    yield Observation(\n                        value=state[\"resources\"],\n                        attributes={\n                            \"location\": self.service_name,\n                            \"location_type\": self._active_location_type(),\n                        }\n                    )\n        except Exception as e:\n            self.logger.error(f\"Error observing resources: {e}\")\n\n    def _observe_army_size(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback to observe current army size\"\"\"\n        try:\n            location_id = self._active_location_id()\n            if hasattr(self, '_get_location_state'):\n                state = self._get_location_state(location_id)\n                if state:\n                    self.logger.debug(f\"Observing army size for {location_id}: {state['army']}\")\n                    yield Observation(\n                        value=state[\"army\"],\n                        attributes={\n                            \"location\": self.service_name,\n                            \"location_type\": self._active_location_type(),\n                            \"faction\": state[\"faction\"],\n                        }\n                    )\n        except Exception as e:\n            self.logger.error(f\"Error observing army size: {e}\")\n\n    def _observe_resource_cooldown(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback to observe resource transfer cooldown\"\"\"\n        try:\n            from datetime import datetime\n            location_id = self._active_location_id()\n            if hasattr(self, 'resource_cooldown') and location_id in self.resource_cooldown:\n                cooldown = self.resource_cooldown[location_id]\n                now = datetime.now()\n                if cooldown > now:\n                    cooldown_value = (cooldown - now).total_seconds()\n                    self.logger.debug(f\"Observing cooldown for {location_id}: {cooldown_value}s\")\n                    yield Observation(\n                        value=cooldown_value,\n                        attributes={\"location\": self.service_name}\n                    )\n                else:\n                    yield Observation(value=0, attributes={\"location\": location_id})\n        except Exception as e:\n            self.logger.error(f\"Error observing resource cooldown: {e}\")\n\n    def _observe_location_control(self, options: CallbackOptions) -> Iterable[Observation]:\n        \"\"\"Callback to observe location control status.\"\"\"\n        try:\n            location_id = self._active_location_id()\n            if hasattr(self, '_get_location_state'):\n                state = self._get_location_state(location_id)\n                if state:\n                    faction_value = self._FACTION_VALUE.get(state[\"faction\"], -1)\n                    self.logger.debug(\n                        f\"Observing control for {location_id}: {state['faction']} ({faction_value})\"\n                    )\n                    yield Observation(\n                        value=faction_value,\n                        attributes={\n                            \"location\": self.service_name,\n                            \"location_type\": self._active_location_type(),\n                            \"faction\": state[\"faction\"],\n                        }\n                    )\n        except Exception as e:\n            self.logger.error(f\"Error observing location control: {e}\")\n    \n    def get_tracer(self):\n        \"\"\"Get the configured tracer\"\"\"\n        return self.tracer\n    \n    def get_logger(self):\n        \"\"\"Get the configured logger\"\"\"\n        return self.logger\n\n    def get_meter(self):\n        \"\"\"Get the configured meter\"\"\"\n        return self.meter\n    \n    def record_battle(self, attacker_faction: str, defender_faction: str, result: str):\n        \"\"\"Record a battle event and force metrics collection\"\"\"\n        try:\n            self.battle_counter.add(\n                1,\n                {\n                    \"attacker_faction\": attacker_faction,\n                    \"defender_faction\": defender_faction,\n                    \"result\": result,\n                    \"location\": self.service_name\n                }\n            )\n            self.logger.info(f\"Battle recorded: {attacker_faction} vs {defender_faction} - {result}\")\n            # Force collection of all metrics\n            self.collect_metrics()\n        except Exception as e:\n            self.logger.error(f\"Error recording battle: {e}\")\n\n    def collect_metrics(self):\n        \"\"\"Force collection and export of all metrics\"\"\"\n        try:\n            # Collect metrics immediately\n            self.metric_reader.collect()\n            # Force flush to ensure metrics are exported\n            self.meter_provider.force_flush()\n            self.logger.debug(\"Metrics collected and flushed\")\n        except Exception as e:\n            self.logger.error(f\"Error collecting metrics: {e}\")\n\n    def shutdown(self):\n        \"\"\"Flush and shutdown all telemetry providers.\"\"\"\n        try:\n            trace.get_tracer_provider().shutdown()\n        except Exception:\n            pass\n        try:\n            self.meter_provider.shutdown()\n        except Exception:\n            pass\n        try:\n            self.logger_provider.shutdown()\n        except Exception:\n            pass\n"
  },
  {
    "path": "game-of-tracing/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for Game of Tracing\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/logs:\n    endpoint: http://loki:3100/otlp\n\n  otlphttp/metrics:\n    endpoint: http://prometheus:9090/api/v1/otlp\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlp/tempo]\n    logs:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlphttp/logs]\n    metrics:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlphttp/metrics]\n"
  },
  {
    "path": "game-of-tracing/config.alloy",
    "content": "/*\n * Alloy Configuration for OpenTelemetry Trace Collection with Tail Sampling\n */\n\n// Receive OpenTelemetry traces\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    metrics = [otelcol.processor.batch.default.input]\n    logs = [otelcol.processor.batch.default.input]\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\n// Batch processor to improve performance\notelcol.processor.batch \"default\" {\n  output {\n    traces = [otelcol.exporter.otlp.tempo.input]\n    logs = [otelcol.exporter.otlphttp.logs.input]\n    metrics = [otelcol.exporter.otlphttp.metrics.input]\n  }\n}\n\n// Send sampled traces to Tempo\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n      insecure = true\n    }\n  }\n} \n\notelcol.exporter.otlphttp \"logs\" {\n  client {\n    endpoint = \"http://loki:3100/otlp\"\n  }\n\n}\n\notelcol.exporter.otlphttp \"metrics\" {\n  client {\n    endpoint = \"http://prometheus:9090/api/v1/otlp\"\n  }\n}\n\n// Receive pprof profiles from the Python services and forward to Pyroscope.\npyroscope.receive_http \"default\" {\n  http {\n    listen_address = \"0.0.0.0\"\n    listen_port    = 9999\n  }\n  forward_to = [pyroscope.write.default.receiver]\n}\n\npyroscope.write \"default\" {\n  endpoint {\n    url = \"http://pyroscope:4040\"\n  }\n}\n\nlivedebugging {\n  enabled = true\n}"
  },
  {
    "path": "game-of-tracing/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n"
  },
  {
    "path": "game-of-tracing/docker-compose.coda.yml",
    "content": "services:\n  # Southern Capital\n  southern-capital:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5001:5001\"\n    environment:\n      - LOCATION_ID=southern_capital\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=southern-capital\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('southern_capital'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5001/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Northern Capital\n  northern-capital:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5002:5002\"\n    environment:\n      - LOCATION_ID=northern_capital\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=northern-capital\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('northern_capital'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5002/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 1\n  village-1:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5003:5003\"\n    environment:\n      - LOCATION_ID=village_1\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-1\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_1'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5003/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 2\n  village-2:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5004:5004\"\n    environment:\n      - LOCATION_ID=village_2\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-2\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_2'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5004/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 3\n  village-3:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5005:5005\"\n    environment:\n      - LOCATION_ID=village_3\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-3\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_3'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5005/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 4\n  village-4:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5006:5006\"\n    environment:\n      - LOCATION_ID=village_4\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-4\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_4'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5006/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 5\n  village-5:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5007:5007\"\n    environment:\n      - LOCATION_ID=village_5\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-5\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_5'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5007/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Village 6\n  village-6:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5008:5008\"\n    environment:\n      - LOCATION_ID=village_6\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-6\n      - DATABASE_FILE=/data/game_state.db\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"-c\", \"from location_server import LocationServer; server = LocationServer('village_6'); server.run()\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5008/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n\n  # Web-based war map application\n  war-map:\n    build:\n      context: ./war_map\n      dockerfile: Dockerfile\n    ports:\n      - \"8080:8080\"\n    environment:\n      - DATABASE_FILE=/data/game_state.db\n      - LOCATION_NAME=war-map\n      - SECRET_KEY=war_of_westeros_secret_key\n      - IN_DOCKER=1\n      - AI_URL=http://ai-opponent:8081\n      - TEMPO_URL=http://tempo:3200\n    volumes:\n      - game-data:/data\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:8080/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      southern-capital:\n        condition: service_healthy\n      northern-capital:\n        condition: service_healthy\n      village-1:\n        condition: service_healthy\n      village-2:\n        condition: service_healthy\n      village-3:\n        condition: service_healthy\n      village-4:\n        condition: service_healthy\n      village-5:\n        condition: service_healthy\n      village-6:\n        condition: service_healthy\n\n  # AI Opponent Service\n  ai-opponent:\n    build:\n      context: ./ai_opponent\n      dockerfile: Dockerfile\n    ports:\n      - \"8081:8081\"\n    environment:\n      - IN_DOCKER=1\n      - LOCATION_NAME=ai-opponent\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:8081/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      southern-capital:\n        condition: service_healthy\n      northern-capital:\n        condition: service_healthy\n      village-1:\n        condition: service_healthy\n      village-2:\n        condition: service_healthy\n      village-3:\n        condition: service_healthy\n      village-4:\n        condition: service_healthy\n      village-5:\n        condition: service_healthy\n      village-6:\n        condition: service_healthy\n\nvolumes:\n  game-data:\n"
  },
  {
    "path": "game-of-tracing/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for tracing\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - prometheus\n\n  # Pyroscope v2 for continuous profiling\n  pyroscope:\n    image: grafana/pyroscope:${GRAFANA_PYROSCOPE_VERSION:-2.0.1}\n    ports:\n      - \"4040:4040\"\n    command:\n      - \"-config.file=/etc/pyroscope/config.yaml\"\n      - \"-architecture.storage=v1-v2-dual\"\n    volumes:\n      - ./pyroscope-config.yaml:/etc/pyroscope/config.yaml\n      - pyroscope-data:/data\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_SECURITY_ALLOW_EMBEDDING=true\n      - GF_SECURITY_DISABLE_SANITIZE_HTML=true\n      - GF_FEATURE_TOGGLES_ENABLE=dashboardNewLayouts,kubernetesDashboards,provisioning\n      - GF_PATHS_PERMITTED_PROVISIONING_PATHS=grafana/|/etc/grafana/provisioning/dashboards/\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    volumes:\n      - ./grafana:/etc/grafana/provisioning\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - prometheus\n      - tempo\n      - pyroscope\n\n  # Alloy for telemetry pipeline and tail sampling\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n      - 4317:4317/tcp    # OTLP gRPC\n      - 4318:4318/tcp    # OTLP HTTP\n      - 9999:9999/tcp    # Pyroscope HTTP receiver\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - pyroscope\n\n  # Game of Kingdoms War Game Services\n\n  # Southern Capital\n  southern-capital:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5001:5001\"\n    environment:\n      - SLOT_ID=slot_1\n      - LOCATION_ID=southern_capital\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=southern-capital\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5001/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Northern Capital\n  northern-capital:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5002:5002\"\n    environment:\n      - SLOT_ID=slot_2\n      - LOCATION_ID=northern_capital\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=northern-capital\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5002/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 1\n  village-1:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5003:5003\"\n    environment:\n      - SLOT_ID=slot_3\n      - LOCATION_ID=village_1\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-1\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5003/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 2\n  village-2:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5004:5004\"\n    environment:\n      - SLOT_ID=slot_4\n      - LOCATION_ID=village_2\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-2\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5004/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 3\n  village-3:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5005:5005\"\n    environment:\n      - SLOT_ID=slot_5\n      - LOCATION_ID=village_3\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-3\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5005/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 4\n  village-4:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5006:5006\"\n    environment:\n      - SLOT_ID=slot_6\n      - LOCATION_ID=village_4\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-4\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5006/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 5\n  village-5:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5007:5007\"\n    environment:\n      - SLOT_ID=slot_7\n      - LOCATION_ID=village_5\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-5\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5007/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Village 6\n  village-6:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n    ports:\n      - \"5008:5008\"\n    environment:\n      - SLOT_ID=slot_8\n      - LOCATION_ID=village_6\n      - FLASK_APP=location_server.py\n      - LOCATION_NAME=village-6\n      - DATABASE_FILE=/data/game_state.db\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n      - IN_DOCKER=1\n    volumes:\n      - game-data:/data\n    command: [\"python\", \"location_server.py\"]\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:5008/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      - alloy\n\n  # Web-based war map application\n  war-map:\n    build:\n      context: ./war_map\n      dockerfile: Dockerfile\n    ports:\n      - \"8080:8080\"\n    environment:\n      - DATABASE_FILE=/data/game_state.db\n      - GAME_SESSIONS_DB=/data/game_sessions.db\n      - LOCATION_NAME=war-map\n      - SECRET_KEY=war_of_westeros_secret_key\n      - IN_DOCKER=1\n      - AI_URL=http://ai-opponent:8081\n      - TEMPO_URL=http://tempo:3200\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n    volumes:\n      - game-data:/data\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:8080/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      southern-capital:\n        condition: service_healthy\n      northern-capital:\n        condition: service_healthy\n      village-1:\n        condition: service_healthy\n      village-2:\n        condition: service_healthy\n      village-3:\n        condition: service_healthy\n      village-4:\n        condition: service_healthy\n      village-5:\n        condition: service_healthy\n      village-6:\n        condition: service_healthy\n      tempo:\n        condition: service_started\n\n  # AI Opponent Service\n  ai-opponent:\n    build:\n      context: ./ai_opponent\n      dockerfile: Dockerfile\n    ports:\n      - \"8081:8081\"\n    environment:\n      - IN_DOCKER=1\n      - LOCATION_NAME=ai-opponent\n      - PYROSCOPE_SERVER_ADDRESS=http://alloy:9999\n    healthcheck:\n      test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:8081/health')\"]\n      interval: 5s\n      timeout: 3s\n      retries: 5\n      start_period: 10s\n    depends_on:\n      southern-capital:\n        condition: service_healthy\n      northern-capital:\n        condition: service_healthy\n      village-1:\n        condition: service_healthy\n      village-2:\n        condition: service_healthy\n      village-3:\n        condition: service_healthy\n      village-4:\n        condition: service_healthy\n      village-5:\n        condition: service_healthy\n      village-6:\n        condition: service_healthy\n      alloy:\n        condition: service_started\n\nvolumes:\n  game-data:\n  pyroscope-data:\n"
  },
  {
    "path": "game-of-tracing/grafana/dashboards/War of Kingdoms-1747821967780.json",
    "content": "{\n  \"apiVersion\": \"dashboard.grafana.app/v2beta1\",\n  \"kind\": \"Dashboard\",\n  \"metadata\": {\n    \"name\": \"game-dashboard\"\n  },\n  \"spec\": {\n    \"annotations\": [\n      {\n        \"kind\": \"AnnotationQuery\",\n        \"spec\": {\n          \"builtIn\": true,\n          \"enable\": true,\n          \"hide\": true,\n          \"iconColor\": \"rgba(0, 211, 255, 1)\",\n          \"name\": \"Annotations & Alerts\",\n          \"query\": {\n            \"datasource\": {\n              \"name\": \"-- Grafana --\"\n            },\n            \"group\": \"grafana\",\n            \"kind\": \"DataQuery\",\n            \"spec\": {},\n            \"version\": \"v0\"\n          }\n        }\n      }\n    ],\n    \"cursorSync\": \"Off\",\n    \"description\": \"\",\n    \"editable\": true,\n    \"elements\": {\n      \"panel-1\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"filters\": [\n                          {\n                            \"id\": \"f905accb\",\n                            \"operator\": \"=\",\n                            \"scope\": \"span\"\n                          },\n                          {\n                            \"id\": \"service-name\",\n                            \"operator\": \"=\",\n                            \"scope\": \"resource\",\n                            \"tag\": \"service.name\",\n                            \"value\": [\n                              \"war_map\"\n                            ],\n                            \"valueType\": \"string\"\n                          }\n                        ],\n                        \"limit\": 20,\n                        \"metricsQueryType\": \"range\",\n                        \"queryType\": \"traceqlSearch\",\n                        \"tableType\": \"traces\"\n                      },\n                      \"group\": \"tempo\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 1,\n          \"links\": [],\n          \"title\": \"Player Decisions\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"custom\": {\n                    \"align\": \"auto\",\n                    \"cellOptions\": {\n                      \"type\": \"auto\"\n                    },\n                    \"inspect\": false\n                  },\n                  \"mappings\": [],\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\"\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"cellHeight\": \"sm\",\n                \"footer\": {\n                  \"countRows\": false,\n                  \"fields\": \"\",\n                  \"reducer\": [\n                    \"sum\"\n                  ],\n                  \"show\": false\n                },\n                \"showHeader\": true\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"table\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-10\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_location_control_ratio{location=\\\"northern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 10,\n          \"links\": [],\n          \"title\": \"Location Allegiance\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 2,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"#370a4d\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"super-light-blue\",\n                        \"value\": 1\n                      },\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 2\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"displayMode\": \"gradient\",\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": false\n                },\n                \"maxVizHeight\": 300,\n                \"minVizHeight\": 16,\n                \"minVizWidth\": 8,\n                \"namePlacement\": \"auto\",\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showUnfilled\": true,\n                \"sizing\": \"auto\",\n                \"valueMode\": \"color\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"bargauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-11\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"direction\": \"backward\",\n                        \"editorMode\": \"code\",\n                        \"expr\": \"{service_name=\\\"northern-capital\\\"} | code_function_name != \\\"_log\\\"\",\n                        \"queryType\": \"range\"\n                      },\n                      \"group\": \"loki\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 11,\n          \"links\": [],\n          \"title\": \"Location History\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {},\n                \"overrides\": []\n              },\n              \"options\": {\n                \"dedupStrategy\": \"none\",\n                \"enableInfiniteScrolling\": false,\n                \"enableLogDetails\": true,\n                \"prettifyLogMessage\": false,\n                \"showCommonLabels\": false,\n                \"showLabels\": false,\n                \"showTime\": false,\n                \"sortOrder\": \"Descending\",\n                \"wrapLogMessage\": false\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"logs\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-12\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"code\",\n                        \"expr\": \"sum(game_army_size_ratio{job=\\\"$villages\\\"}) without (faction)\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 12,\n          \"links\": [],\n          \"title\": \"Current Army\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  },\n                  \"unit\": \"short\"\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"colorMode\": \"background\",\n                \"graphMode\": \"none\",\n                \"justifyMode\": \"auto\",\n                \"orientation\": \"auto\",\n                \"percentChangeColorMode\": \"standard\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showPercentChange\": false,\n                \"textMode\": \"auto\",\n                \"wideLayout\": true\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"stat\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-13\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_resources_ratio{job=\\\"$villages\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 13,\n          \"links\": [],\n          \"title\": \"Current Resources\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 200,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 30\n                      },\n                      {\n                        \"color\": \"#EAB839\",\n                        \"value\": 50\n                      },\n                      {\n                        \"color\": \"dark-green\",\n                        \"value\": 100\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"minVizHeight\": 75,\n                \"minVizWidth\": 75,\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showThresholdLabels\": false,\n                \"showThresholdMarkers\": true,\n                \"sizing\": \"auto\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"gauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-14\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"code\",\n                        \"expr\": \"sum(game_location_control_ratio{job=\\\"$villages\\\"}) without (faction)\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 14,\n          \"links\": [],\n          \"title\": \"Location Allegiance\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 2,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"#370a4d\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"super-light-blue\",\n                        \"value\": 1\n                      },\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 2\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"displayMode\": \"gradient\",\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": false\n                },\n                \"maxVizHeight\": 300,\n                \"minVizHeight\": 16,\n                \"minVizWidth\": 8,\n                \"namePlacement\": \"auto\",\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showUnfilled\": true,\n                \"sizing\": \"auto\",\n                \"valueMode\": \"color\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"bargauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-15\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"direction\": \"backward\",\n                        \"editorMode\": \"code\",\n                        \"expr\": \"{service_name=\\\"$villages\\\"} | code_function_name !=\\\"_log\\\"\",\n                        \"queryType\": \"range\"\n                      },\n                      \"group\": \"loki\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 15,\n          \"links\": [],\n          \"title\": \"Location History\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {},\n                \"overrides\": []\n              },\n              \"options\": {\n                \"dedupStrategy\": \"none\",\n                \"enableInfiniteScrolling\": false,\n                \"enableLogDetails\": true,\n                \"prettifyLogMessage\": false,\n                \"showCommonLabels\": false,\n                \"showLabels\": false,\n                \"showTime\": false,\n                \"sortOrder\": \"Descending\",\n                \"wrapLogMessage\": false\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"logs\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-16\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"filters\": [\n                          {\n                            \"id\": \"e020e714\",\n                            \"operator\": \"=\",\n                            \"scope\": \"span\"\n                          }\n                        ],\n                        \"limit\": 20,\n                        \"metricsQueryType\": \"range\",\n                        \"queryType\": \"serviceMap\",\n                        \"serviceMapQuery\": \"{}\",\n                        \"tableType\": \"traces\"\n                      },\n                      \"group\": \"tempo\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 16,\n          \"links\": [],\n          \"title\": \"War Map\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {},\n                \"overrides\": []\n              },\n              \"options\": {\n                \"edges\": {},\n                \"layoutAlgorithm\": \"layered\",\n                \"nodes\": {},\n                \"zoomMode\": \"cooperative\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"nodeGraph\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-17\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"filters\": [\n                          {\n                            \"id\": \"9aa2da84\",\n                            \"operator\": \">\",\n                            \"scope\": \"span\",\n                            \"tag\": \"army_size\",\n                            \"value\": [\n                              \"3\"\n                            ]\n                          }\n                        ],\n                        \"limit\": 20,\n                        \"metricsQueryType\": \"range\",\n                        \"query\": \"{span.army_size>3}\",\n                        \"queryType\": \"traceqlSearch\",\n                        \"tableType\": \"traces\"\n                      },\n                      \"group\": \"tempo\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 17,\n          \"links\": [],\n          \"title\": \"Army Size Greater than 3\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"custom\": {\n                    \"align\": \"auto\",\n                    \"cellOptions\": {\n                      \"type\": \"auto\"\n                    },\n                    \"inspect\": false\n                  },\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"cellHeight\": \"sm\",\n                \"footer\": {\n                  \"countRows\": false,\n                  \"fields\": \"\",\n                  \"reducer\": [\n                    \"sum\"\n                  ],\n                  \"show\": false\n                },\n                \"showHeader\": true\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"table\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-18\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_army_size_ratio{location=\\\"southern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 18,\n          \"links\": [],\n          \"title\": \"Current Army\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 10,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 3\n                      },\n                      {\n                        \"color\": \"#EAB839\",\n                        \"value\": 5\n                      },\n                      {\n                        \"color\": \"dark-green\",\n                        \"value\": 15\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"minVizHeight\": 75,\n                \"minVizWidth\": 75,\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showThresholdLabels\": false,\n                \"showThresholdMarkers\": true,\n                \"sizing\": \"auto\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"gauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-19\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"exemplar\": true,\n                        \"expr\": \"game_battles_total\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 19,\n          \"links\": [],\n          \"title\": \"Battles\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"palette-classic\"\n                  },\n                  \"custom\": {\n                    \"axisBorderShow\": false,\n                    \"axisCenteredZero\": false,\n                    \"axisColorMode\": \"text\",\n                    \"axisLabel\": \"\",\n                    \"axisPlacement\": \"auto\",\n                    \"barAlignment\": 0,\n                    \"barWidthFactor\": 0.6,\n                    \"drawStyle\": \"line\",\n                    \"fillOpacity\": 17,\n                    \"gradientMode\": \"none\",\n                    \"hideFrom\": {\n                      \"legend\": false,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    },\n                    \"insertNulls\": false,\n                    \"lineInterpolation\": \"linear\",\n                    \"lineStyle\": {\n                      \"fill\": \"solid\"\n                    },\n                    \"lineWidth\": 1,\n                    \"pointSize\": 1,\n                    \"scaleDistribution\": {\n                      \"type\": \"linear\"\n                    },\n                    \"showPoints\": \"auto\",\n                    \"spanNulls\": false,\n                    \"stacking\": {\n                      \"group\": \"A\",\n                      \"mode\": \"none\"\n                    },\n                    \"thresholdsStyle\": {\n                      \"mode\": \"off\"\n                    }\n                  },\n                  \"mappings\": [],\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\"\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": false\n                },\n                \"tooltip\": {\n                  \"hideZeros\": false,\n                  \"mode\": \"single\",\n                  \"sort\": \"none\"\n                }\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"timeseries\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-2\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"code\",\n                        \"exemplar\": true,\n                        \"expr\": \"sum by (faction) (game_army_size_ratio)\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 2,\n          \"links\": [],\n          \"title\": \"Current Army by Faction\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"palette-classic\"\n                  },\n                  \"custom\": {\n                    \"axisBorderShow\": false,\n                    \"axisCenteredZero\": false,\n                    \"axisColorMode\": \"text\",\n                    \"axisLabel\": \"\",\n                    \"axisPlacement\": \"auto\",\n                    \"barAlignment\": 0,\n                    \"barWidthFactor\": 0.6,\n                    \"drawStyle\": \"line\",\n                    \"fillOpacity\": 29,\n                    \"gradientMode\": \"none\",\n                    \"hideFrom\": {\n                      \"legend\": false,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    },\n                    \"insertNulls\": false,\n                    \"lineInterpolation\": \"linear\",\n                    \"lineWidth\": 1,\n                    \"pointSize\": 5,\n                    \"scaleDistribution\": {\n                      \"type\": \"linear\"\n                    },\n                    \"showPoints\": \"auto\",\n                    \"spanNulls\": false,\n                    \"stacking\": {\n                      \"group\": \"A\",\n                      \"mode\": \"none\"\n                    },\n                    \"thresholdsStyle\": {\n                      \"mode\": \"off\"\n                    }\n                  },\n                  \"mappings\": [],\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\"\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": true\n                },\n                \"tooltip\": {\n                  \"hideZeros\": false,\n                  \"mode\": \"single\",\n                  \"sort\": \"none\"\n                }\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"timeseries\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-20\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"filters\": [\n                          {\n                            \"id\": \"f905accb\",\n                            \"operator\": \"=\",\n                            \"scope\": \"span\"\n                          },\n                          {\n                            \"id\": \"service-name\",\n                            \"operator\": \"=\",\n                            \"scope\": \"resource\",\n                            \"tag\": \"service.name\",\n                            \"value\": [\n                              \"ai-opponent\"\n                            ],\n                            \"valueType\": \"string\"\n                          }\n                        ],\n                        \"limit\": 20,\n                        \"metricsQueryType\": \"range\",\n                        \"queryType\": \"traceqlSearch\",\n                        \"tableType\": \"traces\"\n                      },\n                      \"group\": \"tempo\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 20,\n          \"links\": [],\n          \"title\": \"AI Decisions\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"custom\": {\n                    \"align\": \"auto\",\n                    \"cellOptions\": {\n                      \"type\": \"auto\"\n                    },\n                    \"inspect\": false\n                  },\n                  \"mappings\": [],\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\"\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"cellHeight\": \"sm\",\n                \"footer\": {\n                  \"countRows\": false,\n                  \"fields\": \"\",\n                  \"reducer\": [\n                    \"sum\"\n                  ],\n                  \"show\": false\n                },\n                \"showHeader\": true\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"table\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-3\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"code\",\n                        \"exemplar\": true,\n                        \"expr\": \"game_resources_ratio{location_type=\\\"capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 3,\n          \"links\": [],\n          \"title\": \"Current Resources (By Captital)\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"palette-classic-by-name\"\n                  },\n                  \"custom\": {\n                    \"axisBorderShow\": false,\n                    \"axisCenteredZero\": false,\n                    \"axisColorMode\": \"text\",\n                    \"axisLabel\": \"\",\n                    \"axisPlacement\": \"auto\",\n                    \"barAlignment\": 0,\n                    \"barWidthFactor\": 0.6,\n                    \"drawStyle\": \"line\",\n                    \"fillOpacity\": 26,\n                    \"gradientMode\": \"none\",\n                    \"hideFrom\": {\n                      \"legend\": false,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    },\n                    \"insertNulls\": false,\n                    \"lineInterpolation\": \"linear\",\n                    \"lineWidth\": 1,\n                    \"pointSize\": 5,\n                    \"scaleDistribution\": {\n                      \"type\": \"linear\"\n                    },\n                    \"showPoints\": \"auto\",\n                    \"spanNulls\": false,\n                    \"stacking\": {\n                      \"group\": \"A\",\n                      \"mode\": \"none\"\n                    },\n                    \"thresholdsStyle\": {\n                      \"mode\": \"off\"\n                    }\n                  },\n                  \"mappings\": [],\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\"\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": true\n                },\n                \"tooltip\": {\n                  \"hideZeros\": false,\n                  \"mode\": \"single\",\n                  \"sort\": \"none\"\n                }\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"timeseries\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-4\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"direction\": \"backward\",\n                        \"editorMode\": \"code\",\n                        \"expr\": \"{service_name=\\\"southern-capital\\\"} | code_function_name != \\\"_log\\\"\",\n                        \"queryType\": \"range\"\n                      },\n                      \"group\": \"loki\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 4,\n          \"links\": [],\n          \"title\": \"Location History\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {},\n                \"overrides\": []\n              },\n              \"options\": {\n                \"dedupStrategy\": \"none\",\n                \"enableInfiniteScrolling\": false,\n                \"enableLogDetails\": true,\n                \"prettifyLogMessage\": false,\n                \"showCommonLabels\": false,\n                \"showLabels\": false,\n                \"showTime\": false,\n                \"sortOrder\": \"Descending\",\n                \"wrapLogMessage\": false\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"logs\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-5\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"exemplar\": true,\n                        \"expr\": \"game_army_size_ratio{location=\\\"southern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 5,\n          \"links\": [],\n          \"title\": \"Current Army\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"fixedColor\": \"dark-red\",\n                    \"mode\": \"fixed\"\n                  },\n                  \"custom\": {\n                    \"axisBorderShow\": false,\n                    \"axisCenteredZero\": false,\n                    \"axisColorMode\": \"text\",\n                    \"axisLabel\": \"\",\n                    \"axisPlacement\": \"auto\",\n                    \"barAlignment\": 0,\n                    \"barWidthFactor\": 0.6,\n                    \"drawStyle\": \"line\",\n                    \"fillOpacity\": 20,\n                    \"gradientMode\": \"scheme\",\n                    \"hideFrom\": {\n                      \"legend\": false,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    },\n                    \"insertNulls\": false,\n                    \"lineInterpolation\": \"smooth\",\n                    \"lineWidth\": 3,\n                    \"pointSize\": 5,\n                    \"scaleDistribution\": {\n                      \"type\": \"linear\"\n                    },\n                    \"showPoints\": \"auto\",\n                    \"spanNulls\": false,\n                    \"stacking\": {\n                      \"group\": \"A\",\n                      \"mode\": \"none\"\n                    },\n                    \"thresholdsStyle\": {\n                      \"mode\": \"off\"\n                    }\n                  },\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"green\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 80\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"hidden\",\n                  \"placement\": \"right\",\n                  \"showLegend\": false\n                },\n                \"tooltip\": {\n                  \"hideZeros\": false,\n                  \"mode\": \"single\",\n                  \"sort\": \"none\"\n                }\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"timeseries\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-6\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_resources_ratio{location=\\\"southern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 6,\n          \"links\": [],\n          \"title\": \"Current Resources\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 200,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 30\n                      },\n                      {\n                        \"color\": \"#EAB839\",\n                        \"value\": 50\n                      },\n                      {\n                        \"color\": \"dark-green\",\n                        \"value\": 100\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"minVizHeight\": 75,\n                \"minVizWidth\": 75,\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showThresholdLabels\": false,\n                \"showThresholdMarkers\": true,\n                \"sizing\": \"auto\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"gauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-7\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_location_control_ratio{location=\\\"southern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 7,\n          \"links\": [],\n          \"title\": \"Location Allegiance\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 2,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"#370a4d\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"super-light-blue\",\n                        \"value\": 1\n                      },\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 2\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"displayMode\": \"gradient\",\n                \"legend\": {\n                  \"calcs\": [],\n                  \"displayMode\": \"list\",\n                  \"placement\": \"bottom\",\n                  \"showLegend\": false\n                },\n                \"maxVizHeight\": 300,\n                \"minVizHeight\": 16,\n                \"minVizWidth\": 8,\n                \"namePlacement\": \"auto\",\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showUnfilled\": true,\n                \"sizing\": \"auto\",\n                \"valueMode\": \"color\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"bargauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-8\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_army_size_ratio{location=\\\"northern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 8,\n          \"links\": [],\n          \"title\": \"Current Army\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 10,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 3\n                      },\n                      {\n                        \"color\": \"#EAB839\",\n                        \"value\": 5\n                      },\n                      {\n                        \"color\": \"dark-green\",\n                        \"value\": 15\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"minVizHeight\": 75,\n                \"minVizWidth\": 75,\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showThresholdLabels\": false,\n                \"showThresholdMarkers\": true,\n                \"sizing\": \"auto\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"gauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      },\n      \"panel-9\": {\n        \"kind\": \"Panel\",\n        \"spec\": {\n          \"data\": {\n            \"kind\": \"QueryGroup\",\n            \"spec\": {\n              \"queries\": [\n                {\n                  \"kind\": \"PanelQuery\",\n                  \"spec\": {\n                    \"hidden\": false,\n                    \"query\": {\n                      \"kind\": \"DataQuery\",\n                      \"spec\": {\n                        \"disableTextWrap\": false,\n                        \"editorMode\": \"builder\",\n                        \"expr\": \"game_resources_ratio{location=\\\"northern-capital\\\"}\",\n                        \"fullMetaSearch\": false,\n                        \"includeNullMetadata\": true,\n                        \"instant\": false,\n                        \"legendFormat\": \"__auto\",\n                        \"range\": true,\n                        \"useBackend\": false\n                      },\n                      \"group\": \"prometheus\",\n                      \"version\": \"v0\"\n                    },\n                    \"refId\": \"A\"\n                  }\n                }\n              ],\n              \"queryOptions\": {},\n              \"transformations\": []\n            }\n          },\n          \"description\": \"\",\n          \"id\": 9,\n          \"links\": [],\n          \"title\": \"Current Resources\",\n          \"vizConfig\": {\n            \"kind\": \"VizConfig\",\n            \"spec\": {\n              \"fieldConfig\": {\n                \"defaults\": {\n                  \"color\": {\n                    \"mode\": \"thresholds\"\n                  },\n                  \"max\": 200,\n                  \"min\": 0,\n                  \"thresholds\": {\n                    \"mode\": \"absolute\",\n                    \"steps\": [\n                      {\n                        \"color\": \"dark-red\",\n                        \"value\": 0\n                      },\n                      {\n                        \"color\": \"red\",\n                        \"value\": 30\n                      },\n                      {\n                        \"color\": \"#EAB839\",\n                        \"value\": 50\n                      },\n                      {\n                        \"color\": \"dark-green\",\n                        \"value\": 100\n                      }\n                    ]\n                  }\n                },\n                \"overrides\": []\n              },\n              \"options\": {\n                \"minVizHeight\": 75,\n                \"minVizWidth\": 75,\n                \"orientation\": \"auto\",\n                \"reduceOptions\": {\n                  \"calcs\": [\n                    \"lastNotNull\"\n                  ],\n                  \"fields\": \"\",\n                  \"values\": false\n                },\n                \"showThresholdLabels\": false,\n                \"showThresholdMarkers\": true,\n                \"sizing\": \"auto\"\n              },\n              \"pluginVersion\": \"12.0.0\"\n            },\n            \"group\": \"gauge\",\n            \"version\": \"12.4.0\"\n          }\n        }\n      }\n    },\n    \"layout\": {\n      \"kind\": \"TabsLayout\",\n      \"spec\": {\n        \"tabs\": [\n          {\n            \"kind\": \"TabsLayoutTab\",\n            \"spec\": {\n              \"layout\": {\n                \"kind\": \"RowsLayout\",\n                \"spec\": {\n                  \"rows\": [\n                    {\n                      \"kind\": \"RowsLayoutRow\",\n                      \"spec\": {\n                        \"collapse\": false,\n                        \"layout\": {\n                          \"kind\": \"GridLayout\",\n                          \"spec\": {\n                            \"items\": [\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-19\"\n                                  },\n                                  \"height\": 7,\n                                  \"width\": 24,\n                                  \"x\": 0,\n                                  \"y\": 0\n                                }\n                              },\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-1\"\n                                  },\n                                  \"height\": 11,\n                                  \"width\": 24,\n                                  \"x\": 0,\n                                  \"y\": 7\n                                }\n                              },\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-2\"\n                                  },\n                                  \"height\": 6,\n                                  \"width\": 12,\n                                  \"x\": 0,\n                                  \"y\": 18\n                                }\n                              },\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-3\"\n                                  },\n                                  \"height\": 6,\n                                  \"width\": 12,\n                                  \"x\": 12,\n                                  \"y\": 18\n                                }\n                              },\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-20\"\n                                  },\n                                  \"height\": 11,\n                                  \"width\": 24,\n                                  \"x\": 0,\n                                  \"y\": 24\n                                }\n                              }\n                            ]\n                          }\n                        },\n                        \"title\": \"Current Overview\"\n                      }\n                    },\n                    {\n                      \"kind\": \"RowsLayoutRow\",\n                      \"spec\": {\n                        \"collapse\": false,\n                        \"layout\": {\n                          \"kind\": \"GridLayout\",\n                          \"spec\": {\n                            \"items\": [\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-16\"\n                                  },\n                                  \"height\": 11,\n                                  \"width\": 24,\n                                  \"x\": 0,\n                                  \"y\": 0\n                                }\n                              },\n                              {\n                                \"kind\": \"GridLayoutItem\",\n                                \"spec\": {\n                                  \"element\": {\n                                    \"kind\": \"ElementReference\",\n                                    \"name\": \"panel-17\"\n                                  },\n                                  \"height\": 10,\n                                  \"width\": 24,\n                                  \"x\": 0,\n                                  \"y\": 11\n                                }\n                              }\n                            ]\n                          }\n                        },\n                        \"title\": \"Trace Analytics\"\n                      }\n                    }\n                  ]\n                }\n              },\n              \"title\": \"War Map\"\n            }\n          },\n          {\n            \"kind\": \"TabsLayoutTab\",\n            \"spec\": {\n              \"layout\": {\n                \"kind\": \"GridLayout\",\n                \"spec\": {\n                  \"items\": [\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-18\"\n                        },\n                        \"height\": 6,\n                        \"width\": 9,\n                        \"x\": 0,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-6\"\n                        },\n                        \"height\": 6,\n                        \"width\": 8,\n                        \"x\": 9,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-7\"\n                        },\n                        \"height\": 6,\n                        \"width\": 7,\n                        \"x\": 17,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-4\"\n                        },\n                        \"height\": 10,\n                        \"width\": 24,\n                        \"x\": 0,\n                        \"y\": 6\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-5\"\n                        },\n                        \"height\": 6,\n                        \"width\": 24,\n                        \"x\": 0,\n                        \"y\": 16\n                      }\n                    }\n                  ]\n                }\n              },\n              \"title\": \"Southern Kingdom\"\n            }\n          },\n          {\n            \"kind\": \"TabsLayoutTab\",\n            \"spec\": {\n              \"layout\": {\n                \"kind\": \"GridLayout\",\n                \"spec\": {\n                  \"items\": [\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-8\"\n                        },\n                        \"height\": 6,\n                        \"width\": 9,\n                        \"x\": 0,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-9\"\n                        },\n                        \"height\": 6,\n                        \"width\": 8,\n                        \"x\": 9,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-10\"\n                        },\n                        \"height\": 6,\n                        \"width\": 7,\n                        \"x\": 17,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-11\"\n                        },\n                        \"height\": 10,\n                        \"width\": 24,\n                        \"x\": 0,\n                        \"y\": 6\n                      }\n                    }\n                  ]\n                }\n              },\n              \"title\": \"Northern Kingdom \"\n            }\n          },\n          {\n            \"kind\": \"TabsLayoutTab\",\n            \"spec\": {\n              \"layout\": {\n                \"kind\": \"GridLayout\",\n                \"spec\": {\n                  \"items\": [\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-12\"\n                        },\n                        \"height\": 6,\n                        \"width\": 9,\n                        \"x\": 0,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-13\"\n                        },\n                        \"height\": 6,\n                        \"width\": 8,\n                        \"x\": 9,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-14\"\n                        },\n                        \"height\": 6,\n                        \"width\": 7,\n                        \"x\": 17,\n                        \"y\": 0\n                      }\n                    },\n                    {\n                      \"kind\": \"GridLayoutItem\",\n                      \"spec\": {\n                        \"element\": {\n                          \"kind\": \"ElementReference\",\n                          \"name\": \"panel-15\"\n                        },\n                        \"height\": 10,\n                        \"width\": 24,\n                        \"x\": 0,\n                        \"y\": 6\n                      }\n                    }\n                  ]\n                }\n              },\n              \"title\": \"Villages\"\n            }\n          }\n        ]\n      }\n    },\n    \"links\": [],\n    \"liveNow\": false,\n    \"preload\": false,\n    \"tags\": [],\n    \"timeSettings\": {\n      \"autoRefresh\": \"\",\n      \"autoRefreshIntervals\": [\n        \"5s\",\n        \"10s\",\n        \"30s\",\n        \"1m\",\n        \"5m\",\n        \"15m\",\n        \"30m\",\n        \"1h\",\n        \"2h\",\n        \"1d\"\n      ],\n      \"fiscalYearStartMonth\": 0,\n      \"from\": \"now-30m\",\n      \"hideTimepicker\": false,\n      \"timezone\": \"browser\",\n      \"to\": \"now\"\n    },\n    \"title\": \"Game Dashboard\",\n    \"variables\": [\n      {\n        \"kind\": \"QueryVariable\",\n        \"spec\": {\n          \"hide\": \"dontHide\",\n          \"includeAll\": false,\n          \"label\": \"Villages\",\n          \"multi\": false,\n          \"name\": \"villages\",\n          \"query\": {\n            \"datasource\": {\n              \"name\": \"prometheus\"\n            },\n            \"group\": \"prometheus\",\n            \"kind\": \"DataQuery\",\n            \"spec\": {\n              \"qryType\": 1,\n              \"query\": \"label_values(game_resources_ratio,job)\",\n              \"refId\": \"PrometheusVariableQueryEditor-VariableQuery\"\n            },\n            \"version\": \"v0\"\n          },\n          \"refresh\": \"onDashboardLoad\",\n          \"regex\": \"\",\n          \"skipUrlSync\": false,\n          \"sort\": \"alphabeticalAsc\"\n        }\n      }\n    ]\n  }\n}\n"
  },
  {
    "path": "game-of-tracing/grafana/dashboards/dashboards.yaml",
    "content": "apiVersion: 1\nproviders:\n  - name: 'game-of-tracing'\n    orgId: 1\n    folder: ''\n    type: file\n    disableDeletion: true\n    updateIntervalSeconds: 10\n    allowUiUpdates: false\n    options:\n      path: /etc/grafana/provisioning/dashboards\n      foldersFromFilesStructure: false\n"
  },
  {
    "path": "game-of-tracing/grafana/datasources/defaults.yml",
    "content": "apiVersion: 1\ndatasources:\n- name: prometheus\n  uid: prometheus\n  type: prometheus\n  orgId: 1\n  url: http://prometheus:9090\n  basicAuth: false\n  isDefault: false\n  version: 1\n  editable: false\n  jsonData:\n    exemplarTraceIdDestinations:\n      - datasourceUid: \"tempo\"\n        name: \"trace_id\"\n- name: tempo\n  uid: tempo\n  type: tempo\n  access: proxy\n  orgId: 1\n  url: http://tempo:3200\n  basicAuth: false\n  isDefault: true\n  version: 1\n  editable: false\n  jsonData:\n    serviceMap:\n      datasourceUid: 'prometheus'\n    nodeGraph:\n      enabled: true\n    tracesToLogsV2:\n      datasourceUid: 'loki'\n      filterBySpanID: true\n    tracesToMetrics:\n      datasourceUid: 'prometheus'\n    tracesToProfilesV2:\n      datasourceUid: 'pyroscope'\n      tags:\n        - key: 'service.name'\n          value: 'service_name'\n      profileTypeId: 'process_cpu:cpu:nanoseconds:cpu:nanoseconds'\n- name: loki\n  uid: loki\n  type: loki\n  access: proxy\n  orgId: 1\n  url: http://loki:3100\n  basicAuth: false\n  isDefault: false\n  jsonData:\n    derivedFields:\n      - datasourceUid: \"tempo\"\n        matcherRegex: \"trace_id\"\n        matcherType: \"label\"\n        name: \"trace_id\"\n        targetBlank: true\n        url: \"$${__value.raw}\"\n        urlDisplayLabel: \"\"\n- name: pyroscope\n  uid: pyroscope\n  type: grafana-pyroscope-datasource\n  access: proxy\n  orgId: 1\n  url: http://pyroscope:4040\n  basicAuth: false\n  isDefault: false\n  editable: false\n"
  },
  {
    "path": "game-of-tracing/loki-config.yaml",
    "content": "auth_enabled: false\n\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: debug\n  grpc_server_max_concurrent_streams: 1000\n\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\n\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\n\nlimits_config:\n  metric_aggregation_enabled: true\n\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\n\nruler:\n  alertmanager_url: http://localhost:9093\n\nfrontend:\n  encoding: protobuf\n\n\n\n\n# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration\n# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/\n#\n# Statistics help us better understand how Loki is used, and they show us performance\n# levels for most users. This helps us prioritize features and documentation.\n# For more information on what's sent, look at\n# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go\n# Refer to the buildReport method to see what goes into a report.\n#\n# If you would like to disable reporting, uncomment the following lines:\n#analytics:\n#  reporting_enabled: false"
  },
  {
    "path": "game-of-tracing/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\notlp:\n  keep_identifying_resource_attributes: true"
  },
  {
    "path": "game-of-tracing/pyroscope-config.yaml",
    "content": "---\n# Minimal Pyroscope v2 config for local single-binary demo.\n# v2 defaults (filesystem backend, v1-v2-dual storage) handle the rest.\n\nserver:\n  http_listen_port: 4040\n\nstorage:\n  backend: filesystem\n  filesystem:\n    dir: /data\n"
  },
  {
    "path": "game-of-tracing/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.\n    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can\n      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver\n        thrift_http:                   #\n          endpoint: \"tempo:14268\"      # for a production deployment you should only enable the receivers you need!\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally\n\ncompactor:\n  compaction:\n    block_retention: 720h                # overall Tempo trace retention. set for demo purposes\n\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     # backend configuration to use\n    wal:\n      path: /var/tempo/wal             # where to store the wal locally\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator\n      generate_native_histograms: both\n      "
  },
  {
    "path": "game-of-tracing/war_map/CLAUDE.md",
    "content": "# war_map/ — UI + Span-Link Broker\n\n> Flask web UI on port 8080, game session orchestrator, and **owner of the span-link reconstruction logic that drives game replay**. This doc is read by any AI coding agent. For scenario-wide context read [`../AGENTS.md`](../AGENTS.md) first.\n\n## Purpose\n\n`war-map` is the human-facing surface of the game and the coordination point for everything the player touches:\n\n- Hosts the **map picker** (`/map_picker` + `/select_map`) that lets the user choose between `war_of_kingdoms` and `white_walkers_attack`, then renders the faction selection (or single-player auto-start) for the chosen map.\n- Renders the interactive game map (territory ownership, army sizes, supply routes, wall-hold HUD for WWA).\n- Manages faction selection, sessions, and the human player's identity.\n- Is the **sole writer** of the `game_actions` SQLite table — the record of every action's trace/span IDs that makes span-link replay possible (rows carry a `map_id` column).\n- Activates / deactivates the AI opponent on behalf of the player (auto-activates as `white_walkers` when the chosen map is WWA).\n- Proxies trace-replay queries to Tempo and falls back to local SQLite when Tempo is unavailable.\n- Instruments player actions as `SERVER` spans with `trace.Link`s chaining each action to the previous one in the session.\n- Runs the **wall-hold tick thread** (`_wall_tick_thread`, 30 s cadence) that increments `wall_hold` when one faction owns every wall keep, and declares the WWA winner at 5 consecutive ticks.\n\n## File map\n\n| File | Size | Purpose |\n|---|---|---|\n| `app.py` | ~64 KB | Flask app, session/player management, span-link broker, Tempo proxy for replay, AI activation control. |\n| `telemetry.py` | ~3 KB | `GameTelemetry` — traces + logs (no custom metrics), plus Pyroscope profiling with OTel span-profile linkage. |\n| `templates/index.html` | ~7 KB | Faction selection screen. |\n| `templates/map.html` | ~50 KB | Main SVG-based game map with real-time updates. |\n| `templates/layout.html` | ~4 KB | Shared layout chrome. |\n| `templates/replay.html` | ~6 KB | Replay session picker. |\n| `templates/replay_session.html` | ~28 KB | Per-session trace-replay UI — the consumer of the span-link chain. |\n| `static/css/style.css` | — | UI styling. |\n| `Dockerfile` | small | `python:3.11-slim`, runs `python app.py`. |\n| `requirements.txt` | small | Flask 3.1.3, requests 2.33.1, python-dotenv 1.2.2, OpenTelemetry SDK/API + exporters, `pyroscope-io` + `pyroscope-otel` for profiling. |\n\n## The span-link broker (the critical bit)\n\n### Two SQLite databases — do not confuse\n\n| File | Owner | Purpose |\n|---|---|---|\n| `game_state.db` | All 8 location services (WAL mode, shared) | Canonical game state |\n| `game_sessions.db` | `war_map` **only** | `game_actions` table: `(game_session_id, action_sequence, action_type, player_name, faction, trace_id, span_id, location_id, target_location_id, timestamp, game_state_after, map_id)` |\n\n`game_actions` schema is defined in `init_game_session_tracking()` at `app.py:60-96`. It carries a `UNIQUE(game_session_id, action_sequence)` constraint — the sequence is what lets \"next action\" look up \"previous action\" deterministically.\n\n### Storing an action — `store_game_action()` at `app.py:101-128`\n\nCalled at the tail of every action handler. Reads the current max `action_sequence` for the session, inserts a new row with `next_sequence = max + 1`, returns the sequence number. Persists the active `map_id` (defaults to `get_active_map_id()` when callers don't pass one) so the replay UI can render the correct map layout for each session.\n\n### Resolving a session's map — `get_session_map_id()`\n\nUsed by `replay_session_page` to pick the right layout. Reads the first non-NULL `map_id` from the session's actions (cheap — sessions don't switch maps mid-play), falls back to the active map, then to `DEFAULT_MAP_ID`. Without this, the replay template renders the WoK layout regardless of which map was actually played.\n\n### Reconstructing a previous span context — `get_previous_action_context()` at `app.py:130-170`\n\nLooks up `(trace_id, span_id)` for `(game_session_id, target_sequence)` in SQLite. Converts the hex strings to integers with `int(result[0], 16)` / `int(result[1], 16)` (this step has bitten agents in the past — the IDs are stored as hex strings, not raw bytes). Constructs a `trace.SpanContext(trace_id=..., span_id=..., is_remote=True, trace_flags=trace.TraceFlags.SAMPLED)` and returns it. The `SAMPLED` flag is required — without it, downstream processors may drop the link.\n\n### Creating a link — `create_span_link_from_context()` at `app.py:172-189`\n\nWraps the reconstructed context in a `trace.Link(span_context, attributes={...})` with:\n\n- `link.type` — caller-supplied (default `\"game_sequence\"`; AI opponent uses `\"ai_decision_trigger\"` in its own code).\n- `link.relation` — always `\"follows\"`.\n- `game.sequence` — always `\"true\"` (enables Tempo tag search).\n\n### Per-action flow inside a player-action handler\n\n```python\nprevious_span_context = get_previous_action_context(game_session_id, current_sequence)\nlinks = [create_span_link_from_context(previous_span_context, \"game_sequence\")] if previous_span_context else []\n\nwith tracer.start_as_current_span(\n    \"move_army\",\n    kind=SpanKind.SERVER,\n    links=links,\n    attributes={\n        \"game.session.id\": game_session_id,\n        \"game.action.sequence\": current_sequence + 1,\n        \"span.player.action\": True,\n        \"player.name\": ...,\n        \"player.faction\": ...,\n    },\n) as span:\n    # ... do the work, call location_api_request, etc.\n    store_game_action(\n        game_session_id, \"move_army\", ...,\n        trace_id=format(span.get_span_context().trace_id, '032x'),\n        span_id=format(span.get_span_context().span_id, '016x'),\n        ...\n    )\n```\n\nThe `format(..., '032x')` / `'016x'` pair is the inverse of the `int(..., 16)` step in `get_previous_action_context()` — always keep the two in sync.\n\n## Replay endpoints\n\nThe replay UI (`replay_session.html`) is backed by Tempo. `app.py` serves as the proxy and cleans up the responses.\n\n**Primary (Tempo):**\n- Discover sessions — `GET {TEMPO_URL}/api/v2/search/tag/game.session.id/values`\n- Pull a session's traces — `GET {TEMPO_URL}/api/search?q={game.session.id=\"<id>\"}&limit=100`\n- Pull a specific trace — `GET {TEMPO_URL}/api/traces/<trace_id>`\n\n**Fallback (SQLite):** If Tempo returns an error or is unreachable, read the `game_actions` table directly. Replay renders a reduced view (without span payloads) but the session narrative is preserved.\n\n## Environment\n\n| Var | Default | Purpose |\n|---|---|---|\n| `SECRET_KEY` | `war_of_westeros_secret_key` | Flask session secret |\n| `AI_URL` / `AI_SERVICE_URL` | `http://localhost:8081` | AI opponent base URL. Docker sets `http://ai-opponent:8081` |\n| `DATABASE_FILE` | `../app/game_state.db` | Shared game-state DB (read-only access from war_map) |\n| `GAME_SESSIONS_DB` | `game_sessions.db` | `game_actions` DB. Docker sets `/data/game_sessions.db` |\n| `API_BASE_URL` | `http://localhost` | Base URL for location server calls (host portion only; port comes from `LOCATION_PORTS`) |\n| `TEMPO_URL` | `http://tempo:3200` | Replay-query target |\n| `IN_DOCKER` | unset | Switches location URLs between `localhost:500X` and container DNS |\n\nLocation ports are hard-coded in `LOCATION_PORTS` at `app.py:201-210`; mirror any change here in `app/game_config.py`.\n\n## `X-Frame-Options` stripped — intentional\n\n`@app.after_request` at `app.py:191-194` removes `X-Frame-Options` from every response:\n\n```python\n@app.after_request\ndef remove_frame_options(response):\n    response.headers.pop('X-Frame-Options', None)\n    return response\n```\n\nThis is deliberate — it lets the UI be embedded in Grafana iframes for the replay experience. Grafana's `GF_SECURITY_ALLOW_EMBEDDING=true` is the other half of this configuration. **Do not remove** unless you are also disabling Grafana embedding.\n\n## Common edits\n\n**Add a new action type to the span-link chain.**\n1. Add the Flask handler in `app.py`, following the `move_army` / `create_army` pattern: look up previous context, build link, start a SERVER span with link + attributes, call `store_game_action()` at the tail.\n2. Add a renderer case in `templates/replay_session.html` so the replay UI can visualize the new action.\n3. Update the action-types table in [`../SPAN_LINKS.md`](../SPAN_LINKS.md).\n4. Update this doc and [`../AGENTS.md`](../AGENTS.md) if the new action surfaces new span attributes.\n\n**Tune the replay query.**\nEdit the TraceQL strings in the replay endpoints (`app.py`). The `game.session.id` tag is required — Tempo uses it to group the session's traces.\n\n**Add attributes to every player-action link.**\nEdit `create_span_link_from_context()` at `app.py:172-189`. The current three (`link.type`, `link.relation`, `game.sequence`) are load-bearing — the replay UI reads them.\n\n**Change session-tracking schema.**\nEdit `init_game_session_tracking()` at `app.py:60-96`. Because the DB lives on a persistent Docker volume, a schema change requires either `docker compose down -v` before restart **or** a migration script. Flag to the user which one you recommend before changing columns.\n\n## Keep this doc current\n\nPer the sub-agent rule, any change to span-link fields, replay endpoints, env vars, action types, or the line-number anchors above must land in the same work unit. Before returning a response that touched `war_map/`, grep this file for references to anything you changed.\n\nParticularly sensitive references:\n- `app.py:130-170` — `get_previous_action_context`\n- `app.py:172-189` — `create_span_link_from_context`\n- `app.py:60-96` — `init_game_session_tracking`\n- `app.py:101-128` — `store_game_action`\n- `app.py:191-194` — `X-Frame-Options` strip\n- `app.py:201-210` — `LOCATION_PORTS` dict\n\n## Cross-references\n\n- [`../AGENTS.md`](../AGENTS.md) — scenario-wide architecture and patterns\n- [`../SPAN_LINKS.md`](../SPAN_LINKS.md) — full span-link design spec and replay flow\n- [`../app/CLAUDE.md`](../app/CLAUDE.md) — location-server HTTP API this service calls\n- [`../ai_opponent/CLAUDE.md`](../ai_opponent/CLAUDE.md) — AI service this one activates/deactivates\n"
  },
  {
    "path": "game-of-tracing/war_map/Dockerfile",
    "content": "FROM python:3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install --no-cache-dir -r requirements.txt\n\nCOPY . .\n\nENV FLASK_APP=app.py\nENV FLASK_DEBUG=0\nENV IN_DOCKER=1\n\nEXPOSE 8080\n\nCMD [\"flask\", \"run\", \"--host=0.0.0.0\", \"--port=8080\"] "
  },
  {
    "path": "game-of-tracing/war_map/app.py",
    "content": "import os\nimport json\nimport sqlite3\nimport requests\nimport threading\nimport uuid\nimport time\nimport atexit\nfrom flask import Flask, render_template, jsonify, request, redirect, url_for, session\nfrom telemetry import GameTelemetry\nfrom opentelemetry import trace\nfrom opentelemetry.trace import SpanKind\nfrom opentelemetry.propagate import inject\n\napp = Flask(__name__)\napp.secret_key = os.environ.get('SECRET_KEY', 'war_of_westeros_secret_key')\n\n# AI Service configuration\nAI_SERVICE_URL = os.environ.get('AI_URL', 'http://localhost:8081')\n\n# Initialize telemetry\ntelemetry = GameTelemetry(service_name=\"war_map\")\nlogger = telemetry.get_logger()\ntracer = telemetry.get_tracer()\natexit.register(telemetry.shutdown)\n\n# Game session tracking database\nGAME_SESSIONS_DB = os.environ.get('GAME_SESSIONS_DB', 'game_sessions.db')  # Use local file for development\n\n# Game state variables\nGAME_OVER = False\nWINNER = None\nVICTORY_MESSAGE = None\n\n# ----------------------------------------------------------------\n# Maps — in-UI picker metadata.\n# Full per-location config lives in app/game_config.py. This is a compact\n# read-only duplicate of the fields war_map actually needs: layout for the\n# canvas, tick rules for the hold-to-win loop, faction/AI wiring for the\n# picker screen. Keep the map-id strings in sync with app/game_config.py.\n# ----------------------------------------------------------------\nDEFAULT_MAP_ID = \"war_of_kingdoms\"\n\nMAPS_META = {\n    \"war_of_kingdoms\": {\n        \"display_name\": \"War of Kingdoms\",\n        \"description\": (\n            \"Northern and Southern kingdoms clash for dominance. \"\n            \"Capture the enemy capital to win.\"\n        ),\n        \"single_player\": False,\n        \"player_faction\": None,\n        \"ai_faction\": None,\n        \"factions\": [\"northern\", \"southern\"],\n        \"tick_interval_s\": 0,\n        \"win_hold_ticks\": 0,\n        \"icon\": \"fa-chess-knight\",\n    },\n    \"white_walkers_attack\": {\n        \"display_name\": \"White Walkers Attack\",\n        \"description\": (\n            \"The Long Night has come. As the Night's Watch, hold every Wall \"\n            \"keep for 5 ticks before the White Walkers do. Single-player.\"\n        ),\n        \"single_player\": True,\n        \"player_faction\": \"nights_watch\",\n        \"ai_faction\": \"white_walkers\",\n        \"factions\": [\"nights_watch\", \"white_walkers\", \"barbarian\"],\n        \"tick_interval_s\": 30,\n        \"win_hold_ticks\": 5,\n        \"icon\": \"fa-icicles\",\n    },\n}\n\n# Map layout — canvas x/y percentages per location. Each map's keys must\n# match the location ids in app/game_config.py's MAPS[map_id][\"locations\"].\nLOCATION_POSITIONS_BY_MAP = {\n    \"war_of_kingdoms\": {\n        \"southern_capital\": {\"x\": 20, \"y\": 70, \"type\": \"capital\", \"name\": \"Southern Capital\"},\n        \"northern_capital\": {\"x\": 80, \"y\": 20, \"type\": \"capital\", \"name\": \"Northern Capital\"},\n        \"village_1\": {\"x\": 35, \"y\": 55, \"type\": \"village\", \"name\": \"Village 1\"},\n        \"village_2\": {\"x\": 65, \"y\": 35, \"type\": \"village\", \"name\": \"Village 2\"},\n        \"village_3\": {\"x\": 30, \"y\": 40, \"type\": \"village\", \"name\": \"Village 3\"},\n        \"village_4\": {\"x\": 45, \"y\": 65, \"type\": \"village\", \"name\": \"Village 4\"},\n        \"village_5\": {\"x\": 50, \"y\": 50, \"type\": \"village\", \"name\": \"Village 5\"},\n        \"village_6\": {\"x\": 70, \"y\": 45, \"type\": \"village\", \"name\": \"Village 6\"},\n    },\n    \"white_walkers_attack\": {\n        \"nights_watch_fortress\": {\"x\": 50, \"y\": 85, \"type\": \"capital\", \"name\": \"Castle Black\"},\n        \"white_walker_fortress\": {\"x\": 50, \"y\": 15, \"type\": \"capital\", \"name\": \"The Lands of Always Winter\"},\n        \"wall_west\": {\"x\": 20, \"y\": 50, \"type\": \"wall\", \"name\": \"Westwatch\"},\n        \"wall_center_west\": {\"x\": 40, \"y\": 50, \"type\": \"wall\", \"name\": \"Queensgate\"},\n        \"wall_center_east\": {\"x\": 60, \"y\": 50, \"type\": \"wall\", \"name\": \"Deep Lake\"},\n        \"wall_east\": {\"x\": 80, \"y\": 50, \"type\": \"wall\", \"name\": \"Eastwatch-by-the-Sea\"},\n        \"barbarian_village_west\": {\"x\": 10, \"y\": 72, \"type\": \"village\", \"name\": \"Free Folk Camp (West)\"},\n        \"barbarian_village_east\": {\"x\": 90, \"y\": 72, \"type\": \"village\", \"name\": \"Free Folk Camp (East)\"},\n    },\n}\n\nLOCATION_CONNECTIONS_BY_MAP = {\n    \"war_of_kingdoms\": [\n        [\"southern_capital\", \"village_1\"],\n        [\"southern_capital\", \"village_3\"],\n        [\"northern_capital\", \"village_2\"],\n        [\"northern_capital\", \"village_6\"],\n        [\"village_1\", \"village_2\"],\n        [\"village_1\", \"village_4\"],\n        [\"village_2\", \"village_5\"],\n        [\"village_3\", \"village_5\"],\n        [\"village_3\", \"village_6\"],\n        [\"village_4\", \"village_5\"],\n        [\"village_5\", \"village_6\"],\n    ],\n    \"white_walkers_attack\": [\n        [\"nights_watch_fortress\", \"wall_west\"],\n        [\"nights_watch_fortress\", \"wall_center_west\"],\n        [\"nights_watch_fortress\", \"wall_center_east\"],\n        [\"nights_watch_fortress\", \"wall_east\"],\n        [\"white_walker_fortress\", \"wall_west\"],\n        [\"white_walker_fortress\", \"wall_center_west\"],\n        [\"white_walker_fortress\", \"wall_center_east\"],\n        [\"white_walker_fortress\", \"wall_east\"],\n        [\"wall_west\", \"wall_center_west\"],\n        [\"wall_center_west\", \"wall_center_east\"],\n        [\"wall_center_east\", \"wall_east\"],\n        [\"wall_west\", \"barbarian_village_west\"],\n        [\"wall_east\", \"barbarian_village_east\"],\n    ],\n}\n\n# Per-map list of wall-type locations for the hold-to-win check.\nWALL_LOCATIONS_BY_MAP = {\n    map_id: [\n        loc_id for loc_id, meta in positions.items()\n        if meta.get(\"type\") == \"wall\"\n    ]\n    for map_id, positions in LOCATION_POSITIONS_BY_MAP.items()\n}\n\n# Kept for legacy call sites that still reference the module-level names.\n# These stay pointing at the WoK defaults — call sites that need per-map\n# behaviour should call _current_positions() / _current_connections() instead.\nLOCATION_POSITIONS = LOCATION_POSITIONS_BY_MAP[DEFAULT_MAP_ID]\nLOCATION_CONNECTIONS = LOCATION_CONNECTIONS_BY_MAP[DEFAULT_MAP_ID]\n\n\ndef _current_positions():\n    \"\"\"Positions for the currently active map (reads active_map_id from DB).\"\"\"\n    return LOCATION_POSITIONS_BY_MAP.get(\n        get_active_map_id(), LOCATION_POSITIONS_BY_MAP[DEFAULT_MAP_ID]\n    )\n\n\ndef _current_connections():\n    \"\"\"Connections for the currently active map.\"\"\"\n    return LOCATION_CONNECTIONS_BY_MAP.get(\n        get_active_map_id(), LOCATION_CONNECTIONS_BY_MAP[DEFAULT_MAP_ID]\n    )\n\n\ndef _current_walls():\n    return WALL_LOCATIONS_BY_MAP.get(get_active_map_id(), [])\n\ndef init_game_session_tracking():\n    \"\"\"Initialize the game session tracking database\"\"\"\n    try:\n        # Ensure the database directory exists if using an absolute path\n        db_dir = os.path.dirname(GAME_SESSIONS_DB)\n        if db_dir and not os.path.exists(db_dir):\n            os.makedirs(db_dir, exist_ok=True)\n        \n        conn = sqlite3.connect(GAME_SESSIONS_DB)\n        cursor = conn.cursor()\n        \n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS game_actions (\n            id INTEGER PRIMARY KEY AUTOINCREMENT,\n            game_session_id TEXT NOT NULL,\n            action_sequence INTEGER NOT NULL,\n            action_type TEXT NOT NULL,\n            player_name TEXT,\n            faction TEXT,\n            trace_id TEXT NOT NULL,\n            span_id TEXT NOT NULL,\n            location_id TEXT,\n            target_location_id TEXT,\n            timestamp INTEGER NOT NULL,\n            game_state_after TEXT,\n            map_id TEXT,\n            UNIQUE(game_session_id, action_sequence)\n        )\n        ''')\n\n        # Best-effort migration for existing game_sessions.db files created\n        # before the map_id column existed. SQLite's ALTER TABLE only adds\n        # missing columns; the IGNORE/OperationalError guard keeps a\n        # fresh-install run idempotent.\n        try:\n            cursor.execute(\"ALTER TABLE game_actions ADD COLUMN map_id TEXT\")\n        except sqlite3.OperationalError:\n            pass\n\n        conn.commit()\n        conn.close()\n        logger.info(f\"Game session tracking database initialized: {GAME_SESSIONS_DB}\")\n        \n    except Exception as e:\n        logger.error(f\"Failed to initialize game session tracking database: {e}\")\n        # Don't fail the app startup if database init fails\n        pass\n\n# Initialize the game session tracking database immediately\ninit_game_session_tracking()\n# Tables in game_state.db (game_config, wall_hold, faction_economy) are\n# initialized lazily on first call to _ensure_game_config_tables() — see\n# the in-process startup path later in this module.\n\ndef store_game_action(game_session_id, action_type, player_name, faction,\n                     trace_id, span_id, location_id=None, target_location_id=None,\n                     game_state=None, map_id=None):\n    \"\"\"Store a game action with its trace information.\n\n    ``map_id`` is recorded so the replay page can render the correct map\n    layout (positions/connections) for sessions played on non-default maps.\n    Defaults to the currently active map when not supplied.\n    \"\"\"\n    if map_id is None:\n        try:\n            map_id = get_active_map_id()\n        except Exception:\n            map_id = DEFAULT_MAP_ID\n\n    conn = sqlite3.connect(GAME_SESSIONS_DB)\n    cursor = conn.cursor()\n\n    # Get next sequence number\n    cursor.execute(\"SELECT MAX(action_sequence) FROM game_actions WHERE game_session_id = ?\",\n                   (game_session_id,))\n    result = cursor.fetchone()\n    next_sequence = (result[0] or 0) + 1\n\n    # Debug logging\n    logger.info(f\"Storing action: session={game_session_id}, sequence={next_sequence}, action={action_type}, trace_id={trace_id}, span_id={span_id}, map_id={map_id}\")\n\n    cursor.execute('''\n    INSERT INTO game_actions\n    (game_session_id, action_sequence, action_type, player_name, faction,\n     trace_id, span_id, location_id, target_location_id, timestamp, game_state_after, map_id)\n    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n    ''', (game_session_id, next_sequence, action_type, player_name, faction,\n          trace_id, span_id, location_id, target_location_id,\n          int(time.time()), json.dumps(game_state) if game_state else None, map_id))\n\n    conn.commit()\n    conn.close()\n    return next_sequence\n\n\ndef get_session_map_id(session_id):\n    \"\"\"Resolve the map a session was played on.\n\n    Looks at any non-NULL ``map_id`` in the session's actions; falls back to\n    the currently active map for sessions stored before the column was\n    populated. Returns ``DEFAULT_MAP_ID`` as a last resort so the replay\n    template always has a layout to render.\n    \"\"\"\n    try:\n        conn = sqlite3.connect(GAME_SESSIONS_DB)\n        try:\n            row = conn.execute(\n                \"SELECT map_id FROM game_actions \"\n                \"WHERE game_session_id = ? AND map_id IS NOT NULL \"\n                \"ORDER BY action_sequence LIMIT 1\",\n                (session_id,),\n            ).fetchone()\n            if row and row[0] in LOCATION_POSITIONS_BY_MAP:\n                return row[0]\n        finally:\n            conn.close()\n    except Exception as e:\n        logger.warning(f\"get_session_map_id failed for {session_id}: {e}\")\n\n    try:\n        active = get_active_map_id()\n        if active in LOCATION_POSITIONS_BY_MAP:\n            return active\n    except Exception:\n        pass\n    return DEFAULT_MAP_ID\n\ndef get_previous_action_context(game_session_id, target_sequence):\n    \"\"\"Get the action's span context for linking by target sequence number\"\"\"\n    conn = sqlite3.connect(GAME_SESSIONS_DB)\n    cursor = conn.cursor()\n    \n    # Debug logging\n    logger.info(f\"Looking for action: session={game_session_id}, target_sequence={target_sequence}\")\n    \n    cursor.execute('''\n    SELECT trace_id, span_id FROM game_actions \n    WHERE game_session_id = ? AND action_sequence = ?\n    ''', (game_session_id, target_sequence))\n    \n    result = cursor.fetchone()\n    conn.close()\n    \n    if result:\n        try:\n            # Debug logging\n            logger.info(f\"Found target action: trace_id={result[0]}, span_id={result[1]}\")\n            \n            # Reconstruct the span context from stored trace and span IDs\n            trace_id = int(result[0], 16)\n            span_id = int(result[1], 16)\n            \n            # Create span context with proper trace flags\n            span_context = trace.SpanContext(\n                trace_id=trace_id,\n                span_id=span_id,\n                is_remote=True,\n                trace_flags=trace.TraceFlags.SAMPLED\n            )\n            \n            logger.info(f\"Created span context for linking: trace_id={trace_id:032x}, span_id={span_id:016x}\")\n            return span_context\n        except (ValueError, TypeError) as e:\n            logger.error(f\"Failed to reconstruct span context: {e}\")\n            return None\n    else:\n        logger.info(f\"No action found for sequence {target_sequence}\")\n    return None\n\ndef create_span_link_from_context(span_context, link_type=\"game_sequence\"):\n    \"\"\"Create a span link from a span context using the official API\"\"\"\n    if span_context is None:\n        return None\n    \n    try:\n        link = trace.Link(\n            span_context,\n            attributes={\n                \"link.type\": link_type,\n                \"link.relation\": \"follows\",\n                \"game.sequence\": \"true\"\n            }\n        )\n        return link\n    except Exception as e:\n        logger.error(f\"Failed to create span link: {e}\")\n        return None\n\n@app.after_request\ndef remove_frame_options(response):\n    response.headers.pop('X-Frame-Options', None)\n    return response\n\n# Configuration\nDATABASE_FILE = os.environ.get('DATABASE_FILE', '../app/game_state.db')\nAPI_BASE_URL = os.environ.get('API_BASE_URL', 'http://localhost')  # Base URL for API calls\n\n# Location server ports (from game_config.py). These are keyed by the\n# *current-map* location id; when the active map changes, the keys here\n# follow along because both maps assign the same port to the same slot.\nLOCATION_PORTS = {\n    \"southern_capital\": 5001,\n    \"northern_capital\": 5002,\n    \"village_1\": 5003,\n    \"village_2\": 5004,\n    \"village_3\": 5005,\n    \"village_4\": 5006,\n    \"village_5\": 5007,\n    \"village_6\": 5008,\n    # White Walkers Attack mappings (same ports — just aliased).\n    \"nights_watch_fortress\": 5001,\n    \"white_walker_fortress\": 5002,\n    \"wall_west\": 5003,\n    \"wall_center_west\": 5004,\n    \"wall_center_east\": 5005,\n    \"wall_east\": 5006,\n    \"barbarian_village_west\": 5007,\n    \"barbarian_village_east\": 5008,\n}\n\n# Container hostname per logical location id. WWA reuses the same 8 slot\n# containers, so its location ids resolve to the WoK container names. Without\n# this aliasing, ``location_id.replace('_', '-')`` produces hostnames like\n# ``nights-watch-fortress`` that don't exist in the docker network and the\n# /map render returns an empty locations dict (blank map).\nCONTAINER_FOR_LOCATION_ID = {\n    \"southern_capital\": \"southern-capital\",\n    \"northern_capital\": \"northern-capital\",\n    \"village_1\": \"village-1\",\n    \"village_2\": \"village-2\",\n    \"village_3\": \"village-3\",\n    \"village_4\": \"village-4\",\n    \"village_5\": \"village-5\",\n    \"village_6\": \"village-6\",\n    \"nights_watch_fortress\": \"southern-capital\",\n    \"white_walker_fortress\": \"northern-capital\",\n    \"wall_west\": \"village-1\",\n    \"wall_center_west\": \"village-2\",\n    \"wall_center_east\": \"village-3\",\n    \"wall_east\": \"village-4\",\n    \"barbarian_village_west\": \"village-5\",\n    \"barbarian_village_east\": \"village-6\",\n}\n\n# Container hostname (in docker-compose) per slot. Stable across maps.\nSLOT_CONTAINER_NAMES = {\n    \"slot_1\": \"southern-capital\",\n    \"slot_2\": \"northern-capital\",\n    \"slot_3\": \"village-1\",\n    \"slot_4\": \"village-2\",\n    \"slot_5\": \"village-3\",\n    \"slot_6\": \"village-4\",\n    \"slot_7\": \"village-5\",\n    \"slot_8\": \"village-6\",\n}\n\n# Port per slot.\nSLOT_PORTS = {\n    \"slot_1\": 5001,\n    \"slot_2\": 5002,\n    \"slot_3\": 5003,\n    \"slot_4\": 5004,\n    \"slot_5\": 5005,\n    \"slot_6\": 5006,\n    \"slot_7\": 5007,\n    \"slot_8\": 5008,\n}\n\n\ndef _container_for_slot(slot_id):\n    \"\"\"Return the docker-compose service name hosting ``slot_id`` (stable).\"\"\"\n    return SLOT_CONTAINER_NAMES.get(slot_id, slot_id.replace('_', '-'))\n\n\ndef _slot_port_pairs():\n    \"\"\"Yield (slot_id, port) tuples for all 8 slots.\"\"\"\n    return list(SLOT_PORTS.items())\n\n# LOCATION_POSITIONS and LOCATION_CONNECTIONS are defined earlier (as the\n# WoK default slices of the LOCATION_*_BY_MAP dicts). Legacy call sites that\n# still reference the unsuffixed names get the WoK layout; new code should\n# go through _current_positions() / _current_connections().\n\n# Game state - track victory conditions (local process cache; also read\n# from wall_hold on map WWA).\n# Note: GAME_OVER/WINNER/VICTORY_MESSAGE already declared near top of file.\n\ndef get_db_connection():\n    \"\"\"Create a connection to the SQLite database\"\"\"\n    conn = sqlite3.connect(DATABASE_FILE)\n    conn.row_factory = sqlite3.Row\n    return conn\n\n\n# ----------------------------------------------------------------\n# Active map + wall-hold state (lives in game_state.db so location\n# servers and war_map agree on the single source of truth).\n# ----------------------------------------------------------------\n\ndef _ensure_game_config_tables():\n    \"\"\"Create game_config, faction_economy, and wall_hold if missing, and\n    migrate the war_map table for single-player maps (adds map_id + drops the\n    faction UNIQUE constraint so nights_watch can be registered without\n    conflicting with the WoK two-faction model).\n    \"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS game_config (\n            key TEXT PRIMARY KEY,\n            value TEXT NOT NULL\n        )\n        ''')\n        cursor.execute(\n            \"INSERT OR IGNORE INTO game_config (key, value) VALUES ('active_map_id', ?)\",\n            (DEFAULT_MAP_ID,),\n        )\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS faction_economy (\n            faction TEXT PRIMARY KEY,\n            corpses INTEGER NOT NULL DEFAULT 0\n        )\n        ''')\n        cursor.execute('''\n        CREATE TABLE IF NOT EXISTS wall_hold (\n            map_id TEXT NOT NULL,\n            faction TEXT NOT NULL,\n            ticks INTEGER NOT NULL DEFAULT 0,\n            last_update INTEGER NOT NULL,\n            PRIMARY KEY (map_id, faction)\n        )\n        ''')\n        # war_map table: additive map_id column for session-level bookkeeping.\n        try:\n            cursor.execute(\"ALTER TABLE war_map ADD COLUMN map_id TEXT\")\n        except sqlite3.OperationalError:\n            pass\n        conn.commit()\n        conn.close()\n    except sqlite3.Error as e:\n        logger.error(f\"Failed to ensure game_config tables: {e}\")\n\n\ndef get_active_map_id():\n    \"\"\"Return the currently active map id from game_state.db (cached row).\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\"SELECT value FROM game_config WHERE key = 'active_map_id'\")\n        row = cursor.fetchone()\n        conn.close()\n        return row['value'] if row else DEFAULT_MAP_ID\n    except sqlite3.Error:\n        return DEFAULT_MAP_ID\n\n\ndef set_active_map_id(map_id):\n    \"\"\"Persist the active map id. Location services pick this up via /reload.\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\n            \"INSERT INTO game_config (key, value) VALUES ('active_map_id', ?) \"\n            \"ON CONFLICT(key) DO UPDATE SET value = excluded.value\",\n            (map_id,),\n        )\n        conn.commit()\n        conn.close()\n        return True\n    except sqlite3.Error as e:\n        logger.error(f\"Failed to set active map id: {e}\")\n        return False\n\n\ndef reset_wall_hold(map_id):\n    \"\"\"Zero the wall-hold counter for every faction on ``map_id``.\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\"DELETE FROM wall_hold WHERE map_id = ?\", (map_id,))\n        conn.commit()\n        conn.close()\n    except sqlite3.Error as e:\n        logger.error(f\"Failed to reset wall_hold for {map_id}: {e}\")\n\n\ndef bump_wall_hold(map_id, faction, reset_others=True):\n    \"\"\"Increment ``faction``'s tick count on ``map_id``. Optionally reset\n    every other faction back to 0. Returns the new tick count.\n    \"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        now = int(time.time())\n        if reset_others:\n            cursor.execute(\n                \"UPDATE wall_hold SET ticks = 0 WHERE map_id = ? AND faction != ?\",\n                (map_id, faction),\n            )\n        cursor.execute(\n            \"INSERT INTO wall_hold (map_id, faction, ticks, last_update) \"\n            \"VALUES (?, ?, 1, ?) \"\n            \"ON CONFLICT(map_id, faction) DO UPDATE SET \"\n            \"ticks = ticks + 1, last_update = excluded.last_update\",\n            (map_id, faction, now),\n        )\n        cursor.execute(\n            \"SELECT ticks FROM wall_hold WHERE map_id = ? AND faction = ?\",\n            (map_id, faction),\n        )\n        row = cursor.fetchone()\n        conn.commit()\n        conn.close()\n        return int(row['ticks']) if row else 0\n    except sqlite3.Error as e:\n        logger.error(f\"Failed to bump wall_hold: {e}\")\n        return 0\n\n\ndef get_wall_hold(map_id):\n    \"\"\"Return {faction: ticks} for the given map.\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\"SELECT faction, ticks FROM wall_hold WHERE map_id = ?\", (map_id,))\n        rows = cursor.fetchall()\n        conn.close()\n        return {r['faction']: int(r['ticks']) for r in rows}\n    except sqlite3.Error:\n        return {}\n\n\ndef get_faction_corpses(faction):\n    \"\"\"Read a faction's corpse pool (0 when no row yet).\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        cursor.execute(\"SELECT corpses FROM faction_economy WHERE faction = ?\", (faction,))\n        row = cursor.fetchone()\n        conn.close()\n        return int(row['corpses']) if row else 0\n    except sqlite3.Error:\n        return 0\n\n\ndef check_faction_availability(faction):\n    \"\"\"Check if a faction is already claimed by another player\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        \n        # Check if the war_map table exists\n        cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='war_map'\")\n        if not cursor.fetchone():\n            # Create the war_map table if it doesn't exist\n            cursor.execute('''\n            CREATE TABLE war_map (\n                id INTEGER PRIMARY KEY AUTOINCREMENT,\n                faction TEXT UNIQUE NOT NULL,\n                player_name TEXT,\n                session_id TEXT UNIQUE\n            )\n            ''')\n            conn.commit()\n        \n        # Check if the faction is already taken\n        cursor.execute(\"SELECT * FROM war_map WHERE faction = ?\", (faction,))\n        result = cursor.fetchone()\n        \n        conn.close()\n        logger.info(f\"Faction availability check: {result is None}\")\n        return result is None  # True if available, False if taken\n    except sqlite3.Error as e:\n        logger.error(f\"Database error: {e}\")\n        return False\n\ndef register_faction(faction, player_name, session_id):\n    \"\"\"Register a player's faction choice\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        \n        # Try to insert the new faction record\n        cursor.execute(\n            \"INSERT INTO war_map (faction, player_name, session_id) VALUES (?, ?, ?)\",\n            (faction, player_name, session_id)\n        )\n        conn.commit()\n        conn.close()\n        logger.info(f\"Faction registered: {faction} for {player_name} with session ID {session_id}\")\n        return True\n    except sqlite3.Error as e:\n        logger.error(f\"Database error when registering faction: {e}\")\n        return False\n\ndef get_player_faction(session_id):\n    \"\"\"Get the faction associated with a session ID\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        \n        cursor.execute(\"SELECT faction FROM war_map WHERE session_id = ?\", (session_id,))\n        result = cursor.fetchone()\n        \n        conn.close()\n        logger.info(f\"Player faction retrieved: {result['faction'] if result else None}\")\n        return result['faction'] if result else None\n    except sqlite3.Error as e:\n        logger.error(f\"Database error: {e}\")\n        return None\n\ndef release_faction(session_id):\n    \"\"\"Release a faction when a player logs out or disconnects\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        \n        cursor.execute(\"DELETE FROM war_map WHERE session_id = ?\", (session_id,))\n        conn.commit()\n        conn.close()\n        logger.info(f\"Faction released for session ID: {session_id}\")\n        return True\n    except sqlite3.Error as e:\n        logger.error(f\"Database error when releasing faction: {e}\")\n        return False\n\ndef release_all_factions():\n    \"\"\"Release all faction assignments - used for game reset\"\"\"\n    try:\n        conn = get_db_connection()\n        cursor = conn.cursor()\n        \n        cursor.execute(\"DELETE FROM war_map\")\n        conn.commit()\n        conn.close()\n        logger.info(\"All factions released\")\n        return True\n    except sqlite3.Error as e:\n        logger.error(f\"Database error when releasing all factions: {e}\")\n        return False\n\ndef get_location_url(location_id):\n    \"\"\"Get the URL for a location's API\"\"\"\n    # In Docker, use container names instead of localhost. WWA location ids\n    # alias the WoK slot containers — see CONTAINER_FOR_LOCATION_ID.\n    if os.environ.get('IN_DOCKER'):\n        host = CONTAINER_FOR_LOCATION_ID.get(\n            location_id, location_id.replace('_', '-')\n        )\n    else:\n        host = 'localhost'\n\n    port = LOCATION_PORTS[location_id]\n    return f\"http://{host}:{port}\"\n\ndef make_api_request(location_id, endpoint, method='GET', data=None):\n    \"\"\"Make an API request to a location server with trace context.\"\"\"\n    url = f\"{get_location_url(location_id)}/{endpoint}\"\n    \n    # Only create spans for important operations, not for status checks\n    important_endpoints = {'move_army', 'all_out_attack', 'send_resources_to_capital', 'receive_army', 'receive_resources', 'collect_resources', 'create_army'}\n    \n    headers = {\"Content-Type\": \"application/json\"}\n    if endpoint in important_endpoints:\n        # Create span only for important operations\n        with tracer.start_as_current_span(\n            \"location_api_request\",\n            kind=SpanKind.CLIENT,\n            attributes={\n                \"location.id\": location_id,\n                \"location.endpoint\": endpoint,\n                \"http.method\": method\n            }\n        ) as span:\n            inject(headers)  # Inject trace context into headers\n            try:\n                if method == 'GET':\n                    response = requests.get(url, headers=headers)\n                else:  # POST\n                    response = requests.post(url, json=data, headers=headers)\n                \n                span.set_attribute(\"http.status_code\", response.status_code)\n                response.raise_for_status()\n                result = response.json()\n                \n                if not result.get(\"success\", True):\n                    span.set_status(trace.StatusCode.ERROR, result.get(\"message\", \"Unknown error\"))\n                \n                return result\n            except requests.RequestException as e:\n                span.record_exception(e)\n                span.set_status(trace.StatusCode.ERROR, str(e))\n                return {\"error\": str(e)}\n    else:\n        # For status checks and other non-important operations, just make the request without tracing\n        try:\n            if method == 'GET':\n                response = requests.get(url, headers=headers)\n            else:  # POST\n                response = requests.post(url, json=data, headers=headers)\n            response.raise_for_status()\n            return response.json()\n        except requests.RequestException as e:\n            return {\"error\": str(e)}\n\ndef check_game_over(locations_data, map_id=None):\n    \"\"\"Dispatch to the right win-condition check based on the active map.\"\"\"\n    if map_id is None:\n        map_id = get_active_map_id()\n    if map_id == \"white_walkers_attack\":\n        # WWA games end only via hold-the-walls. Capital captures do not end\n        # the game (the capital can change hands mid-match).\n        return check_wall_hold_win(locations_data, map_id)\n    return check_capital_capture_win(locations_data)\n\n\ndef check_capital_capture_win(locations_data):\n    \"\"\"Classic WoK win: take the enemy capital.\"\"\"\n    global GAME_OVER, WINNER, VICTORY_MESSAGE\n\n    if locations_data.get('southern_capital', {}).get('faction') == 'northern':\n        GAME_OVER = True\n        WINNER = 'northern'\n        VICTORY_MESSAGE = \"The Northern Kingdom has conquered the Southern Capital! Victory through unity!\"\n        return True\n\n    if locations_data.get('northern_capital', {}).get('faction') == 'southern':\n        GAME_OVER = True\n        WINNER = 'southern'\n        VICTORY_MESSAGE = \"The Southern Kingdom has conquered the Northern Capital! Glory to the South!\"\n        return True\n\n    logger.info(\"Game is not over\")\n    return False\n\n\ndef check_wall_hold_win(locations_data, map_id):\n    \"\"\"White Walkers Attack win: one faction has held every wall for the\n    configured number of ticks. This is a passive check — the tick thread\n    owns incrementing the counter; here we just observe + declare.\n    \"\"\"\n    global GAME_OVER, WINNER, VICTORY_MESSAGE\n\n    threshold = MAPS_META.get(map_id, {}).get(\"win_hold_ticks\", 0)\n    if threshold <= 0:\n        return False\n\n    holds = get_wall_hold(map_id)\n    for faction, ticks in holds.items():\n        if ticks >= threshold:\n            GAME_OVER = True\n            WINNER = faction\n            if faction == \"nights_watch\":\n                VICTORY_MESSAGE = (\n                    \"The Night's Watch held the Wall! The Long Night is broken.\"\n                )\n            elif faction == \"white_walkers\":\n                VICTORY_MESSAGE = (\n                    \"The Wall has fallen. The Long Night has come for Westeros.\"\n                )\n            else:\n                VICTORY_MESSAGE = f\"{faction.title()} held every Wall keep for {threshold} ticks.\"\n            return True\n\n    logger.debug(f\"Wall hold check: {holds} (threshold {threshold})\")\n    return False\n\ndef reset_game_state():\n    \"\"\"Reset the game state\"\"\"\n    global GAME_OVER, WINNER, VICTORY_MESSAGE\n    GAME_OVER = False\n    WINNER = None\n    VICTORY_MESSAGE = None\n\ndef reset_game_data():\n    \"\"\"Reset the game completely by resetting each location's state\"\"\"\n    # First, reset our local game state\n    reset_game_state()\n    \n    # Deactivate AI if it's running\n    try:\n        requests.post(f\"{AI_SERVICE_URL}/deactivate\", timeout=5)\n        logger.info(\"AI deactivated during game reset\")\n    except Exception as e:\n        logger.warning(f\"Failed to deactivate AI during reset: {e}\")\n    \n    # Next, reset all faction assignments\n    release_all_factions()\n    \n    # Clear the game session tracking database\n    try:\n        conn = sqlite3.connect(GAME_SESSIONS_DB)\n        cursor = conn.cursor()\n        cursor.execute(\"DELETE FROM game_actions\")\n        conn.commit()\n        conn.close()\n        logger.info(\"Game session tracking database cleared\")\n    except Exception as e:\n        logger.warning(f\"Failed to clear game session database: {e}\")\n    \n    # Finally, reset one location to trigger a database reset\n    # (Since they all share the same database, we only need to reset one)\n    try:\n        make_api_request('southern_capital', 'reset', method='POST')\n        logger.info(\"Game data reset\")\n        return True\n    except Exception as e:\n        logger.error(f\"Error resetting game data: {e}\")\n        return False\n\n@app.route('/health', methods=['GET'])\ndef health():\n    return jsonify({\"status\": \"ok\"})\n\n@app.route('/')\ndef index():\n    \"\"\"Home page. Routes through the map picker on first visit; once the\n    user has picked a map the faction-selection view (WoK) or auto-start\n    view (WWA single-player) is served instead.\n    \"\"\"\n    _ensure_game_config_tables()\n\n    # Already in a game with a faction? Go straight to the map.\n    if 'session_id' in session and get_player_faction(session['session_id']):\n        return redirect(url_for('game_map'))\n\n    # No map chosen yet → map picker.\n    if 'map_id' not in session:\n        return redirect(url_for('map_picker'))\n\n    map_id = session['map_id']\n    meta = MAPS_META.get(map_id, MAPS_META[DEFAULT_MAP_ID])\n\n    if meta[\"single_player\"]:\n        # Single-player maps skip the faction cards. A single CTA button posts\n        # back with faction=player_faction.\n        player_faction = meta[\"player_faction\"]\n        player_available = check_faction_availability(player_faction)\n        return render_template(\n            'index.html',\n            map_id=map_id,\n            map_meta=meta,\n            single_player=True,\n            player_faction=player_faction,\n            player_available=player_available,\n            southern_available=False,\n            northern_available=False,\n        )\n\n    # Classic WoK two-faction flow.\n    southern_available = check_faction_availability('southern')\n    northern_available = check_faction_availability('northern')\n    logger.info(f\"Southern available: {southern_available}, Northern available: {northern_available}\")\n\n    return render_template(\n        'index.html',\n        map_id=map_id,\n        map_meta=meta,\n        single_player=False,\n        southern_available=southern_available,\n        northern_available=northern_available,\n    )\n\n\n@app.route('/map_picker')\ndef map_picker():\n    \"\"\"Map selection screen. Renders one card per entry in MAPS_META.\"\"\"\n    _ensure_game_config_tables()\n    return render_template('map_picker.html', maps=MAPS_META)\n\n\n@app.route('/select_map', methods=['POST'])\ndef select_map():\n    \"\"\"Persist the chosen map as active + reload every location service.\n\n    Steps:\n      1. Write ``active_map_id`` to game_config.\n      2. Reset the locations table via one location's ``/reset`` (shared DB —\n         one call repopulates the 8 rows from the new map's config).\n      3. POST ``/reload`` to every slot so the in-memory ``location_info`` on\n         each service rebinds without a container restart.\n      4. For single-player maps, auto-register the preset player faction and\n         auto-activate the AI as the preset enemy faction.\n      5. Redirect to the entry UI (map-aware from the session).\n    \"\"\"\n    map_id = request.form.get('map_id') or DEFAULT_MAP_ID\n    if map_id not in MAPS_META:\n        logger.error(f\"Unknown map_id: {map_id}\")\n        return redirect(url_for('map_picker'))\n\n    with tracer.start_as_current_span(\n        \"select_map\",\n        kind=SpanKind.SERVER,\n        attributes={\"game.map.id\": map_id},\n    ) as span:\n        # 1. Persist + wipe any previous wall-hold counters.\n        set_active_map_id(map_id)\n        reset_wall_hold(map_id)\n        # Clear all maps' old counters to avoid stale wins after switching.\n        for mid in MAPS_META:\n            reset_wall_hold(mid)\n\n        # 2. Reset locations rows to match the new map.\n        try:\n            # Any one container will do — the DB is shared. Use the first\n            # Docker service name (stable across maps).\n            reset_container = _container_for_slot(\"slot_1\")\n            requests.post(\n                f\"http://{reset_container}:5001/reset\" if os.environ.get('IN_DOCKER')\n                else f\"http://localhost:5001/reset\",\n                timeout=5,\n            )\n        except Exception as e:\n            logger.warning(f\"Failed to reset location rows during map switch: {e}\")\n\n        # 3. Kick every slot to reload identity.\n        for slot_id, port in _slot_port_pairs():\n            try:\n                host = _container_for_slot(slot_id) if os.environ.get('IN_DOCKER') else \"localhost\"\n                requests.post(f\"http://{host}:{port}/reload\", timeout=5)\n            except Exception as e:\n                logger.warning(f\"Failed to /reload {slot_id}: {e}\")\n\n        # 4. Clear faction claims + session data so the new map starts clean.\n        release_all_factions()\n        session.pop('faction', None)\n        session.pop('player_name', None)\n        session.pop('game_session_id', None)\n        session.pop('action_sequence', None)\n        session.pop('session_id', None)\n        session['map_id'] = map_id\n\n        meta = MAPS_META[map_id]\n\n        # 5. Single-player: AI activation is deferred until the player clicks\n        # \"Take the Black\" on the index page (so the player always explicitly\n        # starts the game). But we do pre-reset the game-over flags.\n        reset_game_state()\n        span.set_attribute(\"single_player\", meta[\"single_player\"])\n\n    return redirect(url_for('index'))\n\n\n@app.route('/select_faction', methods=['POST'])\ndef select_faction():\n    \"\"\"Process faction selection (WoK two-player or single-player preset).\"\"\"\n    map_id = session.get('map_id', DEFAULT_MAP_ID)\n    meta = MAPS_META.get(map_id, MAPS_META[DEFAULT_MAP_ID])\n\n    faction = request.form.get('faction')\n    player_name = request.form.get('player_name', 'Unknown Player')\n\n    allowed = set(meta.get(\"factions\", []))\n    if not faction or faction not in allowed:\n        return render_template(\n            'index.html',\n            map_id=map_id,\n            map_meta=meta,\n            single_player=meta[\"single_player\"],\n            player_faction=meta.get(\"player_faction\"),\n            southern_available=check_faction_availability('southern'),\n            northern_available=check_faction_availability('northern'),\n            player_available=(\n                check_faction_availability(meta.get(\"player_faction\"))\n                if meta[\"single_player\"] else False\n            ),\n            error=\"Invalid faction selected\",\n        )\n\n    # Check if faction is available\n    if not check_faction_availability(faction):\n        logger.info(f\"Faction {faction} is already taken\")\n        return render_template(\n            'index.html',\n            map_id=map_id,\n            map_meta=meta,\n            single_player=meta[\"single_player\"],\n            player_faction=meta.get(\"player_faction\"),\n            southern_available=check_faction_availability('southern'),\n            northern_available=check_faction_availability('northern'),\n            player_available=False,\n            error=f\"The {faction.replace('_', ' ').title()} faction is already taken\",\n        )\n\n    # Generate a session ID if not present\n    if 'session_id' not in session:\n        session['session_id'] = str(uuid.uuid4())\n\n    # Generate a game session ID for span linking\n    if 'game_session_id' not in session:\n        session['game_session_id'] = str(uuid.uuid4())\n        session['action_sequence'] = 0  # Initialize action sequence\n        logger.info(f\"Initialized game session: {session['game_session_id']}\")\n\n    # Register the faction\n    if register_faction(faction, player_name, session['session_id']):\n        session['faction'] = faction\n        session['player_name'] = player_name\n        session['is_ai'] = False  # Human player by default\n        logger.info(f\"Player {player_name} selected faction {faction} on map {map_id}\")\n\n        # On single-player maps, auto-activate the AI as the preset enemy\n        # the moment the human commits to playing.\n        if meta[\"single_player\"] and meta.get(\"ai_faction\"):\n            try:\n                requests.post(\n                    f\"{AI_SERVICE_URL}/activate\",\n                    json={\"faction\": meta[\"ai_faction\"], \"map_id\": map_id},\n                    timeout=5,\n                )\n                logger.info(f\"Auto-activated AI as {meta['ai_faction']} for single-player map {map_id}\")\n            except Exception as e:\n                logger.warning(f\"Auto-activation of AI failed: {e}\")\n\n        return redirect(url_for('game_map'))\n    else:\n        logger.error(f\"Failed to register faction {faction}\")\n        return render_template(\n            'index.html',\n            map_id=map_id,\n            map_meta=meta,\n            single_player=meta[\"single_player\"],\n            player_faction=meta.get(\"player_faction\"),\n            southern_available=check_faction_availability('southern'),\n            northern_available=check_faction_availability('northern'),\n            player_available=False,\n            error=f\"Failed to register {faction.replace('_', ' ').title()} faction\",\n        )\n\n@app.route('/logout')\ndef logout():\n    \"\"\"Log out and release faction\"\"\"\n    if 'session_id' in session:\n        release_faction(session['session_id'])\n        logger.info(f\"Faction released for session ID: {session['session_id']}\")\n    # Clear the session\n    session.clear()\n    return redirect(url_for('index'))\n\n@app.route('/restart-game')\ndef restart_game():\n    \"\"\"Reset the game and redirect all players to faction selection\"\"\"\n    logger.info(\"Game restart initiated\")\n    \n    # Store current session info for logging\n    current_player = session.get('player_name', 'Unknown')\n    current_faction = session.get('faction', 'Unknown')\n    \n    # Reset the entire game state\n    success = reset_game_data()\n    \n    # Clear current user's session completely\n    session.clear()\n    \n    # Log the restart\n    if success:\n        logger.info(f\"Game successfully restarted by {current_player} ({current_faction})\")\n    else:\n        logger.error(f\"Game restart failed, initiated by {current_player} ({current_faction})\")\n    \n    # Redirect to the home page with a reset status\n    if success:\n        return redirect(url_for('index') + '?reset=success&message=Game has been reset successfully')\n    else:\n        return redirect(url_for('index') + '?reset=failed&message=Game reset failed, please try again')\n\n@app.route('/map')\ndef game_map():\n    \"\"\"Game map page — renders the canvas for the currently active map.\"\"\"\n    # Check if user has selected a faction\n    if 'faction' not in session:\n        return redirect(url_for('index'))\n\n    map_id = session.get('map_id') or get_active_map_id()\n    positions = LOCATION_POSITIONS_BY_MAP.get(map_id, LOCATION_POSITIONS_BY_MAP[DEFAULT_MAP_ID])\n    connections = LOCATION_CONNECTIONS_BY_MAP.get(map_id, LOCATION_CONNECTIONS_BY_MAP[DEFAULT_MAP_ID])\n    meta = MAPS_META.get(map_id, MAPS_META[DEFAULT_MAP_ID])\n\n    faction = session['faction']\n    player_name = session.get('player_name', 'Unknown Player')\n\n    # Get all location data for the map (only the ids relevant to this map).\n    locations_data = {}\n    for loc_id in positions.keys():\n        data = make_api_request(loc_id, '')\n        if 'error' not in data:\n            locations_data[loc_id] = {\n                **positions[loc_id],\n                'faction': data['faction'],\n                'resources': data['resources'],\n                'army': data['army'],\n            }\n\n    # Check for game over condition (map-aware).\n    check_game_over(locations_data, map_id=map_id)\n\n    # Wall-hold HUD payload for WWA.\n    wall_hold_state = None\n    if map_id == \"white_walkers_attack\":\n        wall_hold_state = {\n            \"threshold\": meta.get(\"win_hold_ticks\", 0),\n            \"holds\": get_wall_hold(map_id),\n            \"walls\": WALL_LOCATIONS_BY_MAP.get(map_id, []),\n        }\n\n    return render_template(\n        'map.html',\n        player_name=player_name,\n        faction=faction,\n        map_id=map_id,\n        map_meta=meta,\n        locations=locations_data,\n        connections=connections,\n        wall_hold=wall_hold_state,\n        game_over=GAME_OVER,\n        winner=WINNER,\n        victory_message=VICTORY_MESSAGE,\n    )\n\n@app.route('/api/collect_resources', methods=['POST'])\ndef collect_resources():\n    \"\"\"API endpoint to collect resources at a location\"\"\"\n    # Get game session info for span linking\n    game_session_id = session.get('game_session_id')\n    current_sequence = session.get('action_sequence', 0)\n    \n    # Get previous action context for linking\n    links = []\n    if game_session_id and current_sequence > 0:\n        previous_span_context = get_previous_action_context(game_session_id, current_sequence)\n        if previous_span_context:\n            link = create_span_link_from_context(previous_span_context, \"game_sequence\")\n            if link:\n                links.append(link)\n    \n    with tracer.start_as_current_span(\n        \"collect_resources\",\n        kind=SpanKind.SERVER,\n        links=links,\n        attributes={\n            \"player.name\": session.get('player_name', 'Unknown'),\n            \"player.faction\": session.get('faction', 'Unknown'),\n            \"game.session.id\": game_session_id,\n            \"game.action.type\": \"collect_resources\",\n            \"game.action.sequence\": current_sequence + 1\n        }\n    ) as span:\n        location_id = request.json.get('location_id')\n        if not location_id:\n            logger.error(\"Location ID required\")\n            return jsonify({\"error\": \"Location ID required\"}), 400\n        \n        span.set_attribute(\"location_id\", location_id)\n        \n        result = make_api_request(location_id, 'collect_resources', method='POST')\n        logger.info(f\"Collect resources result: {result}\")\n        \n        # Store this action for future span linking\n        if game_session_id and result.get('success', True):  # Assume success if not specified\n            try:\n                next_sequence = store_game_action(\n                    game_session_id=game_session_id,\n                    action_type=\"collect_resources\",\n                    player_name=session.get('player_name'),\n                    faction=session.get('faction'),\n                    trace_id=format(span.get_span_context().trace_id, '032x'),\n                    span_id=format(span.get_span_context().span_id, '016x'),\n                    location_id=location_id\n                )\n                session['action_sequence'] = next_sequence\n                logger.info(f\"Stored game action {next_sequence} for session {game_session_id}\")\n            except Exception as e:\n                logger.error(f\"Failed to store game action: {e}\")\n        \n        return jsonify(result)\n\n@app.route('/api/create_army', methods=['POST'])\ndef create_army():\n    \"\"\"API endpoint to create an army at a location\"\"\"\n    # Get game session info for span linking\n    game_session_id = session.get('game_session_id')\n    current_sequence = session.get('action_sequence', 0)\n    \n    # Get previous action context for linking\n    links = []\n    if game_session_id and current_sequence > 0:\n        previous_span_context = get_previous_action_context(game_session_id, current_sequence)\n        if previous_span_context:\n            link = create_span_link_from_context(previous_span_context, \"game_sequence\")\n            if link:\n                links.append(link)\n    \n    with tracer.start_as_current_span(\n        \"create_army\",\n        kind=SpanKind.SERVER,\n        links=links,\n        attributes={\n            \"player.name\": session.get('player_name', 'Unknown'),\n            \"player.faction\": session.get('faction', 'Unknown'),\n            \"game.session.id\": game_session_id,\n            \"game.action.type\": \"create_army\",\n            \"game.action.sequence\": current_sequence + 1\n        }\n    ) as span:\n        location_id = request.json.get('location_id')\n        if not location_id:\n            logger.error(\"Location ID required\")\n            return jsonify({\"error\": \"Location ID required\"}), 400\n        \n        span.set_attribute(\"location_id\", location_id)\n        \n        result = make_api_request(location_id, 'create_army', method='POST')\n        logger.info(f\"Create army result: {result}\")\n        \n        # Store this action for future span linking\n        if game_session_id and result.get('success', True):  # Assume success if not specified\n            try:\n                next_sequence = store_game_action(\n                    game_session_id=game_session_id,\n                    action_type=\"create_army\",\n                    player_name=session.get('player_name'),\n                    faction=session.get('faction'),\n                    trace_id=format(span.get_span_context().trace_id, '032x'),\n                    span_id=format(span.get_span_context().span_id, '016x'),\n                    location_id=location_id\n                )\n                session['action_sequence'] = next_sequence\n                logger.info(f\"Stored game action {next_sequence} for session {game_session_id}\")\n            except Exception as e:\n                logger.error(f\"Failed to store game action: {e}\")\n        \n        return jsonify(result)\n\n@app.route('/api/move_army', methods=['POST'])\ndef move_army():\n    \"\"\"API endpoint to move an army\"\"\"\n    # Get game session info for span linking\n    game_session_id = session.get('game_session_id')\n    current_sequence = session.get('action_sequence', 0)\n    \n    # Debug logging\n    logger.info(f\"move_army: session={game_session_id}, current_sequence={current_sequence}\")\n    \n    # Get previous action context for linking\n    # Note: current_sequence is the last stored sequence number, so we look for that\n    previous_span_context = None\n    links = []\n    if game_session_id and current_sequence > 0:\n        previous_span_context = get_previous_action_context(game_session_id, current_sequence)\n        if previous_span_context:\n            link = create_span_link_from_context(previous_span_context, \"game_sequence\")\n            if link:\n                links.append(link)\n                logger.info(f\"Created span link to previous action (sequence {current_sequence})\")\n    \n    with tracer.start_as_current_span(\n        \"move_army\",\n        kind=SpanKind.SERVER,\n        links=links,  # Add span links here\n        attributes={\n            \"player.name\": session.get('player_name', 'Unknown'),\n            \"player.faction\": session.get('faction', 'Unknown'),\n            \"game.session.id\": game_session_id,\n            \"game.action.type\": \"move_army\",\n            \"game.action.sequence\": current_sequence + 1\n        }\n    ) as span:\n        # Debug: log current span info\n        current_trace_id = format(span.get_span_context().trace_id, '032x')\n        current_span_id = format(span.get_span_context().span_id, '016x')\n        logger.info(f\"Current span: trace_id={current_trace_id}, span_id={current_span_id}\")\n        \n        source_id = request.json.get('source_id')\n        target_id = request.json.get('target_id')\n        \n        if not source_id or not target_id:\n            span.set_status(trace.StatusCode.ERROR, \"Missing location IDs\")\n            return jsonify({\"error\": \"Source and target location IDs required\"}), 400\n        \n        span.set_attribute(\"source_location\", source_id)\n        span.set_attribute(\"target_location\", target_id)\n        \n        # Check if the player controls the source location\n        source_info = make_api_request(source_id, '')\n        player_faction = session.get('faction')\n        \n        if source_info.get('faction') != player_faction:\n            span.set_status(trace.StatusCode.ERROR, \"Not player's location\")\n            return jsonify({\n                \"error\": f\"You cannot move armies from {source_id} because it belongs to {source_info.get('faction')}\"\n            }), 403\n        \n        result = make_api_request(\n            source_id, \n            'move_army', \n            method='POST',\n            data={\"target_location\": target_id}\n        )\n        \n        # Check if this move resulted in a victory condition\n        if target_id in ['southern_capital', 'northern_capital'] and result.get('success'):\n            locations_data = {}\n            for loc_id in _current_positions().keys():\n                data = make_api_request(loc_id, '')\n                if 'error' not in data:\n                    locations_data[loc_id] = {\n                        'faction': data['faction']\n                    }\n            \n            if check_game_over(locations_data):\n                result['game_over'] = True\n                result['winner'] = WINNER\n                result['victory_message'] = VICTORY_MESSAGE\n                span.set_attribute(\"game_over\", True)\n                span.set_attribute(\"winner\", WINNER)\n        \n        # Store this action for future span linking\n        if game_session_id:\n            try:\n                next_sequence = store_game_action(\n                    game_session_id=game_session_id,\n                    action_type=\"move_army\",\n                    player_name=session.get('player_name'),\n                    faction=session.get('faction'),\n                    trace_id=current_trace_id,\n                    span_id=current_span_id,\n                    location_id=source_id,\n                    target_location_id=target_id\n                )\n                session['action_sequence'] = next_sequence\n                logger.info(f\"Stored game action {next_sequence} for session {game_session_id}, updated session sequence to {next_sequence}\")\n            except Exception as e:\n                logger.error(f\"Failed to store game action: {e}\")\n        \n        return jsonify(result)\n\n@app.route('/api/location_info/<location_id>', methods=['GET'])\ndef location_info(location_id):\n    \"\"\"API endpoint to get information about a location\"\"\"\n    if location_id not in _current_positions():\n        return jsonify({\"error\": \"Invalid location ID\"}), 400\n    \n    result = make_api_request(location_id, '')\n    logger.info(f\"Location info result: {result}\")\n    return jsonify(result)\n\n@app.route('/api/map_data', methods=['GET'])\ndef map_data():\n    \"\"\"API endpoint to get all map data for updating the UI\"\"\"\n    map_id = get_active_map_id()\n    meta = MAPS_META.get(map_id, MAPS_META[DEFAULT_MAP_ID])\n    locations_data = {}\n    for loc_id in _current_positions().keys():\n        data = make_api_request(loc_id, '')\n        if 'error' not in data:\n            locations_data[loc_id] = {\n                **_current_positions()[loc_id],\n                'faction': data['faction'],\n                'resources': data['resources'],\n                'army': data['army'],\n                'type': _current_positions()[loc_id]['type']\n            }\n\n    check_game_over(locations_data, map_id=map_id)\n\n    response = {\n        \"locations\": locations_data,\n        \"connections\": _current_connections(),\n        \"game_over\": GAME_OVER,\n        \"winner\": WINNER,\n        \"victory_message\": VICTORY_MESSAGE,\n        \"map_id\": map_id,\n    }\n\n    # Include wall-hold state when the active map uses the tick mechanic.\n    if meta.get(\"win_hold_ticks\", 0) > 0:\n        response[\"wall_hold\"] = {\n            \"threshold\": meta[\"win_hold_ticks\"],\n            \"holds\": get_wall_hold(map_id),\n            \"walls\": WALL_LOCATIONS_BY_MAP.get(map_id, []),\n        }\n\n    return jsonify(response)\n\n@app.route('/api/game_status', methods=['GET'])\ndef game_status():\n    \"\"\"API endpoint to get the current game status\"\"\"\n    # Always check the current state to catch AI victories\n    locations_data = {}\n    for loc_id in _current_positions().keys():\n        data = make_api_request(loc_id, '')\n        if 'error' not in data:\n            locations_data[loc_id] = {\n                'faction': data['faction']\n            }\n    \n    # Check for game over condition with fresh data\n    check_game_over(locations_data)\n    \n    return jsonify({\n        \"game_over\": GAME_OVER,\n        \"winner\": WINNER,\n        \"victory_message\": VICTORY_MESSAGE\n    })\n\n@app.route('/api/reset_game', methods=['POST'])\ndef reset_game():\n    \"\"\"Reset the game state (for testing)\"\"\"\n    success = reset_game_data()\n    return jsonify({\"success\": success, \"message\": \"Game has been reset\"})\n\n@app.route('/api/send_resources_to_capital', methods=['POST'])\ndef send_resources_to_capital():\n    \"\"\"API endpoint to send resources from a village to its capital\"\"\"\n    with tracer.start_as_current_span(\n        \"send_resources_to_capital\",\n        kind=SpanKind.SERVER,\n        attributes={\n            \"player.name\": session.get('player_name', 'Unknown'),\n            \"player.faction\": session.get('faction', 'Unknown')\n        }\n    ) as span:\n        location_id = request.json.get('location_id')\n        if not location_id:\n            span.set_status(trace.StatusCode.ERROR, \"Missing location ID\")\n            return jsonify({\"error\": \"Location ID required\"}), 400\n        \n        span.set_attribute(\"source_location\", location_id)\n        \n        # Forward the request to the location server\n        result = make_api_request(location_id, 'send_resources_to_capital', method='POST')\n        return jsonify(result)\n\n@app.route('/api/all_out_attack', methods=['POST'])\ndef all_out_attack():\n    \"\"\"API endpoint to launch an all-out attack from a capital\"\"\"\n    # Get game session info for span linking\n    game_session_id = session.get('game_session_id')\n    current_sequence = session.get('action_sequence', 0)\n    \n    # Get previous action context for linking\n    links = []\n    if game_session_id and current_sequence > 0:\n        previous_span_context = get_previous_action_context(game_session_id, current_sequence)\n        if previous_span_context:\n            link = create_span_link_from_context(previous_span_context, \"game_sequence\")\n            if link:\n                links.append(link)\n    \n    with tracer.start_as_current_span(\n        \"all_out_attack\",\n        kind=SpanKind.SERVER,\n        links=links,\n        attributes={\n            \"player.name\": session.get('player_name', 'Unknown'),\n            \"player.faction\": session.get('faction', 'Unknown'),\n            \"game.session.id\": game_session_id,\n            \"game.action.type\": \"all_out_attack\",\n            \"game.action.sequence\": current_sequence + 1\n        }\n    ) as span:\n        location_id = request.json.get('location_id')\n        if not location_id:\n            span.set_status(trace.StatusCode.ERROR, \"Location ID required\")\n            return jsonify({\"error\": \"Location ID required\"}), 400\n        \n        span.set_attribute(\"location_id\", location_id)\n        \n        # Forward the request to the location server\n        try:\n            result = make_api_request(location_id, 'all_out_attack', method='POST', data=request.json)\n            if 'error' in result:\n                span.set_status(trace.StatusCode.ERROR, f\"Error from location server: {result['error']}\")\n                return jsonify({\"success\": False, \"message\": f\"Error from location server: {result['error']}\"}), 500\n            \n            # Check if this attack resulted in game over\n            if result.get('success'):\n                locations_data = {}\n                for loc_id in _current_positions().keys():\n                    data = make_api_request(loc_id, '')\n                    if 'error' not in data:\n                        locations_data[loc_id] = {\n                            'faction': data['faction']\n                        }\n                \n                if check_game_over(locations_data):\n                    result['game_over'] = True\n                    result['winner'] = WINNER\n                    result['victory_message'] = VICTORY_MESSAGE\n                    span.set_attribute(\"game_over\", True)\n                    span.set_attribute(\"winner\", WINNER)\n        \n            # Store this action for future span linking\n            if game_session_id and result.get('success'):\n                try:\n                    next_sequence = store_game_action(\n                        game_session_id=game_session_id,\n                        action_type=\"all_out_attack\",\n                        player_name=session.get('player_name'),\n                        faction=session.get('faction'),\n                        trace_id=format(span.get_span_context().trace_id, '032x'),\n                        span_id=format(span.get_span_context().span_id, '016x'),\n                        location_id=location_id\n                    )\n                    session['action_sequence'] = next_sequence\n                    logger.info(f\"Stored game action {next_sequence} for session {game_session_id}\")\n                except Exception as e:\n                    logger.error(f\"Failed to store game action: {e}\")\n            \n            return jsonify(result)\n        \n        except Exception as e:\n            span.set_status(trace.StatusCode.ERROR, f\"Request failed: {str(e)}\")\n            logger.error(f\"All out attack failed: {e}\")\n            return jsonify({\"success\": False, \"message\": f\"Request failed: {str(e)}\"}), 500\n\n@app.route('/api/ai_toggle', methods=['POST'])\ndef toggle_ai():\n    \"\"\"Toggle AI opponent on/off\"\"\"\n    data = request.get_json()\n    enable_ai = data.get('enable', False)\n    \n    if enable_ai:\n        # Get player's faction to determine AI faction\n        player_faction = session.get('faction')\n        if not player_faction:\n            return jsonify({\"success\": False, \"message\": \"No player faction selected\"}), 400\n        \n        # AI takes the opposite faction\n        ai_faction = 'northern' if player_faction == 'southern' else 'southern'\n        \n        # Activate AI\n        try:\n            response = requests.post(\n                f\"{AI_SERVICE_URL}/activate\",\n                json={\"faction\": ai_faction},\n                timeout=5\n            )\n            if response.status_code == 200:\n                result = response.json()\n                if result.get('success'):\n                    logger.info(f\"AI activated for {ai_faction} faction\")\n                    return jsonify({\n                        \"success\": True,\n                        \"message\": f\"AI opponent activated for {ai_faction} faction\"\n                    })\n            \n            return jsonify({\n                \"success\": False,\n                \"message\": \"Failed to activate AI\"\n            }), 500\n            \n        except requests.RequestException as e:\n            logger.error(f\"Error communicating with AI service: {e}\")\n            return jsonify({\n                \"success\": False,\n                \"message\": \"AI service unavailable\"\n            }), 503\n    else:\n        # Deactivate AI\n        try:\n            response = requests.post(\n                f\"{AI_SERVICE_URL}/deactivate\",\n                timeout=5\n            )\n            if response.status_code == 200:\n                logger.info(\"AI deactivated\")\n                return jsonify({\n                    \"success\": True,\n                    \"message\": \"AI opponent deactivated\"\n                })\n            \n            return jsonify({\n                \"success\": False,\n                \"message\": \"Failed to deactivate AI\"\n            }), 500\n            \n        except requests.RequestException as e:\n            logger.error(f\"Error communicating with AI service: {e}\")\n            return jsonify({\n                \"success\": False,\n                \"message\": \"AI service unavailable\"\n            }), 503\n\n@app.route('/api/ai_status', methods=['GET'])\ndef get_ai_status():\n    \"\"\"Get current AI status\"\"\"\n    try:\n        response = requests.get(f\"{AI_SERVICE_URL}/status\", timeout=5)\n        if response.status_code == 200:\n            return jsonify(response.json())\n        \n        return jsonify({\"active\": False, \"faction\": None})\n    except requests.RequestException:\n        return jsonify({\"active\": False, \"faction\": None})\n\n@app.route('/api/replay/sessions', methods=['GET'])\ndef get_replay_sessions():\n    \"\"\"Get available game sessions for replay using tag values API\"\"\"\n    tempo_url = os.environ.get('TEMPO_URL', 'http://localhost:3200')\n    \n    try:\n        from datetime import datetime, timedelta\n        \n        # Step 1: Get all game session IDs using tag values API\n        end_time = datetime.now()\n        start_time = end_time - timedelta(hours=24)  # 24-hour window\n        \n        tag_params = {\n            'start': int(start_time.timestamp()),\n            'end': int(end_time.timestamp()),\n            'limit': 50\n        }\n        \n        response = requests.get(\n            f\"{tempo_url}/api/v2/search/tag/.game.session.id/values\",\n            params=tag_params,\n            timeout=15\n        )\n        \n        if response.status_code != 200:\n            logger.error(f\"Tag values API failed with status {response.status_code}\")\n            return jsonify({\n                'success': False,\n                'error': f'Tag values API failed with status {response.status_code}',\n                'sessions': [],\n                'total_sessions': 0\n            }), response.status_code\n        \n        tag_response = response.json()\n        session_ids = []\n        \n        # Extract session IDs from tag values\n        for tag_value in tag_response.get('tagValues', []):\n            if tag_value.get('type') == 'string':\n                session_id = tag_value.get('value', '')\n                if session_id:\n                    session_ids.append(session_id)\n        \n        logger.info(f\"Found {len(session_ids)} game sessions: {session_ids}\")\n        \n        # Just return the session IDs with minimal info - details will be fetched when clicked\n        session_list = []\n        for session_id in session_ids:\n            session_list.append({\n                'session_id': session_id,\n                'player_name': 'Unknown',  # Will be determined when session is opened\n                'faction': 'Unknown',      # Will be determined when session is opened\n                'start_time': 0,           # Will be determined when session is opened\n                'action_count': 0,         # Will be determined when session is opened\n                'last_action': 'Unknown'   # Will be determined when session is opened\n            })\n        \n        # Sort by session_id for consistent ordering\n        session_list.sort(key=lambda x: x.get('session_id', ''), reverse=True)\n        \n        return jsonify({\n            'success': True,\n            'sessions': session_list,\n            'total_sessions': len(session_list),\n            'data_source': 'tempo_tag_values',\n            'discovered_session_ids': session_ids\n        })\n        \n    except Exception as e:\n        logger.error(f\"Error getting replay sessions: {e}\")\n        return jsonify({\n            'success': False,\n            'error': str(e),\n            'sessions': [],\n            'total_sessions': 0\n        }), 500\n\n\n\n\n\n@app.route('/replay')\ndef replay_page():\n    \"\"\"Replay page to view game sessions\"\"\"\n    return render_template('replay.html')\n\n@app.route('/replay/<session_id>')\ndef replay_session_page(session_id):\n    \"\"\"Page to replay a specific game session — renders with the layout of\n    whichever map the session was played on (not the active map).\"\"\"\n    map_id = get_session_map_id(session_id)\n    return render_template(\n        'replay_session.html',\n        session_id=session_id,\n        map_id=map_id,\n        location_positions=LOCATION_POSITIONS_BY_MAP[map_id],\n        location_connections=LOCATION_CONNECTIONS_BY_MAP[map_id],\n    )\n    \"\"\"Debug endpoint to verify restart cleared all data properly\"\"\"\n    verification_results = {\n        'game_state_reset': False,\n        'span_links_cleared': False,\n        'faction_assignments_cleared': False,\n        'ai_deactivated': False,\n        'database_reset': False\n    }\n    \n    try:\n        # Check game state\n        verification_results['game_state_reset'] = not GAME_OVER and WINNER is None and VICTORY_MESSAGE is None\n        \n        # Check span links database\n        conn = sqlite3.connect(GAME_SESSIONS_DB)\n        cursor = conn.cursor()\n        cursor.execute(\"SELECT COUNT(*) FROM game_actions\")\n        span_links_count = cursor.fetchone()[0]\n        conn.close()\n        verification_results['span_links_cleared'] = span_links_count == 0\n        \n        # Check faction assignments\n        db_conn = get_db_connection()\n        cursor = db_conn.cursor()\n        cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='war_map'\")\n        table_exists = cursor.fetchone() is not None\n        \n        if table_exists:\n            cursor.execute(\"SELECT COUNT(*) FROM war_map\")\n            faction_count = cursor.fetchone()[0]\n            verification_results['faction_assignments_cleared'] = faction_count == 0\n        else:\n            verification_results['faction_assignments_cleared'] = True\n        db_conn.close()\n        \n        # Check AI status\n        try:\n            response = requests.get(f\"{AI_SERVICE_URL}/status\", timeout=5)\n            if response.status_code == 200:\n                ai_status = response.json()\n                verification_results['ai_deactivated'] = not ai_status.get('active', False)\n            else:\n                verification_results['ai_deactivated'] = True  # Assume deactivated if can't reach\n        except:\n            verification_results['ai_deactivated'] = True  # Assume deactivated if can't reach\n        \n        # Check if location database reset to initial state\n        try:\n            locations_data = {}\n            for loc_id in _current_positions().keys():\n                data = make_api_request(loc_id, '')\n                if 'error' not in data:\n                    locations_data[loc_id] = data\n            \n            # Verify initial state\n            from game_config import LOCATIONS\n            database_reset = True\n            for loc_id, expected in LOCATIONS.items():\n                actual = locations_data.get(loc_id, {})\n                if (actual.get('faction') != expected['faction'] or\n                    actual.get('army') != expected['initial_army'] or\n                    actual.get('resources') != expected['initial_resources']):\n                    database_reset = False\n                    break\n            \n            verification_results['database_reset'] = database_reset\n        except Exception:\n            verification_results['database_reset'] = False\n        \n        # Overall status\n        all_clear = all(verification_results.values())\n        \n        return jsonify({\n            'success': True,\n            'all_systems_reset': all_clear,\n            'details': verification_results\n        })\n        \n    except Exception as e:\n        return jsonify({\n            'success': False,\n            'error': str(e),\n            'details': verification_results\n        }), 500\n\n@app.route('/api/replay/session/<session_id>', methods=['GET'])\ndef get_replay_session(session_id):\n    \"\"\"Get detailed replay data for a specific session\"\"\"\n    tempo_url = os.environ.get('TEMPO_URL', 'http://localhost:3200')\n    \n    try:\n        logger.info(f\"Getting replay data for session: {session_id}\")\n        \n        # Query for this specific session with 24-hour time window\n        from datetime import datetime, timedelta\n        end_time = datetime.now()\n        start_time = end_time - timedelta(hours=24)  # 24-hour window\n        \n        params = {\n            'q': f'{{.game.session.id=\"{session_id}\"}}',\n            'start': int(start_time.timestamp()),\n            'end': int(end_time.timestamp()),\n            'limit': 100\n        }\n        \n        logger.info(f\"Querying Tempo with: {params}\")\n        \n        response = requests.get(\n            f\"{tempo_url}/api/search\",\n            params=params,\n            timeout=15\n        )\n        \n        logger.info(f\"Tempo response status: {response.status_code}\")\n        \n        actions = []\n        seen_spans = set()  # Track span IDs to avoid duplicates\n        \n        if response.status_code == 200:\n            search_results = response.json()\n            traces = search_results.get('traces', [])\n            \n            logger.info(f\"Found {len(traces)} traces for session {session_id}\")\n            \n            # Extract all game actions from traces\n            for trace in traces:\n                trace_id = trace.get('traceID')\n                root_trace_name = trace.get('rootTraceName', '')\n                trace_details_success = False\n                \n                # Try to get full trace details first (preferred - has rich attributes)\n                try:\n                    trace_params = {\n                        'start': int(start_time.timestamp()),\n                        'end': int(end_time.timestamp())\n                    }\n                    \n                    trace_response = requests.get(\n                        f\"{tempo_url}/api/traces/{trace_id}\",\n                        params=trace_params,\n                        timeout=10\n                    )\n                    \n                    if trace_response.status_code == 200:\n                        trace_detail = trace_response.json()\n                        \n                        # Parse using the correct structure: batches -> scopeSpans -> spans\n                        for batch in trace_detail.get('batches', []):\n                            for scope_span in batch.get('scopeSpans', []):\n                                for span in scope_span.get('spans', []):\n                                    # Parse ALL spans for this session, don't filter by action type\n                                    action = parse_span_to_action_from_detail(span, trace_id, root_trace_name)\n                                    if action and action.get('session_id') == session_id:\n                                        span_id = action.get('span_id')\n                                        if span_id and span_id not in seen_spans:\n                                            seen_spans.add(span_id)\n                                            actions.append(action)\n                                            trace_details_success = True\n                    else:\n                        logger.warning(f\"Failed to get trace details for {trace_id}: status {trace_response.status_code}\")\n                        \n                except Exception as e:\n                    logger.warning(f\"Error getting trace details for {trace_id}: {e}\")\n                \n                # Only use search results if trace details completely failed\n                if not trace_details_success:\n                    logger.info(f\"Using search results fallback for trace {trace_id}\")\n                    for span_set in trace.get('spanSets', []):\n                        for span in span_set.get('spans', []):\n                            action = parse_span_to_action_from_search(span, trace_id, root_trace_name, session_id)\n                            if action:\n                                span_id = action.get('span_id')\n                                if span_id and span_id not in seen_spans:\n                                    seen_spans.add(span_id)\n                                    actions.append(action)\n        else:\n            logger.warning(f\"Tempo search failed with status {response.status_code}\")\n        \n        # Sort by sequence number or start time\n        actions.sort(key=lambda x: (x.get('sequence', 0), x.get('start_time', 0)))\n        \n        logger.info(f\"Returning {len(actions)} actions for session {session_id}\")\n        \n        # Extract session metadata from actions\n        session_metadata = {\n            'player_name': 'Unknown',\n            'faction': 'Unknown',\n            'start_time': 0,\n            'end_time': 0\n        }\n        \n        if actions:\n            # Get metadata from first action\n            first_action = actions[0]\n            session_metadata['player_name'] = first_action.get('player_name', 'Unknown')\n            session_metadata['faction'] = first_action.get('faction', 'Unknown')\n            session_metadata['start_time'] = first_action.get('start_time', 0)\n            \n            # Get end time from last action\n            last_action = actions[-1]\n            session_metadata['end_time'] = last_action.get('start_time', 0)\n        \n        # Verify span links\n        span_link_chain = verify_action_links(actions)\n        \n        return jsonify({\n            'success': True,\n            'session_id': session_id,\n            'session_metadata': session_metadata,\n            'actions': actions,\n            'span_link_chain': span_link_chain,\n            'total_actions': len(actions),\n            'data_source': 'tempo'\n        })\n        \n    except Exception as e:\n        logger.error(f\"Error getting replay session {session_id}: {e}\")\n        return jsonify({\n            'success': False,\n            'error': str(e),\n            'session_id': session_id,\n            'actions': [],\n            'total_actions': 0\n        }), 500\n\n\n\ndef parse_span_to_action_from_detail(span, trace_id, root_trace_name):\n    \"\"\"Parse a span from trace details into a game action for replay\"\"\"\n    \n    # Convert base64 spanId to hex for consistency with search results\n    span_id_b64 = span.get('spanId', '')\n    span_id_hex = ''\n    if span_id_b64:\n        try:\n            import base64\n            span_id_bytes = base64.b64decode(span_id_b64)\n            span_id_hex = span_id_bytes.hex()\n        except:\n            span_id_hex = span_id_b64  # fallback to original\n    \n    action = {\n        'trace_id': trace_id,\n        'span_id': span_id_hex,\n        'operation': span.get('name', ''),  # Fixed: name not operationName\n        'action_type': root_trace_name,  # Use root trace name as action type\n        'start_time': 0,  # Will be set properly below\n        'duration': 0,  # Will calculate from start/end times\n        'attributes': {},\n        'span_links': [],\n        'data_source': 'tempo_detail'\n    }\n    \n    # Calculate duration and set start time properly\n    start_time_raw = span.get('startTimeUnixNano', 0)\n    end_time_raw = span.get('endTimeUnixNano', 0)\n    \n    # Convert start time to integer and set it\n    try:\n        action['start_time'] = int(start_time_raw) if start_time_raw else 0\n    except (ValueError, TypeError):\n        action['start_time'] = 0\n    \n    # Calculate duration if we have both start and end times\n    if start_time_raw and end_time_raw:\n        try:\n            # Convert to integers if they're strings\n            start_time_int = int(start_time_raw) if isinstance(start_time_raw, str) else start_time_raw\n            end_time_int = int(end_time_raw) if isinstance(end_time_raw, str) else end_time_raw\n            action['duration'] = end_time_int - start_time_int\n        except (ValueError, TypeError):\n            action['duration'] = 0\n    \n    # Extract attributes from the correct structure\n    for attr in span.get('attributes', []):\n        key = attr.get('key', '')\n        value = attr.get('value', {})\n        \n        # Store full attribute for later use - handle all value types correctly\n        if 'stringValue' in value:\n            action['attributes'][key] = value['stringValue']\n        elif 'intValue' in value:\n            action['attributes'][key] = value['intValue']  # Keep as int, convert when needed\n        elif 'boolValue' in value:\n            action['attributes'][key] = value['boolValue']  # Keep as bool\n    \n    # Extract span links from the links array and convert to hex format\n    links = span.get('links', [])\n    if links:  # Only process if links is not None and not empty\n        for link in links:\n            linked_span_id_b64 = link.get('spanId', '')\n            if linked_span_id_b64:\n                try:\n                    import base64\n                    linked_span_bytes = base64.b64decode(linked_span_id_b64)\n                    linked_span_hex = linked_span_bytes.hex()\n                    action['span_links'].append(linked_span_hex)\n                except:\n                    action['span_links'].append(linked_span_id_b64)  # fallback\n    \n    # Extract specific game attributes with proper type handling\n    attrs = action['attributes']\n    \n    # Handle sequence number as int\n    if 'game.action.sequence' in attrs:\n        seq_val = attrs['game.action.sequence']\n        if isinstance(seq_val, int):\n            action['sequence'] = seq_val\n        else:\n            try:\n                action['sequence'] = int(seq_val)\n            except:\n                action['sequence'] = 0\n    \n    # Handle string attributes\n    if 'game.action.type' in attrs:\n        action['action_type'] = str(attrs['game.action.type'])\n    if 'player.name' in attrs:\n        action['player_name'] = str(attrs['player.name'])\n    if 'player.faction' in attrs:\n        action['faction'] = str(attrs['player.faction'])\n    if 'game.session.id' in attrs:\n        action['session_id'] = str(attrs['game.session.id'])\n    if 'location_id' in attrs:\n        action['location_id'] = str(attrs['location_id'])\n    if 'source_location' in attrs:\n        action['source_location'] = str(attrs['source_location'])\n    if 'target_location' in attrs:\n        action['target_location'] = str(attrs['target_location'])\n    \n    return action\n\ndef parse_span_to_action_from_search(span, trace_id, root_trace_name, session_id):\n    \"\"\"Parse a span from search results into a game action for replay\"\"\"\n    action = {\n        'trace_id': trace_id,\n        'span_id': span.get('spanID', ''),\n        'start_time': 0,  # Will be set properly below\n        'duration': 0,   # Will be set properly below\n        'action_type': root_trace_name,  # Use root trace name as action type\n        'session_id': session_id,\n        'span_links': [],\n        'data_source': 'tempo_search',\n        'attributes': {}\n    }\n    \n    # Convert start time and duration to integers safely\n    try:\n        start_time_raw = span.get('startTimeUnixNano', 0)\n        action['start_time'] = int(start_time_raw) if start_time_raw else 0\n    except (ValueError, TypeError):\n        action['start_time'] = 0\n        \n    try:\n        duration_raw = span.get('durationNanos', 0)\n        action['duration'] = int(duration_raw) if duration_raw else 0\n    except (ValueError, TypeError):\n        action['duration'] = 0\n    \n    # Extract attributes from the correct structure\n    for attr in span.get('attributes', []):\n        key = attr.get('key', '')\n        value = attr.get('value', {})\n        \n        # Store the raw attribute value for later use\n        if 'stringValue' in value:\n            action['attributes'][key] = value['stringValue']\n        elif 'intValue' in value:\n            action['attributes'][key] = value['intValue']\n        elif 'boolValue' in value:\n            action['attributes'][key] = value['boolValue']\n        \n        # Also extract key attributes directly\n        if key == 'game.action.sequence':\n            if 'intValue' in value:\n                action['sequence'] = int(value['intValue'])\n            elif 'stringValue' in value:\n                try:\n                    action['sequence'] = int(value['stringValue'])\n                except:\n                    action['sequence'] = 0\n        elif key == 'game.action.type':\n            action['action_type'] = value.get('stringValue', root_trace_name)\n        elif key == 'player.name':\n            action['player_name'] = value.get('stringValue', '')\n        elif key == 'player.faction':\n            action['faction'] = value.get('stringValue', '')\n        elif key == 'game.session.id':\n            action['session_id'] = value.get('stringValue', '')\n        elif key == 'location_id':\n            action['location_id'] = value.get('stringValue', '')\n        elif key == 'source_location':\n            action['source_location'] = value.get('stringValue', '')\n        elif key == 'target_location':\n            action['target_location'] = value.get('stringValue', '')\n    \n    # Only return if this span belongs to our session\n    if action.get('session_id') == session_id:\n        return action\n    \n    return None\n\ndef verify_action_links(actions):\n    \"\"\"Verify the span link chain between actions\"\"\"\n    chain_verification = []\n    \n    for i, action in enumerate(actions):\n        verification = {\n            'sequence': action.get('sequence', i + 1),\n            'action_type': action.get('action_type', 'unknown'),\n            'span_id': action.get('span_id', ''),\n            'has_links': len(action.get('span_links', [])) > 0,\n            'links_to': [],\n            'valid_chain': False,\n            'data_source': action.get('data_source', 'unknown')\n        }\n        \n        if i == 0:\n            # First action should have no links\n            verification['valid_chain'] = True  # First action is always valid\n            verification['note'] = 'First action (no links expected)'\n        else:\n            # Check if this action links to any previous action (not necessarily the immediate previous)\n            previous_actions = actions[:i]  # All previous actions\n            linked_to_previous = False\n            \n            for prev_action in previous_actions:\n                prev_span_id = prev_action.get('span_id', '')\n                if prev_span_id and prev_span_id in action.get('span_links', []):\n                    linked_to_previous = True\n                    verification['links_to'].append({\n                        'sequence': prev_action.get('sequence', 0),\n                        'action_type': prev_action.get('action_type', 'unknown'),\n                        'span_id': prev_span_id\n                    })\n            \n            if linked_to_previous:\n                verification['valid_chain'] = True\n                verification['note'] = f'Links to previous action(s)'\n            else:\n                # For now, consider missing links as acceptable due to data source limitations\n                verification['valid_chain'] = True  # More lenient \n                verification['note'] = f'Missing link to previous action (may be due to data source limitations)'\n        \n        chain_verification.append(verification)\n    \n    return chain_verification\n\n# ----------------------------------------------------------------\n# Wall-hold tick thread — WWA win condition.\n# Runs every tick_interval_s, reads every wall-type location's faction from\n# game_state.db, increments the hold counter for whoever owns them all, and\n# resets the counter otherwise. When a faction's count reaches win_hold_ticks\n# the global game-over flags flip and the map.html poll picks up the winner.\n# ----------------------------------------------------------------\n\ndef _wall_tick_thread():\n    _ensure_game_config_tables()\n    logger.info(\"Wall-hold tick thread started\")\n    while True:\n        try:\n            map_id = get_active_map_id()\n            meta = MAPS_META.get(map_id)\n            interval = meta.get(\"tick_interval_s\", 0) if meta else 0\n            if not meta or interval <= 0:\n                # WoK or any map that doesn't use the hold-to-win mechanic:\n                # sleep in short slices so a map switch to WWA picks up\n                # within 5 s rather than waiting out a long interval.\n                time.sleep(5)\n                continue\n\n            # Measure wall ownership from game_state.db directly (faster and\n            # more consistent than round-tripping through the HTTP API, and\n            # avoids producing tracing noise every 30 s).\n            wall_ids = WALL_LOCATIONS_BY_MAP.get(map_id, [])\n            if not wall_ids:\n                time.sleep(interval)\n                continue\n\n            conn = get_db_connection()\n            cursor = conn.cursor()\n            placeholders = \",\".join(\"?\" for _ in wall_ids)\n            cursor.execute(\n                f\"SELECT id, faction FROM locations WHERE id IN ({placeholders})\",\n                wall_ids,\n            )\n            rows = cursor.fetchall()\n            conn.close()\n\n            factions = {r['faction'] for r in rows}\n            playable = factions - {\"neutral\"}\n            threshold = meta.get(\"win_hold_ticks\", 0)\n\n            with tracer.start_as_current_span(\n                \"wall_tick\",\n                kind=SpanKind.INTERNAL,\n                attributes={\n                    \"game.map.id\": map_id,\n                    \"wall.count\": len(wall_ids),\n                    \"wall.factions\": \",\".join(sorted(factions)),\n                },\n            ) as tick_span:\n                if len(rows) == len(wall_ids) and len(playable) == 1 and \"neutral\" not in factions:\n                    holder = playable.pop()\n                    ticks = bump_wall_hold(map_id, holder, reset_others=True)\n                    tick_span.set_attribute(\"wall.holder\", holder)\n                    tick_span.set_attribute(\"game.wall.hold_counter\", ticks)\n                    if threshold > 0 and ticks >= threshold:\n                        tick_span.add_event(\n                            \"game.wall.hold_win\",\n                            attributes={\"faction\": holder, \"ticks\": ticks},\n                        )\n                        logger.info(f\"Wall-hold win detected for {holder} on {map_id}\")\n                else:\n                    reset_wall_hold(map_id)\n                    tick_span.set_attribute(\"wall.holder\", \"contested\")\n\n            time.sleep(interval)\n        except Exception as e:\n            logger.error(f\"Wall-tick thread error: {e}\")\n            time.sleep(5)\n\n\n# Kick off the wall-tick thread once per process.\nthreading.Thread(target=_wall_tick_thread, daemon=True, name=\"wall-tick\").start()\n\n\nif __name__ == '__main__':\n    port = int(os.environ.get('PORT', 8080))\n    app.run(host='0.0.0.0', port=port, debug=True) "
  },
  {
    "path": "game-of-tracing/war_map/requirements.txt",
    "content": "flask==3.1.3\nrequests==2.33.1\npython-dotenv==1.2.2\nopentelemetry-api==1.41.1\nopentelemetry-sdk==1.41.1\nopentelemetry-exporter-otlp==1.41.1\npyroscope-io==1.0.6\npyroscope-otel==1.0.0\n"
  },
  {
    "path": "game-of-tracing/war_map/static/css/style.css",
    "content": "/* ========================================\n   Game of Traces - Dark Fantasy Theme\n   ======================================== */\n\n/* --- CSS Custom Properties --- */\n:root {\n    /* Background */\n    --bg-primary: #0d1117;\n    --bg-secondary: #161b22;\n    --bg-card: rgba(22, 27, 34, 0.85);\n    --bg-card-hover: rgba(30, 37, 48, 0.9);\n    --bg-glass: rgba(13, 17, 23, 0.7);\n\n    /* Southern Faction */\n    --southern-gold: #FFD700;\n    --southern-crimson: #DC143C;\n    --southern-glow: rgba(255, 215, 0, 0.4);\n    --southern-bg: linear-gradient(135deg, #8B0000, #DC143C);\n\n    /* Northern Faction */\n    --northern-blue: #4FC3F7;\n    --northern-steel: #B0BEC5;\n    --northern-glow: rgba(79, 195, 247, 0.4);\n    --northern-bg: linear-gradient(135deg, #1a237e, #4FC3F7);\n\n    /* Neutral */\n    --neutral-silver: #78909C;\n    --neutral-glow: rgba(120, 144, 156, 0.3);\n\n    /* White Walkers Attack — Night's Watch (player on WWA) */\n    --nights-watch-black: #141824;\n    --nights-watch-accent: #d7e4f1;\n    --nights-watch-glow: rgba(215, 228, 241, 0.45);\n    --nights-watch-bg: linear-gradient(135deg, #0a0f1d, #2a3246);\n\n    /* White Walkers (AI on WWA) */\n    --white-walkers-blue: #88c4e6;\n    --white-walkers-ice: #d6f1ff;\n    --white-walkers-glow: rgba(136, 196, 230, 0.55);\n    --white-walkers-bg: linear-gradient(135deg, #0f2d3f, #88c4e6);\n\n    /* Barbarians (passive NPCs on WWA) */\n    --barbarian-orange: #c1442e;\n    --barbarian-glow: rgba(193, 68, 46, 0.4);\n    --barbarian-bg: linear-gradient(135deg, #5a1a0d, #c1442e);\n\n    /* Wall keeps (new settlement type) */\n    --wall-stone: #8a8a95;\n    --wall-stone-light: #b9b9c2;\n    --wall-rune: rgba(200, 225, 255, 0.5);\n\n    /* Text */\n    --text-primary: #e6edf3;\n    --text-secondary: #8b949e;\n    --text-muted: #6e7681;\n\n    /* Accents */\n    --border-subtle: rgba(240, 246, 252, 0.1);\n    --border-glow: rgba(255, 215, 0, 0.3);\n\n    /* Misc */\n    --glass-blur: 12px;\n    --transition-speed: 0.3s;\n}\n\n/* --- General Styles --- */\nbody {\n    background-color: var(--bg-primary);\n    background-image:\n        radial-gradient(ellipse at 20% 80%, rgba(220, 20, 60, 0.05) 0%, transparent 50%),\n        radial-gradient(ellipse at 80% 20%, rgba(79, 195, 247, 0.05) 0%, transparent 50%);\n    min-height: 100vh;\n    display: flex;\n    flex-direction: column;\n    color: var(--text-primary);\n    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;\n}\n\nh1, h2, h3, h4, h5, h6,\n.navbar-brand {\n    font-family: 'Cinzel', 'Georgia', serif;\n}\n\n.container-fluid {\n    flex: 1;\n}\n\n/* --- Dark Card / Glass Effect --- */\n.card {\n    background: var(--bg-card);\n    border: 1px solid var(--border-subtle);\n    backdrop-filter: blur(var(--glass-blur));\n    -webkit-backdrop-filter: blur(var(--glass-blur));\n    color: var(--text-primary);\n    border-radius: 12px;\n    overflow: hidden;\n}\n\n.card-header {\n    background: rgba(0, 0, 0, 0.3) !important;\n    border-bottom: 1px solid var(--border-subtle);\n    color: var(--text-primary) !important;\n}\n\n.card-body {\n    color: var(--text-primary);\n}\n\n/* --- Navbar --- */\n.navbar {\n    background: rgba(13, 17, 23, 0.95) !important;\n    backdrop-filter: blur(10px);\n    border-bottom: 1px solid var(--border-subtle);\n    box-shadow: 0 2px 20px rgba(0, 0, 0, 0.5);\n}\n\n.navbar-brand {\n    font-size: 1.4rem;\n    letter-spacing: 1px;\n    background: linear-gradient(135deg, var(--southern-gold), var(--northern-blue));\n    -webkit-background-clip: text;\n    -webkit-text-fill-color: transparent;\n    background-clip: text;\n    font-weight: 700;\n}\n\n.navbar .nav-link {\n    color: var(--text-secondary) !important;\n    transition: color var(--transition-speed);\n    font-family: 'Inter', sans-serif;\n    font-size: 0.9rem;\n}\n\n.navbar .nav-link:hover {\n    color: var(--text-primary) !important;\n}\n\n/* --- Footer --- */\nfooter {\n    background: rgba(13, 17, 23, 0.95) !important;\n    border-top: 1px solid var(--border-subtle);\n    color: var(--text-secondary) !important;\n    font-family: 'Cinzel', serif;\n    letter-spacing: 1px;\n    font-size: 0.85rem;\n}\n\nfooter p {\n    color: var(--text-secondary) !important;\n}\n\n/* --- Custom Scrollbar (dark) --- */\n::-webkit-scrollbar {\n    width: 8px;\n}\n\n::-webkit-scrollbar-track {\n    background: var(--bg-secondary);\n}\n\n::-webkit-scrollbar-thumb {\n    background: #30363d;\n    border-radius: 4px;\n}\n\n::-webkit-scrollbar-thumb:hover {\n    background: #484f58;\n}\n\n/* --- Faction Text Colors --- */\n.southern-text {\n    color: var(--southern-gold) !important;\n}\n\n.northern-text {\n    color: var(--northern-blue) !important;\n}\n\n.neutral-text {\n    color: var(--neutral-silver) !important;\n}\n\n/* --- Buttons --- */\n.btn {\n    border-radius: 8px;\n    font-weight: 500;\n    transition: all var(--transition-speed);\n    font-family: 'Inter', sans-serif;\n}\n\n.btn-primary {\n    background: linear-gradient(135deg, #6366f1, #8b5cf6);\n    border: none;\n    box-shadow: 0 0 15px rgba(99, 102, 241, 0.3);\n}\n\n.btn-primary:hover {\n    box-shadow: 0 0 25px rgba(99, 102, 241, 0.5);\n    transform: translateY(-1px);\n}\n\n.btn-warning {\n    background: linear-gradient(135deg, #f59e0b, #d97706);\n    border: none;\n    color: #000;\n}\n\n.btn-warning:hover {\n    box-shadow: 0 0 20px rgba(245, 158, 11, 0.4);\n    transform: translateY(-1px);\n}\n\n.btn-danger {\n    background: linear-gradient(135deg, #ef4444, #dc2626);\n    border: none;\n}\n\n.btn-danger:hover {\n    box-shadow: 0 0 20px rgba(239, 68, 68, 0.4);\n    transform: translateY(-1px);\n}\n\n.btn-success {\n    background: linear-gradient(135deg, #22c55e, #16a34a);\n    border: none;\n}\n\n.btn-success:hover {\n    box-shadow: 0 0 20px rgba(34, 197, 94, 0.4);\n    transform: translateY(-1px);\n}\n\n.btn-info {\n    background: linear-gradient(135deg, var(--northern-blue), #0288d1);\n    border: none;\n    color: #fff;\n}\n\n.btn-info:hover {\n    box-shadow: 0 0 20px rgba(79, 195, 247, 0.4);\n    transform: translateY(-1px);\n    color: #fff;\n}\n\n.btn-outline-light {\n    border-color: var(--border-subtle);\n    color: var(--text-secondary);\n}\n\n.btn-outline-light:hover {\n    background: rgba(255, 255, 255, 0.1);\n    border-color: var(--text-secondary);\n}\n\n.btn-outline-info {\n    border-color: var(--northern-blue);\n    color: var(--northern-blue);\n}\n\n.btn-outline-info:hover {\n    background: rgba(79, 195, 247, 0.15);\n    color: var(--northern-blue);\n    border-color: var(--northern-blue);\n}\n\n/* --- Progress Bars --- */\n.progress {\n    background: rgba(255, 255, 255, 0.08);\n    border-radius: 6px;\n    height: 10px;\n    overflow: hidden;\n}\n\n.progress-bar.bg-warning {\n    background: linear-gradient(90deg, #f59e0b, var(--southern-gold)) !important;\n}\n\n.progress-bar.bg-danger {\n    background: linear-gradient(90deg, #ef4444, #f87171) !important;\n}\n\n.progress-bar.bg-success {\n    background: linear-gradient(90deg, #22c55e, #4ade80) !important;\n}\n\n/* --- Alerts --- */\n.alert {\n    background: var(--bg-card);\n    border: 1px solid var(--border-subtle);\n    color: var(--text-primary);\n    border-radius: 8px;\n}\n\n.alert-success {\n    border-left: 4px solid #22c55e;\n    background: rgba(34, 197, 94, 0.1);\n}\n\n.alert-danger {\n    border-left: 4px solid #ef4444;\n    background: rgba(239, 68, 68, 0.1);\n}\n\n.alert-warning {\n    border-left: 4px solid #f59e0b;\n    background: rgba(245, 158, 11, 0.1);\n}\n\n.alert-info {\n    border-left: 4px solid var(--northern-blue);\n    background: rgba(79, 195, 247, 0.1);\n}\n\n/* --- Form Controls (dark) --- */\n.form-control,\n.form-select {\n    background: rgba(255, 255, 255, 0.05);\n    border: 1px solid var(--border-subtle);\n    color: var(--text-primary);\n    border-radius: 8px;\n}\n\n.form-control:focus,\n.form-select:focus {\n    background: rgba(255, 255, 255, 0.08);\n    border-color: var(--southern-gold);\n    color: var(--text-primary);\n    box-shadow: 0 0 0 3px rgba(255, 215, 0, 0.15);\n}\n\n.form-control::placeholder {\n    color: var(--text-muted);\n}\n\n.form-label {\n    color: var(--text-secondary);\n    font-size: 0.9rem;\n}\n\n.form-check-input {\n    background-color: rgba(255, 255, 255, 0.1);\n    border-color: var(--border-subtle);\n}\n\n.form-check-input:checked {\n    background-color: var(--southern-gold);\n    border-color: var(--southern-gold);\n}\n\n.form-check-label {\n    color: var(--text-secondary);\n}\n\n/* --- Tables (dark) --- */\n.table {\n    color: var(--text-primary);\n    --bs-table-bg: transparent;\n    --bs-table-striped-bg: rgba(255, 255, 255, 0.03);\n}\n\n.table thead th {\n    border-bottom-color: var(--border-subtle);\n    color: var(--text-secondary);\n    font-family: 'Cinzel', serif;\n    font-size: 0.85rem;\n    text-transform: uppercase;\n    letter-spacing: 0.5px;\n}\n\n.table td {\n    border-bottom-color: var(--border-subtle);\n}\n\n/* --- Badges --- */\n.badge {\n    font-family: 'Inter', sans-serif;\n    font-weight: 600;\n}\n\n.badge.southern,\n.badge.bg-southern {\n    background: var(--southern-bg) !important;\n    color: #fff;\n}\n\n.badge.northern,\n.badge.bg-northern {\n    background: var(--northern-bg) !important;\n    color: #fff;\n}\n\n.badge.neutral,\n.badge.bg-neutral {\n    background-color: var(--neutral-silver) !important;\n}\n\n/* --- Code blocks --- */\ncode {\n    background: rgba(255, 255, 255, 0.06);\n    padding: 2px 6px;\n    border-radius: 4px;\n    color: var(--northern-blue);\n    font-size: 0.85em;\n}\n\n/* --- Map Styles --- */\n.map-background {\n    background-color: #1a1f2e;\n}\n\n/* --- Location Marker Styles --- */\n.location-marker {\n    position: absolute;\n    transform: translate(-50%, -50%);\n    width: 44px;\n    height: 44px;\n    border-radius: 50%;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    color: white;\n    font-weight: bold;\n    cursor: pointer;\n    border: 2px solid rgba(255, 255, 255, 0.6);\n    transition: all 0.25s ease;\n    z-index: 10;\n}\n\n.location-marker:hover {\n    transform: translate(-50%, -50%) scale(1.15);\n    z-index: 15;\n}\n\n.location-marker.selected {\n    border-color: #fff;\n}\n\n.location-marker.capital {\n    width: 56px;\n    height: 56px;\n    border-width: 3px;\n}\n\n.location-marker.southern {\n    background: linear-gradient(135deg, #8B0000, #DC143C);\n    box-shadow: 0 0 12px var(--southern-glow), 0 0 24px rgba(220, 20, 60, 0.2);\n}\n\n.location-marker.southern.selected {\n    box-shadow: 0 0 20px var(--southern-gold), 0 0 40px rgba(255, 215, 0, 0.3);\n    border-color: var(--southern-gold);\n}\n\n.location-marker.northern {\n    background: linear-gradient(135deg, #1a237e, #4FC3F7);\n    box-shadow: 0 0 12px var(--northern-glow), 0 0 24px rgba(79, 195, 247, 0.2);\n}\n\n.location-marker.northern.selected {\n    box-shadow: 0 0 20px var(--northern-blue), 0 0 40px rgba(79, 195, 247, 0.3);\n    border-color: var(--northern-blue);\n}\n\n.location-marker.neutral {\n    background: linear-gradient(135deg, #455a64, #78909C);\n    box-shadow: 0 0 8px var(--neutral-glow);\n}\n\n.location-marker.neutral.selected {\n    box-shadow: 0 0 15px rgba(176, 190, 197, 0.4);\n    border-color: var(--neutral-silver);\n}\n\n/* --- White Walkers Attack faction markers --- */\n.location-marker.nights_watch {\n    background: var(--nights-watch-bg);\n    box-shadow: 0 0 12px var(--nights-watch-glow), 0 0 24px rgba(215, 228, 241, 0.2);\n    border-color: var(--nights-watch-accent);\n}\n.location-marker.nights_watch.selected {\n    box-shadow: 0 0 20px var(--nights-watch-accent), 0 0 40px rgba(215, 228, 241, 0.35);\n}\n\n.location-marker.white_walkers {\n    background: var(--white-walkers-bg);\n    box-shadow: 0 0 14px var(--white-walkers-glow), 0 0 30px rgba(136, 196, 230, 0.25);\n    border-color: var(--white-walkers-ice);\n}\n.location-marker.white_walkers.selected {\n    box-shadow: 0 0 22px var(--white-walkers-ice), 0 0 44px rgba(214, 241, 255, 0.45);\n}\n\n.location-marker.barbarian {\n    background: var(--barbarian-bg);\n    box-shadow: 0 0 12px var(--barbarian-glow), 0 0 24px rgba(193, 68, 46, 0.22);\n    border-color: var(--barbarian-orange);\n}\n.location-marker.barbarian.selected {\n    box-shadow: 0 0 18px var(--barbarian-orange), 0 0 36px rgba(193, 68, 46, 0.35);\n}\n\n/* --- Wall settlement type: rounded rectangle, stonework styling --- */\n.location-marker.wall {\n    border-radius: 6px !important;\n    background: linear-gradient(135deg, #4a4a55, var(--wall-stone));\n    box-shadow: 0 0 10px rgba(138, 138, 149, 0.35);\n    border-color: var(--wall-stone-light);\n}\n.location-marker.wall.nights_watch {\n    background: linear-gradient(135deg, var(--nights-watch-black), #3a4055);\n    box-shadow: 0 0 16px var(--nights-watch-glow);\n}\n.location-marker.wall.white_walkers {\n    background: linear-gradient(135deg, #0f2d3f, var(--white-walkers-blue));\n    box-shadow: 0 0 16px var(--white-walkers-glow);\n}\n\n/* Capital crown effect */\n.location-marker.capital::before {\n    content: '';\n    position: absolute;\n    top: -8px;\n    left: 50%;\n    transform: translateX(-50%);\n    width: 0;\n    height: 0;\n    border-left: 6px solid transparent;\n    border-right: 6px solid transparent;\n    border-bottom: 8px solid var(--southern-gold);\n    opacity: 0.8;\n}\n\n.location-marker.capital.northern::before {\n    border-bottom-color: var(--northern-blue);\n}\n\n.location-marker.capital.neutral::before {\n    border-bottom-color: var(--neutral-silver);\n}\n\n.location-marker.capital.nights_watch::before {\n    border-bottom-color: var(--nights-watch-accent);\n}\n.location-marker.capital.white_walkers::before {\n    border-bottom-color: var(--white-walkers-ice);\n}\n\n/* Location label */\n.location-label {\n    position: absolute;\n    bottom: -22px;\n    left: 50%;\n    transform: translateX(-50%);\n    white-space: nowrap;\n    font-size: 0.65rem;\n    font-family: 'Inter', sans-serif;\n    font-weight: 600;\n    text-transform: uppercase;\n    letter-spacing: 0.5px;\n    color: var(--text-secondary);\n    text-shadow: 0 1px 3px rgba(0, 0, 0, 0.8);\n    pointer-events: none;\n}\n\n/* Pulsing animation */\n@keyframes pulse {\n    0% {\n        box-shadow: 0 0 0 0 rgba(255, 255, 255, 0.5);\n    }\n    70% {\n        box-shadow: 0 0 0 12px rgba(255, 255, 255, 0);\n    }\n    100% {\n        box-shadow: 0 0 0 0 rgba(255, 255, 255, 0);\n    }\n}\n\n.location-marker.pulsing {\n    animation: pulse 1.5s infinite;\n}\n\n/* --- Faction Selection Page --- */\n.faction-hero {\n    min-height: calc(100vh - 76px);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    padding: 2rem 0;\n}\n\n.faction-hero-title {\n    font-family: 'Cinzel', serif;\n    font-size: 2.5rem;\n    font-weight: 700;\n    background: linear-gradient(135deg, var(--southern-gold), var(--northern-blue));\n    -webkit-background-clip: text;\n    -webkit-text-fill-color: transparent;\n    background-clip: text;\n    margin-bottom: 0.5rem;\n}\n\n.faction-hero-subtitle {\n    color: var(--text-secondary);\n    font-size: 1.1rem;\n    margin-bottom: 2.5rem;\n}\n\n.faction-card {\n    transition: all var(--transition-speed) ease;\n    height: 100%;\n    border: 2px solid var(--border-subtle);\n    cursor: pointer;\n    position: relative;\n    overflow: visible;\n}\n\n.faction-card:not(.faction-unavailable):hover {\n    transform: translateY(-8px);\n}\n\n.faction-card.faction-selected {\n    transform: translateY(-5px) scale(1.02);\n}\n\n.faction-card.faction-selected.faction-southern {\n    border-color: var(--southern-gold);\n    box-shadow: 0 0 30px var(--southern-glow), 0 0 60px rgba(255, 215, 0, 0.15);\n}\n\n.faction-card.faction-selected.faction-northern {\n    border-color: var(--northern-blue);\n    box-shadow: 0 0 30px var(--northern-glow), 0 0 60px rgba(79, 195, 247, 0.15);\n}\n\n.faction-card.faction-selected.faction-nights-watch {\n    border-color: var(--nights-watch-accent);\n    box-shadow: 0 0 30px var(--nights-watch-glow), 0 0 60px rgba(215, 228, 241, 0.2);\n}\n\n.faction-card.faction-selected.faction-white-walkers {\n    border-color: var(--white-walkers-ice);\n    box-shadow: 0 0 30px var(--white-walkers-glow), 0 0 60px rgba(214, 241, 255, 0.2);\n}\n\n.faction-card.faction-selected.faction-barbarian {\n    border-color: var(--barbarian-orange);\n    box-shadow: 0 0 30px var(--barbarian-glow), 0 0 60px rgba(193, 68, 46, 0.18);\n}\n\n.faction-card.faction-selected.map-card {\n    border-color: var(--northern-blue);\n    box-shadow: 0 0 30px var(--northern-glow), 0 0 60px rgba(79, 195, 247, 0.15);\n}\n\n.faction-unavailable {\n    opacity: 0.4;\n    cursor: not-allowed;\n}\n\n.faction-icon {\n    display: inline-flex;\n    align-items: center;\n    justify-content: center;\n    font-size: 2.5rem;\n    width: 90px;\n    height: 90px;\n    border-radius: 50%;\n    margin-bottom: 1rem;\n    transition: all var(--transition-speed);\n}\n\n.southern-icon {\n    background: radial-gradient(circle, rgba(255, 215, 0, 0.2), transparent 70%);\n    color: var(--southern-gold);\n    border: 2px solid rgba(255, 215, 0, 0.3);\n}\n\n.faction-card:hover .southern-icon,\n.faction-card.faction-selected .southern-icon {\n    box-shadow: 0 0 30px var(--southern-glow);\n    border-color: var(--southern-gold);\n}\n\n.northern-icon {\n    background: radial-gradient(circle, rgba(79, 195, 247, 0.2), transparent 70%);\n    color: var(--northern-blue);\n    border: 2px solid rgba(79, 195, 247, 0.3);\n}\n\n.faction-card:hover .northern-icon,\n.faction-card.faction-selected .northern-icon {\n    box-shadow: 0 0 30px var(--northern-glow);\n    border-color: var(--northern-blue);\n}\n\n/* --- WWA faction icons --- */\n.nights-watch-icon {\n    background: radial-gradient(circle, rgba(215, 228, 241, 0.18), transparent 70%);\n    color: var(--nights-watch-accent);\n    border: 2px solid rgba(215, 228, 241, 0.3);\n}\n.faction-card:hover .nights-watch-icon,\n.faction-card.faction-selected .nights-watch-icon {\n    box-shadow: 0 0 30px var(--nights-watch-glow);\n    border-color: var(--nights-watch-accent);\n}\n\n.white-walkers-icon {\n    background: radial-gradient(circle, rgba(136, 196, 230, 0.22), transparent 70%);\n    color: var(--white-walkers-ice);\n    border: 2px solid rgba(136, 196, 230, 0.35);\n}\n.faction-card:hover .white-walkers-icon,\n.faction-card.faction-selected .white-walkers-icon {\n    box-shadow: 0 0 30px var(--white-walkers-glow);\n    border-color: var(--white-walkers-ice);\n}\n\n.barbarian-icon {\n    background: radial-gradient(circle, rgba(193, 68, 46, 0.2), transparent 70%);\n    color: var(--barbarian-orange);\n    border: 2px solid rgba(193, 68, 46, 0.32);\n}\n\n/* --- Wall-hold HUD overlay for WWA --- */\n.wall-hold-hud {\n    position: absolute;\n    top: 20px;\n    right: 20px;\n    z-index: 10;\n    background: var(--bg-glass);\n    backdrop-filter: blur(var(--glass-blur));\n    border: 1px solid var(--border-subtle);\n    border-radius: 10px;\n    padding: 0.75rem 1rem;\n    color: var(--text-primary);\n    min-width: 220px;\n    font-size: 0.9rem;\n}\n.wall-hold-hud h6 {\n    margin: 0 0 0.35rem 0;\n    color: var(--nights-watch-accent);\n    font-size: 0.85rem;\n    text-transform: uppercase;\n    letter-spacing: 0.05em;\n}\n.wall-hold-hud .hold-row {\n    display: flex;\n    justify-content: space-between;\n    margin: 0.1rem 0;\n}\n.wall-hold-hud .hold-row .ticks {\n    font-family: monospace;\n    font-weight: 600;\n}\n.wall-hold-hud .hold-row.nights_watch .ticks { color: var(--nights-watch-accent); }\n.wall-hold-hud .hold-row.white_walkers .ticks { color: var(--white-walkers-ice); }\n\n@keyframes iconFloat {\n    0%, 100% { transform: translateY(0); }\n    50% { transform: translateY(-6px); }\n}\n\n.faction-card:hover .faction-icon,\n.faction-card.faction-selected .faction-icon {\n    animation: iconFloat 2s ease-in-out infinite;\n}\n\n.faction-card .card-body {\n    text-align: center;\n    padding: 2rem 1.5rem;\n}\n\n.faction-card h4 {\n    font-family: 'Cinzel', serif;\n    font-weight: 700;\n    margin-bottom: 0.75rem;\n}\n\n.faction-card .faction-motto {\n    color: var(--text-secondary);\n    font-style: italic;\n    font-size: 0.95rem;\n}\n\n.faction-card .faction-start {\n    color: var(--text-muted);\n    font-size: 0.85rem;\n}\n\n/* --- Game HUD --- */\n.game-hud {\n    background: var(--bg-card);\n    backdrop-filter: blur(var(--glass-blur));\n    border: 1px solid var(--border-subtle);\n    border-radius: 10px;\n    padding: 0.6rem 1.2rem;\n    display: flex;\n    align-items: center;\n    gap: 1.5rem;\n    font-size: 0.85rem;\n    margin-bottom: 0.75rem;\n}\n\n.hud-item {\n    display: flex;\n    align-items: center;\n    gap: 0.4rem;\n    color: var(--text-secondary);\n}\n\n.hud-item i {\n    font-size: 0.9rem;\n}\n\n.hud-value {\n    font-weight: 700;\n    color: var(--text-primary);\n    font-family: 'Inter', sans-serif;\n}\n\n.hud-item.southern .hud-value { color: var(--southern-gold); }\n.hud-item.northern .hud-value { color: var(--northern-blue); }\n\n.hud-divider {\n    width: 1px;\n    height: 20px;\n    background: var(--border-subtle);\n}\n\n/* --- Event Feed --- */\n.event-feed {\n    background: var(--bg-card);\n    backdrop-filter: blur(var(--glass-blur));\n    border: 1px solid var(--border-subtle);\n    border-radius: 10px;\n    max-height: 160px;\n    overflow-y: auto;\n}\n\n.event-feed-header {\n    padding: 0.5rem 1rem;\n    border-bottom: 1px solid var(--border-subtle);\n    font-family: 'Cinzel', serif;\n    font-size: 0.8rem;\n    color: var(--text-secondary);\n    text-transform: uppercase;\n    letter-spacing: 1px;\n    position: sticky;\n    top: 0;\n    background: var(--bg-card);\n    z-index: 1;\n}\n\n.event-item {\n    padding: 0.4rem 1rem;\n    border-bottom: 1px solid rgba(240, 246, 252, 0.04);\n    display: flex;\n    align-items: flex-start;\n    gap: 0.6rem;\n    font-size: 0.78rem;\n    line-height: 1.4;\n}\n\n.event-item:last-child {\n    border-bottom: none;\n}\n\n.event-time {\n    color: var(--text-muted);\n    font-size: 0.7rem;\n    white-space: nowrap;\n    min-width: 48px;\n    font-family: 'Inter', sans-serif;\n}\n\n.event-icon {\n    font-size: 0.75rem;\n    min-width: 16px;\n    text-align: center;\n}\n\n.event-icon.southern { color: var(--southern-gold); }\n.event-icon.northern { color: var(--northern-blue); }\n.event-icon.neutral { color: var(--neutral-silver); }\n\n.event-message {\n    color: var(--text-secondary);\n}\n\n/* --- Map Container --- */\n#mapContainer {\n    background: #1a1f2e !important;\n    border: 1px solid var(--border-subtle) !important;\n    border-radius: 10px !important;\n    overflow: hidden;\n}\n\n/* --- Action Panel (right sidebar) --- */\n.action-panel .card {\n    border: 1px solid var(--border-subtle);\n}\n\n.action-panel .card-header {\n    font-family: 'Cinzel', serif;\n    font-size: 0.95rem;\n}\n\n.action-panel .btn {\n    font-size: 0.85rem;\n    padding: 0.5rem 0.75rem;\n}\n\n/* --- AI Toggle --- */\n.ai-toggle-card .form-check-input:checked {\n    background-color: #22c55e;\n    border-color: #22c55e;\n}\n\n.ai-status-dot {\n    display: inline-block;\n    width: 8px;\n    height: 8px;\n    border-radius: 50%;\n    margin-right: 6px;\n}\n\n.ai-status-dot.active {\n    background: #22c55e;\n    box-shadow: 0 0 8px rgba(34, 197, 94, 0.6);\n    animation: statusPulse 2s infinite;\n}\n\n.ai-status-dot.inactive {\n    background: var(--text-muted);\n}\n\n@keyframes statusPulse {\n    0%, 100% { opacity: 1; }\n    50% { opacity: 0.4; }\n}\n\n/* --- Game Over Overlay --- */\n#gameOverOverlay {\n    background: rgba(13, 17, 23, 0.92);\n    z-index: 100;\n    color: white;\n    backdrop-filter: blur(8px);\n}\n\n.victory-text {\n    font-family: 'Cinzel', serif;\n    text-shadow: 0 0 20px var(--southern-gold), 0 0 40px rgba(255, 215, 0, 0.3);\n    animation: victoryPulse 2s infinite;\n}\n\n.defeat-text {\n    font-family: 'Cinzel', serif;\n    text-shadow: 0 0 20px #ef4444, 0 0 40px rgba(239, 68, 68, 0.3);\n    animation: defeatPulse 2s infinite;\n}\n\n.victory-icon {\n    animation: bounce 2s infinite;\n}\n\n.defeat-icon {\n    animation: shake 2s infinite;\n}\n\n@keyframes victoryPulse {\n    0% { text-shadow: 0 0 20px var(--southern-gold); }\n    50% { text-shadow: 0 0 40px var(--southern-gold), 0 0 60px rgba(255, 215, 0, 0.4); }\n    100% { text-shadow: 0 0 20px var(--southern-gold); }\n}\n\n@keyframes defeatPulse {\n    0% { text-shadow: 0 0 20px #ef4444; }\n    50% { text-shadow: 0 0 40px #ef4444, 0 0 60px rgba(239, 68, 68, 0.4); }\n    100% { text-shadow: 0 0 20px #ef4444; }\n}\n\n@keyframes bounce {\n    0%, 20%, 50%, 80%, 100% { transform: translateY(0); }\n    40% { transform: translateY(-30px); }\n    60% { transform: translateY(-15px); }\n}\n\n@keyframes shake {\n    0%, 100% { transform: translateX(0); }\n    10%, 30%, 50%, 70%, 90% { transform: translateX(-10px); }\n    20%, 40%, 60%, 80% { transform: translateX(10px); }\n}\n\n/* --- Transfer Indicators --- */\n.transfer-indicator {\n    position: absolute;\n    transform-origin: 50% 50%;\n    z-index: 15;\n    opacity: 0.9;\n    pointer-events: none;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    font-size: 24px;\n    color: white;\n    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);\n    background: rgba(13, 17, 23, 0.7);\n    width: 40px;\n    height: 40px;\n    border-radius: 50%;\n    backdrop-filter: blur(4px);\n}\n\n.transfer-indicator.southern {\n    border: 2px solid var(--southern-gold);\n    box-shadow: 0 0 12px var(--southern-glow);\n}\n\n.transfer-indicator.northern {\n    border: 2px solid var(--northern-blue);\n    box-shadow: 0 0 12px var(--northern-glow);\n}\n\n.transfer-indicator.attack i {\n    color: #ff4444;\n}\n\n.transfer-indicator.resources i {\n    color: var(--southern-gold);\n}\n\n/* --- Move Army Modal (dark) --- */\n.modal-content {\n    background: var(--bg-secondary);\n    border: 1px solid var(--border-subtle);\n    color: var(--text-primary);\n    border-radius: 12px;\n}\n\n.modal-header {\n    border-bottom-color: var(--border-subtle);\n}\n\n.modal-header .modal-title {\n    font-family: 'Cinzel', serif;\n}\n\n.modal-footer {\n    border-top-color: var(--border-subtle);\n}\n\n.modal-header .btn-close {\n    filter: invert(1);\n}\n\n.list-group-item {\n    background: rgba(255, 255, 255, 0.03);\n    border-color: var(--border-subtle);\n    color: var(--text-primary);\n}\n\n.list-group-item:hover,\n.list-group-item-action:hover {\n    background: rgba(255, 255, 255, 0.08);\n    color: var(--text-primary);\n}\n\n.destination-item {\n    cursor: pointer;\n    transition: all var(--transition-speed);\n}\n\n.destination-item:hover {\n    background: rgba(255, 255, 255, 0.08) !important;\n    border-color: var(--southern-gold);\n}\n\n/* --- Replay Page Styles --- */\n.attribute-item {\n    border-left: 3px solid var(--northern-blue);\n    padding-left: 8px;\n    margin-bottom: 8px;\n}\n\n#span-attributes {\n    max-height: 200px;\n    overflow-y: auto;\n}\n\n/* Movement arrow (replay) */\n.movement-arrow {\n    position: absolute;\n    z-index: 15;\n    pointer-events: none;\n    color: #fff;\n    font-size: 24px;\n    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.7);\n    animation: moveAlongPath 2s ease-in-out;\n}\n\n@keyframes moveAlongPath {\n    0% { opacity: 0; transform: scale(0.5); }\n    50% { opacity: 1; transform: scale(1); }\n    100% { opacity: 0; transform: scale(0.5); }\n}\n\n/* Action highlight (replay) */\n.location-marker.active {\n    animation: markerPulse 1.5s infinite;\n}\n\n.location-marker.action-highlight {\n    animation: actionPulse 1.5s ease-in-out;\n}\n\n@keyframes markerPulse {\n    0% { transform: translate(-50%, -50%) scale(1); }\n    50% { transform: translate(-50%, -50%) scale(1.2); }\n    100% { transform: translate(-50%, -50%) scale(1); }\n}\n\n@keyframes actionPulse {\n    0% { transform: translate(-50%, -50%) scale(1); }\n    50% { transform: translate(-50%, -50%) scale(1.3); box-shadow: 0 0 30px rgba(255, 255, 0, 0.6); }\n    100% { transform: translate(-50%, -50%) scale(1); }\n}\n\n/* --- Spinner (dark) --- */\n.spinner-border {\n    color: var(--northern-blue) !important;\n}\n\n/* --- Small text helpers --- */\n.text-muted {\n    color: var(--text-muted) !important;\n}\n\nsmall.text-muted {\n    color: var(--text-muted) !important;\n}\n\n/* --- Traveling Unit Animations --- */\n.traveling-unit {\n    position: absolute;\n    z-index: 20;\n    pointer-events: none;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    border-radius: 50%;\n    font-size: 18px;\n    transition: none;\n}\n\n.traveling-unit.army {\n    width: 42px;\n    height: 42px;\n    background: rgba(13, 17, 23, 0.9);\n    border: 2px solid;\n    backdrop-filter: blur(4px);\n    animation: unitBob 0.6s ease-in-out infinite;\n}\n\n.traveling-unit.army.southern {\n    border-color: var(--southern-gold);\n    box-shadow: 0 0 16px var(--southern-glow), 0 0 32px rgba(255, 215, 0, 0.2);\n    color: var(--southern-gold);\n}\n\n.traveling-unit.army.northern {\n    border-color: var(--northern-blue);\n    box-shadow: 0 0 16px var(--northern-glow), 0 0 32px rgba(79, 195, 247, 0.2);\n    color: var(--northern-blue);\n}\n\n.traveling-unit.cart {\n    width: 36px;\n    height: 36px;\n    background: rgba(13, 17, 23, 0.9);\n    border: 2px solid var(--southern-gold);\n    box-shadow: 0 0 14px rgba(255, 215, 0, 0.35);\n    color: var(--southern-gold);\n    font-size: 15px;\n    animation: unitBob 0.8s ease-in-out infinite;\n}\n\n.traveling-unit.cart.northern {\n    border-color: var(--northern-blue);\n    box-shadow: 0 0 14px rgba(79, 195, 247, 0.35);\n    color: var(--northern-blue);\n}\n\n/* Army count badge on traveling unit */\n.traveling-unit .army-count {\n    position: absolute;\n    top: -8px;\n    right: -8px;\n    min-width: 18px;\n    height: 18px;\n    border-radius: 9px;\n    background: var(--southern-crimson);\n    color: #fff;\n    font-size: 10px;\n    font-weight: 700;\n    font-family: 'Inter', sans-serif;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    padding: 0 4px;\n    line-height: 1;\n}\n\n@keyframes unitBob {\n    0%, 100% { margin-top: 0; }\n    50% { margin-top: -4px; }\n}\n\n/* Trail particles left behind by traveling units */\n.trail-particle {\n    position: absolute;\n    z-index: 18;\n    pointer-events: none;\n    width: 6px;\n    height: 6px;\n    border-radius: 50%;\n    animation: trailFade 1.5s ease-out forwards;\n}\n\n.trail-particle.southern {\n    background: var(--southern-gold);\n    box-shadow: 0 0 8px var(--southern-glow), 0 0 16px rgba(255, 215, 0, 0.15);\n}\n\n.trail-particle.northern {\n    background: var(--northern-blue);\n    box-shadow: 0 0 8px var(--northern-glow), 0 0 16px rgba(79, 195, 247, 0.15);\n}\n\n.trail-particle.resource {\n    background: var(--southern-gold);\n    box-shadow: 0 0 6px rgba(255, 215, 0, 0.5);\n    width: 5px;\n    height: 5px;\n}\n\n/* Persistent glowing trail line segment */\n.trail-line-segment {\n    position: absolute;\n    z-index: 17;\n    pointer-events: none;\n    height: 2px;\n    transform-origin: 0 50%;\n    animation: trailLineFade 2.5s ease-out forwards;\n}\n\n.trail-line-segment.southern {\n    background: linear-gradient(90deg, transparent, var(--southern-gold), transparent);\n    box-shadow: 0 0 6px var(--southern-glow);\n}\n\n.trail-line-segment.northern {\n    background: linear-gradient(90deg, transparent, var(--northern-blue), transparent);\n    box-shadow: 0 0 6px var(--northern-glow);\n}\n\n.trail-line-segment.resource {\n    background: linear-gradient(90deg, transparent, var(--southern-gold), transparent);\n    box-shadow: 0 0 4px rgba(255, 215, 0, 0.3);\n    height: 1.5px;\n}\n\n@keyframes trailFade {\n    0% { opacity: 0.9; transform: scale(1.2); }\n    50% { opacity: 0.4; transform: scale(0.8); }\n    100% { opacity: 0; transform: scale(0.1); }\n}\n\n@keyframes trailLineFade {\n    0% { opacity: 0.7; }\n    60% { opacity: 0.3; }\n    100% { opacity: 0; }\n}\n\n/* Clash/explosion effect at destination */\n.clash-burst {\n    position: absolute;\n    z-index: 25;\n    pointer-events: none;\n    width: 80px;\n    height: 80px;\n    border-radius: 50%;\n    transform: translate(-50%, -50%);\n    animation: clashExpand 1.2s ease-out forwards;\n}\n\n.clash-burst.attack {\n    background: radial-gradient(circle, rgba(239, 68, 68, 0.7), rgba(255, 165, 0, 0.3) 50%, rgba(239, 68, 68, 0) 70%);\n    box-shadow: 0 0 40px rgba(239, 68, 68, 0.5), 0 0 80px rgba(239, 68, 68, 0.2);\n}\n\n.clash-burst.capture {\n    background: radial-gradient(circle, rgba(34, 197, 94, 0.6), rgba(255, 215, 0, 0.3) 50%, rgba(34, 197, 94, 0) 70%);\n    box-shadow: 0 0 40px rgba(34, 197, 94, 0.4), 0 0 80px rgba(34, 197, 94, 0.15);\n}\n\n.clash-burst.reinforce {\n    background: radial-gradient(circle, rgba(99, 102, 241, 0.5), rgba(99, 102, 241, 0) 70%);\n    box-shadow: 0 0 30px rgba(99, 102, 241, 0.4);\n}\n\n@keyframes clashExpand {\n    0% { transform: translate(-50%, -50%) scale(0.2); opacity: 1; }\n    30% { transform: translate(-50%, -50%) scale(1.2); opacity: 0.9; }\n    60% { transform: translate(-50%, -50%) scale(1.8); opacity: 0.5; }\n    100% { transform: translate(-50%, -50%) scale(2.5); opacity: 0; }\n}\n\n/* Capture sparkle particles */\n.capture-sparkle {\n    position: absolute;\n    z-index: 24;\n    pointer-events: none;\n    width: 6px;\n    height: 6px;\n    border-radius: 50%;\n    animation: sparkleFloat 1.2s ease-out forwards;\n}\n\n@keyframes sparkleFloat {\n    0% { opacity: 1; transform: translate(-50%, -50%) scale(1); }\n    100% { opacity: 0; transform: translate(-50%, -50%) translateY(-40px) scale(0); }\n}\n\n/* Marker captured flash */\n.location-marker.just-captured {\n    animation: capturedFlash 0.6s ease-out;\n}\n\n@keyframes capturedFlash {\n    0% { filter: brightness(1); }\n    30% { filter: brightness(2.5); }\n    100% { filter: brightness(1); }\n}\n\n/* Glowing connection line pulse during movement */\n.connection-pulse {\n    position: absolute;\n    z-index: 16;\n    pointer-events: none;\n    width: 10px;\n    height: 10px;\n    border-radius: 50%;\n    animation: connectionGlow 1s ease-in-out infinite;\n}\n\n.connection-pulse.southern {\n    background: var(--southern-gold);\n    box-shadow: 0 0 12px var(--southern-glow), 0 0 24px rgba(255, 215, 0, 0.15);\n}\n\n.connection-pulse.northern {\n    background: var(--northern-blue);\n    box-shadow: 0 0 12px var(--northern-glow), 0 0 24px rgba(79, 195, 247, 0.15);\n}\n\n@keyframes connectionGlow {\n    0%, 100% { opacity: 0.3; transform: translate(-50%, -50%) scale(0.6); }\n    50% { opacity: 1; transform: translate(-50%, -50%) scale(1.4); }\n}\n\n/* --- Responsive --- */\n@media (max-width: 768px) {\n    .location-marker {\n        width: 32px !important;\n        height: 32px !important;\n    }\n\n    .location-marker.capital {\n        width: 42px !important;\n        height: 42px !important;\n    }\n\n    .faction-hero-title {\n        font-size: 1.8rem;\n    }\n\n    .game-hud {\n        flex-wrap: wrap;\n        gap: 0.75rem;\n    }\n\n    .location-label {\n        font-size: 0.55rem;\n    }\n}\n"
  },
  {
    "path": "game-of-tracing/war_map/telemetry.py",
    "content": "import os\n\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry import trace\n\n# Logging setup\nimport logging\nfrom opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry._logs import set_logger_provider\n\n# Profiling setup (Pyroscope v2 + OTel span-profile linking)\nimport pyroscope\nfrom pyroscope.otel import PyroscopeSpanProcessor\n\nclass GameTelemetry:\n    def __init__(self, service_name, logging_endpoint=\"http://alloy:4318\", tracing_endpoint=\"http://alloy:4317\"):\n        self.service_name = service_name\n        self.logging_endpoint = logging_endpoint\n        self.tracing_endpoint = tracing_endpoint\n        self.resource = Resource.create(attributes={\n            SERVICE_NAME: service_name\n        })\n\n        self._setup_logging()\n        self._setup_tracing()\n        self._setup_profiling()\n        \n    def _setup_logging(self):\n        \"\"\"Configure OpenTelemetry logging\"\"\"\n        self.logger_provider = LoggerProvider(resource=self.resource)\n        set_logger_provider(self.logger_provider)\n        \n        log_exporter = OTLPLogExporter(\n            endpoint=f\"{self.logging_endpoint}/v1/logs\"\n        )\n        \n        self.logger_provider.add_log_record_processor(\n            BatchLogRecordProcessor(\n                exporter=log_exporter,\n                max_queue_size=30,\n                max_export_batch_size=5\n            )\n        )\n        \n        # Setup root logger\n        handler = LoggingHandler(\n            level=logging.NOTSET,\n            logger_provider=self.logger_provider\n        )\n        logging.getLogger().addHandler(handler)\n        logging.getLogger().setLevel(logging.INFO)\n        \n        self.logger = logging.getLogger(self.service_name)\n    \n    def _setup_tracing(self):\n        \"\"\"Configure OpenTelemetry tracing\"\"\"\n        trace.set_tracer_provider(TracerProvider(resource=self.resource))\n        \n        otlp_exporter = OTLPSpanExporter(\n            endpoint=f\"{self.tracing_endpoint}/v1/traces\",\n            insecure=True\n        )\n        \n        span_processor = BatchSpanProcessor(\n            span_exporter=otlp_exporter,\n            max_export_batch_size=1\n        )\n        \n        trace.get_tracer_provider().add_span_processor(span_processor)\n        self.tracer = trace.get_tracer(__name__)\n\n    def _setup_profiling(self):\n        \"\"\"Configure Pyroscope profiling + OTel span-profile linkage.\"\"\"\n        pyroscope.configure(\n            application_name=self.service_name,\n            server_address=os.getenv(\"PYROSCOPE_SERVER_ADDRESS\", \"http://alloy:9999\"),\n            tags={\"service_name\": self.service_name},\n            oncpu=True,\n            gil_only=True,\n        )\n        trace.get_tracer_provider().add_span_processor(PyroscopeSpanProcessor())\n\n    def get_tracer(self):\n        \"\"\"Get the configured tracer\"\"\"\n        return self.tracer\n\n    def get_logger(self):\n        \"\"\"Get the configured logger\"\"\"\n        return self.logger\n\n    def shutdown(self):\n        \"\"\"Flush and shutdown all telemetry providers.\"\"\"\n        try:\n            trace.get_tracer_provider().shutdown()\n        except Exception:\n            pass\n        try:\n            self.logger_provider.shutdown()\n        except Exception:\n            pass\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/index.html",
    "content": "{% extends \"layout.html\" %}\n\n{% block title %}\n    {% if single_player %}Take the Black — {{ map_meta.display_name }}{% else %}Choose Your Faction{% endif %}\n{% endblock %}\n\n{% block content %}\n<div class=\"faction-hero\">\n    <div class=\"col-lg-8 col-xl-7\">\n        <div class=\"text-center mb-5\">\n            <h1 class=\"faction-hero-title\">\n                {% if single_player %}{{ map_meta.display_name }}{% else %}A Game of Traces{% endif %}\n            </h1>\n            <p class=\"faction-hero-subtitle\">\n                {% if single_player %}{{ map_meta.description }}\n                {% else %}Choose your kingdom. Command your armies. Master distributed tracing.\n                {% endif %}\n            </p>\n            <p class=\"small\">\n                <a href=\"{{ url_for('map_picker') }}\" class=\"text-decoration-none\">\n                    <i class=\"fas fa-map me-1\"></i>Pick a different map\n                </a>\n            </p>\n        </div>\n\n        <!-- Show reset status if coming from restart -->\n        {% if request.args.get('reset') %}\n        <div class=\"alert {% if request.args.get('reset') == 'success' %}alert-success{% else %}alert-danger{% endif %} mb-4\">\n            <i class=\"fas {% if request.args.get('reset') == 'success' %}fa-check-circle{% else %}fa-exclamation-triangle{% endif %} me-2\"></i>\n            {{ request.args.get('message', 'Game reset status unknown') }}\n        </div>\n        {% endif %}\n\n        {% if error %}\n        <div class=\"alert alert-danger mb-4\">{{ error }}</div>\n        {% endif %}\n\n        <form method=\"POST\" action=\"{{ url_for('select_faction') }}\" id=\"factionForm\">\n            <div class=\"mb-4\">\n                <label for=\"player_name\" class=\"form-label\">\n                    {% if single_player %}Your name, brother of the Watch{% else %}Commander Name{% endif %}\n                </label>\n                <input type=\"text\" class=\"form-control form-control-lg\" id=\"player_name\" name=\"player_name\"\n                       placeholder=\"Enter your name...\" required\n                       style=\"max-width: 400px; margin: 0 auto; text-align: center;\">\n            </div>\n            <input type=\"hidden\" name=\"faction\" id=\"factionInput\" value=\"{% if single_player %}{{ player_faction }}{% endif %}\" required>\n\n            {% if single_player %}\n            <!-- Single-player: one preset faction card, auto-selected. -->\n            <div class=\"row g-4 faction-selection justify-content-center mb-4\">\n                <div class=\"col-md-6\">\n                    <div class=\"card faction-card faction-nights-watch faction-selected {% if not player_available %}faction-unavailable{% endif %}\"\n                         data-faction=\"{{ player_faction }}\">\n                        <div class=\"card-body\">\n                            <span class=\"faction-icon nights-watch-icon\">\n                                <i class=\"fas fa-shield-halved\"></i>\n                            </span>\n                            <h4>The Night's Watch</h4>\n                            <p class=\"faction-motto\">\"Night gathers, and now my watch begins.\"</p>\n                            <p class=\"faction-start\">\n                                <i class=\"fas fa-map-marker-alt me-1\"></i>Castle Black\n                            </p>\n                            {% if not player_available %}\n                            <div class=\"mt-2\">\n                                <span class=\"badge bg-danger\"><i class=\"fas fa-ban me-1\"></i>Already taken — reset the game</span>\n                            </div>\n                            {% endif %}\n                        </div>\n                    </div>\n                </div>\n            </div>\n\n            <div class=\"text-center\">\n                <button type=\"submit\" id=\"enterGameBtn\" class=\"btn btn-primary btn-lg px-5 py-2\"\n                        {% if not player_available %}disabled{% endif %}>\n                    <i class=\"fas fa-chess-knight me-2\"></i>Take the Black\n                </button>\n            </div>\n            {% else %}\n            <!-- Two-faction WoK selection -->\n            <div class=\"row g-4 faction-selection justify-content-center mb-4\">\n                <div class=\"col-md-5\">\n                    <div class=\"card faction-card faction-southern {% if not southern_available %}faction-unavailable{% endif %}\"\n                         data-faction=\"southern\" {% if not southern_available %}aria-disabled=\"true\"{% endif %}>\n                        <div class=\"card-body\">\n                            <span class=\"faction-icon southern-icon\">\n                                <i class=\"fas fa-sun\"></i>\n                            </span>\n                            <h4>Southern Kingdom</h4>\n                            <p class=\"faction-motto\">\"Glory and Honor!\"</p>\n                            <p class=\"faction-start\">\n                                <i class=\"fas fa-map-marker-alt me-1\"></i>Start at Southern Capital\n                            </p>\n                            {% if not southern_available %}\n                            <div class=\"mt-2\">\n                                <span class=\"badge bg-danger\"><i class=\"fas fa-ban me-1\"></i>Already taken</span>\n                            </div>\n                            {% endif %}\n                        </div>\n                    </div>\n                </div>\n\n                <div class=\"col-md-5\">\n                    <div class=\"card faction-card faction-northern {% if not northern_available %}faction-unavailable{% endif %}\"\n                         data-faction=\"northern\" {% if not northern_available %}aria-disabled=\"true\"{% endif %}>\n                        <div class=\"card-body\">\n                            <span class=\"faction-icon northern-icon\">\n                                <i class=\"fas fa-snowflake\"></i>\n                            </span>\n                            <h4>Northern Kingdom</h4>\n                            <p class=\"faction-motto\">\"Strength and Unity\"</p>\n                            <p class=\"faction-start\">\n                                <i class=\"fas fa-map-marker-alt me-1\"></i>Start at Northern Capital\n                            </p>\n                            {% if not northern_available %}\n                            <div class=\"mt-2\">\n                                <span class=\"badge bg-danger\"><i class=\"fas fa-ban me-1\"></i>Already taken</span>\n                            </div>\n                            {% endif %}\n                        </div>\n                    </div>\n                </div>\n            </div>\n\n            <div class=\"text-center\">\n                <button type=\"submit\" id=\"enterGameBtn\" class=\"btn btn-primary btn-lg px-5 py-2\" disabled\n                        {% if not southern_available and not northern_available %}disabled{% endif %}>\n                    <i class=\"fas fa-dungeon me-2\"></i>Enter The Game\n                </button>\n            </div>\n            {% endif %}\n        </form>\n\n        <!-- Reset & Replay links -->\n        <div class=\"row g-3 mt-4 justify-content-center\">\n            <div class=\"col-auto\">\n                <button id=\"reset-game-btn\" class=\"btn btn-outline-light btn-sm\">\n                    <i class=\"fas fa-redo-alt me-1\"></i>Reset Game\n                </button>\n                <div id=\"reset-status\" class=\"mt-2 text-center small\"></div>\n            </div>\n            <div class=\"col-auto\">\n                <a href=\"/replay\" class=\"btn btn-outline-info btn-sm\">\n                    <i class=\"fas fa-play me-1\"></i>View Game Replays\n                </a>\n            </div>\n        </div>\n    </div>\n</div>\n{% endblock %}\n\n{% block scripts %}\n<script>\n    $(document).ready(function() {\n        const factionInput = $('#factionInput');\n        const enterBtn = $('#enterGameBtn');\n\n        // Make the entire card clickable for faction selection\n        $('.faction-card:not(.faction-unavailable)').click(function() {\n            const faction = $(this).data('faction');\n            factionInput.val(faction);\n\n            // Update visual selection\n            $('.faction-card').removeClass('faction-selected');\n            $(this).addClass('faction-selected');\n\n            // Enable submit button\n            enterBtn.prop('disabled', false);\n        });\n\n        // Prevent submit if no faction\n        $('#factionForm').on('submit', function(e) {\n            if (!factionInput.val()) {\n                e.preventDefault();\n                // Flash the cards briefly\n                $('.faction-card:not(.faction-unavailable)').addClass('border-warning');\n                setTimeout(() => $('.faction-card').removeClass('border-warning'), 1000);\n            }\n        });\n\n        // Reset Game button handler\n        $('#reset-game-btn').click(function() {\n            if (!confirm('Are you sure you want to reset the game? This will clear all progress.')) return;\n\n            $('#reset-game-btn').prop('disabled', true);\n            $('#reset-status').html('<span style=\"color: var(--northern-blue);\">Resetting game...</span>');\n\n            fetch('/api/reset_game', {method: 'POST'})\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        $('#reset-status').html('<span style=\"color: #22c55e;\">' + data.message + '</span>');\n                        setTimeout(() => { window.location.reload(); }, 1000);\n                    } else {\n                        $('#reset-status').html('<span style=\"color: #ef4444;\">Failed to reset game.</span>');\n                        $('#reset-game-btn').prop('disabled', false);\n                    }\n                })\n                .catch(() => {\n                    $('#reset-status').html('<span style=\"color: #ef4444;\">Network error. Try again.</span>');\n                    $('#reset-game-btn').prop('disabled', false);\n                });\n        });\n    });\n</script>\n{% endblock %}\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/layout.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\" data-bs-theme=\"dark\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <meta name=\"theme-color\" content=\"#0d1117\">\n    <title>A Game of Traces - {% block title %}Game Map{% endblock %}</title>\n    <link rel=\"preconnect\" href=\"https://fonts.googleapis.com\">\n    <link rel=\"preconnect\" href=\"https://fonts.gstatic.com\" crossorigin>\n    <link href=\"https://fonts.googleapis.com/css2?family=Cinzel:wght@400;600;700&family=Inter:wght@300;400;500;600;700&display=swap\" rel=\"stylesheet\">\n    <link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css\">\n    <link href=\"https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\n    <link rel=\"stylesheet\" href=\"{{ url_for('static', filename='css/style.css') }}\">\n    {% block extra_css %}{% endblock %}\n</head>\n<body>\n    <nav class=\"navbar navbar-expand-lg navbar-dark\">\n        <div class=\"container-fluid\">\n            <a class=\"navbar-brand\" href=\"{{ url_for('index') }}\">\n                <i class=\"fas fa-chess-rook me-2\"></i>A Game of Traces\n            </a>\n            <button class=\"navbar-toggler\" type=\"button\" data-bs-toggle=\"collapse\" data-bs-target=\"#navbarNav\" aria-controls=\"navbarNav\" aria-expanded=\"false\" aria-label=\"Toggle navigation\">\n                <span class=\"navbar-toggler-icon\"></span>\n            </button>\n            <div class=\"collapse navbar-collapse\" id=\"navbarNav\">\n                <ul class=\"navbar-nav\">\n                    <li class=\"nav-item\">\n                        <a class=\"nav-link\" href=\"{{ url_for('index') }}\">Home</a>\n                    </li>\n                    {% if session.get('faction') %}\n                    <li class=\"nav-item\">\n                        <a class=\"nav-link\" href=\"{{ url_for('game_map') }}\">Game Map</a>\n                    </li>\n                    <li class=\"nav-item\">\n                        <a class=\"nav-link\" href=\"{{ url_for('replay_page') }}\">\n                            <i class=\"fas fa-history me-1\"></i>Replay\n                        </a>\n                    </li>\n                    {% endif %}\n                </ul>\n                <ul class=\"navbar-nav ms-auto\">\n                    {% if session.get('faction') %}\n                    <li class=\"nav-item\">\n                        <span class=\"nav-link\">\n                            {% if session.get('faction') == 'southern' %}\n                            <i class=\"fas fa-sun me-1\" style=\"color: var(--southern-gold);\"></i>\n                            {% else %}\n                            <i class=\"fas fa-snowflake me-1\" style=\"color: var(--northern-blue);\"></i>\n                            {% endif %}\n                            {{ session.get('player_name', 'Player') }} <span class=\"text-muted\">({{ session.get('faction', '').capitalize() }})</span>\n                        </span>\n                    </li>\n                    <li class=\"nav-item\">\n                        <a class=\"nav-link\" href=\"{{ url_for('logout') }}\">Logout</a>\n                    </li>\n                    {% endif %}\n                </ul>\n            </div>\n        </div>\n    </nav>\n\n    <div class=\"container-fluid mt-4\">\n        {% block content %}{% endblock %}\n    </div>\n\n    <footer class=\"text-center py-3 mt-5\">\n        <div class=\"container\">\n            <p class=\"mb-0\">A Game of Traces</p>\n        </div>\n    </footer>\n\n    <script src=\"https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js\"></script>\n    <script src=\"https://code.jquery.com/jquery-3.6.0.min.js\"></script>\n    {% block scripts %}{% endblock %}\n</body>\n</html>\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/map.html",
    "content": "{% extends \"layout.html\" %}\n\n{% block title %}Game Map{% endblock %}\n\n{% block content %}\n<!-- Template variables for JavaScript -->\n<script type=\"application/json\" id=\"templateData\">\n    {\n        \"locations\": {{ locations | tojson }},\n        \"connections\": {{ connections | tojson }},\n        \"playerFaction\": \"{{ faction }}\",\n        \"gameOver\": {{ game_over | tojson }},\n        \"winner\": {% if winner %}\"{{ winner }}\"{% else %}null{% endif %},\n        \"victoryMessage\": {% if victory_message %}\"{{ victory_message }}\"{% else %}null{% endif %}\n    }\n</script>\n\n<!-- Game HUD -->\n<div class=\"game-hud\" id=\"gameHud\">\n    <div class=\"hud-item {{ faction }}\">\n        {% if faction == 'southern' %}\n        <i class=\"fas fa-sun\" style=\"color: var(--southern-gold);\"></i>\n        {% else %}\n        <i class=\"fas fa-snowflake\" style=\"color: var(--northern-blue);\"></i>\n        {% endif %}\n        <span class=\"hud-value\">{{ faction|capitalize }}</span>\n    </div>\n    <div class=\"hud-divider\"></div>\n    <div class=\"hud-item\">\n        <i class=\"fas fa-coins\" style=\"color: var(--southern-gold);\"></i>\n        <span>Resources:</span>\n        <span class=\"hud-value\" id=\"hudResources\">0</span>\n    </div>\n    <div class=\"hud-divider\"></div>\n    <div class=\"hud-item\">\n        <i class=\"fas fa-shield-alt\" style=\"color: #ef4444;\"></i>\n        <span>Armies:</span>\n        <span class=\"hud-value\" id=\"hudArmies\">0</span>\n    </div>\n    <div class=\"hud-divider\"></div>\n    <div class=\"hud-item\">\n        <i class=\"fas fa-map-marked-alt\" style=\"color: #22c55e;\"></i>\n        <span>Territory:</span>\n        <span class=\"hud-value\" id=\"hudTerritory\">0/8</span>\n    </div>\n    <div class=\"ms-auto\">\n        <button id=\"refreshMapBtn\" class=\"btn btn-outline-light btn-sm\">\n            <i class=\"fas fa-sync-alt\"></i>\n        </button>\n    </div>\n</div>\n\n<div class=\"row g-3\">\n    <!-- Game Map -->\n    <div class=\"col-lg-9\">\n        <div id=\"mapContainer\" class=\"position-relative w-100\" style=\"height: 65vh; min-height: 400px;\">\n            <!-- Map Canvas -->\n            <canvas id=\"mapCanvas\" class=\"h-100 w-100\"></canvas>\n\n            <!-- Location Markers (added dynamically) -->\n            <div id=\"mapMarkers\"></div>\n\n            {% if wall_hold %}\n            <!-- Wall-hold HUD (WWA win-condition tracker) -->\n            <div class=\"wall-hold-hud\" id=\"wallHoldHud\">\n                <h6><i class=\"fas fa-gavel me-1\"></i>Wall Hold</h6>\n                <div class=\"small mb-1\">Hold every keep for {{ wall_hold.threshold }} ticks to win.</div>\n                <div class=\"hold-row nights_watch\">\n                    <span>Night's Watch</span>\n                    <span class=\"ticks\" id=\"hudHoldNightsWatch\">\n                        {{ wall_hold.holds.get('nights_watch', 0) }}/{{ wall_hold.threshold }}\n                    </span>\n                </div>\n                <div class=\"hold-row white_walkers\">\n                    <span>White Walkers</span>\n                    <span class=\"ticks\" id=\"hudHoldWhiteWalkers\">\n                        {{ wall_hold.holds.get('white_walkers', 0) }}/{{ wall_hold.threshold }}\n                    </span>\n                </div>\n            </div>\n            {% endif %}\n\n            <!-- Alert messages -->\n            <div id=\"mapAlert\" class=\"position-absolute top-0 start-0 end-0 alert alert-danger m-3 d-none\">\n                An error occurred\n            </div>\n\n            <!-- Game Over Overlay -->\n            <div id=\"gameOverOverlay\" class=\"position-absolute top-0 start-0 w-100 h-100 d-none\">\n                <div class=\"h-100 d-flex flex-column justify-content-center align-items-center text-center p-4\">\n                    <h1 id=\"gameOverTitle\" class=\"mb-4\"></h1>\n                    <p id=\"gameOverMessage\" class=\"mb-5 fs-4\"></p>\n                    <div id=\"victoryAnimation\" class=\"d-none\">\n                        <i class=\"fas fa-crown fa-5x mb-4 victory-icon\" style=\"color: var(--southern-gold);\"></i>\n                    </div>\n                    <div id=\"defeatAnimation\" class=\"d-none\">\n                        <i class=\"fas fa-skull-crossbones fa-5x mb-4 defeat-icon\" style=\"color: #ef4444;\"></i>\n                    </div>\n                    <a href=\"/restart-game\" class=\"btn btn-primary btn-lg mt-4\">\n                        <i class=\"fas fa-redo-alt me-2\"></i>Restart Game\n                    </a>\n                </div>\n            </div>\n        </div>\n\n        <!-- Event Feed -->\n        <div class=\"event-feed mt-2\" id=\"eventFeed\">\n            <div class=\"event-feed-header\">\n                <i class=\"fas fa-scroll me-1\"></i> Battle Log\n            </div>\n            <div id=\"eventFeedBody\">\n                <div class=\"event-item\">\n                    <span class=\"event-time\">--:--</span>\n                    <span class=\"event-icon neutral\"><i class=\"fas fa-flag\"></i></span>\n                    <span class=\"event-message\">Game started. Select a location on the map to begin.</span>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <!-- Right Panel -->\n    <div class=\"col-lg-3 action-panel\">\n        <!-- AI Opponent Control -->\n        <div class=\"card ai-toggle-card mb-3\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-robot me-2\"></i>AI Opponent</h6>\n            </div>\n            <div class=\"card-body py-2\">\n                <div class=\"form-check form-switch\">\n                    <input class=\"form-check-input\" type=\"checkbox\" id=\"aiToggle\" role=\"switch\">\n                    <label class=\"form-check-label small\" for=\"aiToggle\">\n                        Enable AI\n                    </label>\n                </div>\n                <div id=\"aiStatus\" class=\"mt-1 small\" style=\"color: var(--text-muted);\">\n                    <span class=\"ai-status-dot inactive\"></span>AI is inactive\n                </div>\n            </div>\n        </div>\n\n        <!-- Location Details -->\n        <div class=\"card mb-3\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\" id=\"locationName\">Select a Location</h6>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"emptyState\" class=\"text-center py-4\">\n                    <i class=\"fas fa-map-marker-alt fa-2x mb-2\" style=\"color: var(--text-muted);\"></i>\n                    <p class=\"small mb-0\" style=\"color: var(--text-secondary);\">Click on a location on the map</p>\n                </div>\n\n                <div id=\"locationDetails\" style=\"display: none;\">\n                    <div class=\"d-flex justify-content-between mb-3\">\n                        <span class=\"small\" style=\"color: var(--text-secondary);\">Faction</span>\n                        <span id=\"locationFaction\" class=\"badge\">Neutral</span>\n                    </div>\n\n                    <!-- Resources bar -->\n                    <div class=\"d-flex justify-content-between mb-1\">\n                        <small style=\"color: var(--text-secondary);\"><i class=\"fas fa-coins me-1\" style=\"color: var(--southern-gold);\"></i>Resources</small>\n                        <small id=\"resourcesValue\" class=\"fw-bold\">0</small>\n                    </div>\n                    <div class=\"progress mb-3\">\n                        <div id=\"resourcesBar\" class=\"progress-bar bg-warning\" role=\"progressbar\" style=\"width: 0%\"></div>\n                    </div>\n\n                    <!-- Army bar -->\n                    <div class=\"d-flex justify-content-between mb-1\">\n                        <small style=\"color: var(--text-secondary);\"><i class=\"fas fa-shield-alt me-1\" style=\"color: #ef4444;\"></i>Army</small>\n                        <small id=\"armyValue\" class=\"fw-bold\">0</small>\n                    </div>\n                    <div class=\"progress mb-3\">\n                        <div id=\"armyBar\" class=\"progress-bar bg-danger\" role=\"progressbar\" style=\"width: 0%\"></div>\n                    </div>\n\n                    <div id=\"actionButtons\">\n                        <button id=\"collectResourcesBtn\" class=\"btn btn-warning btn-sm w-100 mb-2\" style=\"display: none;\">\n                            <i class=\"fas fa-coins me-1\"></i> Collect Resources\n                            <small id=\"resourceCooldown\" class=\"d-none\">(Wait: <span>0</span>s)</small>\n                        </button>\n\n                        <button id=\"createArmyBtn\" class=\"btn btn-danger btn-sm w-100 mb-2\" style=\"display: none;\">\n                            <i class=\"fas fa-shield-alt me-1\"></i> Create Army <small class=\"opacity-75\">(30 res)</small>\n                        </button>\n\n                        <button id=\"allOutAttackBtn\" class=\"btn btn-sm w-100 mb-2\" style=\"display: none; background: linear-gradient(135deg, #ef4444, #991b1b); border: none; color: #fff;\">\n                            <i class=\"fas fa-skull-crossbones me-1\"></i> All Out Attack\n                            <small class=\"d-block opacity-75\">Send all armies to enemy capital</small>\n                        </button>\n\n                        <button id=\"sendResourcesBtn\" class=\"btn btn-success btn-sm w-100 mb-2\" style=\"display: none;\">\n                            <i class=\"fas fa-route me-1\"></i> Send Resources to Capital\n                        </button>\n\n                        <button id=\"moveArmyBtn\" class=\"btn btn-info btn-sm w-100\" data-bs-toggle=\"modal\" data-bs-target=\"#moveArmyModal\">\n                            <i class=\"fas fa-people-arrows me-1\"></i> Move Army\n                        </button>\n                    </div>\n\n                    <div id=\"actionStatus\" class=\"alert alert-info mt-3 small\" style=\"display: none;\">\n                        Action result will appear here\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n</div>\n\n<!-- Move Army Modal -->\n<div class=\"modal fade\" id=\"moveArmyModal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"moveArmyModalLabel\">\n    <div class=\"modal-dialog\" role=\"document\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h5 class=\"modal-title\" id=\"moveArmyModalLabel\">Move Army</h5>\n                <button type=\"button\" class=\"btn-close\" data-bs-dismiss=\"modal\" aria-label=\"Close\"></button>\n            </div>\n            <div class=\"modal-body\">\n                <p>Select a destination for your army from <strong id=\"sourceLocationName\">this location</strong>:</p>\n\n                <div class=\"list-group\" id=\"destinationsList\" role=\"listbox\" aria-label=\"Available destinations\">\n                </div>\n\n                <div id=\"moveArmyStatus\" class=\"alert alert-warning mt-3\" style=\"display: none;\" role=\"alert\">\n                    Status message will appear here\n                </div>\n            </div>\n            <div class=\"modal-footer\">\n                <button type=\"button\" class=\"btn btn-secondary\" data-bs-dismiss=\"modal\">Cancel</button>\n            </div>\n        </div>\n    </div>\n</div>\n{% endblock %}\n\n{% block extra_css %}\n<style>\n    #mapCanvas {\n        position: absolute;\n        top: 0;\n        left: 0;\n        z-index: 5;\n    }\n</style>\n{% endblock %}\n\n{% block scripts %}\n<script>\n    // Parse the JSON data from the template\n    const templateData = JSON.parse(document.getElementById('templateData').textContent);\n\n    // Store the current game state\n    let gameState = {\n        locations: templateData.locations,\n        connections: templateData.connections,\n        selectedLocation: null,\n        playerFaction: templateData.playerFaction,\n        gameOver: templateData.gameOver,\n        winner: templateData.winner,\n        victoryMessage: templateData.victoryMessage\n    };\n\n    // Event feed log (client-side)\n    let eventLog = [];\n\n    // Animation tracking -- prevents map refresh from destroying in-flight animations\n    let activeAnimations = 0;\n    let pendingRefresh = false;\n\n    // DOM elements\n    const mapContainer = document.getElementById('mapContainer');\n    const mapCanvas = document.getElementById('mapCanvas');\n    const markersContainer = document.getElementById('mapMarkers');\n    const gameOverOverlay = document.getElementById('gameOverOverlay');\n\n    // --- HUD ---\n    function updateHUD() {\n        let totalResources = 0;\n        let totalArmies = 0;\n        let territoryCount = 0;\n        const totalLocations = Object.keys(gameState.locations).length;\n\n        for (const [id, loc] of Object.entries(gameState.locations)) {\n            if (loc.faction === gameState.playerFaction) {\n                totalResources += loc.resources;\n                totalArmies += loc.army;\n                territoryCount++;\n            }\n        }\n\n        document.getElementById('hudResources').textContent = totalResources;\n        document.getElementById('hudArmies').textContent = totalArmies;\n        document.getElementById('hudTerritory').textContent = `${territoryCount}/${totalLocations}`;\n    }\n\n    // --- Event Feed ---\n    function addEvent(message, faction, icon) {\n        const now = new Date();\n        const timeStr = now.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' });\n        const factionClass = faction || 'neutral';\n        const iconClass = icon || 'fa-circle';\n\n        eventLog.push({ time: timeStr, message, factionClass, iconClass });\n\n        const feedBody = document.getElementById('eventFeedBody');\n        const eventEl = document.createElement('div');\n        eventEl.className = 'event-item';\n        eventEl.innerHTML = `\n            <span class=\"event-time\">${timeStr}</span>\n            <span class=\"event-icon ${factionClass}\"><i class=\"fas ${iconClass}\"></i></span>\n            <span class=\"event-message\">${message}</span>\n        `;\n\n        // Insert at top (newest first)\n        feedBody.insertBefore(eventEl, feedBody.firstChild.nextSibling);\n\n        // Keep max 30 events\n        while (feedBody.children.length > 31) {\n            feedBody.removeChild(feedBody.lastChild);\n        }\n    }\n\n    // --- Map Init ---\n    function initMap() {\n        clearMap();\n        drawConnections();\n        createLocationMarkers();\n        updateHUD();\n\n        document.getElementById('locationDetails').style.display = 'none';\n        document.getElementById('emptyState').style.display = 'block';\n\n        checkGameOver();\n    }\n\n    function checkGameOver() {\n        if (gameState.gameOver) {\n            const isWinner = gameState.winner === gameState.playerFaction;\n            const gameOverTitle = document.getElementById('gameOverTitle');\n            const gameOverMessage = document.getElementById('gameOverMessage');\n\n            if (isWinner) {\n                gameOverTitle.textContent = \"VICTORY!\";\n                gameOverTitle.classList.add('victory-text');\n                gameOverMessage.textContent = gameState.victoryMessage;\n                document.getElementById('victoryAnimation').classList.remove('d-none');\n                document.getElementById('defeatAnimation').classList.add('d-none');\n            } else {\n                gameOverTitle.textContent = \"DEFEAT!\";\n                gameOverTitle.classList.add('defeat-text');\n                if (gameState.winner === 'southern') {\n                    gameOverMessage.textContent = \"The Southern Kingdom has conquered your capital! Glory to the South!\";\n                } else {\n                    gameOverMessage.textContent = \"The Northern Kingdom has conquered your capital! Victory through unity!\";\n                }\n                document.getElementById('victoryAnimation').classList.add('d-none');\n                document.getElementById('defeatAnimation').classList.remove('d-none');\n            }\n\n            gameOverOverlay.classList.remove('d-none');\n            document.querySelectorAll('#actionButtons button').forEach(b => b.disabled = true);\n        }\n    }\n\n    function clearMap() {\n        markersContainer.innerHTML = '';\n        const ctx = mapCanvas.getContext('2d');\n        ctx.clearRect(0, 0, mapCanvas.width, mapCanvas.height);\n    }\n\n    // --- Draw Connections (faction-colored) ---\n    function drawConnections() {\n        const canvas = mapCanvas;\n        canvas.width = mapContainer.clientWidth;\n        canvas.height = mapContainer.clientHeight;\n        const ctx = canvas.getContext('2d');\n\n        // Subtle grid overlay\n        ctx.strokeStyle = 'rgba(255, 255, 255, 0.02)';\n        ctx.lineWidth = 1;\n        const gridSize = 40;\n        for (let x = 0; x < canvas.width; x += gridSize) {\n            ctx.beginPath(); ctx.moveTo(x, 0); ctx.lineTo(x, canvas.height); ctx.stroke();\n        }\n        for (let y = 0; y < canvas.height; y += gridSize) {\n            ctx.beginPath(); ctx.moveTo(0, y); ctx.lineTo(canvas.width, y); ctx.stroke();\n        }\n\n        // Draw connections\n        ctx.lineWidth = 2;\n\n        gameState.connections.forEach(connection => {\n            const loc1 = gameState.locations[connection[0]];\n            const loc2 = gameState.locations[connection[1]];\n\n            if (loc1 && loc2) {\n                const x1 = mapContainer.clientWidth * (loc1.x / 100);\n                const y1 = mapContainer.clientHeight * (loc1.y / 100);\n                const x2 = mapContainer.clientWidth * (loc2.x / 100);\n                const y2 = mapContainer.clientHeight * (loc2.y / 100);\n\n                // Determine line color based on factions\n                const f1 = loc1.faction;\n                const f2 = loc2.faction;\n\n                const factionLineColors = {\n                    southern: 'rgba(255, 215, 0, 0.35)',\n                    northern: 'rgba(79, 195, 247, 0.35)',\n                    nights_watch: 'rgba(215, 228, 241, 0.45)',\n                    white_walkers: 'rgba(136, 196, 230, 0.45)',\n                    barbarian: 'rgba(193, 68, 46, 0.35)'\n                };\n                if (f1 !== 'neutral' && f1 === f2) {\n                    ctx.strokeStyle = factionLineColors[f1] || 'rgba(120, 144, 156, 0.2)';\n                    ctx.setLineDash([]);\n                } else if (f1 !== 'neutral' && f2 !== 'neutral' && f1 !== f2) {\n                    // Any two non-neutral, non-identical factions = contested.\n                    ctx.strokeStyle = 'rgba(239, 68, 68, 0.3)';\n                    ctx.setLineDash([8, 6]);\n                } else {\n                    ctx.strokeStyle = 'rgba(120, 144, 156, 0.2)';\n                    ctx.setLineDash([]);\n                }\n\n                ctx.beginPath();\n                ctx.moveTo(x1, y1);\n                ctx.lineTo(x2, y2);\n                ctx.stroke();\n                ctx.setLineDash([]);\n            }\n        });\n    }\n\n    // --- Create Location Markers (with labels) ---\n    function createLocationMarkers() {\n        for (const [locationId, locationData] of Object.entries(gameState.locations)) {\n            const marker = document.createElement('div');\n            marker.id = `marker-${locationId}`;\n            marker.className = `location-marker ${locationData.faction} ${locationData.type}`;\n            marker.dataset.locationId = locationId;\n            marker.style.left = `${locationData.x}%`;\n            marker.style.top = `${locationData.y}%`;\n\n            // Icon — chosen by (type, faction).\n            let icon = document.createElement('i');\n            const factionIcons = {\n                southern: 'fas fa-sun',\n                northern: 'fas fa-snowflake',\n                nights_watch: 'fas fa-shield-halved',\n                white_walkers: 'fas fa-icicles',\n                barbarian: 'fas fa-campground',\n                neutral: 'fas fa-chess-rook'\n            };\n            if (locationData.type === 'wall') {\n                // Wall keeps always render as a gate/tower regardless of holder;\n                // the colour on the marker conveys the faction.\n                icon.className = 'fas fa-tower-cell';\n            } else if (locationData.type === 'capital') {\n                icon.className = factionIcons[locationData.faction] || 'fas fa-chess-rook';\n            } else if (locationData.faction === 'barbarian') {\n                icon.className = 'fas fa-campground';\n            } else {\n                icon.className = 'fas fa-map-marker-alt';\n            }\n            marker.appendChild(icon);\n\n            // Army badge\n            if (locationData.army > 0) {\n                const armyBadge = document.createElement('span');\n                armyBadge.className = 'position-absolute top-0 start-100 translate-middle badge rounded-pill bg-danger';\n                armyBadge.textContent = locationData.army;\n                armyBadge.style.fontSize = '0.65em';\n                armyBadge.style.zIndex = '20';\n                marker.appendChild(armyBadge);\n            }\n\n            // Location label below marker\n            const label = document.createElement('span');\n            label.className = 'location-label';\n            label.textContent = locationData.name;\n            marker.appendChild(label);\n\n            // Click handler\n            marker.addEventListener('click', () => selectLocation(locationId));\n\n            markersContainer.appendChild(marker);\n        }\n    }\n\n    // --- Select Location ---\n    function selectLocation(locationId) {\n        if (gameState.selectedLocation) {\n            const prevMarker = document.getElementById(`marker-${gameState.selectedLocation}`);\n            if (prevMarker) prevMarker.classList.remove('selected');\n        }\n\n        gameState.selectedLocation = locationId;\n\n        const marker = document.getElementById(`marker-${locationId}`);\n        if (marker) marker.classList.add('selected');\n\n        const location = gameState.locations[locationId];\n\n        document.getElementById('locationName').textContent = location.name;\n\n        const factionBadge = document.getElementById('locationFaction');\n        factionBadge.textContent = location.faction.charAt(0).toUpperCase() + location.faction.slice(1);\n        factionBadge.className = `badge ${location.faction}`;\n\n        // Update resource bar\n        const resourcePercentage = Math.min(location.resources / 200 * 100, 100);\n        document.getElementById('resourcesBar').style.width = `${resourcePercentage}%`;\n        document.getElementById('resourcesValue').textContent = location.resources;\n\n        // Update army bar — faction colored\n        const armyPercentage = Math.min(location.army / 10 * 100, 100);\n        const armyBar = document.getElementById('armyBar');\n        armyBar.style.width = `${armyPercentage}%`;\n        document.getElementById('armyValue').textContent = location.army;\n\n        // Show/hide buttons\n        const isCapital = location.type === 'capital';\n        const isVillage = location.type === 'village';\n        const isPlayerLocation = location.faction === gameState.playerFaction;\n\n        document.getElementById('collectResourcesBtn').style.display =\n            (isCapital && isPlayerLocation) ? 'block' : 'none';\n        document.getElementById('createArmyBtn').style.display =\n            (isCapital && isPlayerLocation) ? 'block' : 'none';\n        document.getElementById('allOutAttackBtn').style.display =\n            (isCapital && isPlayerLocation) ? 'block' : 'none';\n        document.getElementById('sendResourcesBtn').style.display =\n            (isVillage && isPlayerLocation) ? 'block' : 'none';\n\n        // Cooldown\n        const cooldownSpan = document.getElementById('resourceCooldown');\n        if (isCapital && location.resource_cooldown) {\n            cooldownSpan.classList.remove('d-none');\n            cooldownSpan.querySelector('span').textContent = location.resource_cooldown;\n            if (location.resource_cooldown > 0) startCooldownTimer(location.resource_cooldown);\n        } else {\n            cooldownSpan.classList.add('d-none');\n        }\n\n        // Button states\n        if (isCapital) {\n            document.getElementById('collectResourcesBtn').disabled = !isPlayerLocation || gameState.gameOver || location.resource_cooldown > 0;\n            document.getElementById('createArmyBtn').disabled = !isPlayerLocation || location.resources < 30 || gameState.gameOver;\n            document.getElementById('allOutAttackBtn').disabled = !isPlayerLocation || location.army <= 0 || gameState.gameOver;\n        }\n        document.getElementById('moveArmyBtn').disabled = !isPlayerLocation || location.army <= 0 || gameState.gameOver;\n        document.getElementById('sendResourcesBtn').disabled = !isPlayerLocation || location.resources <= 0 || gameState.gameOver;\n\n        document.getElementById('sourceLocationName').textContent = location.name;\n\n        document.getElementById('locationDetails').style.display = 'block';\n        document.getElementById('emptyState').style.display = 'none';\n\n        updateMoveArmyDestinations(locationId);\n    }\n\n    // --- Move Army Destinations ---\n    function updateMoveArmyDestinations(sourceId) {\n        const destinationsList = document.getElementById('destinationsList');\n        destinationsList.innerHTML = '';\n\n        const sourceLocation = gameState.locations[sourceId];\n        const sourceConnections = [];\n\n        gameState.connections.forEach(conn => {\n            if (conn[0] === sourceId) sourceConnections.push(conn[1]);\n            else if (conn[1] === sourceId) sourceConnections.push(conn[0]);\n        });\n\n        sourceConnections.forEach(destId => {\n            const destLocation = gameState.locations[destId];\n\n            const item = document.createElement('button');\n            item.className = 'list-group-item list-group-item-action destination-item';\n            item.setAttribute('role', 'option');\n            item.setAttribute('aria-selected', 'false');\n            item.setAttribute('id', `dest-${destId}`);\n\n            const isFriendly = destLocation.faction === sourceLocation.faction;\n            const attackWarning = !isFriendly ?\n                `<span style=\"color: #ef4444;\">(Attack! ${sourceLocation.army} vs ${destLocation.army})</span>` :\n                `<span style=\"color: #22c55e;\">(Friendly reinforcement)</span>`;\n\n            item.innerHTML = `\n                <div class=\"d-flex justify-content-between align-items-center\">\n                    <div>\n                        <h6 class=\"mb-0\">${destLocation.name}\n                            <small style=\"color: var(--text-muted);\">${destLocation.faction}</small>\n                        </h6>\n                        <small>${attackWarning}</small>\n                    </div>\n                    <span class=\"badge bg-danger\">Army: ${destLocation.army}</span>\n                </div>\n            `;\n\n            item.addEventListener('click', () => {\n                document.querySelectorAll('#destinationsList [role=\"option\"]')\n                    .forEach(opt => opt.setAttribute('aria-selected', 'false'));\n                item.setAttribute('aria-selected', 'true');\n                moveArmy(sourceId, destId);\n            });\n\n            item.addEventListener('keydown', (e) => {\n                if (e.key === 'Enter' || e.key === ' ') { e.preventDefault(); item.click(); }\n            });\n\n            destinationsList.appendChild(item);\n        });\n    }\n\n    // --- Actions ---\n    async function collectResources(locationId) {\n        try {\n            const response = await fetch('/api/collect_resources', {\n                method: 'POST',\n                headers: { 'Content-Type': 'application/json' },\n                body: JSON.stringify({ location_id: locationId })\n            });\n            const result = await response.json();\n\n            if (result.success) {\n                showActionStatus('success', result.message);\n                gameState.locations[locationId].resources = result.current_resources;\n                selectLocation(locationId);\n                updateHUD();\n                addEvent(`Resources collected at ${gameState.locations[locationId].name}`, gameState.playerFaction, 'fa-coins');\n            } else {\n                if (result.cooldown && result.cooldown_seconds) {\n                    startCooldownTimer(result.cooldown_seconds);\n                    showActionStatus('warning', result.message);\n                } else {\n                    showActionStatus('danger', result.message || 'Failed to collect resources');\n                }\n            }\n        } catch (error) {\n            showActionStatus('danger', 'Network error. Please try again.');\n        }\n    }\n\n    async function createArmy(locationId) {\n        try {\n            const response = await fetch('/api/create_army', {\n                method: 'POST',\n                headers: { 'Content-Type': 'application/json' },\n                body: JSON.stringify({ location_id: locationId })\n            });\n            const result = await response.json();\n\n            if (result.success) {\n                showActionStatus('success', result.message);\n                gameState.locations[locationId].resources = result.current_resources;\n                gameState.locations[locationId].army = result.current_army;\n                refreshMap();\n                selectLocation(locationId);\n                updateHUD();\n                addEvent(`Army created at ${gameState.locations[locationId].name}`, gameState.playerFaction, 'fa-shield-alt');\n            } else {\n                showActionStatus('danger', result.message || 'Failed to create army');\n            }\n        } catch (error) {\n            showActionStatus('danger', 'Network error. Please try again.');\n        }\n    }\n\n    async function moveArmy(sourceId, targetId) {\n        showMoveArmyStatus('warning', 'Moving army...');\n\n        try {\n            const response = await fetch('/api/move_army', {\n                method: 'POST',\n                headers: { 'Content-Type': 'application/json' },\n                body: JSON.stringify({ source_id: sourceId, target_id: targetId })\n            });\n            const result = await response.json();\n\n            if (result.success || result.message) {\n                showMoveArmyStatus('success', result.message);\n\n                // Animated army march from source to target\n                const targetFaction = gameState.locations[targetId]?.faction;\n                const isAttack = targetFaction && targetFaction !== gameState.playerFaction;\n                animateArmyMove(sourceId, targetId, gameState.playerFaction, isAttack);\n\n                const srcName = gameState.locations[sourceId].name;\n                const tgtName = gameState.locations[targetId].name;\n                addEvent(`Army moved from ${srcName} to ${tgtName}`, gameState.playerFaction, 'fa-people-arrows');\n\n                // Delay periodic refresh until animation completes (~4s max)\n                let updateCount = 0;\n                const maxUpdates = 6;\n                setTimeout(() => {\n                    const updateInterval = setInterval(() => {\n                        refreshMapData();\n                        updateCount++;\n                        if (updateCount >= maxUpdates) {\n                            clearInterval(updateInterval);\n                            setTimeout(checkGameStatus, 1000);\n                        }\n                    }, 2500);\n                }, 4500);\n\n                if (result.game_over) {\n                    gameState.gameOver = true;\n                    gameState.winner = result.winner;\n                    gameState.victoryMessage = result.victory_message;\n                    checkGameOver();\n                }\n\n                const moveArmyModal = bootstrap.Modal.getInstance(document.getElementById('moveArmyModal'));\n                if (moveArmyModal) moveArmyModal.hide();\n            } else {\n                showMoveArmyStatus('danger', result.error || 'Failed to move army');\n            }\n        } catch (error) {\n            showMoveArmyStatus('danger', 'Network error. Please try again.');\n        }\n    }\n\n    function showActionStatus(type, message) {\n        const statusElement = document.getElementById('actionStatus');\n        statusElement.className = `alert alert-${type} mt-3 small`;\n        statusElement.textContent = message;\n        statusElement.style.display = 'block';\n        setTimeout(() => { statusElement.style.display = 'none'; }, 5000);\n    }\n\n    function showMoveArmyStatus(type, message) {\n        const statusElement = document.getElementById('moveArmyStatus');\n        statusElement.className = `alert alert-${type} mt-3`;\n        statusElement.textContent = message;\n        statusElement.style.display = 'block';\n    }\n\n    function refreshMap() {\n        clearMap();\n        drawConnections();\n        createLocationMarkers();\n        updateHUD();\n        checkGameOver();\n\n        if (gameState.selectedLocation) {\n            selectLocation(gameState.selectedLocation);\n        }\n    }\n\n    async function refreshMapData() {\n        try {\n            const response = await fetch('/api/map_data');\n            const data = await response.json();\n\n            gameState.locations = data.locations;\n            gameState.gameOver = data.game_over;\n            gameState.winner = data.winner;\n            gameState.victoryMessage = data.victory_message;\n\n            // Update the Wall Hold HUD if present (White Walkers Attack).\n            if (data.wall_hold) {\n                const nw = document.getElementById('hudHoldNightsWatch');\n                const ww = document.getElementById('hudHoldWhiteWalkers');\n                const threshold = data.wall_hold.threshold || 5;\n                const holds = data.wall_hold.holds || {};\n                if (nw) nw.textContent = `${holds.nights_watch || 0}/${threshold}`;\n                if (ww) ww.textContent = `${holds.white_walkers || 0}/${threshold}`;\n            }\n\n            // Defer visual rebuild while animations are playing\n            if (activeAnimations > 0) {\n                pendingRefresh = true;\n                updateHUD(); // HUD is safe to update immediately\n                return;\n            }\n\n            refreshMap();\n\n            const moveArmyModal = bootstrap.Modal.getInstance(document.getElementById('moveArmyModal'));\n            if (moveArmyModal) moveArmyModal.hide();\n        } catch (error) {\n            console.error('Error refreshing map data:', error);\n            const mapAlert = document.getElementById('mapAlert');\n            mapAlert.textContent = 'Failed to refresh map data. Please try again.';\n            mapAlert.classList.remove('d-none');\n            setTimeout(() => { mapAlert.classList.add('d-none'); }, 5000);\n        }\n    }\n\n    // Flush a deferred refresh once all animations finish\n    function flushPendingRefresh() {\n        if (pendingRefresh && activeAnimations === 0) {\n            pendingRefresh = false;\n            refreshMap();\n        }\n    }\n\n    window.addEventListener('resize', () => {\n        drawConnections();\n        document.querySelectorAll('.transfer-arrow').forEach(el => el.remove());\n    });\n\n    // --- Init ---\n    document.addEventListener('DOMContentLoaded', function() {\n        initMap();\n\n        document.getElementById('collectResourcesBtn').addEventListener('click', () => {\n            if (gameState.selectedLocation) collectResources(gameState.selectedLocation);\n        });\n\n        document.getElementById('createArmyBtn').addEventListener('click', () => {\n            if (gameState.selectedLocation) createArmy(gameState.selectedLocation);\n        });\n\n        document.getElementById('refreshMapBtn').addEventListener('click', refreshMapData);\n\n        // Poll every 5 s so the wall-hold counter and resource HUD reflect\n        // the wall-tick thread (30 s cadence) and AI moves within seconds\n        // rather than up to a minute later.\n        setInterval(refreshMapData, 5000);\n        setInterval(checkGameStatus, 5000);\n\n        document.getElementById('sendResourcesBtn').addEventListener('click', () => {\n            if (gameState.selectedLocation) sendResourcesToCapital(gameState.selectedLocation);\n        });\n\n        document.getElementById('allOutAttackBtn').addEventListener('click', () => {\n            if (!gameState.selectedLocation) return;\n            const location = gameState.locations[gameState.selectedLocation];\n            if (!confirm(`Launch an all-out attack with ${location.army} armies from ${location.name}? This cannot be undone!`)) return;\n            launchAllOutAttack(gameState.selectedLocation);\n        });\n\n        // AI Toggle\n        const aiToggle = document.getElementById('aiToggle');\n        const aiStatus = document.getElementById('aiStatus');\n\n        checkAIStatus();\n\n        aiToggle.addEventListener('change', async function() {\n            const enable = this.checked;\n            try {\n                const response = await fetch('/api/ai_toggle', {\n                    method: 'POST',\n                    headers: { 'Content-Type': 'application/json' },\n                    body: JSON.stringify({ enable: enable })\n                });\n                const result = await response.json();\n\n                if (result.success) {\n                    if (enable) {\n                        aiStatus.innerHTML = '<span class=\"ai-status-dot active\"></span>AI is active - ' + result.message;\n                        addEvent('AI opponent activated', 'neutral', 'fa-robot');\n                    } else {\n                        aiStatus.innerHTML = '<span class=\"ai-status-dot inactive\"></span>AI is inactive';\n                        addEvent('AI opponent deactivated', 'neutral', 'fa-robot');\n                    }\n                } else {\n                    this.checked = !enable;\n                    alert('Failed to toggle AI: ' + result.message);\n                }\n            } catch (error) {\n                this.checked = !enable;\n                alert('Failed to connect to AI service');\n            }\n        });\n\n        setInterval(checkAIStatus, 10000);\n\n        async function checkAIStatus() {\n            try {\n                const response = await fetch('/api/ai_status');\n                const status = await response.json();\n\n                if (status.active) {\n                    aiToggle.checked = true;\n                    aiStatus.innerHTML = `<span class=\"ai-status-dot active\"></span>AI is active (${status.faction} faction)`;\n                } else {\n                    aiToggle.checked = false;\n                    aiStatus.innerHTML = '<span class=\"ai-status-dot inactive\"></span>AI is inactive';\n                }\n            } catch (error) {\n                console.error('Failed to check AI status:', error);\n            }\n        }\n    });\n\n    // =============================================\n    // ANIMATED TRAVEL SYSTEM\n    // =============================================\n\n    // Animate a unit traveling from source to target location\n    // type: 'army' | 'cart' -- faction: 'southern' | 'northern'\n    // onArrive: callback when the unit reaches the destination\n    // opts: { armyCount: number } optional extra data\n    function animateTravelingUnit(fromLocId, toLocId, faction, type, onArrive, opts) {\n        const fromLoc = gameState.locations[fromLocId];\n        const toLoc = gameState.locations[toLocId];\n        if (!fromLoc || !toLoc) return;\n\n        activeAnimations++;\n\n        const fromX = mapContainer.clientWidth * (fromLoc.x / 100);\n        const fromY = mapContainer.clientHeight * (fromLoc.y / 100);\n        const toX = mapContainer.clientWidth * (toLoc.x / 100);\n        const toY = mapContainer.clientHeight * (toLoc.y / 100);\n\n        // Create the traveling unit element\n        const unit = document.createElement('div');\n        unit.className = `traveling-unit ${type} ${faction}`;\n\n        const icon = document.createElement('i');\n        if (type === 'army') {\n            icon.className = 'fas fa-chess-knight';\n        } else {\n            icon.className = 'fas fa-coins';\n        }\n        unit.appendChild(icon);\n\n        // Add army count badge if provided\n        if (opts && opts.armyCount && opts.armyCount > 0) {\n            const badge = document.createElement('span');\n            badge.className = 'army-count';\n            badge.textContent = opts.armyCount;\n            unit.appendChild(badge);\n        }\n\n        // Start position\n        unit.style.left = `${fromX}px`;\n        unit.style.top = `${fromY}px`;\n        unit.style.transform = 'translate(-50%, -50%)';\n        mapContainer.appendChild(unit);\n\n        // Calculate travel distance and duration -- longer for more drama\n        const dx = toX - fromX;\n        const dy = toY - fromY;\n        const distance = Math.sqrt(dx * dx + dy * dy);\n        const duration = Math.max(2000, Math.min(4000, distance * 6)); // 2s - 4s\n\n        // Add connection pulse dots along the path\n        const pulseCount = Math.max(3, Math.round(distance / 80));\n        const pulses = [];\n        for (let i = 1; i <= pulseCount; i++) {\n            const t = i / (pulseCount + 1);\n            const pulse = document.createElement('div');\n            pulse.className = `connection-pulse ${faction}`;\n            pulse.style.left = `${fromX + dx * t}px`;\n            pulse.style.top = `${fromY + dy * t}px`;\n            pulse.style.animationDelay = `${i * 0.15}s`;\n            mapContainer.appendChild(pulse);\n            pulses.push(pulse);\n        }\n\n        // Collect trail segments for cleanup\n        const trailElements = [];\n\n        // Animate with requestAnimationFrame for smooth movement\n        const startTime = performance.now();\n        let trailTimer = 0;\n        let prevX = fromX;\n        let prevY = fromY;\n\n        function step(now) {\n            const elapsed = now - startTime;\n            const progress = Math.min(elapsed / duration, 1);\n\n            // Ease-in-out curve\n            const ease = progress < 0.5\n                ? 2 * progress * progress\n                : 1 - Math.pow(-2 * progress + 2, 2) / 2;\n\n            const cx = fromX + dx * ease;\n            const cy = fromY + dy * ease;\n            unit.style.left = `${cx}px`;\n            unit.style.top = `${cy}px`;\n\n            // Drop trail particles every ~60ms\n            trailTimer += (now - (step._lastFrame || now));\n            step._lastFrame = now;\n            if (trailTimer > 60) {\n                trailTimer = 0;\n\n                // Glowing particle\n                const particle = document.createElement('div');\n                particle.className = `trail-particle ${type === 'cart' ? 'resource' : faction}`;\n                particle.style.left = `${cx + (Math.random() - 0.5) * 8}px`;\n                particle.style.top = `${cy + (Math.random() - 0.5) * 8}px`;\n                mapContainer.appendChild(particle);\n                trailElements.push(particle);\n                setTimeout(() => particle.remove(), 1500);\n\n                // Glowing trail line segment from previous position\n                const segDx = cx - prevX;\n                const segDy = cy - prevY;\n                const segLen = Math.sqrt(segDx * segDx + segDy * segDy);\n                if (segLen > 2) {\n                    const seg = document.createElement('div');\n                    seg.className = `trail-line-segment ${type === 'cart' ? 'resource' : faction}`;\n                    seg.style.left = `${prevX}px`;\n                    seg.style.top = `${prevY}px`;\n                    seg.style.width = `${segLen}px`;\n                    seg.style.transform = `rotate(${Math.atan2(segDy, segDx)}rad)`;\n                    mapContainer.appendChild(seg);\n                    trailElements.push(seg);\n                    setTimeout(() => seg.remove(), 2500);\n                }\n\n                prevX = cx;\n                prevY = cy;\n            }\n\n            if (progress < 1) {\n                requestAnimationFrame(step);\n            } else {\n                // Arrived! Clean up unit and pulses\n                unit.remove();\n                pulses.forEach(p => p.remove());\n\n                activeAnimations--;\n                if (onArrive) onArrive();\n                flushPendingRefresh();\n            }\n        }\n\n        requestAnimationFrame(step);\n    }\n\n    // Show a burst effect at a location\n    function showClashEffect(locationId, type) {\n        const loc = gameState.locations[locationId];\n        if (!loc) return;\n\n        const x = mapContainer.clientWidth * (loc.x / 100);\n        const y = mapContainer.clientHeight * (loc.y / 100);\n\n        // Main burst\n        const burst = document.createElement('div');\n        burst.className = `clash-burst ${type}`;\n        burst.style.left = `${x}px`;\n        burst.style.top = `${y}px`;\n        mapContainer.appendChild(burst);\n        setTimeout(() => burst.remove(), 900);\n\n        // Sparkle particles for captures\n        if (type === 'capture') {\n            for (let i = 0; i < 8; i++) {\n                const sparkle = document.createElement('div');\n                sparkle.className = 'capture-sparkle';\n                const angle = (i / 8) * Math.PI * 2;\n                const radius = 20 + Math.random() * 15;\n                sparkle.style.left = `${x + Math.cos(angle) * radius}px`;\n                sparkle.style.top = `${y + Math.sin(angle) * radius}px`;\n                sparkle.style.background = Math.random() > 0.5 ? 'var(--southern-gold)' : '#22c55e';\n                sparkle.style.boxShadow = `0 0 4px ${sparkle.style.background}`;\n                sparkle.style.animationDelay = `${Math.random() * 0.3}s`;\n                mapContainer.appendChild(sparkle);\n                setTimeout(() => sparkle.remove(), 1500);\n            }\n        }\n\n        // Flash the marker\n        const marker = document.getElementById(`marker-${locationId}`);\n        if (marker) {\n            marker.classList.add('just-captured');\n            setTimeout(() => marker.classList.remove('just-captured'), 700);\n        }\n    }\n\n    // Animate army movement with travel + clash at arrival\n    function animateArmyMove(sourceId, targetId, faction, isAttack) {\n        animateTravelingUnit(sourceId, targetId, faction, 'army', () => {\n            if (isAttack) {\n                showClashEffect(targetId, 'attack');\n            } else {\n                showClashEffect(targetId, 'reinforce');\n            }\n        });\n    }\n\n    // Animate resource cart along a multi-hop path\n    function animateResourcePath(path, faction) {\n        if (path.length < 2) return;\n\n        let hopIndex = 0;\n        function nextHop() {\n            if (hopIndex < path.length - 1) {\n                animateTravelingUnit(path[hopIndex], path[hopIndex + 1], faction, 'cart', () => {\n                    // Small flash at intermediate stops\n                    if (hopIndex < path.length - 2) {\n                        showClashEffect(path[hopIndex + 1], 'reinforce');\n                    } else {\n                        // Final destination — gold sparkle\n                        showClashEffect(path[hopIndex + 1], 'capture');\n                    }\n                    hopIndex++;\n                    setTimeout(nextHop, 300);\n                });\n            }\n        }\n        nextHop();\n    }\n\n    // Legacy compatibility wrapper\n    function createTransferIndicator(fromLoc, toLoc, faction, type) {\n        if (type === 'resources') {\n            animateTravelingUnit(fromLoc, toLoc, faction, 'cart', () => {\n                showClashEffect(toLoc, 'reinforce');\n            });\n        } else {\n            const targetFaction = gameState.locations[toLoc]?.faction;\n            const isAttack = targetFaction && targetFaction !== faction && targetFaction !== 'neutral';\n            animateArmyMove(fromLoc, toLoc, faction, isAttack || targetFaction === 'neutral');\n        }\n    }\n\n    async function sendResourcesToCapital(locationId) {\n        try {\n            const response = await fetch('/api/send_resources_to_capital', {\n                method: 'POST',\n                headers: { 'Content-Type': 'application/json' },\n                body: JSON.stringify({ location_id: locationId })\n            });\n            const result = await response.json();\n\n            if (result.success) {\n                showActionStatus('success', result.message);\n                addEvent(`Resources sent from ${gameState.locations[locationId].name} to capital`, gameState.playerFaction, 'fa-route');\n\n                // Animate resource cart along the full path\n                animateResourcePath(result.path, gameState.playerFaction);\n                startResourceTransferUpdates(result.path);\n            } else {\n                showActionStatus('danger', result.message || 'Failed to send resources');\n            }\n        } catch (error) {\n            showActionStatus('danger', 'Network error. Please try again.');\n        }\n    }\n\n    function startResourceTransferUpdates(path) {\n        // Delay refresh until cart animation finishes (~4s per hop)\n        const animDelay = (path.length - 1) * 4500 + 1000;\n        let updateCount = 0;\n        const maxUpdates = path.length * 2;\n        setTimeout(() => {\n            const updateInterval = setInterval(() => {\n                refreshMapData();\n                updateCount++;\n                if (updateCount >= maxUpdates) clearInterval(updateInterval);\n            }, 2500);\n        }, animDelay);\n    }\n\n    function startCooldownTimer(seconds) {\n        const cooldownSpan = document.getElementById('resourceCooldown');\n        const timeSpan = cooldownSpan.querySelector('span');\n        const collectBtn = document.getElementById('collectResourcesBtn');\n        let timeLeft = seconds;\n\n        cooldownSpan.classList.remove('d-none');\n        collectBtn.disabled = true;\n\n        const timer = setInterval(() => {\n            timeLeft--;\n            timeSpan.textContent = timeLeft;\n            if (timeLeft <= 0) {\n                clearInterval(timer);\n                cooldownSpan.classList.add('d-none');\n                if (gameState.selectedLocation) {\n                    const location = gameState.locations[gameState.selectedLocation];\n                    if (location.faction === gameState.playerFaction) collectBtn.disabled = false;\n                }\n            }\n        }, 1000);\n    }\n\n    async function launchAllOutAttack(locationId) {\n        try {\n            const response = await fetch('/api/all_out_attack', {\n                method: 'POST',\n                headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' },\n                body: JSON.stringify({ location_id: locationId })\n            });\n\n            if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);\n            const result = await response.json();\n\n            if (result.success) {\n                showActionStatus('success', result.message);\n                addEvent(`All-out attack launched from ${gameState.locations[locationId].name}!`, gameState.playerFaction, 'fa-skull-crossbones');\n\n                // Animate army marching along the full attack path, hop by hop\n                const hops = (result.path && result.path.length >= 2) ? result.path.length - 1 : 0;\n                if (hops > 0) {\n                    let hopIdx = 0;\n                    function nextAttackHop() {\n                        if (hopIdx < result.path.length - 1) {\n                            const isLastHop = hopIdx === result.path.length - 2;\n                            animateTravelingUnit(result.path[hopIdx], result.path[hopIdx + 1], gameState.playerFaction, 'army', () => {\n                                if (isLastHop) {\n                                    showClashEffect(result.path[hopIdx + 1], 'attack');\n                                } else {\n                                    showClashEffect(result.path[hopIdx + 1], 'reinforce');\n                                }\n                                hopIdx++;\n                                setTimeout(nextAttackHop, 200);\n                            });\n                        }\n                    }\n                    nextAttackHop();\n                }\n\n                // Delay refreshes until animations finish (~4s per hop + buffer)\n                const animDelay = hops * 4500 + 1000;\n                let updateCount = 0;\n                const maxUpdates = Math.max(3, hops * 2);\n                setTimeout(() => {\n                    const updateInterval = setInterval(() => {\n                        refreshMapData();\n                        updateCount++;\n                        if (updateCount >= maxUpdates) {\n                            clearInterval(updateInterval);\n                            setTimeout(checkGameStatus, 1000);\n                        }\n                    }, 2000);\n                }, animDelay);\n\n                if (result.game_over) {\n                    gameState.gameOver = true;\n                    gameState.winner = result.winner;\n                    gameState.victoryMessage = result.victory_message;\n                    checkGameOver();\n                }\n            } else {\n                showActionStatus('danger', result.message || 'Failed to launch attack');\n            }\n        } catch (error) {\n            showActionStatus('danger', 'Failed to launch attack. Check console for details.');\n        }\n    }\n\n    async function checkGameStatus() {\n        try {\n            const response = await fetch('/api/game_status');\n            const status = await response.json();\n\n            if (status.game_over && !gameState.gameOver) {\n                gameState.gameOver = true;\n                gameState.winner = status.winner;\n                gameState.victoryMessage = status.victory_message;\n                await refreshMapData();\n                checkGameOver();\n                addEvent(`Game over! ${status.winner.charAt(0).toUpperCase() + status.winner.slice(1)} wins!`, status.winner, 'fa-crown');\n            }\n        } catch (error) {\n            console.error('Error checking game status:', error);\n        }\n    }\n\n    // Expose animation functions for debugging/testing\n    window._gameAnimations = {\n        animateArmyMove,\n        animateTravelingUnit,\n        showClashEffect,\n        animateResourcePath,\n        addEvent\n    };\n</script>\n{% endblock %}\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/map_picker.html",
    "content": "{% extends \"layout.html\" %}\n\n{% block title %}Pick a Map{% endblock %}\n\n{% block content %}\n<div class=\"faction-hero\">\n    <div class=\"col-lg-9 col-xl-8\">\n        <div class=\"text-center mb-5\">\n            <h1 class=\"faction-hero-title\">A Game of Traces</h1>\n            <p class=\"faction-hero-subtitle\">Pick a battlefield. Each map has its own factions, economy, and win conditions.</p>\n        </div>\n\n        <form method=\"POST\" action=\"{{ url_for('select_map') }}\" id=\"mapPickerForm\">\n            <input type=\"hidden\" name=\"map_id\" id=\"mapIdInput\" value=\"\" required>\n\n            <div class=\"row g-4 justify-content-center mb-4\">\n                {% for map_id, meta in maps.items() %}\n                <div class=\"col-md-6\">\n                    <div class=\"card faction-card map-card\" data-map-id=\"{{ map_id }}\">\n                        <div class=\"card-body\">\n                            <span class=\"faction-icon\">\n                                <i class=\"fas {{ meta.icon }}\"></i>\n                            </span>\n                            <h4>{{ meta.display_name }}</h4>\n                            <p class=\"faction-motto\">\n                                {% if meta.single_player %}Single-player · Hold to win\n                                {% else %}Two-player · Capture to win\n                                {% endif %}\n                            </p>\n                            <p class=\"faction-start\">{{ meta.description }}</p>\n                            <p class=\"small mt-2 mb-0\">\n                                <strong>Factions:</strong>\n                                {{ meta.factions | join(', ') | replace('_', ' ') | title }}\n                            </p>\n                        </div>\n                    </div>\n                </div>\n                {% endfor %}\n            </div>\n\n            <div class=\"text-center\">\n                <button type=\"submit\" id=\"enterMapBtn\" class=\"btn btn-primary btn-lg px-5 py-2\" disabled>\n                    <i class=\"fas fa-play me-2\"></i>Enter The Realm\n                </button>\n            </div>\n        </form>\n    </div>\n</div>\n{% endblock %}\n\n{% block scripts %}\n<script>\n    $(document).ready(function() {\n        const mapInput = $('#mapIdInput');\n        const enterBtn = $('#enterMapBtn');\n\n        $('.map-card').click(function() {\n            const mapId = $(this).data('map-id');\n            mapInput.val(mapId);\n            $('.map-card').removeClass('faction-selected');\n            $(this).addClass('faction-selected');\n            enterBtn.prop('disabled', false);\n        });\n\n        $('#mapPickerForm').on('submit', function(e) {\n            if (!mapInput.val()) {\n                e.preventDefault();\n                $('.map-card').addClass('border-warning');\n                setTimeout(() => $('.map-card').removeClass('border-warning'), 900);\n            }\n        });\n    });\n</script>\n{% endblock %}\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/replay.html",
    "content": "{% extends \"layout.html\" %}\n\n{% block title %}Game Replay{% endblock %}\n\n{% block content %}\n<div class=\"row g-3\">\n    <div class=\"col-md-8\">\n        <div class=\"card\">\n            <div class=\"card-header\">\n                <h4 class=\"mb-0\"><i class=\"fas fa-history me-2\"></i>Game Session Replay</h4>\n            </div>\n            <div class=\"card-body\">\n                <p class=\"mb-4\" style=\"color: var(--text-secondary);\">Replay previous game sessions using span links and distributed tracing. Each session shows the complete chain of actions linked together through OpenTelemetry spans.</p>\n\n                <div id=\"loading\" class=\"text-center py-4\">\n                    <div class=\"spinner-border\" role=\"status\">\n                        <span class=\"visually-hidden\">Loading...</span>\n                    </div>\n                    <p class=\"mt-2\" style=\"color: var(--text-secondary);\">Loading game sessions from Tempo...</p>\n                </div>\n\n                <div id=\"sessions-list\" style=\"display: none;\">\n                    <h5>Available Sessions</h5>\n                    <div class=\"table-responsive\">\n                        <table class=\"table table-striped\">\n                            <thead>\n                                <tr>\n                                    <th>Session ID</th>\n                                    <th>Action</th>\n                                </tr>\n                            </thead>\n                            <tbody id=\"sessions-tbody\">\n                            </tbody>\n                        </table>\n                    </div>\n                </div>\n\n                <div id=\"no-sessions\" class=\"alert alert-info\" style=\"display: none;\">\n                    <i class=\"fas fa-info-circle me-2\"></i>\n                    No game sessions found. Play a game first to create replay data!\n                </div>\n\n                <div id=\"error-message\" class=\"alert alert-danger\" style=\"display: none;\">\n                    <i class=\"fas fa-exclamation-triangle me-2\"></i>\n                    <span id=\"error-text\"></span>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <div class=\"col-md-4\">\n        <div class=\"card mb-3\">\n            <div class=\"card-header\">\n                <h5 class=\"mb-0\"><i class=\"fas fa-question-circle me-2\"></i>How It Works</h5>\n            </div>\n            <div class=\"card-body\">\n                <h6 style=\"color: var(--northern-blue);\">Span Links & Replay</h6>\n                <p class=\"small\" style=\"color: var(--text-secondary);\">Each game action creates a span link to the previous action, forming a chain across different traces.</p>\n\n                <h6 style=\"color: var(--northern-blue);\">What You'll See</h6>\n                <ul class=\"small\" style=\"color: var(--text-secondary);\">\n                    <li><strong>Action Sequence</strong> - Chronological order of game moves</li>\n                    <li><strong>Span Links</strong> - How actions connect to each other</li>\n                    <li><strong>Trace Context</strong> - Full distributed tracing information</li>\n                    <li><strong>Game Narrative</strong> - Complete story of how the game unfolded</li>\n                </ul>\n\n                <h6 style=\"color: var(--northern-blue);\">Educational Value</h6>\n                <ul class=\"small\" style=\"color: var(--text-secondary);\">\n                    <li>Cross-trace relationships</li>\n                    <li>Tempo API integration</li>\n                    <li>TraceQL queries</li>\n                    <li>Game state reconstruction</li>\n                </ul>\n\n                <div class=\"mt-3\">\n                    <a href=\"https://grafana.com/docs/tempo/latest/traceql/\" target=\"_blank\" class=\"btn btn-sm btn-outline-info\">\n                        <i class=\"fas fa-external-link-alt me-1\"></i>Learn TraceQL\n                    </a>\n                </div>\n            </div>\n        </div>\n\n        <div class=\"card\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-search me-2\"></i>Try TraceQL Queries</h6>\n            </div>\n            <div class=\"card-body\">\n                <p class=\"small\" style=\"color: var(--text-secondary);\">Use these queries in Grafana Tempo:</p>\n                <div class=\"mb-2\">\n                    <code>{game.session.id!=\"\"}</code>\n                    <small class=\"d-block\" style=\"color: var(--text-muted);\">Find all game sessions</small>\n                </div>\n                <div class=\"mb-2\">\n                    <code>{link.type=\"game_sequence\"}</code>\n                    <small class=\"d-block\" style=\"color: var(--text-muted);\">Find spans with links</small>\n                </div>\n                <div class=\"mb-2\">\n                    <code>{game.action.type=\"move_army\"}</code>\n                    <small class=\"d-block\" style=\"color: var(--text-muted);\">Find specific actions</small>\n                </div>\n            </div>\n        </div>\n    </div>\n</div>\n{% endblock %}\n\n{% block scripts %}\n<script>\ndocument.addEventListener('DOMContentLoaded', function() {\n    loadGameSessions();\n});\n\nasync function loadGameSessions() {\n    try {\n        const response = await fetch('/api/replay/sessions');\n        const data = await response.json();\n\n        document.getElementById('loading').style.display = 'none';\n\n        if (data.success && data.sessions && data.sessions.length > 0) {\n            displaySessions(data.sessions);\n        } else {\n            document.getElementById('no-sessions').style.display = 'block';\n        }\n    } catch (error) {\n        document.getElementById('loading').style.display = 'none';\n        document.getElementById('error-message').style.display = 'block';\n        document.getElementById('error-text').textContent = 'Failed to load game sessions: ' + error.message;\n    }\n}\n\nfunction displaySessions(sessions) {\n    const tbody = document.getElementById('sessions-tbody');\n    tbody.innerHTML = '';\n\n    sessions.forEach(session => {\n        const row = document.createElement('tr');\n        row.innerHTML = `\n            <td><code>${session.session_id}</code></td>\n            <td>\n                <a href=\"/replay/${session.session_id}\" class=\"btn btn-sm btn-primary\">\n                    <i class=\"fas fa-play me-1\"></i>Replay\n                </a>\n            </td>\n        `;\n        tbody.appendChild(row);\n    });\n\n    document.getElementById('sessions-list').style.display = 'block';\n}\n</script>\n{% endblock %}\n"
  },
  {
    "path": "game-of-tracing/war_map/templates/replay_session.html",
    "content": "{% extends \"layout.html\" %}\n\n{% block title %}Session Replay{% endblock %}\n\n{% block content %}\n<div class=\"row g-3\">\n    <!-- Map Replay Area -->\n    <div class=\"col-md-8\">\n        <div class=\"card\">\n            <div class=\"card-header d-flex justify-content-between align-items-center\">\n                <h4 class=\"mb-0\"><i class=\"fas fa-play me-2\"></i>Visual Game Replay</h4>\n                <div>\n                    <button id=\"play-pause-btn\" class=\"btn btn-primary btn-sm me-1\">\n                        <i class=\"fas fa-play\"></i> Play\n                    </button>\n                    <button id=\"step-btn\" class=\"btn btn-outline-light btn-sm me-1\">\n                        <i class=\"fas fa-step-forward\"></i> Step\n                    </button>\n                    <button id=\"reset-btn\" class=\"btn btn-outline-light btn-sm me-1\">\n                        <i class=\"fas fa-undo\"></i> Reset\n                    </button>\n                    <a href=\"/replay\" class=\"btn btn-outline-light btn-sm\">\n                        <i class=\"fas fa-arrow-left me-1\"></i>Back\n                    </a>\n                </div>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"loading\" class=\"text-center py-4\">\n                    <div class=\"spinner-border\" role=\"status\">\n                        <span class=\"visually-hidden\">Loading...</span>\n                    </div>\n                    <p class=\"mt-2\" style=\"color: var(--text-secondary);\">Loading session data from Tempo...</p>\n                </div>\n\n                <div id=\"replay-content\" style=\"display: none;\">\n                    <!-- Progress Bar -->\n                    <div class=\"mb-3\">\n                        <div class=\"d-flex justify-content-between align-items-center mb-2\">\n                            <small style=\"color: var(--text-secondary);\"><strong>Session:</strong> <code id=\"session-id\">{{ session_id }}</code></small>\n                            <small style=\"color: var(--text-secondary);\">Action <span id=\"current-step\">0</span> of <span id=\"total-steps\">0</span></small>\n                        </div>\n                        <div class=\"progress\">\n                            <div id=\"replay-progress\" class=\"progress-bar bg-success\" role=\"progressbar\" style=\"width: 0%\"></div>\n                        </div>\n                    </div>\n\n                    <!-- Game Map -->\n                    <div id=\"mapContainer\" class=\"position-relative\" style=\"height: 500px;\">\n                        <canvas id=\"mapCanvas\" class=\"h-100 w-100\"></canvas>\n                        <div id=\"mapMarkers\"></div>\n\n                        <!-- Action Indicator -->\n                        <div id=\"actionIndicator\" class=\"position-absolute top-0 start-0 m-3 p-2 rounded d-none\"\n                             style=\"background: var(--bg-card); border: 1px solid var(--border-subtle); backdrop-filter: blur(8px);\">\n                            <strong id=\"actionType\" style=\"color: var(--text-primary);\">Action</strong>\n                            <small id=\"actionDetails\" class=\"d-block\" style=\"color: var(--text-secondary);\"></small>\n                        </div>\n\n                        <!-- Replay Speed Control -->\n                        <div class=\"position-absolute bottom-0 start-0 m-3\">\n                            <label for=\"speed-control\" class=\"form-label small\" style=\"color: var(--text-muted);\">Speed:</label>\n                            <select id=\"speed-control\" class=\"form-select form-select-sm\" style=\"width: 100px;\">\n                                <option value=\"3000\">Slow</option>\n                                <option value=\"2000\" selected>Normal</option>\n                                <option value=\"1000\">Fast</option>\n                                <option value=\"500\">Very Fast</option>\n                            </select>\n                        </div>\n                    </div>\n                </div>\n\n                <div id=\"error-message\" class=\"alert alert-danger\" style=\"display: none;\">\n                    <i class=\"fas fa-exclamation-triangle me-2\"></i>\n                    <span id=\"error-text\"></span>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <!-- Span Details Panel -->\n    <div class=\"col-md-4\">\n        <!-- Current Action Details -->\n        <div class=\"card mb-3\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-crosshairs me-2\" style=\"color: var(--southern-gold);\"></i>Current Action</h6>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"current-action-details\">\n                    <p style=\"color: var(--text-muted);\">Click play to start replay</p>\n                </div>\n            </div>\n        </div>\n\n        <!-- Span Attributes -->\n        <div class=\"card mb-3\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-tags me-2\" style=\"color: var(--northern-blue);\"></i>Span Attributes</h6>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"span-attributes\">\n                    <p class=\"small\" style=\"color: var(--text-muted);\">No action selected</p>\n                </div>\n            </div>\n        </div>\n\n        <!-- Session Information -->\n        <div class=\"card mb-3\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-info-circle me-2\"></i>Session Info</h6>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"session-info\">\n                    <p class=\"small mb-2\"><strong>Player:</strong> <span id=\"player-name\" style=\"color: var(--text-secondary);\">Loading...</span></p>\n                    <p class=\"small mb-2\"><strong>Faction:</strong> <span id=\"faction-badge\" class=\"badge\">Loading...</span></p>\n                    <p class=\"small mb-2\"><strong>Total Actions:</strong> <span id=\"total-actions\">0</span></p>\n                    <p class=\"small mb-0\"><strong>Data Source:</strong> <span id=\"data-source\" class=\"badge bg-info\">Tempo</span></p>\n                </div>\n            </div>\n        </div>\n\n        <!-- Span Links -->\n        <div class=\"card\">\n            <div class=\"card-header\">\n                <h6 class=\"mb-0\"><i class=\"fas fa-link me-2\" style=\"color: #8b5cf6;\"></i>Span Links</h6>\n            </div>\n            <div class=\"card-body\">\n                <div id=\"span-links-info\">\n                    <p class=\"small\" style=\"color: var(--text-muted);\">Loading span link analysis...</p>\n                </div>\n            </div>\n        </div>\n    </div>\n</div>\n{% endblock %}\n\n{% block extra_css %}\n<style>\n    .location-marker .badge {\n        font-size: 0.7em !important;\n        min-width: 1.5em;\n        height: 1.5em;\n        line-height: 1.3;\n        border: 1px solid rgba(255, 255, 255, 0.8);\n        font-weight: bold;\n        text-align: center;\n        display: flex;\n        align-items: center;\n        justify-content: center;\n    }\n\n    .location-marker .badge.bg-warning {\n        color: #000 !important;\n        background-color: #ffc107 !important;\n    }\n\n    .location-marker .badge.bg-danger {\n        color: #fff !important;\n        background-color: #dc3545 !important;\n    }\n\n    #mapCanvas {\n        position: absolute;\n        top: 0;\n        left: 0;\n        z-index: 5;\n    }\n</style>\n{% endblock %}\n\n{% block scripts %}\n<script>\n// Game state and replay control\nlet sessionData = null;\nlet currentStep = 0;\nlet isPlaying = false;\nlet replayInterval = null;\nlet gameLocations = {};\nlet replaySpeed = 2000;\n\n// Layout for the map this session was played on — provided server-side\n// from ``LOCATION_POSITIONS_BY_MAP[map_id]`` so the replay matches WWA or\n// any future map, not just the WoK default.\nconst REPLAY_MAP_ID = {{ map_id | tojson }};\nconst LOCATION_POSITIONS = {{ location_positions | tojson }};\nconst LOCATION_CONNECTIONS = {{ location_connections | tojson }};\n\ndocument.addEventListener('DOMContentLoaded', function() {\n    loadSessionData();\n    setupEventListeners();\n});\n\nfunction setupEventListeners() {\n    document.getElementById('play-pause-btn').addEventListener('click', togglePlayPause);\n    document.getElementById('step-btn').addEventListener('click', stepForward);\n    document.getElementById('reset-btn').addEventListener('click', resetReplay);\n    document.getElementById('speed-control').addEventListener('change', function(e) {\n        replaySpeed = parseInt(e.target.value);\n    });\n}\n\nasync function loadSessionData() {\n    const sessionId = '{{ session_id }}';\n\n    try {\n        const response = await fetch(`/api/replay/session/${sessionId}`);\n        const data = await response.json();\n\n        document.getElementById('loading').style.display = 'none';\n\n        if (data.success) {\n            sessionData = data;\n            initializeReplay();\n            initializeMap();\n            displaySessionInfo(data);\n            displaySpanLinkChain(data.span_link_chain || []);\n        } else {\n            showError(data.error || 'Failed to load session');\n        }\n    } catch (error) {\n        document.getElementById('loading').style.display = 'none';\n        showError('Failed to load session: ' + error.message);\n    }\n}\n\nfunction initializeReplay() {\n    document.getElementById('replay-content').style.display = 'block';\n    currentStep = 0;\n    updateProgress();\n\n    Object.keys(LOCATION_POSITIONS).forEach(locationId => {\n        gameLocations[locationId] = {\n            ...LOCATION_POSITIONS[locationId],\n            faction: getInitialFaction(locationId),\n            army: getInitialArmy(locationId),\n            resources: getInitialResources(locationId)\n        };\n    });\n}\n\nfunction getInitialFaction(locationId) {\n    if (locationId === 'southern_capital') return 'southern';\n    if (locationId === 'northern_capital') return 'northern';\n    return 'neutral';\n}\n\nfunction getInitialArmy(locationId) {\n    if (locationId === 'southern_capital' || locationId === 'northern_capital') return 2;\n    return 0;\n}\n\nfunction getInitialResources(locationId) {\n    if (locationId === 'southern_capital' || locationId === 'northern_capital') return 50;\n    if (locationId.startsWith('village_')) return 10;\n    return 0;\n}\n\nfunction initializeMap() {\n    const mapContainer = document.getElementById('mapContainer');\n    const mapCanvas = document.getElementById('mapCanvas');\n\n    mapCanvas.width = mapContainer.clientWidth;\n    mapCanvas.height = mapContainer.clientHeight;\n\n    drawConnections();\n    createLocationMarkers();\n}\n\nfunction drawConnections() {\n    const canvas = document.getElementById('mapCanvas');\n    const ctx = canvas.getContext('2d');\n\n    // Subtle grid\n    ctx.strokeStyle = 'rgba(255, 255, 255, 0.02)';\n    ctx.lineWidth = 1;\n    const gridSize = 40;\n    for (let x = 0; x < canvas.width; x += gridSize) {\n        ctx.beginPath(); ctx.moveTo(x, 0); ctx.lineTo(x, canvas.height); ctx.stroke();\n    }\n    for (let y = 0; y < canvas.height; y += gridSize) {\n        ctx.beginPath(); ctx.moveTo(0, y); ctx.lineTo(canvas.width, y); ctx.stroke();\n    }\n\n    // Connections with faction colors\n    ctx.lineWidth = 2;\n\n    LOCATION_CONNECTIONS.forEach(connection => {\n        const loc1 = gameLocations[connection[0]] || LOCATION_POSITIONS[connection[0]];\n        const loc2 = gameLocations[connection[1]] || LOCATION_POSITIONS[connection[1]];\n\n        if (loc1 && loc2) {\n            const x1 = canvas.width * (loc1.x / 100);\n            const y1 = canvas.height * (loc1.y / 100);\n            const x2 = canvas.width * (loc2.x / 100);\n            const y2 = canvas.height * (loc2.y / 100);\n\n            const f1 = (gameLocations[connection[0]] || {}).faction || 'neutral';\n            const f2 = (gameLocations[connection[1]] || {}).faction || 'neutral';\n\n            if (f1 !== 'neutral' && f1 === f2) {\n                ctx.strokeStyle = f1 === 'southern' ? 'rgba(255, 215, 0, 0.35)' : 'rgba(79, 195, 247, 0.35)';\n                ctx.setLineDash([]);\n            } else if (f1 !== 'neutral' && f2 !== 'neutral' && f1 !== f2) {\n                ctx.strokeStyle = 'rgba(239, 68, 68, 0.3)';\n                ctx.setLineDash([8, 6]);\n            } else {\n                ctx.strokeStyle = 'rgba(120, 144, 156, 0.2)';\n                ctx.setLineDash([]);\n            }\n\n            ctx.beginPath();\n            ctx.moveTo(x1, y1);\n            ctx.lineTo(x2, y2);\n            ctx.stroke();\n            ctx.setLineDash([]);\n        }\n    });\n}\n\nfunction createLocationMarkers() {\n    const markersContainer = document.getElementById('mapMarkers');\n    markersContainer.innerHTML = '';\n\n    Object.keys(gameLocations).forEach(locationId => {\n        const location = gameLocations[locationId];\n        const marker = document.createElement('div');\n\n        marker.id = `marker-${locationId}`;\n        marker.className = `location-marker ${location.faction} ${location.type}`;\n        marker.style.left = `${location.x}%`;\n        marker.style.top = `${location.y}%`;\n\n        marker.title = `${location.name}\\nFaction: ${location.faction}\\nArmy: ${location.army}\\nResources: ${location.resources}`;\n\n        let icon = document.createElement('i');\n        if (location.type === 'capital') {\n            icon.className = location.faction === 'southern' ? 'fas fa-sun' :\n                             location.faction === 'northern' ? 'fas fa-snowflake' :\n                             'fas fa-chess-rook';\n        } else {\n            icon.className = 'fas fa-map-marker-alt';\n        }\n        marker.appendChild(icon);\n\n        if (location.army > 0) {\n            const armyBadge = document.createElement('span');\n            armyBadge.className = 'position-absolute top-0 start-100 translate-middle badge rounded-pill bg-danger';\n            armyBadge.textContent = Math.floor(location.army);\n            armyBadge.style.fontSize = '0.7em';\n            armyBadge.style.minWidth = '1.5em';\n            armyBadge.title = `Army: ${location.army}`;\n            marker.appendChild(armyBadge);\n        }\n\n        if (location.resources > 0) {\n            const resourcesBadge = document.createElement('span');\n            resourcesBadge.className = 'position-absolute bottom-0 start-100 translate-middle badge rounded-pill bg-warning text-dark';\n            resourcesBadge.textContent = Math.floor(location.resources);\n            resourcesBadge.style.fontSize = '0.7em';\n            resourcesBadge.style.minWidth = '1.5em';\n            resourcesBadge.title = `Resources: ${location.resources}`;\n            marker.appendChild(resourcesBadge);\n        }\n\n        // Label\n        const label = document.createElement('span');\n        label.className = 'location-label';\n        label.textContent = location.name;\n        marker.appendChild(label);\n\n        markersContainer.appendChild(marker);\n    });\n}\n\nfunction displaySessionInfo(data) {\n    if (data.actions && data.actions.length > 0) {\n        const firstAction = data.actions[0];\n        document.getElementById('player-name').textContent = firstAction.player_name || 'Unknown';\n\n        const factionBadge = document.getElementById('faction-badge');\n        const faction = firstAction.faction || 'unknown';\n        factionBadge.textContent = faction.charAt(0).toUpperCase() + faction.slice(1);\n        factionBadge.className = `badge ${faction}`;\n    }\n\n    document.getElementById('total-actions').textContent = data.total_actions || 0;\n    document.getElementById('data-source').textContent = data.data_source || 'Tempo';\n}\n\nfunction displaySpanLinkChain(spanLinkChain) {\n    const linksInfo = document.getElementById('span-links-info');\n\n    if (spanLinkChain.length === 0) {\n        linksInfo.innerHTML = '<p class=\"small\" style=\"color: var(--text-muted);\">No span link data available</p>';\n        return;\n    }\n\n    const validChain = spanLinkChain.every(link => link.valid_chain);\n\n    let html = `<div class=\"mb-2\">\n        <span class=\"badge ${validChain ? 'bg-success' : 'bg-warning'}\">\n            ${validChain ? 'Valid Chain' : 'Issues Found'}\n        </span>\n    </div>`;\n\n    spanLinkChain.slice(0, 5).forEach((link) => {\n        html += `<div class=\"small mb-2 p-2\" style=\"border-left: 3px solid ${link.valid_chain ? '#22c55e' : '#f59e0b'}; padding-left: 8px;\">\n            <strong style=\"color: var(--text-primary);\">Step ${link.sequence}:</strong> ${link.action_type}<br>\n            <span style=\"color: var(--text-muted);\">${link.note}</span>\n        </div>`;\n    });\n\n    if (spanLinkChain.length > 5) {\n        html += `<small style=\"color: var(--text-muted);\">... and ${spanLinkChain.length - 5} more</small>`;\n    }\n\n    linksInfo.innerHTML = html;\n}\n\nfunction togglePlayPause() {\n    if (isPlaying) pauseReplay();\n    else startReplay();\n}\n\nfunction startReplay() {\n    if (!sessionData || !sessionData.actions) return;\n\n    isPlaying = true;\n    document.getElementById('play-pause-btn').innerHTML = '<i class=\"fas fa-pause\"></i> Pause';\n\n    replayInterval = setInterval(() => {\n        if (currentStep < sessionData.actions.length) {\n            executeAction(sessionData.actions[currentStep]);\n            currentStep++;\n            updateProgress();\n        } else {\n            pauseReplay();\n        }\n    }, replaySpeed);\n}\n\nfunction pauseReplay() {\n    isPlaying = false;\n    if (replayInterval) {\n        clearInterval(replayInterval);\n        replayInterval = null;\n    }\n    document.getElementById('play-pause-btn').innerHTML = '<i class=\"fas fa-play\"></i> Play';\n}\n\nfunction stepForward() {\n    if (currentStep < sessionData.actions.length) {\n        executeAction(sessionData.actions[currentStep]);\n        currentStep++;\n        updateProgress();\n    }\n}\n\nfunction resetReplay() {\n    pauseReplay();\n    currentStep = 0;\n\n    Object.keys(LOCATION_POSITIONS).forEach(locationId => {\n        gameLocations[locationId] = {\n            ...LOCATION_POSITIONS[locationId],\n            faction: getInitialFaction(locationId),\n            army: getInitialArmy(locationId),\n            resources: getInitialResources(locationId)\n        };\n    });\n\n    // Redraw\n    const canvas = document.getElementById('mapCanvas');\n    const ctx = canvas.getContext('2d');\n    ctx.clearRect(0, 0, canvas.width, canvas.height);\n    drawConnections();\n    createLocationMarkers();\n    updateProgress();\n    clearActionIndicator();\n    clearCurrentActionDetails();\n    clearSpanAttributes();\n}\n\nfunction clearSpanAttributes() {\n    document.getElementById('span-attributes').innerHTML = '<p class=\"small\" style=\"color: var(--text-muted);\">No action selected</p>';\n}\n\nfunction executeAction(action) {\n    showActionIndicator(action);\n    updateCurrentActionDetails(action);\n    updateSpanAttributes(action);\n\n    switch(action.action_type) {\n        case 'collect_resources':\n            simulateCollectResources(action);\n            highlightLocation(action.location_id || action.source_location);\n            break;\n        case 'create_army':\n            simulateCreateArmy(action);\n            highlightLocation(action.location_id || action.source_location);\n            break;\n        case 'move_army':\n            if (action.source_location && action.target_location) {\n                simulateMoveArmy(action);\n                highlightMovement(action.source_location, action.target_location);\n            }\n            break;\n        case 'all_out_attack':\n            simulateAllOutAttack(action);\n            highlightLocation(action.location_id || action.source_location, 'attack');\n            break;\n        case 'send_resources_to_capital':\n            simulateSendResources(action);\n            highlightLocation(action.location_id || action.source_location);\n            break;\n    }\n\n    // Redraw connections for updated factions then recreate markers\n    const canvas = document.getElementById('mapCanvas');\n    const ctx = canvas.getContext('2d');\n    ctx.clearRect(0, 0, canvas.width, canvas.height);\n    drawConnections();\n    createLocationMarkers();\n}\n\nfunction simulateCollectResources(action) {\n    const locationId = action.location_id || action.source_location;\n    if (gameLocations[locationId]) {\n        gameLocations[locationId].resources = Math.floor(gameLocations[locationId].resources + 10);\n    }\n}\n\nfunction simulateCreateArmy(action) {\n    const locationId = action.location_id || action.source_location;\n    if (gameLocations[locationId]) {\n        gameLocations[locationId].resources = Math.max(0, Math.floor(gameLocations[locationId].resources - 30));\n        gameLocations[locationId].army = Math.floor(gameLocations[locationId].army + 1);\n    }\n}\n\nfunction simulateMoveArmy(action) {\n    const sourceId = action.source_location;\n    const targetId = action.target_location;\n\n    if (gameLocations[sourceId] && gameLocations[targetId]) {\n        const sourceLocation = gameLocations[sourceId];\n        const targetLocation = gameLocations[targetId];\n\n        if (sourceLocation.army > 0) {\n            const movingArmies = Math.floor(sourceLocation.army);\n            sourceLocation.army = 0;\n\n            if (sourceLocation.faction !== targetLocation.faction) {\n                if (movingArmies > targetLocation.army) {\n                    targetLocation.faction = sourceLocation.faction;\n                    targetLocation.army = Math.floor(movingArmies - targetLocation.army);\n                } else {\n                    targetLocation.army = Math.max(1, Math.floor(targetLocation.army - movingArmies));\n                }\n            } else {\n                targetLocation.army = Math.floor(targetLocation.army + movingArmies);\n            }\n        }\n    }\n}\n\nfunction simulateAllOutAttack(action) {\n    const locationId = action.location_id || action.source_location;\n    if (gameLocations[locationId]) {\n        const location = gameLocations[locationId];\n        const faction = location.faction;\n\n        let enemyCapital = faction === 'southern' ? 'northern_capital' :\n                           faction === 'northern' ? 'southern_capital' : null;\n\n        if (enemyCapital && gameLocations[enemyCapital]) {\n            const attackingArmies = Math.floor(location.army);\n            location.army = 0;\n\n            const enemyLocation = gameLocations[enemyCapital];\n            if (attackingArmies > enemyLocation.army) {\n                enemyLocation.faction = faction;\n                enemyLocation.army = Math.floor(attackingArmies - enemyLocation.army);\n            } else {\n                enemyLocation.army = Math.max(1, Math.floor(enemyLocation.army - attackingArmies));\n            }\n        }\n    }\n}\n\nfunction simulateSendResources(action) {\n    const locationId = action.location_id || action.source_location;\n    if (gameLocations[locationId]) {\n        const location = gameLocations[locationId];\n        const resources = Math.floor(location.resources);\n        location.resources = 0;\n\n        let capitalId = location.faction === 'southern' ? 'southern_capital' :\n                        location.faction === 'northern' ? 'northern_capital' : null;\n\n        if (capitalId && gameLocations[capitalId]) {\n            gameLocations[capitalId].resources = Math.floor(gameLocations[capitalId].resources + resources);\n        }\n    }\n}\n\nfunction highlightLocation(locationId) {\n    document.querySelectorAll('.location-marker').forEach(marker => {\n        marker.classList.remove('active', 'action-highlight');\n    });\n\n    const marker = document.getElementById(`marker-${locationId}`);\n    if (marker) {\n        marker.classList.add('action-highlight');\n        setTimeout(() => { marker.classList.remove('action-highlight'); }, 2000);\n    }\n}\n\nfunction highlightMovement(sourceId, targetId) {\n    const sourcePos = LOCATION_POSITIONS[sourceId];\n    const targetPos = LOCATION_POSITIONS[targetId];\n    if (!sourcePos || !targetPos) return;\n\n    highlightLocation(sourceId);\n\n    const mapContainer = document.getElementById('mapContainer');\n    const arrow = document.createElement('div');\n    arrow.className = 'movement-arrow';\n    arrow.innerHTML = '<i class=\"fas fa-arrow-right\"></i>';\n\n    const dx = targetPos.x - sourcePos.x;\n    const dy = targetPos.y - sourcePos.y;\n    const angle = Math.atan2(dy, dx) * (180 / Math.PI);\n\n    const avgX = (sourcePos.x + targetPos.x) / 2;\n    const avgY = (sourcePos.y + targetPos.y) / 2;\n    arrow.style.left = `${avgX}%`;\n    arrow.style.top = `${avgY}%`;\n    arrow.style.transform = `translate(-50%, -50%) rotate(${angle}deg)`;\n\n    mapContainer.appendChild(arrow);\n\n    setTimeout(() => { highlightLocation(targetId); }, 1000);\n    setTimeout(() => { arrow.remove(); }, 2500);\n}\n\nfunction showActionIndicator(action) {\n    const indicator = document.getElementById('actionIndicator');\n    const actionType = document.getElementById('actionType');\n    const actionDetails = document.getElementById('actionDetails');\n\n    actionType.textContent = action.action_type || 'Unknown Action';\n\n    let details = `Sequence: ${action.sequence || 'N/A'}`;\n    if (action.player_name) details += ` | Player: ${action.player_name}`;\n    if (action.source_location) details += ` | From: ${action.source_location}`;\n    if (action.target_location) details += ` | To: ${action.target_location}`;\n\n    actionDetails.textContent = details;\n    indicator.classList.remove('d-none');\n\n    setTimeout(() => { indicator.classList.add('d-none'); }, 3000);\n}\n\nfunction clearActionIndicator() {\n    document.getElementById('actionIndicator').classList.add('d-none');\n}\n\nfunction updateCurrentActionDetails(action) {\n    const detailsDiv = document.getElementById('current-action-details');\n    const timestamp = new Date(action.start_time / 1000000);\n    const duration = action.duration ? (action.duration / 1000000).toFixed(2) + 'ms' : 'N/A';\n    const displaySequence = action.sequence || currentStep;\n\n    detailsDiv.innerHTML = `\n        <h6 style=\"color: var(--text-primary);\">${action.action_type || action.operation}</h6>\n        <p class=\"small mb-2\"><strong>Step:</strong> ${displaySequence}</p>\n        <p class=\"small mb-2\"><strong>Player:</strong> ${action.player_name || 'Unknown'}</p>\n        <p class=\"small mb-2\"><strong>Faction:</strong> ${action.faction || 'Unknown'}</p>\n        <p class=\"small mb-2\"><strong>Time:</strong> ${timestamp.toLocaleTimeString()}</p>\n        <p class=\"small mb-2\"><strong>Duration:</strong> ${duration}</p>\n        <p class=\"small mb-0\"><strong>Span Links:</strong> ${action.span_links ? action.span_links.length : 0}</p>\n    `;\n}\n\nfunction clearCurrentActionDetails() {\n    document.getElementById('current-action-details').innerHTML = '<p style=\"color: var(--text-muted);\">Click play to start replay</p>';\n}\n\nfunction updateSpanAttributes(action) {\n    const attributesDiv = document.getElementById('span-attributes');\n\n    if (!action.attributes || Object.keys(action.attributes).length === 0) {\n        attributesDiv.innerHTML = '<p class=\"small\" style=\"color: var(--text-muted);\">No attributes available</p>';\n        return;\n    }\n\n    let html = '';\n    Object.entries(action.attributes).forEach(([key, value]) => {\n        html += `<div class=\"attribute-item\">\n            <strong class=\"small\" style=\"color: var(--text-secondary);\">${key}</strong><br>\n            <code class=\"small\">${value}</code>\n        </div>`;\n    });\n\n    attributesDiv.innerHTML = html;\n}\n\nfunction updateProgress() {\n    const totalSteps = sessionData.actions.length;\n    const progress = totalSteps > 0 ? (currentStep / totalSteps) * 100 : 0;\n\n    document.getElementById('replay-progress').style.width = `${progress}%`;\n    document.getElementById('current-step').textContent = Math.max(0, currentStep);\n\n    const totalStepsElement = document.getElementById('total-steps');\n    if (totalStepsElement) totalStepsElement.textContent = totalSteps;\n}\n\nfunction showError(message) {\n    document.getElementById('error-message').style.display = 'block';\n    document.getElementById('error-text').textContent = message;\n}\n</script>\n{% endblock %}\n"
  },
  {
    "path": "gelf-log-ingestion/README.md",
    "content": "# GELF Log Ingestion Scenario\n\nThis scenario demonstrates how to ingest GELF (Graylog Extended Log Format) logs using Grafana Alloy's `loki.source.gelf` component. A Python application sends structured GELF messages over UDP to Alloy, which relabels GELF metadata (host, level, facility) into Loki labels before forwarding to Loki for storage and querying in Grafana.\n\n## Architecture\n\n```\ngelf-logger (Python/pygelf) --UDP:12201--> Alloy (loki.source.gelf) --> Loki --> Grafana\n```\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/gelf-log-ingestion\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345` to inspect the Alloy pipeline and live debugging output.\n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`. Navigate to **Explore** and select the **Loki** datasource. Query logs using `{host=\"gelf-logger\"}` or filter by label (e.g., `{level=\"6\"}` for INFO).\n\n## GELF Level Mapping\n\n| GELF Level | Syslog Severity |\n|------------|-----------------|\n| 0          | Emergency       |\n| 1          | Alert           |\n| 2          | Critical        |\n| 3          | Error           |\n| 4          | Warning         |\n| 5          | Notice          |\n| 6          | Informational   |\n| 7          | Debug           |\n"
  },
  {
    "path": "gelf-log-ingestion/app/main.py",
    "content": "import logging\nimport time\nimport random\nfrom pygelf import GelfUdpHandler\n\nlogger = logging.getLogger(\"gelf-demo\")\nlogger.setLevel(logging.DEBUG)\nhandler = GelfUdpHandler(host=\"alloy\", port=12201, compress=False)\nlogger.addHandler(handler)\n\nmessages = [\n    (logging.INFO, \"User authentication successful\", {\"user_id\": \"42\", \"method\": \"oauth2\"}),\n    (logging.WARNING, \"Slow database query detected\", {\"query_time_ms\": \"2500\", \"table\": \"orders\"}),\n    (logging.ERROR, \"Failed to connect to payment gateway\", {\"gateway\": \"stripe\", \"retry_count\": \"3\"}),\n    (logging.INFO, \"Order processed successfully\", {\"order_id\": \"ORD-12345\", \"total\": \"99.99\"}),\n    (logging.DEBUG, \"Cache lookup completed\", {\"cache_hit\": \"true\", \"key\": \"user:42:profile\"}),\n    (logging.CRITICAL, \"Disk space critically low\", {\"mount\": \"/data\", \"available_pct\": \"2\"}),\n    (logging.INFO, \"Health check passed\", {\"service\": \"api\", \"response_ms\": \"12\"}),\n    (logging.WARNING, \"Rate limit approaching threshold\", {\"client_ip\": \"10.0.1.50\", \"requests\": \"980\"}),\n]\n\nprint(\"Starting GELF log generator...\")\nwhile True:\n    level, msg, extra = random.choice(messages)\n    logger.log(level, msg, extra=extra)\n    time.sleep(random.uniform(1, 3))\n"
  },
  {
    "path": "gelf-log-ingestion/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Receive GELF logs over UDP\nloki.source.gelf \"default\" {\n\tforward_to = [loki.relabel.gelf.receiver]\n}\n\n// Relabel GELF metadata into useful labels\nloki.relabel \"gelf\" {\n\tforward_to = [loki.write.local.receiver]\n\n\trule {\n\t\tsource_labels = [\"__gelf_message_host\"]\n\t\ttarget_label  = \"host\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__gelf_message_level\"]\n\t\ttarget_label  = \"level\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__gelf_message_facility\"]\n\t\ttarget_label  = \"facility\"\n\t}\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "gelf-log-ingestion/docker-compose.coda.yml",
    "content": "services:\n  gelf-logger:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: gelf-logger\n    volumes:\n      - ./app/main.py:/app/main.py\n    command: [\"sh\", \"-c\", \"pip install pygelf && python3 /app/main.py\"]\n"
  },
  {
    "path": "gelf-log-ingestion/docker-compose.yml",
    "content": "\nservices:\n\n  # GELF log generator using pygelf\n  gelf-logger:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: gelf-logger\n    volumes:\n      - ./app/main.py:/app/main.py\n    command: [\"sh\", \"-c\", \"pip install pygelf && python3 /app/main.py\"]\n    depends_on:\n      - alloy\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 12201:12201/udp\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n"
  },
  {
    "path": "gelf-log-ingestion/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n"
  },
  {
    "path": "image-versions.env",
    "content": "# Centralized Docker image versions for all examples.\n#\n# Renovate tracks each variable below — the `# renovate:` annotation\n# tells the bot which docker image the version refers to. Bumps to\n# this file land via renovate PRs that also bump the matching\n# `${VAR:-default}` fallback in every docker-compose file. Both sides\n# are driven by customManagers in renovate.json: one for this file,\n# one generic rule that captures the depName from the `image:` line\n# in compose files. Keep them in lockstep — the check-image-versions\n# workflow will fail PRs where they drift.\n#\n# Adding a new image: declare `# renovate: datasource=docker depName=<image>`\n# + `<NAME>_VERSION=<value>` here, and reference it in compose as\n# `image: <image>:${<NAME>_VERSION:-<value>}`. No renovate.json edit needed.\n\n# Grafana images\n# renovate: datasource=docker depName=grafana/loki\nGRAFANA_LOKI_VERSION=3.6.10\n# renovate: datasource=docker depName=grafana/grafana\nGRAFANA_VERSION=13.0.1\n# renovate: datasource=docker depName=grafana/alloy\nGRAFANA_ALLOY_VERSION=v1.16.1\n# renovate: datasource=docker depName=grafana/tempo\nGRAFANA_TEMPO_VERSION=2.10.4\n# renovate: datasource=docker depName=grafana/pyroscope\nGRAFANA_PYROSCOPE_VERSION=2.0.1\n\n# Prometheus images\n# renovate: datasource=docker depName=prom/prometheus\nPROMETHEUS_VERSION=v3.11.3\n\n# Other images\n# renovate: datasource=docker depName=python\nPYTHON_VERSION=3.11-slim\n\n# nginx-monitoring scenario\n# renovate: datasource=docker depName=nginx\nNGINX_VERSION=1.30-alpine\n# renovate: datasource=docker depName=nginx/nginx-prometheus-exporter\nNGINX_EXPORTER_VERSION=1.5.1\n# renovate: datasource=docker depName=curlimages/curl\nCURL_VERSION=8.20.0\n\n# rabbitmq-monitoring scenario\n# renovate: datasource=docker depName=rabbitmq\nRABBITMQ_VERSION=4.3.0-management\n# renovate: datasource=docker depName=pivotalrabbitmq/perf-test\nRABBITMQ_PERF_TEST_VERSION=2.24.0\n# vault-secrets scenario\n# renovate: datasource=docker depName=hashicorp/vault\nVAULT_VERSION=2.0.0\n\n# cloudwatch-metrics scenario\n# renovate: datasource=docker depName=localstack/localstack\nLOCALSTACK_VERSION=4.4.0\n"
  },
  {
    "path": "k8s/README.md",
    "content": "\n# Monitor Kubernetes Grafana Alloy\n\n> Note this scenario works using the K8s Monitoring Helm chart. This abstracts the need to configure Loki and deploys best practices for monitoring Kubernetes clusters. The chart supports; metrics, logs, profiling, and tracing.\n\nIn this directory you will find a series of scenarios that demonstrate how to setup Alloy via the Kubernetes monitoring helm chart. Examples specific to each telemetry source are provided in the respective directories.\n\n| Scenario | Description |\n| --- | --- |\n| [Logs](./logs) | Monitor Kubernetes logs with Grafana Alloy and Loki |\n| [Metrics](./metrics) | Monitor Kubernetes metrics with Grafana Alloy and Prometheus |\n| [Profiling](./profiling) | Monitor Kubernetes profiling with Grafana Alloy and Pyroscope |\n| [Tracing](./tracing) | Monitor Kubernetes tracing with Grafana Alloy and Tempo |\n\n"
  },
  {
    "path": "k8s/events/README.md",
    "content": "# Kubernetes events to Loki — without the k8s-monitoring Helm chart\n\nA focused scenario showing how `loki.source.kubernetes_events` works under the hood: Alloy is deployed as a plain `Deployment` with explicit RBAC and an Alloy `ConfigMap`, instead of being abstracted behind the [`k8s-monitoring` Helm chart](https://github.com/grafana/k8s-monitoring-helm) used in [`k8s/logs/`](../logs/).\n\n## How this differs from `k8s/logs/`\n\n| Aspect | `k8s/logs/` (existing) | `k8s/events/` (this) |\n|---|---|---|\n| Alloy deployment | `k8s-monitoring` Helm chart (collector preset) | Plain `kubectl apply` of ConfigMap + RBAC + Deployment |\n| `loki.source.kubernetes_events` | Hidden inside the chart | **Visible directly in `alloy-config.yaml`** |\n| Scope | Pod logs + cluster events (mixed) | **Cluster events only** with `type` / `reason` / `namespace` / `kind` labels |\n| Demo intent | \"ship everything for K8s monitoring\" | \"show how events ingestion actually works\" |\n\nIf you want production-grade Kubernetes observability, use `k8s/logs/`. If you're learning the component or want to extend it (custom filtering, namespace scoping, alerting on event reasons), this scenario is the minimal moving-parts version.\n\n## Prerequisites\n\n- [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/)\n- [Helm](https://helm.sh/docs/intro/install/)\n- The Grafana Helm repo: `helm repo add grafana https://grafana.github.io/helm-charts`\n\n## Step 1 — Create the cluster\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/k8s/events\n\nkind create cluster --config kind.yml\n```\n\n## Step 2 — Create the `meta` namespace and install Loki + Grafana\n\n```bash\nkubectl create namespace meta\n\nhelm install --values loki-values.yml loki    grafana/loki    -n meta\nhelm install --values grafana-values.yml grafana grafana/grafana -n meta\n```\n\nWait for them to be ready:\n\n```bash\nkubectl get pods -n meta -w\n```\n\n## Step 3 — Apply Alloy\n\n```bash\nkubectl apply -f alloy-rbac.yaml\nkubectl apply -f alloy-config.yaml\nkubectl apply -f alloy-deployment.yaml\n```\n\nThe RBAC grants cluster-wide `get/list/watch` on `events` (and only that). The ConfigMap holds the Alloy pipeline. The Deployment is **single-replica on purpose** — events are cluster-scoped, so multiple Alloy replicas would produce duplicate log lines.\n\n## Step 4 — Open Grafana\n\n```bash\nkubectl port-forward -n meta svc/grafana 3000:80\n```\n\nUsername `admin`, password `adminadminadmin` (it's a dev scenario — see `grafana-values.yml`).\n\n## Step 5 — Generate some events\n\n```bash\n# Trigger Created/Started/Pulled events\nkubectl run events-test --image=nginx --restart=Never\n\n# Trigger BackOff/Failed events\nkubectl run events-fail --image=does-not-exist --restart=Never\n\n# Wait, then trigger Killing\nsleep 30\nkubectl delete pod events-test events-fail\n```\n\n## Step 6 — Query in Loki\n\n```logql\n# All events\n{job=\"kubernetes-events\"}\n\n# Just warnings\n{job=\"kubernetes-events\", type=\"Warning\"}\n\n# Pod events in default namespace\n{job=\"kubernetes-events\", namespace=\"default\", kind=\"Pod\"}\n\n# Pull failures\n{job=\"kubernetes-events\", reason=\"Failed\"}\n\n# Backoff loops\n{job=\"kubernetes-events\", reason=\"BackOff\"}\n```\n\nThe promoted labels are `type`, `reason`, `namespace`, and `kind`. The involved-object name (`name`) is kept as **structured metadata** — high cardinality, but searchable via `| json` filters.\n\n## Inspecting the Alloy pipeline\n\n```bash\nkubectl port-forward -n meta svc/alloy 12345:12345\n```\n\nOpen http://localhost:12345 to see the component graph and use **livedebugging** to inspect events flowing through each stage.\n\n## Tear down\n\n```bash\nkind delete cluster\n```\n\n## Customization ideas\n\n- **Namespace scoping**: add `namespaces = [\"prod\", \"default\"]` to the `loki.source.kubernetes_events` block to filter at the source rather than at query time.\n- **Drop noisy reasons**: add a `stage.match` block dropping `reason=~\"Pulled|Pulling|Created\"` if you only care about Warnings.\n- **Alerting**: pair this with a Grafana alert on `count_over_time({type=\"Warning\"}[5m])` for cluster-health monitoring.\n"
  },
  {
    "path": "k8s/events/alloy-config.yaml",
    "content": "# Alloy pipeline as a ConfigMap. Mounted into the alloy Deployment at\n# /etc/alloy/config.alloy.\n#\n# Pipeline:\n#   loki.source.kubernetes_events  (cluster-wide events feed)\n#     → loki.process               (parse JSON, promote labels)\n#     → loki.write                 (push to Loki in this cluster)\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: alloy-config\n  namespace: meta\ndata:\n  config.alloy: |\n    livedebugging {}\n\n    loki.source.kubernetes_events \"cluster\" {\n      job_name   = \"kubernetes-events\"\n      log_format = \"json\"\n      forward_to = [loki.process.events.receiver]\n    }\n\n    loki.process \"events\" {\n      // The component emits a flat JSON envelope (top-level fields:\n      // type, reason, kind, name, count, msg, sourcecomponent, etc).\n      // The `namespace` label is already attached by the source component\n      // itself, so we don't need to extract it here.\n      stage.json {\n        expressions = {\n          type   = \"type\",\n          reason = \"reason\",\n          kind   = \"kind\",\n          name   = \"name\",\n        }\n      }\n\n      // Indexed labels — fast filtering for \"show all Warnings in\n      // namespace X with reason Y on a Pod\".\n      stage.labels {\n        values = {\n          type   = \"\",\n          reason = \"\",\n          kind   = \"\",\n        }\n      }\n\n      // High-cardinality fields kept out of the label index but still\n      // queryable via `| json` filters.\n      stage.structured_metadata {\n        values = {\n          name = \"\",\n        }\n      }\n\n      forward_to = [loki.write.loki.receiver]\n    }\n\n    loki.write \"loki\" {\n      endpoint {\n        url = \"http://loki-gateway.meta.svc.cluster.local/loki/api/v1/push\"\n      }\n    }\n"
  },
  {
    "path": "k8s/events/alloy-deployment.yaml",
    "content": "# A single-replica Deployment is the right shape for this scenario:\n# `loki.source.kubernetes_events` watches a cluster-scoped resource, so\n# more than one replica would just produce duplicate log lines for every\n# event. (A DaemonSet would be wrong for the same reason.)\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: alloy\n  namespace: meta\n  labels:\n    app.kubernetes.io/name: alloy\n    app.kubernetes.io/part-of: alloy-events\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: alloy\n  template:\n    metadata:\n      labels:\n        app.kubernetes.io/name: alloy\n    spec:\n      serviceAccountName: alloy\n      containers:\n        - name: alloy\n          image: grafana/alloy:v1.16.0\n          args:\n            - run\n            - /etc/alloy/config.alloy\n            - --server.http.listen-addr=0.0.0.0:12345\n            - --storage.path=/var/lib/alloy/data\n          ports:\n            - name: http\n              containerPort: 12345\n          volumeMounts:\n            - name: config\n              mountPath: /etc/alloy\n            - name: storage\n              mountPath: /var/lib/alloy/data\n      volumes:\n        - name: config\n          configMap:\n            name: alloy-config\n        - name: storage\n          emptyDir: {}\n---\n# Lightweight Service so the Alloy UI can be port-forwarded easily.\napiVersion: v1\nkind: Service\nmetadata:\n  name: alloy\n  namespace: meta\nspec:\n  selector:\n    app.kubernetes.io/name: alloy\n  ports:\n    - name: http\n      port: 12345\n      targetPort: 12345\n"
  },
  {
    "path": "k8s/events/alloy-rbac.yaml",
    "content": "# Minimal RBAC for `loki.source.kubernetes_events`.\n# It needs cluster-wide read/list/watch on events. Nothing else.\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: alloy\n  namespace: meta\n---\napiVersion: rbac.authorization.k8s.io/v1\nkind: ClusterRole\nmetadata:\n  name: alloy-events-reader\nrules:\n  - apiGroups: [\"\"]\n    resources: [\"events\"]\n    verbs: [\"get\", \"list\", \"watch\"]\n  - apiGroups: [\"events.k8s.io\"]\n    resources: [\"events\"]\n    verbs: [\"get\", \"list\", \"watch\"]\n---\napiVersion: rbac.authorization.k8s.io/v1\nkind: ClusterRoleBinding\nmetadata:\n  name: alloy-events-reader\nroleRef:\n  apiGroup: rbac.authorization.k8s.io\n  kind: ClusterRole\n  name: alloy-events-reader\nsubjects:\n  - kind: ServiceAccount\n    name: alloy\n    namespace: meta\n"
  },
  {
    "path": "k8s/events/grafana-values.yml",
    "content": "---\npersistence:\n  type: pvc\n  enabled: true\n\n# DO NOT DO THIS IN PRODUCTION USECASES\nadminUser: admin\nadminPassword: adminadminadmin\n# CONSIDER USING AN EXISTING SECRET\n# Use an existing secret for the admin user.\n# admin:\n  ## Name of the secret. Can be templated.\n#  existingSecret: \"\"\n#  userKey: admin-user\n#  passwordKey: admin-password\n\nservice:\n  enabled: true\n  type: ClusterIP\n\ndatasources:\n  datasources.yaml:\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki-gateway.meta.svc.cluster.local:80\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n\n"
  },
  {
    "path": "k8s/events/kind.yml",
    "content": "# 1 control-plane + 2 workers — matches the other k8s/ scenarios.\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n  - role: control-plane\n  - role: worker\n  - role: worker\n"
  },
  {
    "path": "k8s/events/loki-values.yml",
    "content": "---\nloki:\n  auth_enabled: false\n  commonConfig:\n    replication_factor: 1\n  schemaConfig:\n    configs:\n      - from: 2024-04-01\n        store: tsdb\n        object_store: s3\n        schema: v13\n        index:\n          prefix: loki_index_\n          period: 24h\n  ingester:\n    chunk_encoding: snappy\n  tracing:\n    enabled: true\n  pattern_ingester:\n      enabled: true\n  limits_config:\n    allow_structured_metadata: true\n    volume_enabled: true\n  ruler:\n    enable_api: true\n  querier:\n    # Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing\n    max_concurrent: 4\n\nminio:\n  enabled: true\n      \ndeploymentMode: SingleBinary\nsingleBinary:\n  replicas: 1\n  resources:\n    limits:\n      cpu: 4\n      memory: 4Gi\n    requests:\n      cpu: 2\n      memory: 2Gi\n  extraEnv:\n    # Keep a little bit lower than memory limits\n    - name: GOMEMLIMIT\n      value: 3750MiB\n\nchunksCache:\n  # default is 500MB, with limited memory keep this smaller\n  writebackSizeLimit: 10MB\n\n\n# Zero out replica counts of other deployment modes\nbackend:\n  replicas: 0\nread:\n  replicas: 0\nwrite:\n  replicas: 0\n\ningester:\n  replicas: 0\nquerier:\n  replicas: 0\nqueryFrontend:\n  replicas: 0\nqueryScheduler:\n  replicas: 0\ndistributor:\n  replicas: 0\ncompactor:\n  replicas: 0\nindexGateway:\n  replicas: 0\nbloomCompactor:\n  replicas: 0\nbloomGateway:\n  replicas: 0"
  },
  {
    "path": "k8s/logs/README.md",
    "content": "\n# Monitor Kubernetes Logs with Grafana Alloy and Loki\n\n> Note this scenario works using the K8s Monitoring Helm chart. This abstracts the need to configure Alloy and deploys best practices for monitoring Kubernetes clusters. The chart supports; metrics, logs, profiling, and tracing. For this scenario, we will use the K8s Monitoring Helm chart to monitor Kubernetes logs. \n\nThis scenario demonstrates how to setup the Kubernetes monitoring helm and Loki. This scenario will install three Helm charts: Loki, Grafana, and k8s-monitoring-helm. Loki will be used to store the logs, Grafana will be used to visualize the logs, and Alloy (k8s-monitoring-helm) will be used to collect three different log sources:\n* Pod Logs\n* Kubernetes Events\n\n## Prerequisites\n\nClone the repository:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\nChange to the directory:\n\n```bash\ncd alloy-scenarios/k8s/logs\n```\n\nNext you will need a Kubernetes cluster (In this example, we will configure a local Kubernetes cluster using [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/))\n\nAn example kind cluster configuration is provided in the `kind.yml` file. To create a kind cluster using this configuration, run the following command:\n\n```bash\nkind create cluster --config kind.yml\n```\n\nLastly you will need to make sure you install Helm on your local machine. You can install Helm by following the instructions [here](https://helm.sh/docs/intro/install/). You will also need to install the Grafana Helm repository:\n\n```bash\nhelm repo add grafana https://grafana.github.io/helm-charts\n```\n\n## Create the `meta` and `prod` namespaces\n\nThe first step is to create the `meta` and `prod` namespaces. To create the namespaces, run the following commands:\n\n```bash\nkubectl create namespace meta && \\\nkubectl create namespace prod\n```\n\n\n## Install the Loki Helm Chart\n\nThe first step is to install the Loki Helm chart. This will install Loki in the `meta` namespace. The `loki-values.yml` file contains the configuration for the Loki Helm chart. To install Loki, run the following command:\n\n```bash\nhelm install --values loki-values.yml loki grafana/loki -n meta\n```\n\nThis installs Loki in monolithic mode. For more information on Loki modes, see the [Loki documentation](https://grafana.com/docs/loki/latest/get-started/deployment-modes/).\n\n## Install the Grafana Helm Chart\n\nThe next step is to install the Grafana Helm chart. This will install Grafana in the `meta` namespace. The `grafana-values.yml` file contains the configuration for the Grafana Helm chart. To install Grafana, run the following command:\n\n```bash\nhelm install --values grafana-values.yml grafana grafana/grafana --namespace meta\n```\nNote that within the `grafana-values.yml` file, the `grafana.ini` configuration is set to use the Loki data source. This is done by setting the `datasources.datasources.yaml` field to the Loki data source configuration.\n\n## Install the K8s Monitoring Helm Chart\n\nThe final step is to install the K8s monitoring Helm chart. This will install Alloy in the `meta` namespace. The `k8s-monitoring-values.yml` file contains the configuration for the K8s monitoring Helm chart. This scenario requires `grafana/k8s-monitoring` chart v4 or later. To install the K8s monitoring Helm chart, run the following command:\n\n```bash\nhelm install --values ./k8s-monitoring-values.yml k8s grafana/k8s-monitoring --version \"^4.0.0\" -n meta --create-namespace\n```\nWithin the `k8s-monitoring-values.yml` file we declare the Alloy configuration. This configuration specifies the log sources that Alloy will collect logs from. In this scenario, we are collecting logs from two different sources: Pod Logs and Kubernetes Events.\n\n## Accessing the Grafana UI\n\nTo access the Grafana UI, you will need to port-forward the Grafana pod to your local machine. First, get the name of the Grafana pod:\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana\" -o jsonpath=\"{.items[0].metadata.name}\")\n```\n\nNext, port-forward the Grafana pod to your local machine:\n\n```bash\nkubectl --namespace meta port-forward $POD_NAME 3000\n```\n\nOpen your browser and go to [http://localhost:3000](http://localhost:3000). You can log in with the default username `admin` and password `adminadminadmin`.\n\n## Accessing the Alloy UI\n\nTo access the Alloy UI, you will need to port-forward the Alloy pod to your local machine. First, get the name of the Alloy pod:\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=alloy-logs,app.kubernetes.io/instance=k8s\" -o jsonpath=\"{.items[0].metadata.name}\")\n```\n\nNext, port-forward the Alloy pod to your local machine:\n\n```bash\nkubectl --namespace meta port-forward $POD_NAME 12345\n```\n\n## View the logs using Explore Logs in Grafana\n\nExplore Logs is a new feature in Grafana which provides a queryless way to explore logs. To access Explore Logs. To access Explore logs open a browser and go to [http://localhost:3000/a/grafana-lokiexplore-app](http://localhost:3000/a/grafana-lokiexplore-app).\n\n## Adding a demo prod app\n\nThe k8s monitoring app is configured to collect logs from two namespaces: `meta` and `prod`. To add a demo prod app, run the following command:\n\n```bash\nhelm install tempo grafana/tempo-distributed -n prod\n```\n\nThis will install the Tempo distributed tracing system in the `prod` namespace."
  },
  {
    "path": "k8s/logs/grafana-values.yml",
    "content": "---\npersistence:\n  type: pvc\n  enabled: true\n\n# DO NOT DO THIS IN PRODUCTION USECASES\nadminUser: admin\nadminPassword: adminadminadmin\n# CONSIDER USING AN EXISTING SECRET\n# Use an existing secret for the admin user.\n# admin:\n  ## Name of the secret. Can be templated.\n#  existingSecret: \"\"\n#  userKey: admin-user\n#  passwordKey: admin-password\n\nservice:\n  enabled: true\n  type: ClusterIP\n\ndatasources:\n  datasources.yaml:\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki-gateway.meta.svc.cluster.local:80\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n\n"
  },
  {
    "path": "k8s/logs/k8s-monitoring-values.yml",
    "content": "---\ncluster:\n  name: meta-monitoring-tutorial\n\ndestinations:\n  loki:\n    type: loki\n    url: http://loki-gateway.meta.svc.cluster.local/loki/api/v1/push\n\nclusterEvents:\n  enabled: true\n  collector: alloy-singleton\n  namespaces:\n    - meta\n    - prod\n\npodLogsViaKubernetesApi:\n  enabled: true\n  collector: alloy-logs\n  namespaces:\n    - meta\n    - prod\n  structuredMetadata:\n    pod: pod\n\ncollectors:\n  alloy-singleton:\n    presets: [singleton]\n  alloy-logs:\n    presets: [clustered]\n"
  },
  {
    "path": "k8s/logs/killercoda/loki-values.yml",
    "content": "---\nloki:\n  auth_enabled: false\n  commonConfig:\n    replication_factor: 1\n  schemaConfig:\n    configs:\n      - from: 2024-04-01\n        store: tsdb\n        object_store: s3\n        schema: v13\n        index:\n          prefix: loki_index_\n          period: 24h\n  ingester:\n    chunk_encoding: snappy\n  tracing:\n    enabled: true\n  pattern_ingester:\n      enabled: true\n  limits_config:\n    allow_structured_metadata: true\n    volume_enabled: true\n  ruler:\n    enable_api: true\n  querier:\n    # Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing\n    max_concurrent: 4\n\nminio:\n  enabled: true\n      \ndeploymentMode: SingleBinary\nsingleBinary:\n  replicas: 1\n  resources:\n    limits:\n      cpu: 0.5\n      memory: 1Gi\n    requests:\n      cpu: 0.5\n      memory: 1Gi\n  extraEnv:\n    # Keep a little bit lower than memory limits\n    - name: GOMEMLIMIT\n      value: 750MiB\n  tolerations:\n      - key: \"node-role.kubernetes.io/control-plane\"\n        operator: \"Exists\"\n        effect: \"NoSchedule\"\n\n\nchunksCache:\n  # default is 500MB, with limited memory keep this smaller\n  writebackSizeLimit: 10MB\n  enabled: false\n\nresultsCache:\n  writebackSizeLimit: 10MB\n  enabled: false\n\ntest:\n  enabled: false\nlokiCanary:\n  enabled: false\n\n\n# Zero out replica counts of other deployment modes\nbackend:\n  replicas: 0\nread:\n  replicas: 0\nwrite:\n  replicas: 0\n\ningester:\n  replicas: 0\nquerier:\n  replicas: 0\nqueryFrontend:\n  replicas: 0\nqueryScheduler:\n  replicas: 0\ndistributor:\n  replicas: 0\ncompactor:\n  replicas: 0\nindexGateway:\n  replicas: 0\nbloomCompactor:\n  replicas: 0\nbloomGateway:\n  replicas: 0"
  },
  {
    "path": "k8s/logs/kind.yml",
    "content": "# a cluster with 3 control-plane nodes and 3 workers\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n- role: control-plane\n- role: worker\n- role: worker\n"
  },
  {
    "path": "k8s/logs/loki-values.yml",
    "content": "---\nloki:\n  auth_enabled: false\n  commonConfig:\n    replication_factor: 1\n  schemaConfig:\n    configs:\n      - from: 2024-04-01\n        store: tsdb\n        object_store: s3\n        schema: v13\n        index:\n          prefix: loki_index_\n          period: 24h\n  ingester:\n    chunk_encoding: snappy\n  tracing:\n    enabled: true\n  pattern_ingester:\n      enabled: true\n  limits_config:\n    allow_structured_metadata: true\n    volume_enabled: true\n  ruler:\n    enable_api: true\n  querier:\n    # Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing\n    max_concurrent: 4\n\nminio:\n  enabled: true\n      \ndeploymentMode: SingleBinary\nsingleBinary:\n  replicas: 1\n  resources:\n    limits:\n      cpu: 4\n      memory: 4Gi\n    requests:\n      cpu: 2\n      memory: 2Gi\n  extraEnv:\n    # Keep a little bit lower than memory limits\n    - name: GOMEMLIMIT\n      value: 3750MiB\n\nchunksCache:\n  # default is 500MB, with limited memory keep this smaller\n  writebackSizeLimit: 10MB\n\n\n# Zero out replica counts of other deployment modes\nbackend:\n  replicas: 0\nread:\n  replicas: 0\nwrite:\n  replicas: 0\n\ningester:\n  replicas: 0\nquerier:\n  replicas: 0\nqueryFrontend:\n  replicas: 0\nqueryScheduler:\n  replicas: 0\ndistributor:\n  replicas: 0\ncompactor:\n  replicas: 0\nindexGateway:\n  replicas: 0\nbloomCompactor:\n  replicas: 0\nbloomGateway:\n  replicas: 0"
  },
  {
    "path": "k8s/metrics/README.md",
    "content": "\n# Monitor Kubernetes Metrics with Grafana Alloy and Prometheus\n\n> Note this scenario works using the K8s Monitoring Helm chart. This abstracts the need to configure Alloy and deploys best practices for monitoring Kubernetes clusters.\n\nThis scenario demonstrates how to set up the Kubernetes monitoring Helm chart with Prometheus. This scenario will install three Helm charts: Prometheus, Grafana, and k8s-monitoring. Prometheus will be used to store the metrics, Grafana will be used to visualize the metrics, and Alloy (k8s-monitoring) will be used to collect:\n* Cluster Metrics (kube-state-metrics, node-exporter, kubelet, cadvisor)\n* Annotation-based autodiscovery (Prometheus-style annotations on pods)\n\n## Prerequisites\n\nClone the repository:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\nChange to the directory:\n\n```bash\ncd alloy-scenarios/k8s/metrics\n```\n\nNext you will need a Kubernetes cluster. An example Kind cluster configuration is provided in the `kind.yml` file:\n\n```bash\nkind create cluster --config kind.yml\n```\n\nInstall Helm and add required repositories:\n\n```bash\nhelm repo add grafana https://grafana.github.io/helm-charts\nhelm repo add prometheus-community https://prometheus-community.github.io/helm-charts\nhelm repo update\n```\n\n## Create the `meta` namespace\n\n```bash\nkubectl create namespace meta\n```\n\n## Install Prometheus\n\n```bash\nhelm install --values prometheus-values.yml prometheus prometheus-community/prometheus -n meta\n```\n\n## Install Grafana\n\n```bash\nhelm install --values grafana-values.yml grafana grafana/grafana -n meta\n```\n\n## Install the K8s Monitoring Helm Chart\n\nThis scenario requires `grafana/k8s-monitoring` chart v4 or later.\n\n```bash\nhelm install --values k8s-monitoring-values.yml k8s grafana/k8s-monitoring --version \"^4.0.0\" -n meta\n```\n\n## Accessing the Grafana UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 3000\n```\n\nOpen [http://localhost:3000](http://localhost:3000) and log in with `admin` / `adminadminadmin`.\n\n## Accessing the Alloy UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=alloy-metrics,app.kubernetes.io/instance=k8s\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 12345\n```\n\n## Explore Metrics\n\nIn Grafana, go to **Explore** and select the **Prometheus** datasource. Try these queries:\n\n* `up` - See all targets being scraped\n* `container_cpu_usage_seconds_total` - Container CPU usage\n* `container_memory_working_set_bytes` - Container memory usage\n* `kube_pod_info` - Pod metadata from kube-state-metrics\n"
  },
  {
    "path": "k8s/metrics/grafana-values.yml",
    "content": "---\npersistence:\n  type: pvc\n  enabled: true\n\nadminUser: admin\nadminPassword: adminadminadmin\n\nservice:\n  enabled: true\n  type: ClusterIP\n\ndatasources:\n  datasources.yaml:\n    apiVersion: 1\n    datasources:\n    - name: Prometheus\n      type: prometheus\n      access: proxy\n      orgId: 1\n      url: http://prometheus-server.meta.svc.cluster.local:80\n      basicAuth: false\n      isDefault: true\n      version: 1\n      editable: false\n"
  },
  {
    "path": "k8s/metrics/k8s-monitoring-values.yml",
    "content": "---\ncluster:\n  name: meta-monitoring-tutorial\n\ndestinations:\n  prometheus:\n    type: prometheus\n    url: http://prometheus-server.meta.svc.cluster.local:80/api/v1/write\n\nclusterMetrics:\n  enabled: true\n\nannotationAutodiscovery:\n  enabled: true\n  collector: alloy-metrics\n\ncollectors:\n  alloy-metrics:\n    presets: [clustered, statefulset]\n\ntelemetryServices:\n  kube-state-metrics:\n    deploy: true\n"
  },
  {
    "path": "k8s/metrics/kind.yml",
    "content": "# a cluster with 1 control-plane node and 2 workers\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n- role: control-plane\n- role: worker\n- role: worker\n"
  },
  {
    "path": "k8s/metrics/prometheus-values.yml",
    "content": "server:\n  persistentVolume:\n    enabled: false\n  extraFlags:\n    - web.enable-remote-write-receiver\n    - enable-feature=native-histograms\n    - enable-feature=exemplar-storage\n\nalertmanager:\n  enabled: false\n\nkube-state-metrics:\n  enabled: false\n\nprometheus-node-exporter:\n  enabled: false\n\nprometheus-pushgateway:\n  enabled: false\n"
  },
  {
    "path": "k8s/profiling/README.md",
    "content": "# Monitor Kubernetes Profiles with Grafana Alloy and Pyroscope\n\n> Note this scenario works using the K8s Monitoring Helm chart. This abstracts the need to configure Alloy and deploys best practices for monitoring Kubernetes clusters.\n\nThis scenario demonstrates how to set up the Kubernetes monitoring Helm chart with Pyroscope for continuous profiling. This scenario will install three Helm charts: Pyroscope, Grafana, and k8s-monitoring. Pyroscope will store the profiles, Grafana will visualize them, and Alloy (k8s-monitoring) will scrape pprof endpoints from pods.\n\nAlloy discovers pods with profiling annotations and scrapes their pprof endpoints (CPU, memory, goroutine, etc.).\n\n## Prerequisites\n\nClone the repository:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\nChange to the directory:\n\n```bash\ncd alloy-scenarios/k8s/profiling\n```\n\nNext you will need a Kubernetes cluster. An example Kind cluster configuration is provided in the `kind.yml` file:\n\n```bash\nkind create cluster --config kind.yml\n```\n\nInstall Helm and add the Grafana Helm repository:\n\n```bash\nhelm repo add grafana https://grafana.github.io/helm-charts\nhelm repo update\n```\n\n## Create the `meta` namespace\n\n```bash\nkubectl create namespace meta\n```\n\n## Install Pyroscope\n\n```bash\nhelm install --values pyroscope-values.yml pyroscope grafana/pyroscope -n meta\n```\n\n## Install Grafana\n\n```bash\nhelm install --values grafana-values.yml grafana grafana/grafana -n meta\n```\n\n## Install the K8s Monitoring Helm Chart\n\nThis scenario requires `grafana/k8s-monitoring` chart v4 or later.\n\n```bash\nhelm install --values k8s-monitoring-values.yml k8s grafana/k8s-monitoring --version \"^4.0.0\" -n meta\n```\n\n## Accessing the Grafana UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 3000\n```\n\nOpen [http://localhost:3000](http://localhost:3000) and log in with `admin` / `adminadminadmin`.\n\n## Accessing the Alloy UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=alloy-profiles,app.kubernetes.io/instance=k8s\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 12345\n```\n\n## Enabling Profiling on Your Pods\n\nTo profile a Go application, ensure it exposes a pprof endpoint (typically at `:6060/debug/pprof/`) and add these annotations to the pod:\n\n```yaml\nmetadata:\n  annotations:\n    profiles.grafana.com/memory.scrape: \"true\"\n    profiles.grafana.com/memory.port_name: \"http-metrics\"\n    profiles.grafana.com/cpu.scrape: \"true\"\n    profiles.grafana.com/cpu.port_name: \"http-metrics\"\n    profiles.grafana.com/goroutine.scrape: \"true\"\n    profiles.grafana.com/goroutine.port_name: \"http-metrics\"\n```\n\n## Adding a Demo App\n\nDeploy Pyroscope's demo Ride Share app to generate profiles:\n\n```bash\nkubectl apply -n meta -f - <<EOF\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: ride-share-go\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app: ride-share-go\n  template:\n    metadata:\n      labels:\n        app: ride-share-go\n      annotations:\n        profiles.grafana.com/memory.scrape: \"true\"\n        profiles.grafana.com/memory.port: \"6060\"\n        profiles.grafana.com/cpu.scrape: \"true\"\n        profiles.grafana.com/cpu.port: \"6060\"\n        profiles.grafana.com/goroutine.scrape: \"true\"\n        profiles.grafana.com/goroutine.port: \"6060\"\n    spec:\n      containers:\n      - name: ride-share-go\n        image: grafana/pyroscope-rideshare-go:latest\n        ports:\n        - containerPort: 5000\n          name: http\n        - containerPort: 6060\n          name: pprof\n        env:\n        - name: REGION\n          value: us-east-1\nEOF\n```\n\n## Explore Profiles\n\nIn Grafana, navigate to the **Pyroscope** app or use **Explore** with the Pyroscope datasource. You can view:\n\n* CPU profiles - flame graphs showing where CPU time is spent\n* Memory profiles - heap allocation and usage\n* Goroutine profiles - concurrent goroutine analysis\n"
  },
  {
    "path": "k8s/profiling/grafana-values.yml",
    "content": "---\npersistence:\n  type: pvc\n  enabled: true\n\nadminUser: admin\nadminPassword: adminadminadmin\n\nservice:\n  enabled: true\n  type: ClusterIP\n\nplugins:\n  - grafana-pyroscope-app\n\ndatasources:\n  datasources.yaml:\n    apiVersion: 1\n    datasources:\n    - name: Pyroscope\n      type: grafana-pyroscope-datasource\n      access: proxy\n      orgId: 1\n      url: http://pyroscope.meta.svc.cluster.local:4040\n      basicAuth: false\n      isDefault: true\n      version: 1\n      editable: false\n"
  },
  {
    "path": "k8s/profiling/k8s-monitoring-values.yml",
    "content": "---\ncluster:\n  name: meta-monitoring-tutorial\n\ndestinations:\n  pyroscope:\n    type: pyroscope\n    url: http://pyroscope.meta.svc.cluster.local:4040\n\nprofiling:\n  enabled: true\n  collector: alloy-profiles\n  pprof:\n    enabled: true\n\ncollectors:\n  alloy-profiles:\n    presets: [privileged, daemonset]\n"
  },
  {
    "path": "k8s/profiling/kind.yml",
    "content": "# a cluster with 1 control-plane node and 2 workers\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n- role: control-plane\n- role: worker\n- role: worker\n"
  },
  {
    "path": "k8s/profiling/pyroscope-values.yml",
    "content": "pyroscope:\n  extraArgs:\n    store.max-block-duration: 5m\n  resources:\n    requests:\n      cpu: 500m\n      memory: 512Mi\n    limits:\n      cpu: 1\n      memory: 1Gi\n"
  },
  {
    "path": "k8s/tracing/README.md",
    "content": "# Monitor Kubernetes Traces with Grafana Alloy and Tempo\n\n> Note this scenario works using the K8s Monitoring Helm chart. This abstracts the need to configure Alloy and deploys best practices for monitoring Kubernetes clusters.\n\nThis scenario demonstrates how to set up the Kubernetes monitoring Helm chart with Tempo for distributed trace collection. This scenario will install three Helm charts: Tempo, Grafana, and k8s-monitoring. Tempo will store the traces, Grafana will visualize them, and Alloy (k8s-monitoring) will receive traces via OTLP and forward them to Tempo.\n\nApplications send traces to Alloy's OTLP endpoint, which then forwards them to Tempo.\n\n## Prerequisites\n\nClone the repository:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\nChange to the directory:\n\n```bash\ncd alloy-scenarios/k8s/tracing\n```\n\nNext you will need a Kubernetes cluster. An example Kind cluster configuration is provided in the `kind.yml` file:\n\n```bash\nkind create cluster --config kind.yml\n```\n\nInstall Helm and add the Grafana Helm repository:\n\n```bash\nhelm repo add grafana https://grafana.github.io/helm-charts\nhelm repo update\n```\n\n## Create the `meta` and `prod` namespaces\n\n```bash\nkubectl create namespace meta && \\\nkubectl create namespace prod\n```\n\n## Install Tempo\n\n```bash\nhelm install --values tempo-values.yml tempo grafana/tempo -n meta\n```\n\n## Install Grafana\n\n```bash\nhelm install --values grafana-values.yml grafana grafana/grafana -n meta\n```\n\n## Install the K8s Monitoring Helm Chart\n\nThis scenario requires `grafana/k8s-monitoring` chart v4 or later.\n\n```bash\nhelm install --values k8s-monitoring-values.yml k8s grafana/k8s-monitoring --version \"^4.0.0\" -n meta\n```\n\nThis configures Alloy to receive OTLP traces on ports 4317 (gRPC) and 4318 (HTTP), then forward them to Tempo.\n\n## Accessing the Grafana UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 3000\n```\n\nOpen [http://localhost:3000](http://localhost:3000) and log in with `admin` / `adminadminadmin`.\n\n## Accessing the Alloy UI\n\n```bash\nexport POD_NAME=$(kubectl get pods --namespace meta -l \"app.kubernetes.io/name=alloy-receiver,app.kubernetes.io/instance=k8s\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl --namespace meta port-forward $POD_NAME 12345\n```\n\n## Sending Traces\n\nApplications in your cluster should set their OTLP exporter endpoint to the Alloy receiver service:\n\n```\nOTEL_EXPORTER_OTLP_ENDPOINT=http://k8s-alloy-receiver.meta.svc.cluster.local:4317\n```\n\n## Adding a Demo App\n\nDeploy a sample instrumented application in the `prod` namespace to generate traces:\n\n```bash\nhelm install tempo-distributed grafana/tempo-distributed -n prod\n```\n\nOr deploy any application instrumented with OpenTelemetry SDK pointing to the Alloy OTLP endpoint above.\n\n## Explore Traces\n\nIn Grafana, go to **Explore** and select the **Tempo** datasource. Use TraceQL to search for traces:\n\n* `{}` - View all traces\n* `{resource.service.name=\"my-service\"}` - Filter by service name\n* `{status=error}` - Find error traces\n"
  },
  {
    "path": "k8s/tracing/grafana-values.yml",
    "content": "---\npersistence:\n  type: pvc\n  enabled: true\n\nadminUser: admin\nadminPassword: adminadminadmin\n\nservice:\n  enabled: true\n  type: ClusterIP\n\ndatasources:\n  datasources.yaml:\n    apiVersion: 1\n    datasources:\n    - name: Tempo\n      type: tempo\n      access: proxy\n      orgId: 1\n      url: http://tempo.meta.svc.cluster.local:3200\n      basicAuth: false\n      isDefault: true\n      version: 1\n      editable: false\n"
  },
  {
    "path": "k8s/tracing/k8s-monitoring-values.yml",
    "content": "---\ncluster:\n  name: meta-monitoring-tutorial\n\ndestinations:\n  tempo:\n    type: otlp\n    url: http://tempo.meta.svc.cluster.local:4317\n    metrics:\n      enabled: false\n    logs:\n      enabled: false\n    traces:\n      enabled: true\n\napplicationObservability:\n  enabled: true\n  collector: alloy-receiver\n  receivers:\n    otlp:\n      grpc:\n        enabled: true\n      http:\n        enabled: true\n  metrics:\n    enabled: false\n  logs:\n    enabled: false\n\ncollectors:\n  alloy-receiver:\n    presets: [deployment]\n"
  },
  {
    "path": "k8s/tracing/kind.yml",
    "content": "# a cluster with 1 control-plane node and 2 workers\nkind: Cluster\napiVersion: kind.x-k8s.io/v1alpha4\nnodes:\n- role: control-plane\n- role: worker\n- role: worker\n"
  },
  {
    "path": "k8s/tracing/tempo-values.yml",
    "content": "tempo:\n  storage:\n    trace:\n      backend: local\n      local:\n        path: /var/tempo/traces\n      wal:\n        path: /var/tempo/wal\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"0.0.0.0:4317\"\n        http:\n          endpoint: \"0.0.0.0:4318\"\n  server:\n    http_listen_port: 3200\n"
  },
  {
    "path": "kafka/README.md",
    "content": "# Kafka Scenarios\n\nLearn how to use Grafana Alloy to monitor logs from Kafka.\n\n## Overview\n\nThis demo showcases how to:\n- Collect logs from a Kafka topic\n- Process and transform JSON log data with Alloy\n- Forward processed logs to Loki\n- Visualize the logs in Grafana\n\n## Components\n\n- **Kafka**: Message broker storing logs\n- **Kafka Producer**: Generates sample logs and sends them to Kafka\n- **Grafana Alloy**: Observability pipeline that processes logs\n- **Loki**: Log aggregation system\n- **Grafana**: Visualization platform\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/kafka\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\nClick `drilldown` to see the logs in Grafana.\n\n## How It Works\n\n1. The `gen_log.sh` script generates random JSON logs with different log levels, applications, and messages\n2. These logs are sent to the Kafka topic `alloy-logs`\n3. Alloy reads from this Kafka topic, processes the JSON data, and forwards it to Loki\n4. Grafana connects to Loki to display and query the processed logs\n\nTry creating dashboards in Grafana to visualize log frequencies by application or error levels!\n\n\n"
  },
  {
    "path": "kafka/config.alloy",
    "content": "\n\nlivedebugging {\n  enabled = true\n}\n\nloki.source.kafka \"kafka\" {\n  brokers = [\"kafka:9092\"]\n  topics  = [\"alloy-logs\"]\n  labels   = {\n    source = \"kafka\",\n    component = \"loki.source.kafka\",\n\n  }\n  version = \"3.8.0\"\n  forward_to = [loki.process.log_data.receiver]\n\n}\n\nloki.process \"log_data\" {\n  forward_to = [loki.write.local.receiver]\n\n\n  stage.json {\n    drop_malformed = true,\n    expressions = {\n      level = \"\",\n      msg   = \"\",\n      app   = \"app\",\n    }\n  }\n\n  stage.json {\n    source = \"app\"\n    expressions = {\n      app_name    = \"name\",\n      app_version = \"version\",\n    }\n  }\n\n  stage.template {\n    source   = \"new_json\"\n    template = \"{\\\"level\\\":\\\"{{ .level }}\\\",\\\"msg\\\":\\\"{{ .msg }}\\\",\\\"app_name\\\":\\\"{{ .app_name }}\\\",\\\"app_version\\\":\\\"{{ .app_version }}\\\"}\"\n\n  }\n\n  stage.output {\n    source = \"new_json\"\n  }\n \n\n  \n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}\n\n"
  },
  {
    "path": "kafka/docker-compose.coda.yml",
    "content": "services:\n  kafka:\n    image: 'bitnami/kafka:3.8'\n    ports:\n      - \"9092:9092\"\n    volumes:\n      - kafka_data:/bitnami/kafka\n    environment:\n       #KRaft must\n      - KAFKA_CFG_NODE_ID=0\n      - KAFKA_CFG_PROCESS_ROLES=controller,broker\n      - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka:9093\n      - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093\n      - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092\n      - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT\n      - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER\n    healthcheck:\n      test: [\"CMD\", \"kafka-topics.sh\", \"--bootstrap-server\", \"localhost:9092\", \"--version\"]\n      interval: 10s\n      timeout: 10s\n      retries: 5\n\n  kafka-producer:\n    image: 'bitnami/kafka:3.8'\n    volumes:\n      - ./gen_log.sh:/bin/gen_log.sh\n      - kafka_data:/bitnami/kafka\n    entrypoint: [\"sh\", \"-c\", \"/bin/gen_log.sh\"]\n\nvolumes:\n  kafka_data:\n"
  },
  {
    "path": "kafka/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # kafka server instance\n  kafka:\n    image: 'bitnami/kafka:3.8'\n    ports:\n      - \"9092:9092\"\n    volumes:\n      - kafka_data:/bitnami/kafka\n\n    environment:\n       #KRaft must\n      - KAFKA_CFG_NODE_ID=0\n      - KAFKA_CFG_PROCESS_ROLES=controller,broker\n      - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka:9093\n      - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093\n      - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092\n      - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT\n      - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER\n    healthcheck:\n      test: [\"CMD\", \"kafka-topics.sh\", \"--bootstrap-server\", \"localhost:9092\", \"--version\"]\n      interval: 10s\n      timeout: 10s\n      retries: 5\n  kafka-producer:\n    image: 'bitnami/kafka:3.8'\n    volumes:\n      - ./gen_log.sh:/bin/gen_log.sh\n      - kafka_data:/bitnami/kafka\n    # change cmd \n    entrypoint: [\"sh\", \"-c\", \"/bin/gen_log.sh\"]\n\n    \n\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 4318:4318\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./logs:/temp/logs\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental  --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\nvolumes:\n  kafka_data:\n"
  },
  {
    "path": "kafka/gen_log.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\nLEVELS=(info warn error debug)\nAPPS=(test auth payment order catalog)\nMSGS=(\n  \"Hello World from Grafana Alloy integration – log pipeline initialized successfully.\"\n  \"User authentication succeeded: user_id=42, ip=192.168.1.100, method=OAuth2.\"\n  \"Order created: order_id=12345, items=[{\\\"sku\\\":\\\"ABC\\\",\\\"qty\\\":2},{\\\"sku\\\":\\\"XYZ\\\",\\\"qty\\\":1}], total=USD 299.99.\"\n  \"Payment processing failed: transaction_id=67890, error_code=PMT-402, reason=Insufficient funds.\"\n  \"Cache miss on key user_profile_42; fetching from primary DB and repopulating cache.\"\n  \"Background job completed: task=metrics-aggregation, duration=12.34s, processed=2500 records.\"\n  \"High memory usage detected on host host-01: usage=87.5%, threshold=80% — consider scaling up.\"\n  \"Debug info: received payload with 15 fields, sample_field=\\\"some long detailed info here\\\", parsing succeeded.\"\n)\n\n# Always running, sending logs to kafka every two seconds.\nwhile true; do\n  level=${LEVELS[RANDOM % ${#LEVELS[@]}]}\n  msg=${MSGS[RANDOM % ${#MSGS[@]}]}\n  app=${APPS[RANDOM % ${#APPS[@]}]}\n  version=\"0.$((RANDOM % 10)).$((RANDOM % 100))\"\n\n  printf '{\"level\":\"%s\",\"msg\":\"%s\",\"app\":{\"name\":\"%s\",\"version\":\"%s\"}}\\n' \\\n    \"$level\" \"$msg\" \"$app\" \"$version\"\n  sleep 2\ndone | kafka-console-producer.sh \\\n    --bootstrap-server kafka:9092 \\\n    --topic alloy-logs\n"
  },
  {
    "path": "kafka/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\n# Note: We are setting the max chunk age far lower than the default expected value\n# This is due to the fact this scenario is used within the LogCLI demo and we need a short flush time.\n# To show how logcli stats --since 24h '{service_name=\"Delivery World\", package_size=\"Large\"}' works.\ningester:\n  max_chunk_age: 5m # Should be 2 hours"
  },
  {
    "path": "linux/README.md",
    "content": "# Monitoring Linux with Alloy\n\nGrafana Alloy can be used to monitor Linux servers and containers. In this guide, we will show you how to deploy Grafana Alloy in a Docker environment to monitor Linux system metrics and logs. The setup consists of:\n* Node Exporter metrics for system performance monitoring\n* System logs collection with Loki\n\n## Prerequisites\n\n* Git - You will need Git to clone the repository.\n* Docker and Docker Compose - This tutorial uses Docker to host Grafana, Loki, Prometheus, and Alloy.\n* Linux environment - Either a Linux host running Docker or a Linux VM.\n\n## About this Demo\n\nThis demo runs Alloy in a container alongside Grafana, Prometheus, and Loki, creating a self-contained monitoring stack. The Alloy container acts as a \"fake Linux server\" to demonstrate monitoring capabilities out of the box.\n\nIn a production environment, you would typically install Alloy directly on each Linux server you want to monitor.\n\n## Step 1: Clone the Repository\n\nClone the repository to your machine:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/linux\n```\n\n## Step 2: Deploy the Monitoring Stack\n\nUse Docker Compose to deploy Grafana, Loki, Prometheus, and Alloy:\n\n```bash\ndocker-compose up -d\n```\n\nYou can check the status of the containers:\n\n```bash\ndocker ps\n```\n\nGrafana should be running on [http://localhost:3000](http://localhost:3000).\n\n## Step 3: Explore the Monitoring Data\n\nOnce the stack is running, you can explore the collected metrics and logs:\n\n1. Access Grafana at [http://localhost:3000](http://localhost:3000) (default credentials are admin/admin)\n2. Import the Node Exporter dashboard to visualize system metrics:\n   - Go to Dashboards → Import\n   - Upload the JSON file from [here](https://grafana.com/api/dashboards/1860/revisions/37/download)\n   - Select the Prometheus data source and click Import\n\nThis community dashboard provides comprehensive system metrics including CPU, memory, disk, and network usage.\n\n## Step 4: Viewing Logs\n\nOpen your browser and go to [http://localhost:3000/a/grafana-lokiexplore-app](http://localhost:3000/a/grafana-lokiexplore-app). This will take you to the Loki explorer in Grafana.\n\n## Deploying on Bare Metal\n\nTo monitor actual Linux servers in production, you would:\n\n1. Install Alloy directly on each Linux server\n\n2. Modify the `config.alloy` file to point to your Prometheus and Loki instances:\n   ```\n   prometheus.remote_write \"local\" {\n     endpoint {\n       url = \"http://localhost:9090/api/v1/write\"\n     }\n   }\n   \n   loki.write \"local\" {\n     endpoint {\n       url = \"http://localhost:3100/loki/api/v1/push\"\n     }\n   }\n   ```\n\n3. Run Alloy as a service:\n   ```bash\n   sudo alloy run /path/to/config.alloy\n   ```\n\n## Configuration Customization\n\nThe included `config.alloy` file sets up:\n\n1. Node Exporter integration to collect system metrics\n2. Log collection from system logs and journal\n3. Relabeling rules to organize metrics and logs\n4. Remote write endpoints for Prometheus and Loki\n\nYou can customize which collectors are enabled/disabled and adjust scrape intervals in the configuration file.\n\n## Troubleshooting\n\nIf you encounter issues:\n\n* Check container logs: `docker-compose logs`\n* Verify Alloy is running: `docker-compose ps`\n* Ensure ports are not conflicting with existing services\n* Review the Alloy configuration in `config.alloy`\n\n\n"
  },
  {
    "path": "linux/config.alloy",
    "content": "// This block relabels metrics coming from node_exporter to add standard labels\ndiscovery.relabel \"integrations_node_exporter\" {\n  targets = prometheus.exporter.unix.integrations_node_exporter.targets\n\n  rule {\n    // Set the instance label to the hostname of the machine\n    target_label = \"instance\"\n    replacement  = constants.hostname\n  }\n\n  rule {\n    // Set a standard job name for all node_exporter metrics\n    target_label = \"job\"\n    replacement = \"integrations/node_exporter\"\n  }\n}\n\n// Configure the node_exporter integration to collect system metrics\nprometheus.exporter.unix \"integrations_node_exporter\" {\n  // Disable unnecessary collectors to reduce overhead\n  disable_collectors = [\"ipvs\", \"btrfs\", \"infiniband\", \"xfs\", \"zfs\"]\n  enable_collectors = [\"meminfo\"]\n\n  filesystem {\n    // Exclude filesystem types that aren't relevant for monitoring\n    fs_types_exclude     = \"^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$\"\n    // Exclude mount points that aren't relevant for monitoring\n    mount_points_exclude = \"^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+)($|/)\"\n    // Timeout for filesystem operations\n    mount_timeout        = \"5s\"\n  }\n\n  netclass {\n    // Ignore virtual and container network interfaces\n    ignored_devices = \"^(veth.*|cali.*|[a-f0-9]{15})$\"\n  }\n\n  netdev {\n    // Exclude virtual and container network interfaces from device metrics\n    device_exclude = \"^(veth.*|cali.*|[a-f0-9]{15})$\"\n  }\n\n\n}\n\n// Define how to scrape metrics from the node_exporter\nprometheus.scrape \"integrations_node_exporter\" {\nscrape_interval = \"15s\"\n  // Use the targets with labels from the discovery.relabel component\n  targets    = discovery.relabel.integrations_node_exporter.output\n  // Send the scraped metrics to the relabeling component\n  forward_to = [prometheus.remote_write.local.receiver]\n}\n\n\n// Define where to send the metrics for storage\nprometheus.remote_write \"local\" {\n  endpoint {\n    // Send metrics to a locally running Prometheus instance\n    url = \"http://prometheus:9090/api/v1/write\"\n  }\n}\n\n// Collect logs from systemd journal for node_exporter integration\nloki.source.journal \"logs_integrations_integrations_node_exporter_journal_scrape\" {\n  // Only collect logs from the last 24 hours\n  max_age       = \"24h0m0s\"\n  // Apply relabeling rules to the logs\n  relabel_rules = discovery.relabel.logs_integrations_integrations_node_exporter_journal_scrape.rules\n  // Send logs to the local Loki instance\n  forward_to    = [loki.write.local.receiver]\n}\n\n// Define which log files to collect for node_exporter\nlocal.file_match \"logs_integrations_integrations_node_exporter_direct_scrape\" {\n  path_targets = [{\n    // Target localhost for log collection\n    __address__ = \"localhost\",\n    // Collect standard system logs\n    __path__    = \"/var/log/{syslog,messages,*.log}\",\n    // Add instance label with hostname\n    instance    = constants.hostname,\n    // Add job label for logs\n    job         = \"integrations/node_exporter\",\n  }]\n}\n\n// Define relabeling rules for systemd journal logs\ndiscovery.relabel \"logs_integrations_integrations_node_exporter_journal_scrape\" {\n  targets = []\n\n  rule {\n    // Extract systemd unit information into a label\n    source_labels = [\"__journal__systemd_unit\"]\n    target_label  = \"unit\"\n  }\n\n  rule {\n    // Extract boot ID information into a label\n    source_labels = [\"__journal__boot_id\"]\n    target_label  = \"boot_id\"\n  }\n\n  rule {\n    // Extract the hostname of the machine into a label\n    source_labels = [\"__journal__hostname\"]\n    target_label  = \"instance\"\n  }\n\n  rule {\n    // Extract the machine id into a label    \n    source_labels = [\"__journal__machine_id\"]\n    target_label  = \"machine_id\"\n  }\n\n  rule {\n    // Extract transport information into a label\n    source_labels = [\"__journal__transport\"]\n    target_label  = \"transport\"\n  }\n\n  rule {\n    // Extract log priority into a level label\n    source_labels = [\"__journal_priority_keyword\"]\n    target_label  = \"level\"\n  }\n}\n\n// Collect logs from files for node_exporter\nloki.source.file \"logs_integrations_integrations_node_exporter_direct_scrape\" {\n  // Use targets defined in local.file_match\n  targets    = local.file_match.logs_integrations_integrations_node_exporter_direct_scrape.targets\n  // Send logs to the local Loki instance\n  forward_to = [loki.write.local.receiver]\n}\n\n// Define where to send logs for storage\nloki.write \"local\" {\n    endpoint {\n        // Send logs to a locally running Loki instance\n        url =\"http://loki:3100/loki/api/v1/push\"\n    }\n}\n\n// Enable live debugging features (empty config means use defaults)\nlivedebugging{}\n"
  },
  {
    "path": "linux/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n\n  prometheus:\n     image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n     command:\n       - --web.enable-remote-write-receiver\n       - --config.file=/etc/prometheus/prometheus.yml\n     ports:\n      - 9090:9090/tcp\n     volumes:\n        - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         - name: Prometheus\n           type: prometheus\n           orgId: 1\n           url: http://prometheus:9090\n           basicAuth: false\n           isDefault: true\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n"
  },
  {
    "path": "linux/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\n# Note: We are setting the max chunk age far lower than the default expected value\n# This is due to the fact this scenario is used within the LogCLI demo and we need a short flush time.\n# To show how logcli stats --since 24h '{service_name=\"Delivery World\", package_size=\"Large\"}' works.\ningester:\n  max_chunk_age: 5m # Should be 2 hours"
  },
  {
    "path": "linux/prom-config.yaml",
    "content": "# my global config\nglobal:\n  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.\n  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.\n  # scrape_timeout is set to the global default (10s).\n\n\n"
  },
  {
    "path": "log-api-gateway/README.md",
    "content": "# Log API Gateway\n\nThis scenario demonstrates using **Grafana Alloy** as a centralized log gateway via the `loki.source.api` component. Instead of scraping logs from files or containers, Alloy exposes a Loki-compatible push API endpoint that applications can send logs to directly.\n\n## Architecture\n\n```\n┌─────────────────┐         ┌───────────────────────┐         ┌──────┐         ┌─────────┐\n│  log-producer    │──POST──▶│  Alloy (loki.source.  │──push──▶│ Loki │◀─query──│ Grafana │\n│  (Python script) │         │  api on :3500)        │         │      │         │         │\n└─────────────────┘         └───────────────────────┘         └──────┘         └─────────┘\n```\n\n1. **log-producer** - A Python script that simulates multiple microservices (auth, order, notification) pushing structured logs to Alloy's Loki push API endpoint.\n2. **Alloy** - Receives logs via `loki.source.api` on port 3500, enriches them with a `gateway=alloy` label, and forwards to Loki.\n3. **Loki** - Stores and indexes the logs.\n4. **Grafana** - Pre-configured with the Loki datasource for querying logs.\n\n## Running\n\n```bash\n# From the repo root (uses centralized image versions)\n./run-example.sh log-api-gateway\n\n# Or directly\ncd log-api-gateway && docker compose up -d\n```\n\n## Exploring\n\n- **Grafana**: [http://localhost:3000](http://localhost:3000) - Query logs in the Explore view using the Loki datasource\n- **Alloy UI**: [http://localhost:12345](http://localhost:12345) - Inspect the pipeline graph and component health\n\n### Example LogQL Queries\n\n```logql\n# All logs from a specific service\n{service_name=\"auth-service\"}\n\n# All logs passing through the gateway\n{gateway=\"alloy\"}\n\n# Filter by environment\n{environment=\"demo\"}\n```\n\n## How It Works\n\nThe `loki.source.api` component in Alloy exposes a Loki-compatible HTTP endpoint (`/loki/api/v1/push`) that any application can push logs to. This is useful when:\n\n- Applications already use the Loki push API format\n- You want a centralized gateway to enrich, filter, or route logs before they reach Loki\n- You need to decouple log producers from the storage backend\n\nThe Alloy pipeline in this scenario:\n\n1. **`loki.source.api`** - Listens on port 3500 for incoming log push requests\n2. **`loki.process`** - Adds a `gateway=alloy` static label to all received logs\n3. **`loki.write`** - Forwards the enriched logs to Loki\n\n## Stopping\n\n```bash\ncd log-api-gateway && docker compose down\n```\n"
  },
  {
    "path": "log-api-gateway/app/producer.py",
    "content": "import requests\nimport time\nimport random\nimport json\n\nALLOY_URL = \"http://alloy:3500/loki/api/v1/push\"\n\nservices = [\n    {\"name\": \"auth-service\", \"messages\": [\n        \"User login attempt from IP 10.0.1.50\",\n        \"Token refresh completed for user_id=42\",\n        \"Failed login: invalid credentials for user@example.com\",\n        \"Session expired for session_id=abc123\",\n    ]},\n    {\"name\": \"order-service\", \"messages\": [\n        \"New order created: ORD-98765\",\n        \"Payment processed for order ORD-98765\",\n        \"Order shipped: tracking_id=TRACK123\",\n        \"Inventory check: item SKU-001 has 5 units remaining\",\n    ]},\n    {\"name\": \"notification-service\", \"messages\": [\n        \"Email sent to user@example.com\",\n        \"SMS notification queued for +1234567890\",\n        \"Push notification delivered to device_id=xyz\",\n        \"Notification batch completed: 150 messages sent\",\n    ]},\n]\n\nprint(\"Starting log producer...\")\nwhile True:\n    service = random.choice(services)\n    message = random.choice(service[\"messages\"])\n\n    payload = {\n        \"streams\": [{\n            \"stream\": {\n                \"service_name\": service[\"name\"],\n                \"environment\": \"demo\",\n            },\n            \"values\": [\n                [str(int(time.time() * 1e9)), message]\n            ]\n        }]\n    }\n\n    try:\n        resp = requests.post(ALLOY_URL, json=payload, headers={\"Content-Type\": \"application/json\"})\n        if resp.status_code != 204:\n            print(f\"Unexpected status: {resp.status_code}\")\n    except Exception as e:\n        print(f\"Error sending log: {e}\")\n\n    time.sleep(random.uniform(0.5, 2.0))\n"
  },
  {
    "path": "log-api-gateway/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Accept logs via Loki push API - acts as a centralized log gateway\nloki.source.api \"default\" {\n\thttp {\n\t\tlisten_address = \"0.0.0.0\"\n\t\tlisten_port    = 3500\n\t}\n\n\tforward_to = [loki.process.enrich.receiver]\n}\n\n// Enrich logs with gateway metadata\nloki.process \"enrich\" {\n\tforward_to = [loki.write.local.receiver]\n\n\tstage.static_labels {\n\t\tvalues = {\n\t\t\t\"gateway\" = \"alloy\",\n\t\t}\n\t}\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "log-api-gateway/docker-compose.coda.yml",
    "content": "services:\n  log-producer:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: log-producer\n    volumes:\n      - ./app/producer.py:/app/producer.py\n    command: sh -c \"pip install requests && python3 /app/producer.py\"\n"
  },
  {
    "path": "log-api-gateway/docker-compose.yml",
    "content": "\nservices:\n\n  # Python script that pushes logs to Alloy's Loki push API endpoint\n  log-producer:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: log-producer\n    volumes:\n      - ./app/producer.py:/app/producer.py\n    command: sh -c \"pip install requests && python3 /app/producer.py\"\n    depends_on:\n      - alloy\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 3500:3500\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: true\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n"
  },
  {
    "path": "log-api-gateway/loki-config.yaml",
    "content": "auth_enabled: false\n\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: debug\n  grpc_server_max_concurrent_streams: 1000\n\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\n\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\n\nlimits_config:\n  metric_aggregation_enabled: true\n\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\n\nruler:\n  alertmanager_url: http://localhost:9093\n\nfrontend:\n  encoding: protobuf\n\n# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration\n# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/\n#\n# Statistics help us better understand how Loki is used, and they show us performance\n# levels for most users. This helps us prioritize features and documentation.\n# For more information on what's sent, look at\n# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go\n# Refer to the buildReport method to see what goes into a report.\n#\n# If you would like to disable reporting, uncomment the following lines:\n#analytics:\n#  reporting_enabled: false\n"
  },
  {
    "path": "log-secret-filtering/README.md",
    "content": "# Log Secret Filtering\n\nDemonstrates how Grafana Alloy's `loki.secretfilter` component automatically redacts secrets from log lines before they reach Loki.\n\n## Overview\n\nA Python application continuously writes log lines -- some containing fake secrets (AWS keys, database connection strings, GitHub tokens, JWTs, Slack webhooks) -- to a shared log file. Alloy tails the file, passes every line through `loki.secretfilter` using built-in Gitleaks patterns, and forwards the sanitized output to Loki. By the time logs appear in Grafana, sensitive values have been replaced with `<REDACTED:$SECRET_NAME>`.\n\nThe example includes:\n\n- **secret-logger** -- Python app that emits a mix of normal and secret-containing log lines every 2 seconds.\n- **Alloy** -- Tails the log file, applies `loki.secretfilter`, and pushes to Loki. Runs with `--stability.level=experimental` because `loki.secretfilter` is an experimental component.\n- **Loki** -- Stores the redacted logs.\n- **Grafana** -- Visualize and query logs to verify secrets have been removed.\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd log-secret-filtering\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n\n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh log-secret-filtering\n   ```\n\n4. Access Grafana at [http://localhost:3000](http://localhost:3000)\n\n## What to Expect\n\n1. Open Grafana and navigate to **Explore**.\n2. Select the **Loki** datasource.\n3. Run the query `{job=\"secret-app\"}`.\n4. You should see log lines where secrets have been replaced, for example:\n   - `Found config: <REDACTED:aws-access-token> with secret`\n   - `Database connection: <REDACTED:generic-api-key>`\n   - Normal log lines (health checks, request timings) pass through unchanged.\n\n## Architecture\n\n```\n┌─────────────────┐      ┌───────────────────────────────────────┐      ┌──────┐      ┌─────────┐\n│  secret-logger  │─────▶│  Alloy                                │─────▶│ Loki │─────▶│ Grafana │\n│  (writes logs)  │ file │  local.file_match ─▶ loki.source.file │ push │      │ query│         │\n└─────────────────┘      │       ─▶ loki.secretfilter ─▶ loki.write     │      │      │         │\n                         └───────────────────────────────────────┘      └──────┘      └─────────┘\n```\n\n## Alloy Pipeline\n\nThe `config.alloy` pipeline:\n\n1. `local.file_match` -- discovers log files at `/tmp/logs/*.log`.\n2. `loki.source.file` -- tails matched files and forwards log entries.\n3. `loki.secretfilter` -- scans each log line against Gitleaks secret patterns and replaces matches with `<REDACTED:$SECRET_NAME>`.\n4. `loki.write` -- pushes sanitized logs to Loki.\n\nVisit the Alloy UI at [http://localhost:12345](http://localhost:12345) to inspect the running pipeline and use the live debugging view.\n"
  },
  {
    "path": "log-secret-filtering/app/main.py",
    "content": "import time\nimport random\nimport datetime\n\nsecrets = [\n    'Found config: AKIAIOSFODNN7EXAMPLE with secret',\n    'Database connection: postgresql://admin:SuperSecret123@db:5432/prod',\n    'Setting API_KEY=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef12',\n    'Bearer token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U',\n    'Slack webhook: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX',\n]\n\nnormal = [\n    'Processing request from 192.168.1.100',\n    'User login successful for user_id=42',\n    'Health check passed: all systems operational',\n    'Cache hit ratio: 94.2%',\n    'Request completed in 23ms',\n]\n\nwith open(\"/logs/app.log\", \"w\") as f:\n    pass\n\nwhile True:\n    line = random.choice(secrets + normal + normal)  # 2:1 ratio normal:secret\n    ts = datetime.datetime.now().isoformat()\n    with open(\"/logs/app.log\", \"a\") as f:\n        f.write(f\"{ts} {line}\\n\")\n    time.sleep(2)\n"
  },
  {
    "path": "log-secret-filtering/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\nlocal.file_match \"app_logs\" {\n\tpath_targets = [{\"__path__\" = \"/tmp/logs/*.log\", \"job\" = \"secret-app\"}]\n\tsync_period  = \"5s\"\n}\n\nloki.source.file \"log_scrape\" {\n\ttargets       = local.file_match.app_logs.targets\n\tforward_to    = [loki.secretfilter.default.receiver]\n\ttail_from_end = true\n}\n\n// Redact secrets from log lines before sending to Loki.\n// Uses built-in Gitleaks patterns to detect API keys, passwords, tokens, etc.\nloki.secretfilter \"default\" {\n\tforward_to = [loki.write.local.receiver]\n\n\tredact_with = \"<REDACTED:$SECRET_NAME>\"\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "log-secret-filtering/docker-compose.coda.yml",
    "content": "services:\n  secret-logger:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app/main.py:/app/main.py\n      - ./logs:/logs\n    command: python3 /app/main.py\n"
  },
  {
    "path": "log-secret-filtering/docker-compose.yml",
    "content": "services:\n  # Python app that periodically logs fake secrets (API keys, passwords, tokens)\n  secret-logger:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app/main.py:/app/main.py\n      - ./logs:/logs\n    command: python3 /app/main.py\n\n  # Alloy telemetry pipeline — scrapes log files and redacts secrets before forwarding to Loki\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./logs:/tmp/logs\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data --stability.level=experimental /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  # Loki for log aggregation\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - loki\n"
  },
  {
    "path": "log-secret-filtering/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 2h\n"
  },
  {
    "path": "logs-file/README.md",
    "content": "# File Scenarios\n\nLearn how to use Grafana Alloy to monitor logs from a file.\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/logs-file\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\n\n"
  },
  {
    "path": "logs-file/config.alloy",
    "content": "\n\nlivedebugging {\n  enabled = true\n}\n\nlocal.file_match \"local_files\" {\n    path_targets = [{\"__path__\" = \"/temp/logs/*.log\", \"job\" = \"python\", \"hostname\" = constants.hostname}]\n    sync_period  = \"5s\"\n}\n \nloki.source.file \"log_scrape\" {\n    targets    = local.file_match.local_files.targets\n    forward_to = [loki.write.local.receiver]\n    tail_from_end = true\n\n\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "logs-file/docker-compose.coda.yml",
    "content": "services:\n  logs-file:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: logs-file\n    volumes:\n      - ./main.py:/main.py\n      - ./logs:/logs\n    command: [\"python3\", \"/main.py\"]\n"
  },
  {
    "path": "logs-file/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Syslog simulator using a Python script\n  logs-file:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: logs-file\n    volumes:\n      - ./main.py:/main.py  # Syslog simulator script\n      - ./logs:/logs  # Directory to store the logs\n    depends_on:\n      - alloy\n    command: [\"python3\", \"/main.py\"]\n  \n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 4318:4318\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./logs:/temp/logs\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental  --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n"
  },
  {
    "path": "logs-file/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\n# Note: We are setting the max chunk age far lower than the default expected value\n# This is due to the fact this scenario is used within the LogCLI demo and we need a short flush time.\n# To show how logcli stats --since 24h '{service_name=\"Delivery World\", package_size=\"Large\"}' works.\ningester:\n  max_chunk_age: 5m # Should be 2 hours"
  },
  {
    "path": "logs-file/main.py",
    "content": "import logging\nimport time\nimport random\nimport os\n\n# Ensure the /logs directory exists\nlog_directory = \"/logs\"\nlog_file = os.path.join(log_directory, \"app.log\")\n\nif not os.path.exists(log_directory):\n    os.makedirs(log_directory)  # Create directory if it doesn't exist\n\n# Configure logging\nlogging.basicConfig(\n    level=logging.DEBUG,\n    format=\"%(asctime)s - %(levelname)s - %(message)s\",\n    handlers=[\n        logging.FileHandler(log_file),\n        logging.StreamHandler()\n    ]\n)\n\nlogger = logging.getLogger(\"ExampleApp\")\n\ndef simulate_process():\n    \"\"\"Simulates a process that generates logs every 5 seconds.\"\"\"\n    actions = [\"start\", \"process\", \"error\", \"complete\"]\n    \n    while True:  # Infinite loop\n        action = random.choice(actions)\n\n        if action == \"start\":\n            logger.info(\"Process started successfully.\")\n        elif action == \"process\":\n            logger.debug(\"Processing data...\")\n        elif action == \"error\":\n            logger.error(\"An error occurred during processing.\")\n        elif action == \"complete\":\n            logger.warning(\"Process completed with minor warnings.\")\n\n        time.sleep(5)  # Generate a log every 5 seconds\n\nif __name__ == \"__main__\":\n    logger.info(\"Application started.\")\n\n    try:\n        simulate_process()\n    except Exception as e:\n        logger.critical(f\"Unhandled exception: {e}\")\n\n    logger.info(\"Application finished.\")\n"
  },
  {
    "path": "logs-tcp/README.md",
    "content": "# Logs Over TCP Scenario\n\nThis scenario demonstrates how to send TCP logs to Alloy within a JSON format. We then use `log.process` to parse the logs and extract the fields from the JSON logs. These fields are used to generate labels and structured metadata for the logs.\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/logs-tcp\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\n\n"
  },
  {
    "path": "logs-tcp/config.alloy",
    "content": "\n\nlivedebugging {\n  enabled = true\n}\n\nloki.source.api \"loki_push_api\" {\n    http {\n        listen_address = \"0.0.0.0\"\n        listen_port = 9999\n    }\n    forward_to = [\n        loki.process.labels.receiver,\n    ]\n}\n\nloki.process \"labels\" {\n    stage.json {\n      expressions = { \"extracted_service\" = \"service_name\", \n                      \"extracted_code_line\" = \"code_line\", \n                      \"extracted_server\" = \"server_id\", \n                    }\n    }\n\n  stage.labels {\n    values = {\n      \"service_name\" = \"extracted_service\",\n    }\n  }\n\n  stage.structured_metadata {\n    values = {\n      \"code_line\" = \"extracted_code_line\",\n      \"server\" = \"extracted_server\",\n    }\n}\n\nforward_to = [loki.write.local.receiver]\n\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "logs-tcp/docker-compose.coda.yml",
    "content": "services:\n  simulator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: simulator\n    volumes:\n      - ./simulator.py:/simulator.py\n    environment:\n      - TARGET_HOST=alloy\n      - TARGET_PORT=9999\n    command: [\"python3\", \"/simulator.py\"]\n"
  },
  {
    "path": "logs-tcp/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n\n  # Syslog simulator using a Python script\n  simulator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: simulator\n    volumes:\n      - ./simulator.py:/simulator.py  # Syslog simulator script\n    environment:\n      - TARGET_HOST=alloy\n      - TARGET_PORT=9999\n    command: [\"python3\", \"/simulator.py\"]\n    depends_on:\n      - alloy\n  \n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 4318:4318\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./logs:/tmp/app-logs/\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental  --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\nvolumes:\n  rsyslog_data:\n"
  },
  {
    "path": "logs-tcp/loki-config.yaml",
    "content": "auth_enabled: false\n\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: debug\n  grpc_server_max_concurrent_streams: 1000\n\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\n\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\n\nlimits_config:\n  metric_aggregation_enabled: true\n\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\n\nruler:\n  alertmanager_url: http://localhost:9093\n\nfrontend:\n  encoding: protobuf\n\n\n\n\n# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration\n# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/\n#\n# Statistics help us better understand how Loki is used, and they show us performance\n# levels for most users. This helps us prioritize features and documentation.\n# For more information on what's sent, look at\n# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go\n# Refer to the buildReport method to see what goes into a report.\n#\n# If you would like to disable reporting, uncomment the following lines:\n#analytics:\n#  reporting_enabled: false"
  },
  {
    "path": "logs-tcp/simulator.py",
    "content": "import socket\nimport time\nimport os\nimport random\nimport json\nfrom datetime import datetime\n\n# Get the target host and port from environment variables\ntarget_host = os.getenv('TARGET_HOST', 'localhost')\ntarget_port = int(os.getenv('TARGET_PORT', 5140))\n\n# Define the endpoint path\nendpoint_path = \"/loki/api/v1/raw\"\n\n# Create a TCP socket\nsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\ntry:\n    sock.connect((target_host, target_port))\nexcept socket.error as e:\n    print(f\"Failed to connect to {target_host}:{target_port} - {e}\")\n    exit(1)\n\n# Define log levels and messages\nlog_levels = [\"INFO\", \"WARNING\", \"ERROR\", \"DEBUG\", \"CRITICAL\"]\nmessages = [\n    \"System started successfully\",\n    \"User login successful\",\n    \"Configuration loaded\",\n    \"Connection to database failed\",\n    \"Data processed successfully\",\n    \"Invalid API request received\",\n    \"Memory usage high\",\n    \"Disk space low\",\n    \"Unknown error occurred\",\n    \"Service restarted\",\n]\n\n# Define extra fields for the log payload\nservice_names = [\"AuthService\", \"DataService\", \"PaymentService\", \"NotificationService\"]\nregions = [\"us-east-1\", \"eu-west-1\", \"ap-south-1\", \"sa-east-1\"]\nserver_ids = [\"srv-101\", \"srv-202\", \"srv-303\", \"srv-404\"]\n\n# Generate and send JSON log messages every few seconds\nwhile True:\n    try:\n        # Correct timestamp format\n        timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')\n        log_level = random.choice(log_levels)\n        message_text = random.choice(messages)\n        service_name = random.choice(service_names)\n        region = random.choice(regions)\n        server_id = random.choice(server_ids)\n        code_line = random.randint(20, 120)  # Simulate random code line numbers\n\n        # Create the JSON log payload\n        log_payload = {\n            \"timestamp\": timestamp,\n            \"severity\": log_level,\n            \"body\": message_text,\n            \"service_name\": service_name,\n            \"code_line\": code_line,\n            \"region\": region,\n            \"server_id\": server_id\n        }\n\n        # Convert the log payload to JSON string\n        log_json = json.dumps(log_payload)\n\n        # Create the HTTP POST request to send the log\n        http_request = (\n            f\"POST {endpoint_path} HTTP/1.1\\r\\n\"\n            f\"Host: {target_host}\\r\\n\"\n            \"Content-Type: application/json\\r\\n\"\n            f\"Content-Length: {len(log_json)}\\r\\n\"\n            \"Connection: keep-alive\\r\\n\"\n            \"\\r\\n\"\n            f\"{log_json}\"\n        )\n\n        # Send the HTTP request over TCP\n        sock.sendall(http_request.encode())\n        print(f\"Sent JSON log message to {target_host}:{target_port} - {log_json}\")\n    except socket.error as e:\n        print(f\"Failed to send log message - {e}\")\n        break\n\n    # Wait for a few seconds before sending the next message\n    time.sleep(random.randint(3, 8))  # Send a message every 3-8 seconds\n"
  },
  {
    "path": "mail-house/README.md",
    "content": "# Mail House Scenario\n\nLearn how to parse structured logs into Labels and Structured Metadata.\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/mail-house\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\n\n"
  },
  {
    "path": "mail-house/config.alloy",
    "content": "\n\nlivedebugging {\n  enabled = true\n}\n\nloki.source.api \"loki_push_api\" {\n    http {\n        listen_address = \"0.0.0.0\"\n        listen_port = 9999\n    }\n    forward_to = [\n        loki.process.labels.receiver,\n    ]\n}\n\nloki.process \"labels\" {\n    stage.json {\n      expressions = { \n                      \"timestamp\" = \"\",\n                      \"state\" = \"\", \n                      \"package_size\" = \"\", \n                      \"package_status\" = \"\", \n                      \"package_id\" = \"\",\n                      \"mail_house_id\" = \"\",\n                    }\n    }\n\n  stage.timestamp {\n    source = \"timestamp\"\n    format = \"RFC3339\"\n}\n\n  stage.labels {\n    values = {\n      \"state\" = \"\",\n      \"package_size\" = \"\",\n      \"mail_house_id\" = \"\",\n    }\n  }\n\n  stage.structured_metadata {\n    values = {\n      \"package_status\" = \"\",\n      \"package_id\" = \"\",\n    }\n  }\n\n  stage.static_labels {\n    values = {\n      \"service_name\" = \"Delivery World\",\n    }\n  }\n\n  stage.output {\n    source = \"message\"\n}\n  \n\n\nforward_to = [loki.write.local.receiver]\n\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "mail-house/docker-compose.coda.yml",
    "content": "services:\n  mail-house-01:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-01\n    restart: unless-stopped\n\n  mail-house-02:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-02\n    restart: unless-stopped\n\n  mail-house-03:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-03\n    restart: unless-stopped\n"
  },
  {
    "path": "mail-house/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  mail-house-01:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    depends_on:\n      - alloy\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-01\n    restart: unless-stopped\n  \n  mail-house-02:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    depends_on:\n      - alloy\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-02\n    restart: unless-stopped\n    \n  mail-house-03:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./main.py:/main.py\n    depends_on:\n      - alloy\n    command: [\"python3\", \"/main.py\"]\n    environment:\n      - MAIL_HOUSE_ID=DEPOT-03\n    restart: unless-stopped\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 4318:4318\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental  --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\n"
  },
  {
    "path": "mail-house/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\n# Note: We are setting the max chunk age far lower than the default expected value\n# This is due to the fact this scenario is used within the LogCLI demo and we need a short flush time.\n# To show how logcli stats --since 24h '{service_name=\"Delivery World\", package_size=\"Large\"}' works.\ningester:\n  max_chunk_age: 5m # Should be 2 hours"
  },
  {
    "path": "mail-house/main.py",
    "content": "import random\nimport json\nimport time\nimport socket\nfrom datetime import datetime\nimport os\n\n\n# Get the target host and port from environment variables\ntarget_host = os.getenv('TARGET_HOST', 'alloy')\ntarget_port = int(os.getenv('TARGET_PORT', 9999))\n# Get the mail house ID from environment variables\nmail_house_id = os.getenv('MAIL_HOUSE_ID', 'MAIL-01')\n\n# Define the endpoint path\nendpoint_path = \"/loki/api/v1/raw\"\n\n# List of states and cities in America (abbreviated version)\nSTATES_CITIES = {\n    \"California\": [\"Los Angeles\", \"San Francisco\", \"San Diego\"],\n    \"Texas\": [\"Houston\", \"Dallas\", \"Austin\"],\n    \"New York\": [\"New York City\", \"Buffalo\", \"Rochester\"],\n    \"Florida\": [\"Miami\", \"Orlando\", \"Tampa\"],\n    \"Illinois\": [\"Chicago\", \"Springfield\", \"Naperville\"],\n}\n\n# Package statuses and metadata\nPACKAGE_SIZES = [\"Small\", \"Medium\", \"Large\"]\nPACKAGE_TYPES = [\"Documents\", \"Electronics\", \"Clothing\", \"Food\", \"Furniture\"]\nPACKAGE_STATUS_LEVELS = [\"info\", \"warning\", \"critical\", \"error\"]\nPACKAGE_NOTES = [\n    \"In transit\",\n    \"Out for delivery\",\n    \"Delivered successfully\",\n    \"Delayed due to weather\",\n    \"Address not found\",\n    \"Returned to sender\",\n    \"Damaged during transit\",\n]\n\n\ndef generate_log_entry():\n    state = random.choice(list(STATES_CITIES.keys()))\n    city = random.choice(STATES_CITIES[state])\n    \n    log_entry = {\n        \"timestamp\": datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S.%f\") + \"Z\",\n        \"state\": state,\n        \"city\": city,\n        \"package_id\": f\"PKG{random.randint(10000, 99999)}\",\n        \"package_type\": random.choice(PACKAGE_TYPES),\n        \"package_size\": random.choice(PACKAGE_SIZES),\n        \"package_status\": random.choice(PACKAGE_STATUS_LEVELS),\n        \"note\": random.choice(PACKAGE_NOTES),\n        \"sender\": {\n            \"name\": f\"Sender{random.randint(1, 100)}\",\n            \"address\": f\"{random.randint(100, 999)} {random.choice(['Main St', 'Broadway', 'Elm St', 'Maple Ave'])}, {city}, {state}\",\n        },\n        \"receiver\": {\n            \"name\": f\"Receiver{random.randint(1, 100)}\",\n            \"address\": f\"{random.randint(100, 999)} {random.choice(['Oak St', 'Pine Rd', 'Cedar Blvd', 'Willow Ln'])}, {random.choice(STATES_CITIES[state])}, {state}\",\n        },\n        \"mail_house_id\": mail_house_id, \n    }\n    return log_entry\n\n\ndef main():\n    # Create a TCP socket\n    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n    try:\n        sock.connect((target_host, target_port))\n    except socket.error as e:\n        print(f\"Failed to connect to {target_host}:{target_port} - {e}\")\n        time.sleep(1)\n        main()\n    \n    while True:\n        try:\n            log_entry = generate_log_entry()\n            log_entry_json = json.dumps(log_entry)\n\n            http_request = (\n                f\"POST {endpoint_path} HTTP/1.1\\r\\n\"\n                f\"Host: {target_host}\\r\\n\"\n                \"Content-Type: application/json\\r\\n\"\n                f\"Content-Length: {len(log_entry_json)}\\r\\n\"\n                \"Connection: keep-alive\\r\\n\"\n                \"\\r\\n\"\n                f\"{log_entry_json}\"\n            )\n\n            # Send the HTTP request over TCP\n            sock.sendall(http_request.encode())\n            print(f\"Sent JSON log message to {target_host}:{target_port} - {log_entry_json}\")\n\n            # Wait for a few seconds before sending the next log\n            time.sleep(1)\n        except socket.error as e:\n            print(f\"Failed to send log message - {e}\")\n            # Close the socket and exit\n            sock.close()\n            exit(1)\n            \n\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "memcached-monitoring/README.md",
    "content": "# Memcached Monitoring with Grafana Alloy\n\nThis scenario demonstrates how to monitor a Memcached instance using Grafana Alloy's built-in `prometheus.exporter.memcached` component.\n\n## Architecture\n\n- **Memcached** - The monitored Memcached instance\n- **Grafana Alloy** - Collects Memcached metrics via `prometheus.exporter.memcached` and remote writes them to Prometheus\n- **Prometheus** - Stores the scraped metrics\n- **Grafana** - Visualizes Memcached metrics (auto-provisioned with Prometheus datasource)\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root using centralized image versions\n./run-example.sh memcached-monitoring\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345\n- **Prometheus**: http://localhost:9090\n\n## Key Metrics\n\nOnce running, you can query Memcached metrics in Grafana or Prometheus. Some useful metrics include:\n\n- `memcached_up` - Whether Memcached is reachable\n- `memcached_current_connections` - Number of current connections\n- `memcached_current_bytes` - Current number of bytes stored\n- `memcached_current_items` - Current number of items stored\n- `memcached_commands_total` - Total commands by command type (get, set, etc.)\n- `memcached_items_evicted_total` - Total number of items evicted\n- `memcached_read_bytes_total` / `memcached_written_bytes_total` - Network throughput\n\n## Stopping\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "memcached-monitoring/config.alloy",
    "content": "// Memcached Monitoring with Grafana Alloy\n// This configuration scrapes Memcached metrics using the built-in prometheus.exporter.memcached component\n// and remote writes them to Prometheus.\n\nlivedebugging {\n\tenabled = true\n}\n\nprometheus.exporter.memcached \"default\" {\n\taddress = \"memcached:11211\"\n}\n\nprometheus.scrape \"memcached\" {\n\ttargets    = prometheus.exporter.memcached.default.targets\n\tforward_to = [prometheus.remote_write.default.receiver]\n}\n\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "memcached-monitoring/docker-compose.coda.yml",
    "content": "services:\n  memcached:\n    image: memcached:1.6@sha256:277e0c4f249b118e95ab10e535bae2fa1af772271d9152f3468e58d59348db56\n    ports:\n      - \"11211:11211\"\n"
  },
  {
    "path": "memcached-monitoring/docker-compose.yml",
    "content": "services:\n  memcached:\n    image: memcached:1.6@sha256:277e0c4f249b118e95ab10e535bae2fa1af772271d9152f3468e58d59348db56\n    ports:\n      - \"11211:11211\"\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - memcached\n      - prometheus\n"
  },
  {
    "path": "memcached-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "mysql-monitoring/README.md",
    "content": "# MySQL Monitoring with Grafana Alloy\n\nThis scenario demonstrates how to monitor a MySQL database using Grafana Alloy's `prometheus.exporter.mysql` component. Alloy scrapes MySQL metrics and remote-writes them to Prometheus, which Grafana queries for visualization.\n\n## Prerequisites\n\n- Docker and Docker Compose installed\n\n## Getting Started\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/mysql-monitoring\ndocker compose up -d\n```\n\n## Access Points\n\n| Service    | URL                          |\n|------------|------------------------------|\n| Grafana    | http://localhost:3000        |\n| Alloy UI   | http://localhost:12345       |\n| Prometheus | http://localhost:9090        |\n\n## What to Expect\n\nOnce the stack is running, Alloy connects to the MySQL instance and exposes metrics via the `prometheus.exporter.mysql` component. These metrics are scraped every 15 seconds and forwarded to Prometheus using remote write.\n\nOpen Grafana at http://localhost:3000, navigate to **Explore**, select the **Prometheus** datasource, and query for `mysql_` prefixed metrics (e.g., `mysql_up`, `mysql_global_status_connections`, `mysql_global_status_threads_connected`).\n\nYou can also inspect the Alloy pipeline at http://localhost:12345 to verify that the exporter, scrape, and remote write components are healthy. Live debugging is enabled for real-time pipeline inspection.\n\n## Stopping the Scenario\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "mysql-monitoring/config.alloy",
    "content": "// ###############################\n// #### Metrics Configuration ####\n// ###############################\n\n// Enable live debugging for the Alloy UI.\nlivedebugging {\n\tenabled = true\n}\n\n// Expose MySQL metrics using the prometheus.exporter.mysql component.\nprometheus.exporter.mysql \"default\" {\n\tdata_source_name = \"root:alloy@(mysql:3306)/\"\n}\n\n// Configure a prometheus.scrape component to collect MySQL metrics.\nprometheus.scrape \"mysql\" {\n\ttargets    = prometheus.exporter.mysql.default.targets\n\tforward_to = [prometheus.remote_write.default.receiver]\n\n\tscrape_interval = \"15s\"\n}\n\n// Configure a prometheus.remote_write component to send metrics to Prometheus.\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "mysql-monitoring/docker-compose.coda.yml",
    "content": "services:\n  mysql:\n    image: mysql:9.7@sha256:f0ef1d92fa650fcfa5b85f1d82bb1a56a6dd579bf256b8f8f2a5a0b1b61c8b0b\n    environment:\n      - MYSQL_ROOT_PASSWORD=alloy\n      - MYSQL_DATABASE=alloy\n    ports:\n      - \"3306:3306\"\n    healthcheck:\n      test: [\"CMD\", \"mysqladmin\", \"ping\", \"-h\", \"localhost\", \"-u\", \"root\", \"-palloy\"]\n      interval: 10s\n      timeout: 5s\n      retries: 5\n"
  },
  {
    "path": "mysql-monitoring/docker-compose.yml",
    "content": "services:\n  mysql:\n    image: mysql:9.7@sha256:f0ef1d92fa650fcfa5b85f1d82bb1a56a6dd579bf256b8f8f2a5a0b1b61c8b0b\n    environment:\n      - MYSQL_ROOT_PASSWORD=alloy\n      - MYSQL_DATABASE=alloy\n    ports:\n      - \"3306:3306\"\n    healthcheck:\n      test: [\"CMD\", \"mysqladmin\", \"ping\", \"-h\", \"localhost\", \"-u\", \"root\", \"-palloy\"]\n      interval: 10s\n      timeout: 5s\n      retries: 5\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      mysql:\n        condition: service_healthy\n"
  },
  {
    "path": "mysql-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "nginx-monitoring/README.md",
    "content": "# NGINX Monitoring with Grafana Alloy\n\nEnd-to-end NGINX observability with a single Alloy pipeline:\n\n- **Logs** — `loki.source.file` tails NGINX access and error logs; `loki.process` parses the combined log format and promotes `method` and `status` to labels.\n- **Metrics** — `prometheus.scrape` scrapes `nginx-prometheus-exporter` (which itself reads NGINX's built-in `stub_status`) and remote-writes to Prometheus.\n\n## Architecture\n\n- **NGINX** — the monitored web server (`/nginx_status` enabled, access/error logs written to a shared volume)\n- **nginx-prometheus-exporter** — translates `stub_status` into Prometheus metrics on `:9113`\n- **loadgen** — small `curl` loop that hits NGINX once per second so the demo has visible activity (200s and 404s)\n- **Grafana Alloy** — the pipeline above, exposed at `:12345`\n- **Loki / Prometheus / Grafana** — backends and visualization, with Loki and Prometheus datasources auto-provisioned\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root using centralized image versions\n./run-example.sh nginx-monitoring\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345 — verify components are healthy and inspect the live data flow\n- **Prometheus**: http://localhost:9090\n- **NGINX**: http://localhost:8080 — `/` returns \"ok\", `/nginx_status` returns connection counters\n\n## Trying it out\n\nThe `loadgen` container hits NGINX once per second (alternating a 200 response and a 404). Within ~30 seconds you should see:\n\n### Logs (Loki)\n\n```logql\n# All access logs\n{job=\"nginx\", log_type=\"access\"}\n\n# Just 4xx\n{job=\"nginx\", log_type=\"access\", status=~\"4..\"}\n\n# Error log\n{job=\"nginx\", log_type=\"error\"}\n```\n\nThe combined-log regex extracts `remote_addr`, `time_local`, `method`, `path`, `status`, and `bytes_sent`. Of those, `method` and `status` are promoted to Loki labels for fast filtering; the rest stay in the line text.\n\n### Metrics (Prometheus)\n\n```promql\n# Active connections\nnginx_connections_active\n\n# Accepted-since-start counter (per second)\nrate(nginx_connections_accepted[1m])\n\n# Total HTTP requests\nnginx_http_requests_total\n```\n\n## Customization\n\n- **Different log format**: edit the regex in `config.alloy` under `loki.process.nginx`. The default expects NGINX's built-in `combined` format.\n- **Different exporter target**: change the `--nginx.scrape-uri` flag on `nginx-exporter` in `docker-compose.yml`.\n- **More log sources**: add entries to `local.file_match.nginx.path_targets`.\n\n## Stopping\n\n```bash\ndocker compose down -v\n```\n\nThe `-v` removes the shared `nginx-logs` volume so the next run starts with a clean log file.\n"
  },
  {
    "path": "nginx-monitoring/config.alloy",
    "content": "// NGINX Monitoring with Grafana Alloy.\n// Logs: tail access.log + error.log via loki.source.file, parse the access log\n// with a combined-format regex, and ship to Loki with method/status labels.\n// Metrics: scrape nginx-prometheus-exporter and remote_write to Prometheus.\n\nlivedebugging {\n\tenabled = true\n}\n\n// --- logs pipeline ---\n\nlocal.file_match \"nginx\" {\n\tpath_targets = [\n\t\t{\n\t\t\t__path__ = \"/var/log/nginx-data/access.log\",\n\t\t\tjob      = \"nginx\",\n\t\t\tlog_type = \"access\",\n\t\t},\n\t\t{\n\t\t\t__path__ = \"/var/log/nginx-data/error.log\",\n\t\t\tjob      = \"nginx\",\n\t\t\tlog_type = \"error\",\n\t\t},\n\t]\n\tsync_period = \"5s\"\n}\n\nloki.source.file \"nginx\" {\n\ttargets       = local.file_match.nginx.targets\n\tforward_to    = [loki.process.nginx.receiver]\n\ttail_from_end = true\n}\n\nloki.process \"nginx\" {\n\t// Extract `method` and `status` from access logs (combined format).\n\t// Error logs pass through unchanged.\n\tstage.match {\n\t\tselector = \"{log_type=\\\"access\\\"}\"\n\n\t\tstage.regex {\n\t\t\texpression = `^(?P<remote_addr>\\S+) - (?P<remote_user>\\S+) \\[(?P<time_local>[^\\]]+)\\] \"(?P<method>\\S+) (?P<path>\\S+) [^\"]+\" (?P<status>\\d+) (?P<bytes_sent>\\d+)`\n\t\t}\n\n\t\tstage.labels {\n\t\t\tvalues = {\n\t\t\t\tmethod = \"\",\n\t\t\t\tstatus = \"\",\n\t\t\t}\n\t\t}\n\t}\n\n\tforward_to = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n\n// --- metrics pipeline ---\n\nprometheus.scrape \"nginx\" {\n\ttargets = [{\n\t\t__address__ = \"nginx-exporter:9113\",\n\t\tjob         = \"nginx\",\n\t}]\n\tforward_to      = [prometheus.remote_write.local.receiver]\n\tscrape_interval = \"15s\"\n}\n\nprometheus.remote_write \"local\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "nginx-monitoring/docker-compose.yml",
    "content": "services:\n  nginx:\n    image: nginx:${NGINX_VERSION:-1.30-alpine}\n    container_name: nginx-monitoring-nginx\n    ports:\n      - \"8080:80\"\n    volumes:\n      - ./nginx.conf:/etc/nginx/nginx.conf:ro\n      - nginx-logs:/var/log/nginx-data\n\n  nginx-exporter:\n    image: nginx/nginx-prometheus-exporter:${NGINX_EXPORTER_VERSION:-1.5.1}\n    container_name: nginx-monitoring-exporter\n    command:\n      - --nginx.scrape-uri=http://nginx:80/nginx_status\n    ports:\n      - \"9113:9113\"\n    depends_on:\n      - nginx\n\n  loadgen:\n    image: curlimages/curl:${CURL_VERSION:-8.20.0}\n    container_name: nginx-monitoring-loadgen\n    entrypoint:\n      - sh\n      - -c\n      - |\n        until curl -s -o /dev/null --max-time 2 http://nginx/; do sleep 1; done\n        while true; do\n          curl -s -o /dev/null http://nginx/\n          curl -s -o /dev/null http://nginx/missing-page\n          sleep 1\n        done\n    depends_on:\n      - nginx\n    restart: unless-stopped\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    container_name: nginx-monitoring-alloy\n    ports:\n      - \"12345:12345\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - nginx-logs:/var/log/nginx-data:ro\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - nginx\n      - nginx-exporter\n      - loki\n      - prometheus\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    container_name: nginx-monitoring-loki\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    container_name: nginx-monitoring-prometheus\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    container_name: nginx-monitoring-grafana\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Prometheus\n          type: prometheus\n          access: proxy\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\nvolumes:\n  nginx-logs:\n"
  },
  {
    "path": "nginx-monitoring/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 5m\n"
  },
  {
    "path": "nginx-monitoring/nginx.conf",
    "content": "worker_processes 1;\nevents {\n    worker_connections 1024;\n}\n\nhttp {\n    # Use the nginx built-in \"combined\" log format:\n    # '$remote_addr - $remote_user [$time_local] \"$request\" '\n    # '$status $body_bytes_sent \"$http_referer\" \"$http_user_agent\"'\n    #\n    # Write to a fresh path outside /var/log/nginx — that directory\n    # has access.log/error.log pre-symlinked to /dev/stdout in the\n    # nginx image, which Alloy's tailer cannot follow across containers.\n    access_log /var/log/nginx-data/access.log combined;\n    error_log  /var/log/nginx-data/error.log warn;\n\n    server {\n        listen 80;\n        server_name _;\n\n        location = / {\n            add_header Content-Type text/plain;\n            return 200 \"ok\\n\";\n        }\n\n        location = /missing-page {\n            return 404;\n        }\n\n        location /nginx_status {\n            stub_status on;\n            access_log off;\n        }\n    }\n}\n"
  },
  {
    "path": "nginx-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "otel-basic-tracing/README.md",
    "content": "# OpenTelemetry Basic Tracing with Grafana Alloy\n\nThis example demonstrates how to collect and visualize OpenTelemetry traces using Grafana Alloy and Tempo.\n\n## Overview\n\nThe example includes:\n\n- A sample Python Flask application that generates various types of traces\n- Grafana Alloy as the telemetry pipeline\n- Tempo for trace storage and querying\n- Prometheus for metrics collection (service graphs)\n- Grafana for visualization\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd otel-basic-tracing\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n   \n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh otel-basic-tracing\n   ```\n\n4. Access the demo application at http://localhost:8080\n5. Access Grafana at http://localhost:3000\n6. Access Prometheus at http://localhost:9090\n\n## What to Expect\n\nThe demo application provides several endpoints that generate different types of traces:\n\n- **/simple**: Generates a simple trace with a single span\n- **/nested**: Generates a trace with nested spans (parent-child relationships)\n- **/error**: Generates a trace that includes an error\n- **/chain**: Simulates a chain of service calls to demonstrate distributed tracing\n\nAfter accessing these endpoints, you can view the traces in Grafana by:\n\n1. Opening http://localhost:3000\n2. Navigating to Explore\n3. Selecting the Tempo data source\n4. Using the Search tab to find and visualize traces\n\n## Service Graphs\n\nThis example includes service graph visualization capabilities. As you generate traces with the demo app (especially with the `/chain` endpoint), Tempo will generate service graph metrics that are sent to Prometheus.\n\nTo view the service graph:\n\n1. Open Grafana (http://localhost:3000)\n2. Navigate to Explore\n3. Select the Tempo data source\n4. Click on the \"Service Graph\" tab\n5. You should see a visual representation of the relationships between services\n\n## Architecture\n\n```\n┌────────────┐     ┌──────────┐      ┌───────┐      ┌─────────┐\n│ Demo App   │────▶│ Alloy    │─────▶│ Tempo │─────▶│ Grafana │\n│ (OTel SDK) │     │          │      │       │      │         │\n└────────────┘     └──────────┘      └───┬───┘      └─────────┘\n                                         │                ▲\n                                         ▼                │\n                                    ┌─────────┐           │\n                                    │Prometheus│───────────┘\n                                    └─────────┘\n```\n\nThe Demo App generates traces using the OpenTelemetry SDK and sends them to Alloy, which processes and forwards them to Tempo. Tempo generates service graph metrics and sends them to Prometheus. Grafana queries both Tempo and Prometheus to visualize traces and service graphs.\n\n## Customizing\n\nThe Alloy configuration is a simple placeholder. You can modify `config.alloy` to add processors, filters, or additional exporters to demonstrate more complex telemetry pipelines. "
  },
  {
    "path": "otel-basic-tracing/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\n\nCOPY app.py .\n\nCMD [\"python\", \"app.py\"] "
  },
  {
    "path": "otel-basic-tracing/app/app.py",
    "content": "import os\nimport random\nimport time\nfrom flask import Flask, request\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\nimport requests\n\n# Configure the tracer\nresource = Resource.create(attributes={\n    SERVICE_NAME: \"trace-demo\"\n})\ntrace.set_tracer_provider(TracerProvider(resource=resource))\n\n# Configure the OTLP exporter using environment variables\n# OTEL_EXPORTER_OTLP_ENDPOINT will be used automatically\notlp_exporter = OTLPSpanExporter(endpoint=\"http://alloy:4317/v1/traces\", insecure=True)\nspan_processor = BatchSpanProcessor(span_exporter=otlp_exporter, max_export_batch_size=1)\ntrace.get_tracer_provider().add_span_processor(span_processor)\n\n# Create a tracer\ntracer = trace.get_tracer(__name__)\n\n# Create a Flask application\napp = Flask(__name__)\n\n# Instrument Flask\nFlaskInstrumentor().instrument_app(app)\n\n# Instrument requests\nRequestsInstrumentor().instrument()\n\n@app.route('/')\ndef home():\n    return \"\"\"\n    <h1>OpenTelemetry Demo</h1>\n    <p>This app demonstrates OpenTelemetry tracing with Grafana Alloy.</p>\n    <ul>\n        <li><a href=\"/simple\">Simple Trace</a></li>\n        <li><a href=\"/nested\">Nested Trace</a></li>\n        <li><a href=\"/error\">Error Trace</a></li>\n        <li><a href=\"/chain\">Chain of Services</a></li>\n        <li><a href=\"/delayed-chain\">Delayed Chain (with Service D having high latency)</a></li>\n    </ul>\n    \"\"\"\n\n@app.route('/simple')\ndef simple_trace():\n    with tracer.start_as_current_span(\"simple-operation\") as span:\n        span.set_attribute(\"operation.type\", \"simple\")\n        span.set_attribute(\"operation.value\", random.randint(1, 100))\n        time.sleep(0.1)  # Simulate work\n        return {\"status\": \"ok\", \"message\": \"Simple trace generated\"}\n\n@app.route('/nested')\ndef nested_trace():\n    with tracer.start_as_current_span(\"parent-operation\") as parent:\n        parent.set_attribute(\"operation.type\", \"parent\")\n        time.sleep(0.05)  # Simulate work\n        \n        with tracer.start_as_current_span(\"child-operation-1\") as child1:\n            child1.set_attribute(\"operation.type\", \"child\")\n            child1.set_attribute(\"child.number\", 1)\n            time.sleep(0.05)  # Simulate work\n            \n        with tracer.start_as_current_span(\"child-operation-2\") as child2:\n            child2.set_attribute(\"operation.type\", \"child\")\n            child2.set_attribute(\"child.number\", 2)\n            time.sleep(0.05)  # Simulate work\n            \n            with tracer.start_as_current_span(\"grandchild-operation\") as grandchild:\n                grandchild.set_attribute(\"operation.type\", \"grandchild\")\n                time.sleep(0.05)  # Simulate work\n                \n        return {\"status\": \"ok\", \"message\": \"Nested trace generated\"}\n\n@app.route('/error')\ndef error_trace():\n    with tracer.start_as_current_span(\"error-operation\") as span:\n        span.set_attribute(\"operation.type\", \"error\")\n        try:\n            # Simulate an error\n            result = 1 / 0\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Error trace generated\"}\n\n@app.route('/chain')\ndef chain_trace():\n    with tracer.start_as_current_span(\"chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        \n        # Simulate a chain of service calls\n        try:\n            # Call ourselves to simulate microservice calls\n            # In a real world example these would be different services\n            service_b_url = f\"http://localhost:8080/service/b?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_b_url)\n            return {\"status\": \"ok\", \"message\": \"Chain trace generated\", \"data\": response.json()}\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete chain\"}\n\n@app.route('/service/b')\ndef service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"request.id\", req_id)\n        time.sleep(0.1)  # Simulate work\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/service/c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/service/c')\ndef service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"request.id\", req_id)\n        time.sleep(0.15)  # Simulate work\n        \n        # Randomly fail sometimes to show error traces\n        if random.random() < 0.2:  # 20% chance of failure\n            span.set_status(trace.StatusCode.ERROR, \"Random failure\")\n            return {\"status\": \"error\", \"message\": \"Service C failed randomly\"}\n        \n        return {\"status\": \"ok\", \"message\": \"Service C completed successfully\"}\n\n# New delayed chain implementation\n@app.route('/delayed-chain')\ndef delayed_chain_trace():\n    with tracer.start_as_current_span(\"delayed-chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        span.set_attribute(\"operation.type\", \"delayed-chain\")\n        \n        try:\n            # Start the chain with Service A\n            service_a_url = f\"http://localhost:8080/delayed/service-a?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_a_url)\n            return {\n                \"status\": \"ok\", \n                \"message\": \"Delayed chain trace generated\", \n                \"data\": response.json()\n            }\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete delayed chain\"}\n\n@app.route('/delayed/service-a')\ndef delayed_service_a():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-a-handler\") as span:\n        span.set_attribute(\"service\", \"A\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        time.sleep(0.1)  # Normal latency\n        \n        # Call service B\n        service_b_url = f\"http://localhost:8080/delayed/service-b?id={req_id}\"\n        response = requests.get(service_b_url)\n        return {\"status\": \"ok\", \"message\": \"Service A completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-b')\ndef delayed_service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        time.sleep(0.15)  # Normal latency\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/delayed/service-c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-c')\ndef delayed_service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        time.sleep(0.2)  # Normal latency\n        \n        # Call the slow service D\n        service_d_url = f\"http://localhost:8080/delayed/service-d?id={req_id}\"\n        response = requests.get(service_d_url)\n        return {\"status\": \"ok\", \"message\": \"Service C completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-d')\ndef delayed_service_d():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-d-handler\") as span:\n        span.set_attribute(\"service\", \"D\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"high\")\n        span.set_attribute(\"latency.category\", \"bottleneck\")\n        \n        # This service consistently has high latency (3-4 seconds)\n        delay = random.uniform(3.0, 4.0)\n        span.set_attribute(\"latency.seconds\", delay)\n        time.sleep(delay)  # High latency\n        \n        # Call final service E\n        service_e_url = f\"http://localhost:8080/delayed/service-e?id={req_id}\"\n        response = requests.get(service_e_url)\n        return {\"status\": \"ok\", \"message\": \"Service D completed (with delay)\", \"data\": response.json()}\n\n@app.route('/delayed/service-e')\ndef delayed_service_e():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-e-handler\") as span:\n        span.set_attribute(\"service\", \"E\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        time.sleep(0.1)  # Normal latency\n        \n        return {\"status\": \"ok\", \"message\": \"Service E completed (chain end)\"}\n\nif __name__ == '__main__':\n    app.run(host='0.0.0.0', port=8080) "
  },
  {
    "path": "otel-basic-tracing/app/requirements.txt",
    "content": "flask\nrequests\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests"
  },
  {
    "path": "otel-basic-tracing/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for OpenTelemetry Trace Collection\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlp/tempo]\n"
  },
  {
    "path": "otel-basic-tracing/config.alloy",
    "content": "/*\n * Alloy Configuration for OpenTelemetry Trace Collection\n */\n\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\notelcol.processor.batch \"default\" {\n  output {\n    traces = [otelcol.exporter.otlp.tempo.input]\n  }\n}\n\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n\t\t\tinsecure = true\n\t\t}\n  }\n} "
  },
  {
    "path": "otel-basic-tracing/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n      - 4317:4317      # OTLP gRPC\n      - 4318:4318      # OTLP HTTP\n\n  # Override demo-app endpoint to use standard OTLP gRPC port\n  demo-app:\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo\n"
  },
  {
    "path": "otel-basic-tracing/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:12345\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo\n"
  },
  {
    "path": "otel-basic-tracing/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for tracing\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - prometheus\n  \n  memcached:\n    image: memcached:1.6.40@sha256:572b011ce33954ee809066d8cecbeb3ec98912109ee3be3663a3197425fd81ac\n    container_name: memcached\n    ports:\n      - \"11211:11211\"\n    environment:\n      - MEMCACHED_MAX_MEMORY=64m  # Set the maximum memory usage\n      - MEMCACHED_THREADS=4       # Number of threads to use\n\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy for telemetry pipeline\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n\n  # Demo app that generates OpenTelemetry traces\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:12345\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo "
  },
  {
    "path": "otel-basic-tracing/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "otel-basic-tracing/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.\n    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can\n      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver\n        thrift_http:                   #\n          endpoint: \"tempo:14268\"      # for a production deployment you should only enable the receivers you need!\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally\n\ncompactor:\n  compaction:\n    block_retention: 720h                # overall Tempo trace retention. set for demo purposes\n\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     # backend configuration to use\n    wal:\n      path: /var/tempo/wal             # where to store the wal locally\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator\n      generate_native_histograms: both\n      "
  },
  {
    "path": "otel-examples/README.md",
    "content": "# OTel Engine Examples\n\nThese scenarios use the **Alloy OTel Engine** -- an experimental feature introduced in Alloy v1.14 that lets you run standard [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) YAML configurations directly inside Alloy. Instead of writing Alloy's River/HCL syntax, you use the exact same YAML format that the upstream OTel Collector uses.\n\n## What is the Alloy OTel Engine?\n\nGrafana Alloy has traditionally used its own **River** configuration language (HCL-like syntax in `config.alloy` files). Starting with v1.14, Alloy ships an experimental **OTel Engine** that accepts standard OTel Collector YAML. This means:\n\n- **No new language to learn** -- if you already know OTel Collector config, you can use Alloy directly\n- **Copy-paste from upstream docs** -- OTel Collector examples work as-is\n- **Migration path** -- move from vanilla OTel Collector to Alloy without rewriting configs\n- **Best of both worlds** -- Alloy's single-binary distribution with OTel Collector's YAML config\n\nThe OTel Engine is started with:\n\n```bash\nalloy otel --config=<CONFIG_FILE>\n```\n\nYou can validate configs before running:\n\n```bash\nalloy otel validate --config=<CONFIG_FILE>\n```\n\n## Running These Examples\n\nEach scenario has a `docker-compose.yml` with the full stack:\n\n```bash\ncd <scenario-dir> && docker compose up -d\n```\n\nOr from the repo root with centralized image versions:\n\n```bash\ncd otel-examples/<scenario-dir> && docker compose --env-file ../../image-versions.env up -d\n```\n\n### Access the stack\n\n- **Grafana**: [http://localhost:3000](http://localhost:3000) (no login required)\n- **Alloy UI**: [http://localhost:12345](http://localhost:12345) (pipeline debugging UI, enabled via the `alloyengine` extension)\n\n### Stop\n\n```bash\ndocker compose down\n```\n\n## Scenarios\n\n| Scenario | Description | Key OTel Components |\n|----------|-------------|-------------------|\n| [filelog-processing](filelog-processing/) | Collect and parse mixed-format log files (JSON + plaintext) using the filelog receiver's operator chain | `filelog` receiver, `json_parser`, `regex_parser`, `severity_parser` operators |\n| [pii-redaction](pii-redaction/) | Scrub credit cards, emails, and IP addresses from traces and logs using OTTL `replace_pattern` | `transform` processor (OTTL) |\n| [routing-multi-tenant](routing-multi-tenant/) | Route logs to different Loki tenants based on resource attributes using fan-out + filter | `forward` connector, `filter` processor, `resource` processor |\n| [cost-control](cost-control/) | Drop health checks, filter debug logs, and apply head-based sampling to reduce telemetry volume | `filter` processor, `probabilistic_sampler` processor |\n| [resource-enrichment](resource-enrichment/) | Auto-discover and attach host/OS/Docker metadata to all telemetry signals | `resourcedetection` processor (env, system, docker) |\n| [count-connector](count-connector/) | Derive count metrics (request rate, error rate) from traces and logs | `count` connector |\n| [ottl-transform](ottl-transform/) | A cookbook of OTTL patterns: JSON parsing, severity mapping, attribute promotion, truncation | `transform` processor (OTTL) |\n| [host-metrics](host-metrics/) | Collect CPU, memory, disk, network metrics -- an OTel-native replacement for node_exporter | `hostmetrics` receiver |\n| [multi-pipeline-fanout](multi-pipeline-fanout/) | Send traces to two backends with different processing per destination (full vs. sampled) | `forward` connector, `probabilistic_sampler` processor |\n| [kafka-buffer](kafka-buffer/) | Buffer traces through Kafka for durability and backpressure handling | `kafka` receiver/exporter |\n\n## Alloy UI and the `alloyengine` Extension\n\nEach scenario includes an `alloyengine` extension in `config-otel.yaml` that starts the Alloy River UI alongside the OTel pipeline. This gives you the visual pipeline debugging UI at [http://localhost:12345](http://localhost:12345).\n\nIf you prefer a pure OTel Collector config without the Alloy UI, you can remove the `alloyengine` extension block and the `extensions: [alloyengine]` line from the `service:` section in `config-otel.yaml`, and drop the `config.alloy` volume mount from `docker-compose.yml`. The OTel pipeline will work identically -- you just won't have the UI.\n\n## OTel Engine vs. River Configs\n\nFor comparison, the parent repo's existing scenarios (e.g., `otel-basic-tracing/`, `otel-span-metrics/`) also have OTel YAML alternatives alongside their River configs. Run those with:\n\n```bash\ndocker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n```\n\n## Available Connectors\n\nThe Alloy OTel Engine supports these connectors: `count`, `grafanacloud`, `servicegraph`, `spanmetrics`, `forward`.\n\n## Further Reading\n\n- [Alloy OTel Engine Documentation](https://grafana.com/docs/alloy/latest/set-up/otel_engine/)\n- [OpenTelemetry Collector Configuration](https://opentelemetry.io/docs/collector/configuration/)\n- [OTTL (OpenTelemetry Transformation Language)](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl)\n"
  },
  {
    "path": "otel-examples/cost-control/README.md",
    "content": "# Telemetry Cost Control\n\nReduce observability costs by filtering noisy telemetry and applying probabilistic sampling in the Alloy OTel pipeline, before data reaches your backends.\n\n## What This Demonstrates\n\n- **Filter processor** to drop unwanted spans (health checks, readiness probes, metrics endpoints)\n- **Filter processor** to drop low-severity logs (DEBUG level)\n- **Probabilistic sampler** for head-based trace sampling (keeps 25% of remaining traces)\n- **Transform processor** to strip high-cardinality attributes (`http.user_agent`, cookies) that inflate storage\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000).\n\n### Verify filtering is working\n\n1. **Traces (Tempo):** Go to Explore > Tempo. Search for traces from `cost-control-demo`. You should see `/api/order` and `/api/error` spans but **no** `/health`, `/ready`, or `/metrics` spans -- those are dropped by the filter processor.\n\n2. **Logs (Loki):** Go to Explore > Loki. Query `{service_name=\"cost-control-demo\"}`. You should see INFO and ERROR logs but **no** DEBUG logs.\n\n3. **Sampling:** Only ~25% of the remaining (non-filtered) traces make it through. Compare the demo app's request rate with the trace count in Tempo to see the reduction.\n\n### Sample Loki query\n\n```logql\n{service_name=\"cost-control-demo\"} | json\n```\n\n### Check the Alloy OTel pipeline\n\nVisit the Alloy OTel HTTP server at [http://localhost:8888](http://localhost:8888).\n\n## Key Configuration\n\nThe `config-otel.yaml` pipeline applies three cost-control stages:\n\n1. **`filter/traces`** -- Drops spans where `http.target` or `http.route` matches `/health`, `/ready`, or `/metrics`. These high-frequency probes generate enormous trace volume with no diagnostic value.\n\n2. **`filter/logs`** -- Drops log records with `severity_number < 9` (below INFO). DEBUG logs are useful in development but costly at scale.\n\n3. **`probabilistic_sampler`** -- Keeps 25% of remaining traces via consistent head-based sampling. Adjust `sampling_percentage` to trade off between cost and visibility.\n\n4. **`transform/strip`** -- Removes `http.user_agent` and `http.request.header.cookie` attributes from spans. These high-cardinality fields consume significant index and storage space.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/cost-control/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/cost-control/app/app.py",
    "content": "\"\"\"\nDemo Flask app for the cost-control scenario.\n\nGenerates a noisy mix of telemetry: frequent health/ready checks, DEBUG logs,\nand occasional real business traces. The Alloy OTel pipeline filters out the\nnoise using filter processors and probabilistic sampling.\n\"\"\"\n\nimport logging\nimport random\nimport threading\nimport time\n\nfrom flask import Flask, jsonify\nfrom opentelemetry import trace\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.sdk.resources import Resource\nfrom opentelemetry.trace import StatusCode\n\n# --- OTel Setup ---\nresource = Resource.create({\n    \"service.name\": \"cost-control-demo\",\n    \"service.version\": \"1.0.0\",\n})\n\n# Traces\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(__name__)\n\n# Logs via OTel\nlogger_provider = LoggerProvider(resource=resource)\nlogger_provider.add_log_record_processor(\n    BatchLogRecordProcessor(OTLPLogExporter(endpoint=\"alloy:4317\", insecure=True))\n)\nhandler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider)\nlogging.basicConfig(level=logging.DEBUG)\nlogger = logging.getLogger(\"cost-control-demo\")\nlogger.addHandler(handler)\n\n# --- Flask App ---\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\n\n@app.route(\"/health\")\ndef health():\n    \"\"\"Noisy health check endpoint - called very frequently.\"\"\"\n    logger.debug(\"Health check OK\")\n    return jsonify({\"status\": \"healthy\"})\n\n\n@app.route(\"/ready\")\ndef ready():\n    \"\"\"Noisy readiness probe endpoint.\"\"\"\n    logger.debug(\"Readiness check OK\")\n    return jsonify({\"status\": \"ready\"})\n\n\n@app.route(\"/api/order\")\ndef order():\n    \"\"\"Real business endpoint that produces useful traces.\"\"\"\n    with tracer.start_as_current_span(\"process-order\") as span:\n        order_id = f\"ORD-{random.randint(1000, 9999)}\"\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"order.amount\", round(random.uniform(10.0, 500.0), 2))\n        span.set_attribute(\"customer.tier\", random.choice([\"gold\", \"silver\", \"bronze\"]))\n\n        # Simulate processing time\n        time.sleep(random.uniform(0.05, 0.2))\n\n        logger.info(\"Order %s processed successfully\", order_id)\n        return jsonify({\"order_id\": order_id, \"status\": \"completed\"})\n\n\n@app.route(\"/api/error\")\ndef error():\n    \"\"\"Endpoint that occasionally generates errors.\"\"\"\n    with tracer.start_as_current_span(\"handle-error\") as span:\n        error_code = random.choice([\"TIMEOUT\", \"INVALID_INPUT\", \"DB_ERROR\"])\n        span.set_attribute(\"error.code\", error_code)\n        span.set_status(StatusCode.ERROR, f\"Simulated error: {error_code}\")\n        span.record_exception(Exception(f\"Simulated {error_code}\"))\n\n        logger.error(\"Request failed with error: %s\", error_code)\n        return jsonify({\"error\": error_code}), 500\n\n\ndef load_generator():\n    \"\"\"Background thread that generates traffic with a noisy distribution.\"\"\"\n    import requests\n\n    base_url = \"http://localhost:8080\"\n    # Wait for Flask to start\n    time.sleep(5)\n\n    while True:\n        r = random.random()\n        try:\n            if r < 0.70:\n                requests.get(f\"{base_url}/health\", timeout=2)\n            elif r < 0.80:\n                requests.get(f\"{base_url}/ready\", timeout=2)\n            elif r < 0.95:\n                requests.get(f\"{base_url}/api/order\", timeout=2)\n            else:\n                requests.get(f\"{base_url}/api/error\", timeout=2)\n        except Exception:\n            pass\n\n        # Also emit frequent DEBUG logs (noise)\n        logger.debug(\"Background tick at %s\", time.time())\n        time.sleep(random.uniform(0.2, 1.0))\n\n\nif __name__ == \"__main__\":\n    thread = threading.Thread(target=load_generator, daemon=True)\n    thread.start()\n    app.run(host=\"0.0.0.0\", port=8080)\n"
  },
  {
    "path": "otel-examples/cost-control/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\n"
  },
  {
    "path": "otel-examples/cost-control/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Telemetry Cost Control\n#\n# Demonstrates using filter and probabilistic_sampler processors\n# to drop noisy telemetry (health checks, debug logs) and apply\n# head-based sampling to reduce observability costs.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  # Drop health check and readiness probe spans\n  filter/traces:\n    error_mode: ignore\n    traces:\n      span:\n        - attributes[\"http.target\"] == \"/health\"\n        - attributes[\"http.target\"] == \"/ready\"\n        - attributes[\"http.target\"] == \"/metrics\"\n        - attributes[\"http.route\"] == \"/health\"\n        - attributes[\"http.route\"] == \"/ready\"\n\n  # Drop DEBUG-level logs\n  filter/logs:\n    error_mode: ignore\n    logs:\n      log_record:\n        - severity_number < 9\n\n  # Head-based probabilistic sampling: keep 25% of remaining traces\n  probabilistic_sampler:\n    sampling_percentage: 25\n\n  # Strip high-cardinality attributes to reduce storage\n  transform/strip:\n    error_mode: ignore\n    trace_statements:\n      - context: span\n        statements:\n          - delete_key(attributes, \"http.user_agent\")\n          - delete_key(attributes, \"http.request.header.cookie\")\n\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/loki:\n    endpoint: http://loki:3100/otlp\n\n  debug:\n    verbosity: basic\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [filter/traces, probabilistic_sampler, transform/strip, batch]\n      exporters: [otlp/tempo]\n    logs:\n      receivers: [otlp]\n      processors: [filter/logs, batch]\n      exporters: [otlphttp/loki, debug]\n"
  },
  {
    "path": "otel-examples/cost-control/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/cost-control/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n      - OTEL_SERVICE_NAME=cost-control-demo\n"
  },
  {
    "path": "otel-examples/cost-control/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Loki for log storage\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    command: -config.file=/etc/loki/local-config.yaml\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n\n  # Tempo for trace storage\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - loki\n      - tempo\n\n  # Alloy in OTel engine mode\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    command: otel --config=/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888       # OTel engine HTTP server\n      - 4317:4317       # OTLP gRPC\n      - 4318:4318       # OTLP HTTP\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    depends_on:\n      - loki\n      - tempo\n\n  # Demo app that generates noisy telemetry\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=cost-control-demo\n    depends_on:\n      - alloy\n"
  },
  {
    "path": "otel-examples/cost-control/loki-config.yaml",
    "content": "auth_enabled: false\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-examples/cost-control/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\noverrides:\n  defaults: {}\n"
  },
  {
    "path": "otel-examples/count-connector/README.md",
    "content": "# Count Connector (Derive Metrics from Signals)\n\nUse the OTel count connector to automatically derive count metrics from traces and logs -- the \"metrics from signals\" pattern -- without additional instrumentation.\n\n## What This Demonstrates\n\n- **Count connector** deriving metrics from trace spans and log records\n- Generating error rate metrics (`span.error.count`, `log.error.count`) from signal status codes\n- Generating volume metrics (`span.count`, `log.count`) for throughput monitoring\n- Routing derived metrics to Prometheus while original signals go to Tempo and Loki\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000).\n\n### View derived metrics in Prometheus\n\nGo to Explore > Prometheus and query the following metrics:\n\n```promql\n# Total span count (rate per second)\nrate(span_count_total[5m])\n\n# Error span count (rate per second)\nrate(span_error_count_total[5m])\n\n# Error rate as a percentage\nrate(span_error_count_total[5m]) / rate(span_count_total[5m]) * 100\n\n# Total log record count\nrate(log_count_total[5m])\n\n# Error log count\nrate(log_error_count_total[5m])\n```\n\n### View original traces in Tempo\n\nGo to Explore > Tempo and search for `count-connector-demo` traces. You will see both successful (OK) and error traces.\n\n### View original logs in Loki\n\nGo to Explore > Loki and query:\n\n```logql\n{service_name=\"count-connector-demo\"} | json\n```\n\n### Check the Alloy OTel pipeline\n\nVisit the Alloy OTel HTTP server at [http://localhost:8888](http://localhost:8888).\n\n## Key Configuration\n\nThe `config-otel.yaml` pipeline uses the **count connector** to bridge signals:\n\n1. **`connectors/count`** -- Defines four derived metrics:\n   - `span.count` -- Total number of spans received\n   - `span.error.count` -- Spans where `status.code == 2` (ERROR)\n   - `log.count` -- Total number of log records received\n   - `log.error.count` -- Logs where `severity_number >= 17` (ERROR and above)\n\n2. **Pipeline wiring:**\n   - `traces` pipeline: receives OTLP, exports to both `count` connector and `otlp/tempo`\n   - `logs` pipeline: receives OTLP, exports to both `count` connector and `otlphttp/loki`\n   - `metrics` pipeline: receives from `count` connector, exports to `otlphttp/prometheus`\n\nThe count connector acts as both an exporter (in the traces/logs pipelines) and a receiver (in the metrics pipeline), bridging signals without any application changes.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/count-connector/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/count-connector/app/app.py",
    "content": "\"\"\"\nDemo Flask app for the count-connector scenario.\n\nGenerates a mix of successful and error traces plus log records at various\nseverity levels. The Alloy OTel pipeline uses the count connector to derive\nmetrics (span.count, span.error.count, log.count, log.error.count) from\nthese signals.\n\"\"\"\n\nimport logging\nimport random\nimport threading\nimport time\n\nfrom flask import Flask, jsonify\nfrom opentelemetry import trace\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.sdk.resources import Resource\nfrom opentelemetry.trace import StatusCode\n\n# --- OTel Setup ---\nresource = Resource.create({\n    \"service.name\": \"count-connector-demo\",\n    \"service.version\": \"1.0.0\",\n})\n\n# Traces\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(__name__)\n\n# Logs via OTel\nlogger_provider = LoggerProvider(resource=resource)\nlogger_provider.add_log_record_processor(\n    BatchLogRecordProcessor(OTLPLogExporter(endpoint=\"alloy:4317\", insecure=True))\n)\nhandler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider)\nlogging.basicConfig(level=logging.DEBUG)\nlogger = logging.getLogger(\"count-connector-demo\")\nlogger.addHandler(handler)\n\n# --- Flask App ---\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\n\n@app.route(\"/api/process\")\ndef process():\n    \"\"\"Simulates a processing request. ~80% success, ~20% error.\"\"\"\n    with tracer.start_as_current_span(\"process-request\") as span:\n        request_id = f\"REQ-{random.randint(1000, 9999)}\"\n        span.set_attribute(\"request.id\", request_id)\n\n        time.sleep(random.uniform(0.02, 0.15))\n\n        if random.random() < 0.20:\n            error_type = random.choice([\"ValidationError\", \"TimeoutError\", \"DatabaseError\"])\n            span.set_status(StatusCode.ERROR, f\"Simulated {error_type}\")\n            span.set_attribute(\"error.type\", error_type)\n            span.record_exception(Exception(f\"Simulated {error_type}\"))\n            logger.error(\"Request %s failed: %s\", request_id, error_type)\n            return jsonify({\"request_id\": request_id, \"error\": error_type}), 500\n\n        logger.info(\"Request %s processed successfully\", request_id)\n        return jsonify({\"request_id\": request_id, \"status\": \"ok\"})\n\n\n@app.route(\"/api/notify\")\ndef notify():\n    \"\"\"Simulates sending a notification.\"\"\"\n    with tracer.start_as_current_span(\"send-notification\") as span:\n        channel = random.choice([\"email\", \"sms\", \"push\"])\n        span.set_attribute(\"notification.channel\", channel)\n\n        time.sleep(random.uniform(0.01, 0.1))\n\n        if random.random() < 0.10:\n            span.set_status(StatusCode.ERROR, \"Notification delivery failed\")\n            logger.error(\"Notification via %s failed\", channel)\n            return jsonify({\"channel\": channel, \"status\": \"failed\"}), 500\n\n        logger.info(\"Notification sent via %s\", channel)\n        return jsonify({\"channel\": channel, \"status\": \"sent\"})\n\n\n@app.route(\"/health\")\ndef health():\n    return jsonify({\"status\": \"healthy\"})\n\n\ndef load_generator():\n    \"\"\"Background thread generating continuous traffic every 2 seconds.\"\"\"\n    import requests\n\n    base_url = \"http://localhost:8080\"\n    time.sleep(5)\n\n    while True:\n        try:\n            endpoint = random.choice([\"/api/process\", \"/api/process\", \"/api/notify\"])\n            requests.get(f\"{base_url}{endpoint}\", timeout=5)\n        except Exception:\n            pass\n\n        # Also emit some standalone log records\n        severity = random.choices(\n            [\"info\", \"warn\", \"error\"],\n            weights=[60, 25, 15],\n            k=1,\n        )[0]\n        if severity == \"info\":\n            logger.info(\"Background task check - all systems normal\")\n        elif severity == \"warn\":\n            logger.warning(\"Background task check - queue depth elevated\")\n        else:\n            logger.error(\"Background task check - connectivity issue detected\")\n\n        time.sleep(2)\n\n\nif __name__ == \"__main__\":\n    thread = threading.Thread(target=load_generator, daemon=True)\n    thread.start()\n    app.run(host=\"0.0.0.0\", port=8080)\n"
  },
  {
    "path": "otel-examples/count-connector/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\n"
  },
  {
    "path": "otel-examples/count-connector/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Count Connector (Derive Metrics from Signals)\n#\n# Demonstrates using the count connector to derive count metrics\n# from traces and logs: error rates, request counts per service,\n# log volume by severity -- \"metrics from signals\" pattern.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nconnectors:\n  count:\n    spans:\n      span.count:\n        description: Total number of spans received\n      span.error.count:\n        description: Number of error spans\n        conditions:\n          - status.code == 2\n    logs:\n      log.count:\n        description: Total number of log records\n      log.error.count:\n        description: Number of error log records\n        conditions:\n          - severity_number >= 17\n\nprocessors:\n  batch: {}\n\n  # Convert delta temporality (from count connector) to cumulative for Prometheus\n  deltatocumulative: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/loki:\n    endpoint: http://loki:3100/otlp\n\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    # Ingest traces and forward to count connector + Tempo\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [count, otlp/tempo]\n    # Ingest logs and forward to count connector + Loki\n    logs:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [count, otlphttp/loki]\n    # Export derived count metrics to Prometheus\n    metrics:\n      receivers: [count]\n      processors: [deltatocumulative, batch]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-examples/count-connector/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/count-connector/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n      - OTEL_SERVICE_NAME=count-connector-demo\n"
  },
  {
    "path": "otel-examples/count-connector/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Loki for log storage\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    command: -config.file=/etc/loki/local-config.yaml\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n\n  # Prometheus for metrics storage (receives derived count metrics via OTLP)\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for trace storage\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - loki\n      - prometheus\n      - tempo\n\n  # Alloy in OTel engine mode\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    command: otel --config=/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888       # OTel engine HTTP server\n      - 4317:4317       # OTLP gRPC\n      - 4318:4318       # OTLP HTTP\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    depends_on:\n      - loki\n      - prometheus\n      - tempo\n\n  # Demo app that generates traces and logs for count connector\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=count-connector-demo\n    depends_on:\n      - alloy\n"
  },
  {
    "path": "otel-examples/count-connector/loki-config.yaml",
    "content": "auth_enabled: false\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-examples/count-connector/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-examples/count-connector/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks]\n      generate_native_histograms: both\n"
  },
  {
    "path": "otel-examples/filelog-processing/README.md",
    "content": "# Filelog Processing\n\nDemonstrates the OTel Collector **filelog receiver** with operator chains to parse mixed-format log files. A log generator writes both JSON and plaintext log lines to a shared volume, and Alloy (running the OTel engine) reads, parses, and ships them to Loki.\n\n## What This Demonstrates\n\n- **Filelog receiver** reading log files from disk using glob patterns\n- **Conditional operator chains** that detect log format and apply the correct parser (JSON vs regex)\n- **Severity parsing** to map log levels to OTel severity\n- **Resource attribute injection** to tag all logs with a service name\n- Exporting parsed logs to **Loki via OTLP/HTTP**\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\n1. Open Grafana at [http://localhost:3000](http://localhost:3000) (no login required).\n2. Go to **Explore** and select the **Loki** datasource.\n3. Try these LogQL queries:\n\n```logql\n{service_name=\"log-demo\"}\n```\n\n```logql\n{service_name=\"log-demo\"} | json\n```\n\n```logql\n{service_name=\"log-demo\"} |= \"ERROR\"\n```\n\n4. Observe that both JSON and plaintext lines are ingested, with severity levels and timestamps correctly parsed.\n\n## Key Configuration\n\nThe `config-otel.yaml` defines a filelog receiver with chained operators:\n\n- **`json_parser`** (conditional) -- fires when the log line starts with `{`, extracting structured fields and timestamps.\n- **`regex_parser`** (conditional) -- fires when the log line starts with a date pattern, capturing timestamp, level, and message.\n- **`severity_parser`** -- maps the parsed `level` attribute to OTel severity.\n- **`add` operator** -- injects `service.name` as a resource attribute.\n\nLogs are batched and exported to Loki's OTLP endpoint at `http://loki:3100/otlp`.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/filelog-processing/app/generate_logs.py",
    "content": "\"\"\"\nLog generator that writes mixed-format log lines to /var/log/app/demo.log.\n\nAlternates between JSON and plaintext formats with random log levels\nto exercise the filelog receiver's operator chains.\n\"\"\"\n\nimport json\nimport os\nimport random\nimport time\nfrom datetime import datetime, timezone\n\nLOG_DIR = \"/var/log/app\"\nLOG_FILE = os.path.join(LOG_DIR, \"demo.log\")\n\nLEVELS = [\"DEBUG\", \"INFO\", \"INFO\", \"INFO\", \"WARN\", \"ERROR\"]\n\nJSON_MESSAGES = [\n    (\"User logged in\", {\"user_id\": \"u123\", \"region\": \"us-east\"}),\n    (\"Order placed\", {\"order_id\": \"ord-9876\", \"amount\": 49.99}),\n    (\"Cache hit\", {\"cache_key\": \"session:abc\", \"ttl\": 300}),\n    (\"Payment processed\", {\"user_id\": \"u456\", \"method\": \"credit_card\"}),\n    (\"Item shipped\", {\"order_id\": \"ord-5432\", \"carrier\": \"fedex\"}),\n    (\"User signed up\", {\"user_id\": \"u789\", \"plan\": \"premium\"}),\n]\n\nPLAIN_MESSAGES = [\n    \"Failed to process request for user u456\",\n    \"Connection timeout reaching database primary\",\n    \"Rate limit exceeded for API key ak-1234\",\n    \"Scheduled cleanup completed, removed 42 expired sessions\",\n    \"Health check passed for service order-api\",\n    \"Retrying failed webhook delivery attempt 3/5\",\n    \"Disk usage at 78% on volume /data\",\n]\n\n\ndef write_json_line(f, level):\n    msg, extra = random.choice(JSON_MESSAGES)\n    record = {\n        \"timestamp\": datetime.now(timezone.utc).strftime(\"%Y-%m-%dT%H:%M:%S.%f\")[:-3] + \"Z\",\n        \"level\": level,\n        \"message\": msg,\n        **extra,\n    }\n    f.write(json.dumps(record) + \"\\n\")\n\n\ndef write_plain_line(f, level):\n    ts = datetime.now(timezone.utc).strftime(\"%Y-%m-%d %H:%M:%S,%f\")[:-3]\n    msg = random.choice(PLAIN_MESSAGES)\n    f.write(f\"{ts} {level} {msg}\\n\")\n\n\ndef main():\n    os.makedirs(LOG_DIR, exist_ok=True)\n    print(f\"Writing logs to {LOG_FILE}\")\n\n    while True:\n        level = random.choice(LEVELS)\n        with open(LOG_FILE, \"a\") as f:\n            if random.random() < 0.5:\n                write_json_line(f, level)\n            else:\n                write_plain_line(f, level)\n        time.sleep(2)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "otel-examples/filelog-processing/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: File Log Processing\n#\n# Demonstrates the filelog receiver with operator chains for parsing\n# mixed-format log files (JSON, plaintext, multiline stack traces).\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  filelog:\n    include:\n      - /var/log/app/*.log\n    operators:\n      # Try to parse as JSON first\n      - type: json_parser\n        if: body matches \"^\\\\{\"\n        parse_from: body\n        parse_to: attributes\n        timestamp:\n          parse_from: attributes.timestamp\n          layout: \"%Y-%m-%dT%H:%M:%S.%fZ\"\n\n      # For non-JSON lines, extract with regex\n      - type: regex_parser\n        if: body matches \"^\\\\d{4}-\\\\d{2}-\\\\d{2}\"\n        regex: \"^(?P<timestamp>\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2},\\\\d{3}) (?P<level>\\\\w+) (?P<message>.*)\"\n        timestamp:\n          parse_from: attributes.timestamp\n          layout: \"%Y-%m-%d %H:%M:%S,%f\"\n\n      # Map severity from parsed level\n      - type: severity_parser\n        parse_from: attributes.level\n        if: attributes.level != nil\n\n      # Add a static resource attribute\n      - type: add\n        field: resource[\"service.name\"]\n        value: log-demo\n\nprocessors:\n  batch:\n    timeout: 2s\n    send_batch_size: 256\n\nexporters:\n  otlphttp/loki:\n    endpoint: http://loki:3100/otlp\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    logs:\n      receivers: [filelog]\n      processors: [batch]\n      exporters: [otlphttp/loki]\n"
  },
  {
    "path": "otel-examples/filelog-processing/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/filelog-processing/docker-compose.coda.yml",
    "content": "services:\n  log-generator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    volumes:\n      - ./app/generate_logs.py:/app/generate_logs.py\n      - /var/log/alloy-demo:/var/log/app\n    command: [\"python3\", \"/app/generate_logs.py\"]\n"
  },
  {
    "path": "otel-examples/filelog-processing/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n      - app-logs:/var/log/app\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - loki\n\n  log-generator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app/generate_logs.py:/app/generate_logs.py\n      - app-logs:/var/log/app\n    command: [\"python3\", \"/app/generate_logs.py\"]\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - loki\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\nvolumes:\n  app-logs:\n"
  },
  {
    "path": "otel-examples/filelog-processing/loki-config.yaml",
    "content": "auth_enabled: false\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-examples/host-metrics/README.md",
    "content": "# Host Metrics with OTel Hostmetrics Receiver\n\nCollect CPU, memory, disk, filesystem, network, and process metrics using the OpenTelemetry `hostmetrics` receiver -- an OTel-native replacement for Prometheus node_exporter. Metrics are exported via OTLP to Prometheus.\n\n## What This Demonstrates\n\n- **Hostmetrics receiver**: Collects system-level metrics without a separate exporter binary\n- **Scrapers**: CPU (with utilization), memory (with utilization), disk, filesystem, network, load, and process scrapers\n- **Resource detection**: Automatically adds host metadata (hostname, OS type) to all metrics\n- **OTLP export to Prometheus**: Metrics are sent via OTLP to Prometheus's native OTLP receiver\n- **Stress testing**: A stress container generates CPU and memory load to produce interesting metric data\n\n## Metrics Collected\n\n| Scraper    | Example Metrics                                                    |\n|------------|-------------------------------------------------------------------|\n| CPU        | `system_cpu_time`, `system_cpu_utilization`                        |\n| Memory     | `system_memory_usage`, `system_memory_utilization`                 |\n| Disk       | `system_disk_io`, `system_disk_operations`                         |\n| Filesystem | `system_filesystem_usage`, `system_filesystem_utilization`         |\n| Network    | `system_network_io`, `system_network_packets`                      |\n| Load       | `system_cpu_load_average_1m`, `system_cpu_load_average_5m`         |\n| Process    | `process_cpu_time`, `process_memory_physical_usage`                |\n\n## Prerequisites\n\n- Docker and Docker Compose\n- Linux host (hostmetrics requires access to `/proc` and `/sys`)\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000) and go to **Explore > Prometheus**.\n\n### Sample PromQL Queries\n\n**CPU utilization:**\n```promql\nsystem_cpu_utilization{state=\"user\"}\n```\n\n**Memory usage (bytes):**\n```promql\nsystem_memory_usage{state=\"used\"}\n```\n\n**Disk I/O rate:**\n```promql\nrate(system_disk_io_total[5m])\n```\n\n**Network bytes transmitted:**\n```promql\nrate(system_network_io_total{direction=\"transmit\"}[5m])\n```\n\n**System load averages:**\n```promql\nsystem_cpu_load_average_1m\n```\n\n**Top processes by CPU:**\n```promql\ntopk(10, rate(process_cpu_time_total[5m]))\n```\n\n## Key Configuration\n\nThe `config-otel.yaml` configures:\n\n1. **`hostmetrics` receiver**: Enables all major scrapers with 15s collection interval. CPU and memory utilization metrics are explicitly enabled.\n2. **`resourcedetection` processor**: Uses `env` and `system` detectors to add hostname and OS metadata.\n3. **`otlphttp/prometheus` exporter**: Sends metrics via OTLP to Prometheus's native OTLP endpoint.\n\nThe Alloy container runs with `pid: host` and mounts `/proc`, `/sys`, and `/` from the host to enable full system visibility.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/host-metrics/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Host Metrics Collection\n#\n# Demonstrates using the hostmetrics receiver to collect CPU, memory,\n# disk, filesystem, and network metrics -- an OTel-native replacement\n# for Prometheus node_exporter.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  hostmetrics:\n    collection_interval: 15s\n    scrapers:\n      cpu:\n        metrics:\n          system.cpu.utilization:\n            enabled: true\n      memory:\n        metrics:\n          system.memory.utilization:\n            enabled: true\n      disk: {}\n      filesystem: {}\n      network: {}\n      load: {}\n      process:\n        include:\n          match_type: regexp\n          names: [\".*\"]\n        mute_process_exe_error: true\n        mute_process_io_error: true\n        mute_process_user_error: true\n\nprocessors:\n  # Detect host metadata automatically\n  resourcedetection:\n    detectors: [env, system]\n    system:\n      hostname_sources: [\"os\"]\n\n  batch:\n    timeout: 10s\n    send_batch_size: 512\n\nexporters:\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    metrics:\n      receivers: [hostmetrics]\n      processors: [resourcedetection, batch]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-examples/host-metrics/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/host-metrics/docker-compose.coda.yml",
    "content": "services:\n  stress:\n    image: polinux/stress@sha256:b6144f84f9c15dac80deb48d3a646b55c7043ab1d83ea0a697c09097aaad21aa\n    command: [\"stress\", \"--cpu\", \"1\", \"--vm\", \"1\", \"--vm-bytes\", \"64M\"]\n"
  },
  {
    "path": "otel-examples/host-metrics/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /proc:/hostfs/proc:ro\n      - /sys:/hostfs/sys:ro\n      - /:/hostfs:ro\n    environment:\n      - HOST_PROC=/hostfs/proc\n      - HOST_SYS=/hostfs/sys\n      - HOST_ETC=/hostfs/etc\n      - HOST_VAR=/hostfs/var\n      - HOST_RUN=/hostfs/run\n    pid: host\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - prometheus\n\n  stress:\n    image: polinux/stress@sha256:b6144f84f9c15dac80deb48d3a646b55c7043ab1d83ea0a697c09097aaad21aa\n    command: [\"stress\", \"--cpu\", \"1\", \"--vm\", \"1\", \"--vm-bytes\", \"64M\"]\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - prometheus\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/host-metrics/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\n    - host.name\n    - os.type\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-examples/kafka-buffer/README.md",
    "content": "# Kafka-Buffered Trace Pipeline\n\nDemonstrates using Apache Kafka as a durable buffer in an OpenTelemetry trace pipeline. Alloy runs both the agent tier (OTLP receiver to Kafka) and the gateway tier (Kafka to Tempo) in a single collector instance, showcasing the two-tier architecture pattern.\n\n## What This Demonstrates\n\n- **Kafka as a durable buffer**: Traces are written to Kafka before being exported to Tempo, providing resilience against backend outages\n- **Two-tier collector architecture**: The agent tier ingests OTLP and writes to Kafka; the gateway tier reads from Kafka and exports to Tempo\n- **Single-collector demo**: Both tiers run in one Alloy instance for simplicity, but in production these would be separate deployments\n- **KRaft mode Kafka**: Uses Bitnami Kafka with KRaft (no ZooKeeper required)\n- **Auto topic creation**: The `otlp-traces` topic is created automatically on first write\n\n## Architecture\n\n```\nApp --OTLP--> Alloy (agent tier) --Kafka--> Alloy (gateway tier) --OTLP--> Tempo\n```\n\nIn this demo, both tiers are the same Alloy instance with two separate pipelines:\n\n1. **`traces/ingest`**: `otlp` receiver -> `kafka` exporter\n2. **`traces/export`**: `kafka` receiver -> `batch` processor -> `otlp/tempo` exporter\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\nWait about 30 seconds for Kafka to initialize before traces start flowing.\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000) and go to **Explore > Tempo**.\n\nSearch for traces from `kafka-buffer-demo`. You should see traces for HTTP endpoints (`/api/items`, `/api/checkout`, `/api/health`) with database query child spans.\n\n### Demonstrate Resilience\n\nThe key benefit of the Kafka buffer is resilience. Try this experiment:\n\n1. Let the demo run for a minute to generate some traces\n2. Stop Tempo: `docker compose stop tempo`\n3. Wait 30 seconds (traces are buffering in Kafka)\n4. Restart Tempo: `docker compose start tempo`\n5. Check Grafana -- the buffered traces should appear in Tempo\n\nThis works because Kafka retains messages until the consumer (gateway tier) successfully reads them.\n\n## Key Configuration\n\nThe `config-otel.yaml` defines:\n\n1. **`kafka` exporter**: Writes OTLP-encoded trace data to the `otlp-traces` Kafka topic\n2. **`kafka` receiver**: Reads from the same topic and deserializes traces\n3. **Two pipelines**: `traces/ingest` (app -> Kafka) and `traces/export` (Kafka -> Tempo)\n\nThe Kafka exporter uses `otlp_proto` encoding, which preserves full trace fidelity through the buffer.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/kafka-buffer/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/kafka-buffer/app/app.py",
    "content": "\"\"\"\nFlask app generating traces for the Kafka buffer demo.\n\nProduces varied HTTP traces that flow through the Alloy pipeline:\n  app -> OTLP -> Alloy -> Kafka -> Alloy -> Tempo\n\nA background thread generates continuous load against the Flask endpoints.\n\"\"\"\n\nimport random\nimport time\nimport threading\n\nfrom flask import Flask, jsonify\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.sdk.resources import Resource\n\nresource = Resource.create({\n    \"service.name\": \"kafka-buffer-demo\",\n    \"service.version\": \"1.0.0\",\n    \"deployment.environment\": \"demo\",\n})\n\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(\"kafka-demo\")\n\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\n\n@app.route(\"/api/items\", methods=[\"GET\"])\ndef list_items():\n    with tracer.start_as_current_span(\"query-items-db\") as span:\n        span.set_attribute(\"db.system\", \"postgresql\")\n        span.set_attribute(\"db.statement\", \"SELECT * FROM items LIMIT 20\")\n        time.sleep(random.uniform(0.01, 0.04))\n    return jsonify({\"items\": [{\"id\": i, \"name\": f\"item-{i}\"} for i in range(5)]})\n\n\n@app.route(\"/api/items/<int:item_id>\", methods=[\"GET\"])\ndef get_item(item_id):\n    with tracer.start_as_current_span(\"query-single-item\") as span:\n        span.set_attribute(\"db.system\", \"postgresql\")\n        span.set_attribute(\"db.statement\", f\"SELECT * FROM items WHERE id = {item_id}\")\n        span.set_attribute(\"app.item_id\", item_id)\n        time.sleep(random.uniform(0.005, 0.02))\n    return jsonify({\"id\": item_id, \"name\": f\"item-{item_id}\", \"price\": round(random.uniform(5, 100), 2)})\n\n\n@app.route(\"/api/checkout\", methods=[\"POST\"])\ndef checkout():\n    with tracer.start_as_current_span(\"process-checkout\") as span:\n        span.set_attribute(\"app.cart_size\", random.randint(1, 10))\n        span.set_attribute(\"app.payment_method\", random.choice([\"credit_card\", \"paypal\", \"apple_pay\"]))\n        time.sleep(random.uniform(0.05, 0.15))\n\n        # Simulate occasional failures\n        if random.random() < 0.1:\n            span.set_attribute(\"error\", True)\n            span.set_attribute(\"error.message\", \"Payment gateway timeout\")\n            return jsonify({\"error\": \"Payment failed\"}), 500\n\n    return jsonify({\"order_id\": random.randint(10000, 99999), \"status\": \"confirmed\"}), 201\n\n\n@app.route(\"/api/health\")\ndef health():\n    return jsonify({\"status\": \"ok\"})\n\n\ndef generate_load():\n    \"\"\"Background thread that sends requests to the Flask app.\"\"\"\n    import urllib.request\n\n    time.sleep(5)  # Wait for Flask to start\n    base = \"http://localhost:8080\"\n    endpoints = [\n        (\"GET\", f\"{base}/api/items\"),\n        (\"GET\", f\"{base}/api/items/1\"),\n        (\"GET\", f\"{base}/api/items/2\"),\n        (\"GET\", f\"{base}/api/items/3\"),\n        (\"POST\", f\"{base}/api/checkout\"),\n        (\"GET\", f\"{base}/api/health\"),\n    ]\n\n    while True:\n        method, url = random.choice(endpoints)\n        try:\n            req = urllib.request.Request(url, method=method)\n            if method == \"POST\":\n                req.add_header(\"Content-Type\", \"application/json\")\n                req.data = b'{\"items\": [1, 2, 3]}'\n            urllib.request.urlopen(req)\n        except Exception:\n            pass\n        time.sleep(random.uniform(0.5, 2.0))\n\n\nif __name__ == \"__main__\":\n    load_thread = threading.Thread(target=generate_load, daemon=True)\n    load_thread.start()\n    app.run(host=\"0.0.0.0\", port=8080)\n"
  },
  {
    "path": "otel-examples/kafka-buffer/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests\n"
  },
  {
    "path": "otel-examples/kafka-buffer/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Kafka-Buffered Pipeline (Gateway)\n#\n# Demonstrates a two-tier collector architecture:\n#   Agent:   otlp receiver -> kafka exporter (writes to Kafka)\n#   Gateway: kafka receiver -> batch -> otlp exporter (reads from Kafka, writes to backends)\n#\n# This config runs BOTH tiers in a single collector for demo purposes,\n# using Kafka as a durable buffer between ingest and export.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  # Tier 1: Accept OTLP from applications\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\n  # Tier 2: Read back from Kafka\n  kafka:\n    brokers:\n      - kafka:9092\n    protocol_version: \"3.0.0\"\n    traces:\n      topic: otlp-traces\n\nprocessors:\n  batch: {}\n\nexporters:\n  # Tier 1: Write to Kafka buffer\n  kafka:\n    brokers:\n      - kafka:9092\n    topic: otlp-traces\n    protocol_version: \"3.0.0\"\n    encoding: otlp_proto\n\n  # Tier 2: Write to Tempo\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    # Agent tier: ingest OTLP and buffer to Kafka\n    traces/ingest:\n      receivers: [otlp]\n      exporters: [kafka]\n    # Gateway tier: read from Kafka and export to backend\n    traces/export:\n      receivers: [kafka]\n      processors: [batch]\n      exporters: [otlp/tempo]\n"
  },
  {
    "path": "otel-examples/kafka-buffer/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/kafka-buffer/docker-compose.coda.yml",
    "content": "services:\n  kafka:\n    image: apache/kafka:4.2.0@sha256:9516fb7634bad307d17c33b589fde9023003b0cb761374f500002b980a3149b9\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - KAFKA_NODE_ID=0\n      - KAFKA_PROCESS_ROLES=broker,controller\n      - KAFKA_CONTROLLER_QUORUM_VOTERS=0@localhost:9093\n      - KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093\n      - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092\n      - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT\n      - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER\n      - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1\n      - CLUSTER_ID=kafka-buffer-demo-cluster-001\n\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-examples/kafka-buffer/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  kafka:\n    image: apache/kafka:4.2.0@sha256:9516fb7634bad307d17c33b589fde9023003b0cb761374f500002b980a3149b9\n    ports:\n      - 9092:9092\n    environment:\n      - KAFKA_NODE_ID=0\n      - KAFKA_PROCESS_ROLES=broker,controller\n      - KAFKA_CONTROLLER_QUORUM_VOTERS=0@kafka:9093\n      - KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093\n      - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092\n      - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT\n      - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER\n      - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1\n      - CLUSTER_ID=kafka-buffer-demo-cluster-001\n\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 4317:4317\n      - 4318:4318\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - kafka\n      - tempo\n\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - tempo\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/kafka-buffer/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"0.0.0.0:4317\"\n        http:\n          endpoint: \"0.0.0.0:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\noverrides:\n  defaults: {}\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/README.md",
    "content": "# Multi-Pipeline Fan-Out\n\nDemonstrates sending the same traces to multiple backends with different processing per destination using the OpenTelemetry forward connector. Full-fidelity traces go to a primary Tempo instance, while sampled and attribute-stripped traces go to a secondary instance. This is a common pattern for migrations and tiered storage strategies.\n\n## What This Demonstrates\n\n- **Forward connector**: The `forward/sampled` connector duplicates trace data from one pipeline into another\n- **Fan-out pattern**: A single intake pipeline fans out to two export pipelines with independent processing\n- **Probabilistic sampling**: The secondary pipeline only keeps 10% of traces\n- **Attribute stripping**: The secondary pipeline removes sensitive/large attributes (user agent, cookies, request body) and truncates remaining attributes to 128 characters\n- **Dual Tempo instances**: Two independent Tempo backends receiving different subsets and fidelity levels of the same trace data\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000).\n\n### Compare Primary vs Secondary\n\n1. Go to **Explore** and select **Tempo Primary** datasource\n2. Search for traces from `fanout-demo-app`\n3. Pick a trace and note the attributes: full `http.request.header.user_agent`, `http.request.header.cookie`, `http.request.body` values\n4. Switch datasource to **Tempo Secondary**\n5. Search for the same service -- you will see far fewer traces (only ~10%)\n6. On traces that do appear, the user agent, cookie, and request body attributes are gone, and remaining attributes are truncated to 128 characters\n\n### What to Look For\n\n| Aspect              | Tempo Primary                  | Tempo Secondary                  |\n|---------------------|-------------------------------|----------------------------------|\n| Trace volume        | 100% of traces                | ~10% of traces                   |\n| Attribute fidelity  | Full (all attributes present) | Stripped (no UA, cookies, body)  |\n| Attribute length    | Unlimited                     | Truncated to 128 chars           |\n\n## Key Configuration\n\nThe `config-otel.yaml` defines three pipelines:\n\n1. **`traces/intake`**: Receives OTLP, batches, then exports to both `otlp/tempo-primary` and `forward/sampled`\n2. **`traces/sampled`**: Receives from the forward connector, applies probabilistic sampling (10%), strips attributes, and exports to `otlp/tempo-secondary`\n\nThe forward connector (`forward/sampled`) acts as the bridge that duplicates data from the intake pipeline to the sampled pipeline.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/app/app.py",
    "content": "\"\"\"\nFlask app generating varied traces for the multi-pipeline fan-out demo.\n\nProduces traces with large attribute values, user agents, cookies, and\nrequest bodies to demonstrate how the secondary pipeline strips these\nwhile the primary retains full fidelity.\n\"\"\"\n\nimport random\nimport time\nimport threading\n\nfrom flask import Flask, jsonify, request\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.sdk.resources import Resource\n\nresource = Resource.create({\n    \"service.name\": \"fanout-demo-app\",\n    \"service.version\": \"1.0.0\",\n    \"deployment.environment\": \"demo\",\n})\n\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(\"fanout-demo\")\n\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\nUSER_AGENTS = [\n    \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36\",\n    \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15\",\n    \"Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 Chrome/120.0.0.0 Mobile Safari/537.36\",\n    \"curl/8.4.0\",\n]\n\nCOOKIES = [\n    \"session=abc123def456; preferences=dark_mode; tracking_id=xx-\" + \"a\" * 200,\n    \"session=xyz789; cart=item1,item2,item3; locale=en-US\",\n    \"\",\n]\n\n\n@app.route(\"/api/orders\", methods=[\"GET\"])\ndef list_orders():\n    with tracer.start_as_current_span(\"fetch-orders-from-db\") as span:\n        span.set_attribute(\"db.system\", \"postgresql\")\n        span.set_attribute(\"db.statement\", \"SELECT * FROM orders WHERE status = 'active'\")\n        time.sleep(random.uniform(0.01, 0.05))\n    return jsonify({\"orders\": [{\"id\": i, \"status\": \"active\"} for i in range(5)]})\n\n\n@app.route(\"/api/orders\", methods=[\"POST\"])\ndef create_order():\n    with tracer.start_as_current_span(\"insert-order\") as span:\n        span.set_attribute(\"db.system\", \"postgresql\")\n        span.set_attribute(\"db.statement\", \"INSERT INTO orders (product, qty) VALUES ($1, $2)\")\n        span.set_attribute(\"http.request.body\", '{\"product\": \"widget\", \"qty\": 10, \"notes\": \"' + \"x\" * 500 + '\"}')\n        time.sleep(random.uniform(0.02, 0.08))\n    return jsonify({\"id\": random.randint(1000, 9999), \"status\": \"created\"}), 201\n\n\n@app.route(\"/api/health\")\ndef health():\n    return jsonify({\"status\": \"ok\"})\n\n\ndef generate_load():\n    \"\"\"Background thread that sends requests to the Flask app.\"\"\"\n    import urllib.request\n\n    time.sleep(5)  # Wait for Flask to start\n    base = \"http://localhost:8080\"\n    endpoints = [\n        (\"GET\", f\"{base}/api/orders\"),\n        (\"POST\", f\"{base}/api/orders\"),\n        (\"GET\", f\"{base}/api/health\"),\n    ]\n\n    while True:\n        method, url = random.choice(endpoints)\n        try:\n            req = urllib.request.Request(url, method=method)\n            # Add varied headers that will become span attributes\n            req.add_header(\"User-Agent\", random.choice(USER_AGENTS))\n            cookie = random.choice(COOKIES)\n            if cookie:\n                req.add_header(\"Cookie\", cookie)\n            if method == \"POST\":\n                req.add_header(\"Content-Type\", \"application/json\")\n                req.data = b'{\"product\": \"widget\", \"qty\": 1}'\n            urllib.request.urlopen(req)\n        except Exception:\n            pass\n        time.sleep(random.uniform(0.5, 2.0))\n\n\nif __name__ == \"__main__\":\n    load_thread = threading.Thread(target=generate_load, daemon=True)\n    load_thread.start()\n    app.run(host=\"0.0.0.0\", port=8080)\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Multi-Pipeline Fan-Out\n#\n# Demonstrates sending the same traces to multiple backends with\n# different processing per destination: full-fidelity traces to Tempo,\n# and sampled traces with reduced attributes to a second store.\n# Common for migrations and tiered storage strategies.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nconnectors:\n  forward/sampled: {}\n\nprocessors:\n  batch: {}\n\n  # For the sampled pipeline: only keep 10% of traces\n  probabilistic_sampler:\n    sampling_percentage: 10\n\n  # Strip detailed attributes for the sampled/cheap store\n  transform/strip:\n    error_mode: ignore\n    trace_statements:\n      - context: span\n        statements:\n          - delete_key(attributes, \"http.request.header.user_agent\")\n          - delete_key(attributes, \"http.request.header.cookie\")\n          - delete_key(attributes, \"http.request.body\")\n          - truncate_all(attributes, 128)\n\nexporters:\n  # Primary: full-fidelity traces to Tempo\n  otlp/tempo-primary:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  # Secondary: sampled + stripped traces to second Tempo instance\n  otlp/tempo-secondary:\n    endpoint: tempo-secondary:4317\n    tls:\n      insecure: true\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    # Intake pipeline: receive and fan out to both destinations\n    traces/intake:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlp/tempo-primary, forward/sampled]\n    # Sampled pipeline: reduced data to secondary store\n    traces/sampled:\n      receivers: [forward/sampled]\n      processors: [probabilistic_sampler, transform/strip, batch]\n      exporters: [otlp/tempo-secondary]\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  tempo-secondary:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3201:3200\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 4317:4317\n      - 4318:4318\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - tempo\n      - tempo-secondary\n\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - tempo\n      - tempo-secondary\n      - prometheus\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo Primary\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        - name: Tempo Secondary\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo-secondary:3200\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-examples/multi-pipeline-fanout/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"0.0.0.0:4317\"\n        http:\n          endpoint: \"0.0.0.0:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks]\n      generate_native_histograms: both\n"
  },
  {
    "path": "otel-examples/ottl-transform/README.md",
    "content": "# OTTL Transform Cookbook\n\nA cookbook of the most useful OpenTelemetry Transformation Language (OTTL) patterns running in Grafana Alloy's OTel engine. Demonstrates JSON body parsing, severity mapping, attribute promotion, truncation, pattern replacement, and conditional transforms.\n\n## What This Demonstrates\n\n- **JSON body parsing**: Log records arrive with JSON string bodies; OTTL parses them and promotes fields to attributes\n- **Severity mapping**: String severity levels (\"INFO\", \"WARN\", \"ERROR\") are mapped to proper OTel severity numbers\n- **Attribute cleanup**: Promoted fields like `level` and `timestamp` are deleted after extraction\n- **Tier labeling**: Trace spans are automatically tagged with `app.tier=frontend` (when `http.target` is present) or `app.tier=backend` (when `db.system` is present)\n- **Attribute truncation**: All span attributes are truncated to 256 characters\n- **Resource enrichment**: A `deployment.environment=demo` attribute is added to all trace resources\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\n### Logs in Loki\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000) and go to **Explore > Loki**.\n\nQuery to see parsed JSON attributes:\n\n```logql\n{service_name=\"ottl-demo-app\"}\n```\n\nYou should see that JSON fields from the log body (`order_id`, `message`, `amount`, `error_code`, etc.) have been promoted to log attributes. The `level` and `timestamp` fields should be removed after promotion. Severity should be correctly set (INFO=9, WARN=13, ERROR=17).\n\n### Traces in Tempo\n\nSwitch to **Explore > Tempo** and search for traces from `ottl-demo-app`.\n\nLook for:\n- `app.tier` label on spans: `frontend` for HTTP spans, `backend` for database spans\n- Long attribute values (like `http.user_agent` or `db.connection_string`) truncated to 256 characters\n- `deployment.environment=demo` on trace resources\n\n## Key Configuration\n\nThe `config-otel.yaml` defines three transform processors:\n\n1. **`transform/parse-logs`**: Parses JSON string bodies with `ParseJSON(body)`, maps severity, and cleans up attributes\n2. **`transform/traces`**: Adds tier labels based on attribute presence, truncates all attributes to 256 chars\n3. **`transform/resources`**: Adds `deployment.environment=demo` to trace resources\n\nThese are wired into separate pipelines for traces and logs.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/ottl-transform/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/ottl-transform/app/app.py",
    "content": "\"\"\"\nDemo app that sends \"messy\" telemetry to exercise OTTL transform patterns.\n\nSends:\n- Log records with JSON string bodies (to test JSON parsing + attribute promotion)\n- Log records with string severity fields but no severity_number set\n- Traces with varied attributes (http.target, db.system, long values)\n\"\"\"\n\nimport json\nimport time\nimport random\nimport logging\n\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\n\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter\n\nfrom opentelemetry.sdk.resources import Resource\n\nresource = Resource.create({\n    \"service.name\": \"ottl-demo-app\",\n    \"service.version\": \"1.0.0\",\n})\n\n# --- Tracing setup ---\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(\"ottl-demo\")\n\n# --- Logging setup ---\nlogger_provider = LoggerProvider(resource=resource)\nlogger_provider.add_log_record_processor(\n    BatchLogRecordProcessor(OTLPLogExporter(endpoint=\"alloy:4317\", insecure=True))\n)\nhandler = LoggingHandler(logger_provider=logger_provider)\nlogger = logging.getLogger(\"ottl-demo\")\nlogger.addHandler(handler)\nlogger.setLevel(logging.DEBUG)\n\n\ndef send_json_log_records():\n    \"\"\"Send log records with JSON string bodies for OTTL JSON parsing.\"\"\"\n    orders = [\n        {\"timestamp\": \"2024-01-15T10:30:00Z\", \"level\": \"INFO\", \"message\": \"Order processed\", \"order_id\": \"ORD-123\", \"amount\": 49.99},\n        {\"timestamp\": \"2024-01-15T10:30:01Z\", \"level\": \"ERROR\", \"message\": \"Payment failed\", \"order_id\": \"ORD-456\", \"error_code\": \"INSUFFICIENT_FUNDS\"},\n        {\"timestamp\": \"2024-01-15T10:30:02Z\", \"level\": \"WARN\", \"message\": \"Inventory low\", \"product_id\": \"SKU-789\", \"remaining\": 3},\n        {\"timestamp\": \"2024-01-15T10:30:03Z\", \"level\": \"INFO\", \"message\": \"User login\", \"user_id\": \"USR-101\", \"ip\": \"192.168.1.42\"},\n        {\"timestamp\": \"2024-01-15T10:30:04Z\", \"level\": \"ERROR\", \"message\": \"Database timeout\", \"query\": \"SELECT * FROM orders\", \"duration_ms\": 30000},\n    ]\n    record = random.choice(orders)\n    # Send as a JSON string body -- OTTL will parse this\n    logger.info(json.dumps(record))\n\n\ndef send_traces():\n    \"\"\"Send traces with varied attributes to exercise OTTL trace transforms.\"\"\"\n    # Frontend-style span with http.target\n    with tracer.start_as_current_span(\"GET /api/orders\") as span:\n        span.set_attribute(\"http.method\", \"get\")\n        span.set_attribute(\"http.target\", \"/api/orders?page=1&limit=50\")\n        span.set_attribute(\"http.status_code\", 200)\n        span.set_attribute(\"http.user_agent\", \"Mozilla/5.0 \" + \"x\" * 300)  # Very long value\n        time.sleep(random.uniform(0.01, 0.05))\n\n        # Backend-style span with db.system\n        with tracer.start_as_current_span(\"SELECT orders\") as db_span:\n            db_span.set_attribute(\"db.system\", \"postgresql\")\n            db_span.set_attribute(\"db.statement\", \"SELECT id, status, amount FROM orders WHERE user_id = $1 ORDER BY created_at DESC LIMIT 50\")\n            db_span.set_attribute(\"db.name\", \"shop\")\n            db_span.set_attribute(\"db.operation\", \"SELECT\")\n            # Very long attribute to test truncation\n            db_span.set_attribute(\"db.connection_string\", \"host=db.internal port=5432 dbname=shop user=app \" + \"extra_param=value \" * 50)\n            time.sleep(random.uniform(0.02, 0.08))\n\n    # Another trace pattern\n    with tracer.start_as_current_span(\"POST /api/checkout\") as span:\n        span.set_attribute(\"http.method\", \"post\")\n        span.set_attribute(\"http.target\", \"/api/checkout\")\n        span.set_attribute(\"http.status_code\", random.choice([200, 201, 400, 500]))\n        time.sleep(random.uniform(0.05, 0.15))\n\n\ndef main():\n    print(\"OTTL demo app started. Sending messy telemetry every 3 seconds...\")\n    while True:\n        try:\n            send_json_log_records()\n            send_traces()\n        except Exception as e:\n            print(f\"Error sending telemetry: {e}\")\n        time.sleep(3)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "otel-examples/ottl-transform/app/requirements.txt",
    "content": "opentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp-proto-grpc\n"
  },
  {
    "path": "otel-examples/ottl-transform/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: OTTL Transform Cookbook\n#\n# A \"cookbook\" of the most useful OTTL transformation patterns:\n# JSON parsing, attribute promotion, severity mapping, conditional\n# transforms, pattern replacement, and key deletion.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  # Transform 1: Parse JSON log bodies and promote fields\n  transform/parse-logs:\n    error_mode: ignore\n    log_statements:\n      - context: log\n        statements:\n          # Parse JSON string body into a map\n          - merge_maps(attributes, ParseJSON(body), \"upsert\") where IsString(body) and IsMatch(body, \"^[{]\")\n          # Map string severity to proper severity number\n          - set(severity_text, attributes[\"level\"]) where attributes[\"level\"] != nil\n          - set(severity_number, 9) where attributes[\"level\"] == \"INFO\"\n          - set(severity_number, 13) where attributes[\"level\"] == \"WARN\"\n          - set(severity_number, 17) where attributes[\"level\"] == \"ERROR\"\n          # Clean up promoted attributes\n          - delete_key(attributes, \"level\")\n          - delete_key(attributes, \"timestamp\")\n\n  # Transform 2: Enrich and clean trace attributes\n  transform/traces:\n    error_mode: ignore\n    trace_statements:\n      - context: span\n        statements:\n          # Add deployment environment from resource\n          - set(attributes[\"app.tier\"], \"frontend\") where attributes[\"http.target\"] != nil\n          - set(attributes[\"app.tier\"], \"backend\") where attributes[\"db.system\"] != nil\n          # Truncate overly long attribute values\n          - truncate_all(attributes, 256)\n          # Normalize HTTP method to uppercase\n          - replace_pattern(attributes[\"http.method\"], \"^(.*)$\", \"$$1\")\n\n  # Transform 3: Add computed resource attributes\n  transform/resources:\n    error_mode: ignore\n    trace_statements:\n      - context: resource\n        statements:\n          - set(attributes[\"deployment.environment\"], \"demo\")\n\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/loki:\n    endpoint: http://loki:3100/otlp\n\n  debug:\n    verbosity: detailed\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [transform/traces, transform/resources, batch]\n      exporters: [otlp/tempo]\n    logs:\n      receivers: [otlp]\n      processors: [transform/parse-logs, batch]\n      exporters: [otlphttp/loki, debug]\n"
  },
  {
    "path": "otel-examples/ottl-transform/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/ottl-transform/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-examples/ottl-transform/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 4317:4317\n      - 4318:4318\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - loki\n      - tempo\n\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - loki\n      - tempo\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/ottl-transform/loki-config.yaml",
    "content": "auth_enabled: false\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-examples/ottl-transform/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\noverrides:\n  defaults: {}\n"
  },
  {
    "path": "otel-examples/pii-redaction/README.md",
    "content": "# PII Redaction\n\nDemonstrates using the OTel Collector **transform processor** with OTTL `replace_pattern` statements to scrub personally identifiable information (credit card numbers, email addresses, IP addresses) from traces and logs before they reach storage backends.\n\n## What This Demonstrates\n\n- **Transform processor** with OTTL expressions for pattern-based redaction\n- Scrubbing PII from **trace span attributes** (credit cards, emails, IPs)\n- Scrubbing PII from **log record bodies** (credit cards, emails)\n- A Flask demo app that intentionally emits telemetry containing sensitive data\n- Verifying that redacted data arrives in Tempo and Loki with masked values\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\nThe demo app automatically generates traffic every 3 seconds -- no manual interaction needed.\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\n1. Open Grafana at [http://localhost:3000](http://localhost:3000) (no login required).\n\n### Check Traces (Tempo)\n\n2. Go to **Explore** and select the **Tempo** datasource.\n3. Search for traces from `pii-demo-app`.\n4. Open a trace and inspect the `process-order` span attributes. You should see:\n   - `user.credit_card` = `****-****-****-****`\n   - `user.email` = `***@***.***`\n   - `client.ip` = `***.***.***.***`\n\n### Check Logs (Loki)\n\n5. Switch to the **Loki** datasource.\n6. Run:\n\n```logql\n{service_name=\"pii-demo-app\"}\n```\n\n7. Log messages should contain masked values like `Payment processed for card ****-****-****-**** by ***@***.***`.\n\n## Key Configuration\n\nThe `config-otel.yaml` defines two transform processors:\n\n- **`transform/traces`** -- applies `replace_pattern` on span attributes to mask credit card numbers, emails, and IP addresses using regex.\n- **`transform/logs`** -- applies `replace_pattern` on log bodies to mask credit cards and emails.\n\nBoth processors use `error_mode: ignore` so a failed match does not block the pipeline.\n\nThe pipeline receives OTLP data on ports 4317 (gRPC) and 4318 (HTTP), processes it through the transform stage, then exports traces to Tempo and logs to Loki.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/pii-redaction/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/pii-redaction/app/app.py",
    "content": "\"\"\"\nFlask app that generates traces and logs containing PII data.\n\nThe PII (credit cards, emails, IPs) should be redacted by the Alloy\ntransform processor before reaching Loki and Tempo.\n\"\"\"\n\nimport logging\nimport threading\nimport time\n\nimport requests\nfrom flask import Flask, jsonify\nfrom opentelemetry import trace\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.sdk.resources import Resource\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\n\n# --- Resource ---\nresource = Resource.create({\n    \"service.name\": \"pii-demo-app\",\n    \"service.version\": \"1.0.0\",\n})\n\n# --- Traces ---\ntrace_exporter = OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True)\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(__name__)\n\n# --- Logs ---\nlog_exporter = OTLPLogExporter(endpoint=\"alloy:4317\", insecure=True)\nlogger_provider = LoggerProvider(resource=resource)\nlogger_provider.add_log_record_processor(BatchLogRecordProcessor(log_exporter))\notel_handler = LoggingHandler(level=logging.INFO, logger_provider=logger_provider)\n\nlogger = logging.getLogger(\"pii-demo\")\nlogger.setLevel(logging.INFO)\nlogger.addHandler(otel_handler)\n\n# --- Flask App ---\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\n# Sample PII data used in requests\nORDERS = [\n    {\n        \"user\": \"alice\",\n        \"credit_card\": \"4532-1234-5678-9012\",\n        \"email\": \"alice@example.com\",\n        \"ip\": \"192.168.1.100\",\n    },\n    {\n        \"user\": \"bob\",\n        \"credit_card\": \"5425-9876-5432-1098\",\n        \"email\": \"bob@company.org\",\n        \"ip\": \"10.0.42.7\",\n    },\n    {\n        \"user\": \"charlie\",\n        \"credit_card\": \"3782-822463-10005\",\n        \"email\": \"charlie@startup.io\",\n        \"ip\": \"172.16.0.55\",\n    },\n]\n\norder_index = 0\n\n\n@app.route(\"/order\", methods=[\"GET\"])\ndef place_order():\n    global order_index\n    order = ORDERS[order_index % len(ORDERS)]\n    order_index += 1\n\n    with tracer.start_as_current_span(\"process-order\") as span:\n        # Set span attributes containing PII\n        span.set_attribute(\"user.credit_card\", order[\"credit_card\"])\n        span.set_attribute(\"user.email\", order[\"email\"])\n        span.set_attribute(\"client.ip\", order[\"ip\"])\n        span.set_attribute(\"order.user\", order[\"user\"])\n\n        # Emit a log record containing PII in the body\n        logger.info(\n            f\"Payment processed for card {order['credit_card']} \"\n            f\"by {order['email']} from {order['ip']}\"\n        )\n\n        return jsonify({\"status\": \"ok\", \"user\": order[\"user\"]})\n\n\n@app.route(\"/health\", methods=[\"GET\"])\ndef health():\n    return jsonify({\"status\": \"healthy\"})\n\n\ndef traffic_generator():\n    \"\"\"Background thread that calls /order every 3 seconds.\"\"\"\n    time.sleep(5)  # Wait for Flask to start\n    while True:\n        try:\n            requests.get(\"http://localhost:5000/order\", timeout=5)\n        except Exception:\n            pass\n        time.sleep(3)\n\n\nif __name__ == \"__main__\":\n    t = threading.Thread(target=traffic_generator, daemon=True)\n    t.start()\n    app.run(host=\"0.0.0.0\", port=5000)\n"
  },
  {
    "path": "otel-examples/pii-redaction/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests\n"
  },
  {
    "path": "otel-examples/pii-redaction/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: PII Redaction Pipeline\n#\n# Demonstrates using the transform processor with OTTL to scrub\n# sensitive data (credit cards, emails, IPs) from trace attributes\n# and log bodies before export.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  # Scrub PII from trace span attributes\n  transform/traces:\n    error_mode: ignore\n    trace_statements:\n      - context: span\n        statements:\n          # Mask credit card numbers (16 digits with optional separators)\n          - replace_pattern(attributes[\"user.credit_card\"], \"\\\\d{4}[- ]?\\\\d{4}[- ]?\\\\d{4}[- ]?\\\\d{4}\", \"****-****-****-****\")\n          # Mask email addresses\n          - replace_pattern(attributes[\"user.email\"], \"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}\", \"***@***.***\")\n          # Mask IP addresses\n          - replace_pattern(attributes[\"client.ip\"], \"\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\", \"***.***.***.***\")\n\n  # Scrub PII from log bodies\n  transform/logs:\n    error_mode: ignore\n    log_statements:\n      - context: log\n        statements:\n          # Mask credit card numbers in log body\n          - replace_pattern(body, \"\\\\d{4}[- ]?\\\\d{4}[- ]?\\\\d{4}[- ]?\\\\d{4}\", \"****-****-****-****\")\n          # Mask email addresses in log body\n          - replace_pattern(body, \"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}\", \"***@***.***\")\n\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/loki:\n    endpoint: http://loki:3100/otlp\n\n  debug:\n    verbosity: detailed\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [transform/traces, batch]\n      exporters: [otlp/tempo]\n    logs:\n      receivers: [otlp]\n      processors: [transform/logs, batch]\n      exporters: [otlphttp/loki, debug]\n"
  },
  {
    "path": "otel-examples/pii-redaction/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/pii-redaction/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      args:\n        PYTHON_VERSION: ${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-examples/pii-redaction/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 4317:4317/tcp\n      - 4318:4318/tcp\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - loki\n      - tempo\n\n  demo-app:\n    build:\n      context: ./app\n      args:\n        PYTHON_VERSION: ${PYTHON_VERSION:-3.11-slim}\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - loki\n      - tempo\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/pii-redaction/loki-config.yaml",
    "content": "auth_enabled: false\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-examples/pii-redaction/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\noverrides:\n  defaults: {}\n"
  },
  {
    "path": "otel-examples/resource-enrichment/README.md",
    "content": "# Resource Enrichment\n\nAutomatically enrich all telemetry signals with host, OS, and container metadata using the Alloy OTel pipeline -- without changing application code.\n\n## What This Demonstrates\n\n- **`resourcedetection` processor** with `env`, `system`, and `docker` detectors to discover environment metadata\n- **`resource` processor** to add custom attributes (`deployment.environment`, `service.namespace`)\n- How the collector adds context that apps do not set themselves (hostname, OS type, architecture)\n- **Debug exporter** with `detailed` verbosity to inspect enriched resource attributes\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\nOpen Grafana at [http://localhost:3000](http://localhost:3000).\n\n### Check enriched traces in Tempo\n\n1. Go to Explore > Tempo.\n2. Search for traces from `enrichment-demo`.\n3. Click on any trace and expand the resource attributes. You should see attributes the app did **not** set:\n   - `host.name` -- the collector container's hostname\n   - `os.type` -- detected OS\n   - `host.arch` -- CPU architecture\n   - `deployment.environment` = `demo`\n   - `service.namespace` = `otel-examples`\n\n### Check enriched metrics in Prometheus\n\n1. Go to Explore > Prometheus.\n2. Query `app_requests_total` -- the metric labels should include `deployment_environment`, `service_namespace`, and other enriched attributes.\n\n### Inspect debug exporter output\n\n```bash\ndocker compose logs alloy\n```\n\nLook for the `debug` exporter output showing the full resource with detected attributes attached.\n\n### Check the Alloy OTel pipeline\n\nVisit the Alloy OTel HTTP server at [http://localhost:8888](http://localhost:8888).\n\n## Key Configuration\n\nThe `config-otel.yaml` pipeline uses two processors:\n\n1. **`resourcedetection`** -- Auto-detects environment metadata:\n   - `env` detector: reads `OTEL_RESOURCE_ATTRIBUTES` environment variable\n   - `system` detector: discovers `host.name`, `os.type`, `host.arch`\n   - `docker` detector: discovers container metadata (requires Docker socket mount)\n   - `override: false` ensures app-set attributes are not overwritten\n\n2. **`resource`** -- Adds static attributes:\n   - `deployment.environment` = `demo`\n   - `service.namespace` = `otel-examples`\n   - Uses `upsert` action so existing values are updated but new ones are also created\n\nNote: The Alloy container mounts `/var/run/docker.sock` read-only to enable the Docker detector.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/resource-enrichment/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY app.py .\nCMD [\"python\", \"app.py\"]\n"
  },
  {
    "path": "otel-examples/resource-enrichment/app/app.py",
    "content": "\"\"\"\nDemo Flask app for the resource-enrichment scenario.\n\nA simple app that generates traces and metrics WITHOUT setting host/container\nmetadata. The Alloy OTel pipeline uses resourcedetection + resource processors\nto automatically enrich all signals with environment attributes.\n\"\"\"\n\nimport random\nimport threading\nimport time\n\nfrom flask import Flask, jsonify\nfrom opentelemetry import trace, metrics\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.sdk.metrics import MeterProvider\nfrom opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.sdk.resources import Resource\n\n# --- OTel Setup (minimal resource - no host/container info) ---\nresource = Resource.create({\n    \"service.name\": \"enrichment-demo\",\n    \"service.version\": \"1.0.0\",\n})\n\n# Traces\ntracer_provider = TracerProvider(resource=resource)\ntracer_provider.add_span_processor(\n    BatchSpanProcessor(OTLPSpanExporter(endpoint=\"alloy:4317\", insecure=True))\n)\ntrace.set_tracer_provider(tracer_provider)\ntracer = trace.get_tracer(__name__)\n\n# Metrics\nmetric_reader = PeriodicExportingMetricReader(\n    OTLPMetricExporter(endpoint=\"alloy:4317\", insecure=True),\n    export_interval_millis=10000,\n)\nmeter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])\nmetrics.set_meter_provider(meter_provider)\nmeter = metrics.get_meter(__name__)\n\n# Custom metrics\nrequest_counter = meter.create_counter(\"app.requests\", description=\"Total requests\")\nrequest_duration = meter.create_histogram(\"app.request.duration\", unit=\"ms\", description=\"Request duration\")\n\n# --- Flask App ---\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\n\n\n@app.route(\"/api/users\")\ndef list_users():\n    \"\"\"Returns a list of mock users.\"\"\"\n    with tracer.start_as_current_span(\"fetch-users\") as span:\n        start = time.time()\n        user_count = random.randint(1, 50)\n        span.set_attribute(\"user.count\", user_count)\n        time.sleep(random.uniform(0.01, 0.1))\n\n        request_counter.add(1, {\"endpoint\": \"/api/users\", \"method\": \"GET\"})\n        request_duration.record((time.time() - start) * 1000, {\"endpoint\": \"/api/users\"})\n\n        return jsonify({\"users\": [f\"user-{i}\" for i in range(user_count)]})\n\n\n@app.route(\"/api/items\")\ndef list_items():\n    \"\"\"Returns a list of mock items.\"\"\"\n    with tracer.start_as_current_span(\"fetch-items\") as span:\n        start = time.time()\n        item_count = random.randint(1, 100)\n        span.set_attribute(\"item.count\", item_count)\n        time.sleep(random.uniform(0.01, 0.15))\n\n        request_counter.add(1, {\"endpoint\": \"/api/items\", \"method\": \"GET\"})\n        request_duration.record((time.time() - start) * 1000, {\"endpoint\": \"/api/items\"})\n\n        return jsonify({\"items\": [f\"item-{i}\" for i in range(item_count)]})\n\n\n@app.route(\"/health\")\ndef health():\n    return jsonify({\"status\": \"healthy\"})\n\n\ndef load_generator():\n    \"\"\"Background thread that hits endpoints every 2 seconds.\"\"\"\n    import requests\n\n    base_url = \"http://localhost:8080\"\n    time.sleep(5)\n\n    while True:\n        try:\n            endpoint = random.choice([\"/api/users\", \"/api/items\"])\n            requests.get(f\"{base_url}{endpoint}\", timeout=5)\n        except Exception:\n            pass\n        time.sleep(2)\n\n\nif __name__ == \"__main__\":\n    thread = threading.Thread(target=load_generator, daemon=True)\n    thread.start()\n    app.run(host=\"0.0.0.0\", port=8080)\n"
  },
  {
    "path": "otel-examples/resource-enrichment/app/requirements.txt",
    "content": "flask\nrequests\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests\n"
  },
  {
    "path": "otel-examples/resource-enrichment/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Resource Enrichment\n#\n# Demonstrates using the resourcedetection processor to automatically\n# discover and attach environment metadata (host, OS, Docker container)\n# to all telemetry signals without any app-level changes.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  # Auto-detect environment metadata\n  resourcedetection:\n    detectors: [env, system, docker]\n    system:\n      hostname_sources: [\"os\"]\n      resource_attributes:\n        host.name:\n          enabled: true\n        os.type:\n          enabled: true\n        host.arch:\n          enabled: true\n    docker:\n      resource_attributes:\n        host.name:\n          enabled: true\n        os.type:\n          enabled: true\n    timeout: 5s\n    override: false\n\n  # Add custom resource attributes\n  resource:\n    attributes:\n      - key: deployment.environment\n        value: demo\n        action: upsert\n      - key: service.namespace\n        value: otel-examples\n        action: upsert\n\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\n  debug:\n    verbosity: detailed\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [resourcedetection, resource, batch]\n      exporters: [otlp/tempo, debug]\n    metrics:\n      receivers: [otlp]\n      processors: [resourcedetection, resource, batch]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-examples/resource-enrichment/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/resource-enrichment/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n      - OTEL_SERVICE_NAME=enrichment-demo\n"
  },
  {
    "path": "otel-examples/resource-enrichment/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Prometheus for metrics storage (with OTLP receiver)\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for trace storage\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy in OTel engine mode (with Docker socket for container detection)\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    command: otel --config=/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888       # OTel engine HTTP server\n      - 4317:4317       # OTLP gRPC\n      - 4318:4318       # OTLP HTTP\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /var/run/docker.sock:/var/run/docker.sock:ro\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Demo app with minimal resource attributes (collector enriches them)\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=enrichment-demo\n    depends_on:\n      - alloy\n"
  },
  {
    "path": "otel-examples/resource-enrichment/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-examples/resource-enrichment/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\ndistributor:\n  receivers:\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\ningester:\n  max_block_duration: 5m\ncompactor:\n  compaction:\n    block_retention: 720h\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks]\n      generate_native_histograms: both\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/README.md",
    "content": "# Routing Multi-Tenant\n\nDemonstrates using the OTel Collector **forward connector** and **filter processor** to route logs from different tenants into separate Loki organizations. A single OTLP intake pipeline fans out to per-tenant pipelines, each filtering by a `tenant` resource attribute and exporting with the correct `X-Scope-OrgID` header.\n\n## What This Demonstrates\n\n- **Forward connector** to fan out logs from one pipeline into multiple downstream pipelines\n- **Filter processor** to keep only logs matching a specific tenant\n- **Resource processor** to enrich logs with per-tenant attributes\n- **Multi-tenant Loki** with `auth_enabled: true` and `X-Scope-OrgID` header routing\n- Querying isolated tenant data in Grafana using separate datasources\n\n## Prerequisites\n\n- Docker and Docker Compose\n\n## Run\n\n```bash\ndocker compose up -d\n```\n\nThe log generator automatically sends logs for both tenants every 2 seconds.\n\n## Alloy UI\n\nThe Alloy pipeline debugging UI is available at [http://localhost:12345](http://localhost:12345). This is enabled by the `alloyengine` extension in `config-otel.yaml`, which runs the River UI alongside the OTel pipeline.\n\nIf you prefer a pure OTel config without the Alloy UI, remove the `extensions` block and the `extensions: [alloyengine]` line from `config-otel.yaml`.\n\n## Explore\n\n1. Open Grafana at [http://localhost:3000](http://localhost:3000) (no login required).\n2. Go to **Explore**.\n\n### Query team-a logs\n\n3. Select the **Loki (team-a)** datasource and run:\n\n```logql\n{service_name=\"frontend-service\"}\n```\n\nYou should only see logs from team-a (frontend-service messages).\n\n### Query team-b logs\n\n4. Switch to the **Loki (team-b)** datasource and run:\n\n```logql\n{service_name=\"order-service\"}\n```\n\nYou should only see logs from team-b (order-service messages).\n\n### Verify isolation\n\n5. Confirm that team-a's datasource cannot see team-b's logs and vice versa -- this is enforced by Loki's multi-tenant `X-Scope-OrgID` header.\n\n## Key Configuration\n\nThe `config-otel.yaml` uses a three-stage pipeline architecture:\n\n1. **Intake pipeline** (`logs/intake`) -- receives all OTLP logs and exports to two forward connectors (`forward/team-a` and `forward/team-b`).\n2. **Per-tenant pipelines** (`logs/team-a`, `logs/team-b`) -- each receives from its forward connector, applies a filter processor that drops logs not matching the tenant, enriches with a resource processor, and exports to a tenant-specific Loki exporter with the appropriate `X-Scope-OrgID` header.\n\nThe filter processors use `resource.attributes[\"tenant\"] != \"team-a\"` (and `team-b`) to drop non-matching logs, effectively routing each tenant's data to its own Loki organization.\n\n## Stop\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/app/generate_logs.py",
    "content": "\"\"\"\nMulti-tenant log generator using OTel SDK.\n\nAlternates between sending logs with resource attribute tenant=\"team-a\"\nand tenant=\"team-b\" via OTLP gRPC to alloy:4317.\n\"\"\"\n\nimport logging\nimport time\nimport random\n\nfrom opentelemetry.sdk.resources import Resource\nfrom opentelemetry.sdk._logs import LoggerProvider, LoggingHandler\nfrom opentelemetry.sdk._logs.export import BatchLogRecordProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter\n\nTEAM_A_MESSAGES = [\n    \"Team A: Deployed frontend v2.3.1 to production\",\n    \"Team A: User authentication service healthy\",\n    \"Team A: CDN cache invalidation completed\",\n    \"Team A: A/B test experiment-42 started for 10% of users\",\n    \"Team A: Search index rebuild finished in 23s\",\n    \"Team A: Rate limiter triggered for IP range 10.0.0.0/8\",\n]\n\nTEAM_B_MESSAGES = [\n    \"Team B: Payment gateway latency increased to 450ms\",\n    \"Team B: Inventory sync completed for warehouse-west\",\n    \"Team B: Order fulfillment pipeline processed 1,247 orders\",\n    \"Team B: Database replica lag at 120ms\",\n    \"Team B: Shipping label API returned 503, retrying\",\n    \"Team B: Nightly report generation started\",\n]\n\nLEVELS = [logging.DEBUG, logging.INFO, logging.INFO, logging.WARNING, logging.ERROR]\n\n\ndef create_logger(tenant: str, service_name: str) -> logging.Logger:\n    \"\"\"Create an OTel-instrumented logger for a specific tenant.\"\"\"\n    resource = Resource.create({\n        \"service.name\": service_name,\n        \"tenant\": tenant,\n    })\n    exporter = OTLPLogExporter(endpoint=\"alloy:4317\", insecure=True)\n    provider = LoggerProvider(resource=resource)\n    provider.add_log_record_processor(BatchLogRecordProcessor(exporter))\n\n    handler = LoggingHandler(level=logging.DEBUG, logger_provider=provider)\n    logger = logging.getLogger(f\"tenant-{tenant}\")\n    logger.setLevel(logging.DEBUG)\n    logger.addHandler(handler)\n    return logger\n\n\ndef main():\n    print(\"Starting multi-tenant log generator...\")\n    time.sleep(3)  # Wait for Alloy to be ready\n\n    logger_a = create_logger(\"team-a\", \"frontend-service\")\n    logger_b = create_logger(\"team-b\", \"order-service\")\n\n    while True:\n        # Send a team-a log\n        level = random.choice(LEVELS)\n        msg = random.choice(TEAM_A_MESSAGES)\n        logger_a.log(level, msg)\n\n        time.sleep(1)\n\n        # Send a team-b log\n        level = random.choice(LEVELS)\n        msg = random.choice(TEAM_B_MESSAGES)\n        logger_b.log(level, msg)\n\n        time.sleep(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/app/requirements.txt",
    "content": "opentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp-proto-grpc\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/config-otel.yaml",
    "content": "#\n# OTel Collector YAML: Multi-Tenant Routing\n#\n# Demonstrates using the forward connector to fan out logs into\n# multiple pipelines, then filter processors to route by tenant\n# attribute. Each tenant gets its own processing and Loki org ID.\n#\n# Available connectors in Alloy OTel Engine: count, grafanacloud,\n# servicegraph, spanmetrics, forward.\n#\n\nextensions:\n  alloyengine:\n    config:\n      file: /etc/alloy/config.alloy\n    flags:\n      server.http.listen-addr: 0.0.0.0:12345\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nconnectors:\n  # Fork logs into per-tenant pipelines\n  forward/team-a: {}\n  forward/team-b: {}\n\nprocessors:\n  batch: {}\n\n  # Keep only team-a logs\n  filter/team-a:\n    error_mode: ignore\n    logs:\n      log_record:\n        - resource.attributes[\"tenant\"] != \"team-a\"\n\n  # Keep only team-b logs\n  filter/team-b:\n    error_mode: ignore\n    logs:\n      log_record:\n        - resource.attributes[\"tenant\"] != \"team-b\"\n\n  # Add team resource attribute for team-a\n  resource/team-a:\n    attributes:\n      - key: team\n        value: team-a\n        action: upsert\n\n  # Add team resource attribute for team-b\n  resource/team-b:\n    attributes:\n      - key: team\n        value: team-b\n        action: upsert\n\nexporters:\n  otlphttp/loki-team-a:\n    endpoint: http://loki:3100/otlp\n    headers:\n      X-Scope-OrgID: team-a\n\n  otlphttp/loki-team-b:\n    endpoint: http://loki:3100/otlp\n    headers:\n      X-Scope-OrgID: team-b\n\nservice:\n  extensions: [alloyengine]\n  pipelines:\n    # Intake: receive and fan out to both tenant pipelines\n    logs/intake:\n      receivers: [otlp]\n      exporters: [forward/team-a, forward/team-b]\n    # Team A pipeline: filter + enrich + export\n    logs/team-a:\n      receivers: [forward/team-a]\n      processors: [filter/team-a, resource/team-a, batch]\n      exporters: [otlphttp/loki-team-a]\n    # Team B pipeline: filter + enrich + export\n    logs/team-b:\n      receivers: [forward/team-b]\n      processors: [filter/team-b, resource/team-b, batch]\n      exporters: [otlphttp/loki-team-b]\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/config.alloy",
    "content": "// Minimal Alloy config to enable the Alloy UI alongside the OTel Engine.\n// The OTel pipeline is defined in config-otel.yaml.\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/docker-compose.coda.yml",
    "content": "services:\n  log-generator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    volumes:\n      - ./app/generate_logs.py:/app/generate_logs.py\n      - ./app/requirements.txt:/app/requirements.txt\n    command:\n      - sh\n      - -c\n      - \"pip install -r /app/requirements.txt && python /app/generate_logs.py\"\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 8888:8888\n      - 4317:4317/tcp\n      - 4318:4318/tcp\n      - 12345:12345     # Alloy UI\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: otel --config=/etc/alloy/config-otel.yaml\n    depends_on:\n      - loki\n\n  log-generator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app/generate_logs.py:/app/generate_logs.py\n      - ./app/requirements.txt:/app/requirements.txt\n    command:\n      - sh\n      - -c\n      - \"pip install -r /app/requirements.txt && python /app/generate_logs.py\"\n    depends_on:\n      - alloy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    depends_on:\n      - loki\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki (team-a)\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            httpHeaderName1: X-Scope-OrgID\n          secureJsonData:\n            httpHeaderValue1: team-a\n        - name: Loki (team-b)\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n          jsonData:\n            httpHeaderName1: X-Scope-OrgID\n          secureJsonData:\n            httpHeaderValue1: team-b\n        EOF\n        /run.sh\n"
  },
  {
    "path": "otel-examples/routing-multi-tenant/loki-config.yaml",
    "content": "auth_enabled: true\nserver:\n  http_listen_port: 3100\n  grpc_listen_port: 9096\n  log_level: info\ncommon:\n  instance_addr: 127.0.0.1\n  path_prefix: /tmp/storage\n  storage:\n    filesystem:\n      chunks_directory: /tmp/storage/chunks\n      rules_directory: /tmp/storage/rules\n  replication_factor: 1\n  ring:\n    kvstore:\n      store: inmemory\nquery_range:\n  results_cache:\n    cache:\n      embedded_cache:\n        enabled: true\n        max_size_mb: 100\nlimits_config:\n  metric_aggregation_enabled: true\nschema_config:\n  configs:\n    - from: 2020-10-24\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\npattern_ingester:\n  enabled: true\n  metric_aggregation:\n    loki_address: localhost:3100\nruler:\n  alertmanager_url: http://localhost:9093\nfrontend:\n  encoding: protobuf\n"
  },
  {
    "path": "otel-metrics-pipeline/README.md",
    "content": "# OTel Metrics Pipeline\n\nDemonstrates a full OpenTelemetry metrics pipeline through Grafana Alloy: a Python application generates OTLP metrics which flow through Alloy (with batching and attribute transformation) into Prometheus, and are visualized in Grafana.\n\n## Overview\n\nThe pipeline includes:\n- **Python demo app** -- generates counters, histograms, and up-down counters via the OpenTelemetry SDK, sending them as OTLP/gRPC to Alloy.\n- **Grafana Alloy** -- receives OTLP metrics, batches them, applies a transform processor (adds a `deployment.environment` resource attribute), and exports via OTLP/HTTP to Prometheus.\n- **Prometheus** -- ingests metrics through its native OTLP receiver with native histogram support enabled.\n- **Grafana** -- auto-provisioned with a Prometheus datasource for exploring the metrics.\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd otel-metrics-pipeline\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n\n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh otel-metrics-pipeline\n   ```\n\n4. Access the services:\n   - **Grafana**: http://localhost:3000\n   - **Alloy UI**: http://localhost:12345\n   - **Prometheus**: http://localhost:9090\n\n## What to Expect\n\nAfter a few seconds the demo app begins emitting metrics. You can explore them in several ways:\n\n- **Prometheus** -- navigate to http://localhost:9090 and query for metrics such as `app_requests_total`, `app_errors_total`, `app_request_duration_milliseconds`, or `app_active_users`. Note that OTLP metric names are translated to Prometheus conventions (dots become underscores, units are appended as suffixes).\n- **Grafana Explore** -- open http://localhost:3000/explore, select the Prometheus datasource, and build PromQL queries against the ingested metrics.\n- **Alloy pipeline UI** -- visit http://localhost:12345 to inspect the live component graph showing the receiver, batch processor, transform processor, and exporter.\n\n## Metrics Generated\n\n| Metric | Type | Description |\n|---|---|---|\n| `app.requests.total` | Counter | Total HTTP requests by endpoint, method, and status |\n| `app.errors.total` | Counter | Total errors by endpoint |\n| `app.request.duration` | Histogram | Request latency in milliseconds |\n| `app.active_users` | UpDownCounter | Current active users by region |\n\n## Architecture\n\n```\n┌─────────────┐  OTLP/gRPC   ┌───────────────┐  OTLP/HTTP  ┌────────────┐\n│  Python App  │─────────────▶│  Grafana Alloy │────────────▶│ Prometheus │\n│ (metrics gen)│   :4317      │  (batch +      │   :9090     │            │\n└─────────────┘               │   transform)   │             └─────┬──────┘\n                              └───────────────┘                    │\n                                   :12345                          │\n                                 (Alloy UI)                        ▼\n                                                             ┌──────────┐\n                                                             │ Grafana  │\n                                                             │  :3000   │\n                                                             └──────────┘\n```\n"
  },
  {
    "path": "otel-metrics-pipeline/app/main.py",
    "content": "import time\nimport random\n\nfrom opentelemetry import metrics\nfrom opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter\nfrom opentelemetry.sdk.metrics import MeterProvider\nfrom opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader\nfrom opentelemetry.sdk.resources import Resource\n\nresource = Resource.create({\"service.name\": \"demo-metrics-app\"})\nexporter = OTLPMetricExporter()\nreader = PeriodicExportingMetricReader(exporter, export_interval_millis=5000)\nprovider = MeterProvider(resource=resource, metric_readers=[reader])\nmetrics.set_meter_provider(provider)\n\nmeter = metrics.get_meter(__name__)\n\n# Create different metric types\nrequest_counter = meter.create_counter(\"app.requests.total\", description=\"Total requests\", unit=\"requests\")\nerror_counter = meter.create_counter(\"app.errors.total\", description=\"Total errors\", unit=\"errors\")\nlatency_histogram = meter.create_histogram(\"app.request.duration\", description=\"Request duration\", unit=\"ms\")\nactive_users = meter.create_up_down_counter(\"app.active_users\", description=\"Active users\")\n\nprint(\"Starting OTLP metrics generator...\")\nwhile True:\n    # Simulate request metrics\n    endpoint = random.choice([\"/api/users\", \"/api/orders\", \"/api/products\", \"/health\"])\n    method = random.choice([\"GET\", \"POST\"])\n    status = random.choice([\"200\", \"200\", \"200\", \"200\", \"404\", \"500\"])\n\n    request_counter.add(1, {\"endpoint\": endpoint, \"method\": method, \"status\": status})\n\n    if status == \"500\":\n        error_counter.add(1, {\"endpoint\": endpoint})\n\n    latency = random.uniform(5, 500) if status != \"500\" else random.uniform(500, 2000)\n    latency_histogram.record(latency, {\"endpoint\": endpoint, \"method\": method})\n\n    # Simulate active users fluctuation\n    active_users.add(random.choice([-1, 0, 1]), {\"region\": random.choice([\"us-east\", \"eu-west\"])})\n\n    time.sleep(1)\n"
  },
  {
    "path": "otel-metrics-pipeline/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for OpenTelemetry Metrics Pipeline\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\n  transform:\n    error_mode: ignore\n    metric_statements:\n      - context: resource\n        statements:\n          - set(attributes[\"deployment.environment\"], \"demo\")\n\nexporters:\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    metrics:\n      receivers: [otlp]\n      processors: [batch, transform]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-metrics-pipeline/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Receive OTLP metrics from the demo app\notelcol.receiver.otlp \"default\" {\n\thttp { }\n\n\tgrpc { }\n\n\toutput {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t}\n}\n\n// Batch metrics for efficient export\notelcol.processor.batch \"default\" {\n\toutput {\n\t\tmetrics = [otelcol.processor.transform.default.input]\n\t}\n}\n\n// Transform metric attributes (example: add a deployment label)\notelcol.processor.transform \"default\" {\n\terror_mode = \"ignore\"\n\n\tmetric_statements {\n\t\tcontext    = \"resource\"\n\t\tstatements = [\n\t\t\t\"set(attributes[\\\"deployment.environment\\\"], \\\"demo\\\")\",\n\t\t]\n\t}\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.otlphttp.prometheus.input]\n\t}\n}\n\n// Export metrics to Prometheus via OTLP\notelcol.exporter.otlphttp \"prometheus\" {\n\tclient {\n\t\tendpoint = \"http://prometheus:9090/api/v1/otlp\"\n\n\t\ttls {\n\t\t\tinsecure = true\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "otel-metrics-pipeline/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n"
  },
  {
    "path": "otel-metrics-pipeline/docker-compose.coda.yml",
    "content": "services:\n  app:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp-proto-grpc && python3 main.py\"\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n"
  },
  {
    "path": "otel-metrics-pipeline/docker-compose.yml",
    "content": "\nservices:\n  # Python app that generates OTLP metrics\n  app:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp-proto-grpc && python3 main.py\"\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n    depends_on:\n      - alloy\n\n  # Alloy for telemetry pipeline\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n      - 4317:4317        # OTLP gRPC\n      - 4318:4318        # OTLP HTTP\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - prometheus\n\n  # Prometheus for metrics storage\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n"
  },
  {
    "path": "otel-metrics-pipeline/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\n\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-span-metrics/README.md",
    "content": "# OTel Span Metrics (RED Metrics from Traces)\n\nThis scenario demonstrates how to generate **RED metrics** (Request rate, Error rate, Duration) from OpenTelemetry traces using Grafana Alloy's `otelcol.connector.spanmetrics` component.\n\n## Overview\n\nInstead of relying on Tempo's built-in metrics generator, this approach uses Alloy's spanmetrics connector to derive metrics directly from trace spans in the telemetry pipeline. This gives you fine-grained control over which dimensions are extracted and how histograms are configured.\n\n### Architecture\n\n```\nFlask App ---(OTLP/gRPC)---> Alloy ---> Tempo (traces)\n                                |\n                                +---> spanmetrics connector ---> Prometheus (RED metrics)\n```\n\n### What Gets Generated\n\nThe `otelcol.connector.spanmetrics` component produces the following metrics from every span:\n\n- **`duration_milliseconds`** - Histogram of span durations (for latency/duration analysis)\n- **`calls`** - Counter of span calls, with `status_code` label (for request rate and error rate)\n\nAdditional dimensions extracted: `http.method`, `http.status_code`.\n\n## Running\n\n```bash\n# From repo root\n./run-example.sh otel-span-metrics\n\n# Or directly\ncd otel-span-metrics && docker compose up -d\n```\n\n## Accessing the UIs\n\n| Service    | URL                        |\n|------------|----------------------------|\n| Grafana    | http://localhost:3000      |\n| Alloy      | http://localhost:12345     |\n| Prometheus | http://localhost:9090      |\n| Tempo      | http://localhost:3200      |\n| Demo App   | http://localhost:5000      |\n\n## Exploring the Metrics\n\nOnce the scenario is running and the load generator has been active for a minute or so, open Grafana and navigate to the **Explore** page with the **Prometheus** datasource. Try these queries:\n\n```promql\n# Request rate by service and span name\nrate(duration_milliseconds_count[5m])\n\n# Error rate (spans with error status)\nrate(calls{status_code=\"STATUS_CODE_ERROR\"}[5m])\n\n# P95 latency by span name\nhistogram_quantile(0.95, rate(duration_milliseconds_bucket[5m]))\n```\n\n## Stopping\n\n```bash\ncd otel-span-metrics && docker compose down\n```\n"
  },
  {
    "path": "otel-span-metrics/app/load.py",
    "content": "import requests, time, random\nendpoints = [\"http://app:5000/\", \"http://app:5000/api/data\", \"http://app:5000/api/slow\"]\nwhile True:\n    try:\n        url = random.choice(endpoints[:2])  # mostly hit fast endpoints\n        if random.random() < 0.1:\n            url = endpoints[2]  # occasionally hit slow\n        requests.get(url, timeout=5)\n    except:\n        pass\n    time.sleep(random.uniform(0.5, 2.0))\n"
  },
  {
    "path": "otel-span-metrics/app/main.py",
    "content": "from flask import Flask, jsonify\nimport random, time\n\nfrom opentelemetry import trace\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.sdk.resources import Resource\n\nresource = Resource.create({\"service.name\": \"demo-app\"})\nprovider = TracerProvider(resource=resource)\nexporter = OTLPSpanExporter()\nprovider.add_span_processor(BatchSpanProcessor(exporter))\ntrace.set_tracer_provider(provider)\ntracer = trace.get_tracer(__name__)\n\napp = Flask(__name__)\n\n@app.route(\"/\")\ndef index():\n    with tracer.start_as_current_span(\"index\"):\n        time.sleep(random.uniform(0.01, 0.05))\n        return jsonify({\"status\": \"ok\"})\n\n@app.route(\"/api/data\")\ndef get_data():\n    with tracer.start_as_current_span(\"get-data\"):\n        time.sleep(random.uniform(0.02, 0.1))\n        if random.random() < 0.1:\n            raise Exception(\"Random error\")\n        return jsonify({\"data\": [1, 2, 3]})\n\n@app.route(\"/api/slow\")\ndef slow():\n    with tracer.start_as_current_span(\"slow-operation\"):\n        time.sleep(random.uniform(0.5, 2.0))\n        return jsonify({\"status\": \"done\"})\n\nif __name__ == \"__main__\":\n    app.run(host=\"0.0.0.0\", port=5000)\n"
  },
  {
    "path": "otel-span-metrics/app/requirements.txt",
    "content": "flask\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp-proto-grpc\n"
  },
  {
    "path": "otel-span-metrics/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for Span Metrics (RED Metrics from Traces)\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\nconnectors:\n  spanmetrics:\n    histogram:\n      explicit: {}\n    dimensions:\n      - name: http.method\n      - name: http.status_code\n    metrics_flush_interval: 5s\n\nexporters:\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [spanmetrics, otlp/tempo]\n    metrics:\n      receivers: [spanmetrics]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-span-metrics/config.alloy",
    "content": "livedebugging {\n\tenabled = true\n}\n\n// Receive OTel traces from the demo app\notelcol.receiver.otlp \"default\" {\n\thttp { }\n\n\tgrpc { }\n\n\toutput {\n\t\ttraces = [otelcol.processor.batch.default.input]\n\t}\n}\n\n// Batch traces for efficiency\notelcol.processor.batch \"default\" {\n\toutput {\n\t\ttraces = [\n\t\t\totelcol.connector.spanmetrics.default.input,\n\t\t\totelcol.exporter.otlp.tempo.input,\n\t\t]\n\t}\n}\n\n// Generate RED metrics from spans\notelcol.connector.spanmetrics \"default\" {\n\thistogram {\n\t\texplicit { }\n\t}\n\n\tdimension {\n\t\tname = \"http.method\"\n\t}\n\n\tdimension {\n\t\tname = \"http.status_code\"\n\t}\n\n\tmetrics_flush_interval = \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.otlphttp.prometheus.input]\n\t}\n}\n\n// Send RED metrics to Prometheus\notelcol.exporter.otlphttp \"prometheus\" {\n\tclient {\n\t\tendpoint = \"http://prometheus:9090/api/v1/otlp\"\n\n\t\ttls {\n\t\t\tinsecure = true\n\t\t}\n\t}\n}\n\n// Send traces to Tempo\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = \"tempo:4317\"\n\n\t\ttls {\n\t\t\tinsecure = true\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "otel-span-metrics/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n"
  },
  {
    "path": "otel-span-metrics/docker-compose.coda.yml",
    "content": "services:\n  app:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install -r requirements.txt && python main.py\"\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317\n\n  load:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install requests && python load.py\"\n"
  },
  {
    "path": "otel-span-metrics/docker-compose.yml",
    "content": "\nservices:\n  # Python Flask app that generates traces\n  app:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 5000:5000/tcp\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install -r requirements.txt && python main.py\"\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n    depends_on:\n      - alloy\n\n  # Load generator to continuously hit the app endpoints\n  load:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    volumes:\n      - ./app:/app\n    working_dir: /app\n    command: sh -c \"pip install requests && python load.py\"\n    depends_on:\n      - app\n\n  # Alloy for telemetry pipeline with spanmetrics connector\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n      - 4317:4317        # OTLP gRPC\n      - 4318:4318        # OTLP HTTP\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for trace storage\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - tempo-init\n      - memcached\n\n  # Init container to set up Tempo storage directories\n  tempo-init:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    user: root\n    entrypoint:\n      - \"chown\"\n      - \"10001:10001\"\n      - \"/var/tempo\"\n    volumes:\n      - tempo-data:/var/tempo\n\n  memcached:\n    image: memcached:1.6@sha256:277e0c4f249b118e95ab10e535bae2fa1af772271d9152f3468e58d59348db56\n    container_name: memcached\n    ports:\n      - \"11211:11211\"\n    environment:\n      - MEMCACHED_MAX_MEMORY=64m\n      - MEMCACHED_THREADS=4\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\nvolumes:\n  tempo-data:\n"
  },
  {
    "path": "otel-span-metrics/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-span-metrics/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search\n    memcached:\n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:\n    jaeger:\n      protocols:\n        thrift_http:\n          endpoint: \"tempo:14268\"\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m\n\ncompactor:\n  compaction:\n    block_retention: 720h\n\nstorage:\n  trace:\n    backend: local\n    wal:\n      path: /var/tempo/wal\n    local:\n      path: /var/tempo/blocks\n"
  },
  {
    "path": "otel-tail-sampling/README.md",
    "content": "# OpenTelemetry Tail Sampling with Grafana Alloy\n\nThis example demonstrates how to implement tail sampling for OpenTelemetry traces using Grafana Alloy, allowing you to intelligently filter and sample traces based on various criteria.\n\n## Overview\n\nThe example includes:\n\n- A Python Flask application that automatically generates different types of traces in the background\n- Grafana Alloy configured with tail sampling policies and transform processor\n- Tempo for trace storage and querying\n- Prometheus for metrics collection\n- Grafana for visualization\n- Live debugging for monitoring the sampling process\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd otel-tail-sampling\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n   \n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh otel-tail-sampling\n   ```\n\n4. Access the demo application at http://localhost:8080\n5. Access Grafana at http://localhost:3000\n6. Access Prometheus at http://localhost:9090\n7. Access Alloy's live debugging endpoint at http://localhost:12345/debug/livedebugging\n\n## What to Expect\n\nThe demo application automatically generates various types of traces in the background:\n\n- **Simple Traces**: Basic single-span traces\n- **Nested Traces**: Traces with parent-child relationships\n- **Error Traces**: Traces containing errors\n- **High Latency Traces**: Traces with execution times over 5 seconds\n- **Delayed Chain Traces**: Service chains with Service D consistently having high latency (3-4 seconds)\n\nYou can also manually trigger trace generation using the web UI. The application will continuously generate a mix of these trace types in the background at random intervals.\n\n## Processing Pipeline\n\nThis example demonstrates a more complex trace processing pipeline with the following components:\n\n> Note: In the case of tail sampling, this ensures that trace spans are presented to the tail sampler as early as possible, to ensure that a decision period includes all relevant spans for a trace. Batch processing potentially prevents spans from arriving at the sampler before a sampling decision is made once the first span for a trace has been seen. This can lead to incorrect decisions being made, and starts to rely on a cache being enabled for future sampling decisions.\n\n1. **OTLP Receiver**: Receives traces from the application via gRPC or HTTP\n2. **Tail Sampling Processor**: Applies sampling policies based on trace properties\n3. **Batch Processor**: Groups spans for efficient processing\n4. **OTLP Exporter**: Sends sampled traces to Tempo\n\n## Tail Sampling Configuration\n\nThis example uses Alloy's `otelcol.processor.tail_sampling` processor, which makes sampling decisions based on the entire trace, not just individual spans. This allows for more intelligent sampling based on trace-wide properties.\n\n> Note: Tempo indexes upon TraceID's and SpanID's not resource attributes.  Make sure you only send When requesting trace IDs or carrying out TraceQL queries, this will mean that returned traces will in fact consist of whichever duplicate span is encountered first. This will mean that subsequent queries will potentially not yield the same result, and that the service names for spans in the same trace could be comprised of both raw-traces and trace-demo-tail-sampled in the same trace, or appear to be from a sampled trace when it was in fact unsampled, or vice versa. To ensure consistency, only one set of spans with a unique ID and traceID should be emitted to Tempo. \n\nThe tail sampling configuration includes the following policies:\n\n1. **Attribute-Based Sampling**: Samples traces with a specific attribute value\n   ```\n   policy {\n     name = \"test-attribute-policy\"\n     type = \"string_attribute\"\n     \n     string_attribute {\n       key    = \"test_attr_key_1\"\n       values = [\"test_attr_val_1\"]\n     }\n   }\n   ```\n\n2. **Error Sampling**: Always samples traces with ERROR status\n   ```\n   policy {\n     name = \"error-policy\"\n     type = \"status_code\"\n     \n     status_code {\n       status_codes = [\"ERROR\"]\n     }\n   }\n   ```\n\n3. **Latency-Based Sampling**: Samples traces that exceed a latency threshold\n   ```\n   policy {\n     name = \"latency-policy\"\n     type = \"latency\"\n     \n     latency {\n       threshold_ms = 5000  // 5 seconds\n     }\n   }\n   ```\n\n4. **Numerical Range Sampling**: Samples traces with a numeric attribute in a specific range\n   ```\n   policy {\n     name = \"numeric-policy\"\n     type = \"numeric_attribute\"\n     \n     numeric_attribute {\n       key       = \"key1\"\n       min_value = 70\n       max_value = 100\n     }\n   }\n   ```\n\n5. **URL-Based Filtering**: Excludes health check and metrics endpoints\n   ```\n   policy {\n     name = \"url-filter-policy\"\n     type = \"string_attribute\"\n     \n     string_attribute {\n       key             = \"http.url\"\n       values          = [\"/health\", \"/metrics\"]\n       invert_match    = true\n     }\n   }\n   ```\n\n6. **Probabilistic Sampling**: Samples a percentage of remaining traces\n   ```\n   policy {\n     name = \"probabilistic-policy\"\n     type = \"probabilistic\"\n     \n     probabilistic {\n       sampling_percentage = 10\n     }\n   }\n   ```\n\n## Live Debugging\n\nThis example enables Alloy's live debugging feature, which provides real-time insights into the sampling process:\n\n```\nlivedebugging {\n  enabled = true\n}\n```\n\nAccess the live debugging interface at http://localhost:12345 to see:\n\n- Current processing pipeline state\n- Trace sampling decisions in real-time\n- Policy hit counts and performance metrics\n- Throughput statistics\n\n## Sampling Implications\n\nWith tail sampling enabled in this example:\n\n- All error traces are preserved for troubleshooting\n- High latency traces (>5s) are kept for performance analysis\n- Traces with specific attribute values used for monitoring are retained\n- Health check and metrics endpoints are filtered out to reduce noise\n- A small percentage of other traces are kept for baseline monitoring\n- Traces not matching any criteria are dropped, reducing storage needs\n- Raw traces are stored with a different service name for comparison\n\n## Viewing Traces in Grafana\n\nTo view the sampled traces:\n\n1. Open Grafana (http://localhost:3000)\n2. Navigate to Explore\n3. Select the Tempo data source\n4. Use the Search tab to find traces based on various criteria\n\n## Sample Queries\n\nTry these queries in Grafana's Tempo Explorer:\n\n- Find all traces for the sampled service:\n  ```\n  {resource.service.name=\"trace-demo-tail-sampled\"}\n  ```\n\n- Find error traces:\n  ```\n  {status=error}\n  ```\n\n- Find high latency traces:\n  ```\n  {duration>5s}\n  ```\n\n- Find traces with a specific attribute:\n  ```\n  {span.test_attr_key_1=\"test_attr_val_1\"}\n  ```\n  \n- Find traces with Service D bottleneck:\n  ```\n  {span.service.latency=\"high\" && span.latency.category=\"bottleneck\"}\n  ```\n\n## Customizing\n\nYou can modify the `config.alloy` file to adjust the sampling policies:\n\n- Change the decision wait time to balance memory usage vs. complete trace visibility\n- Adjust the sampling thresholds to capture more or fewer traces\n- Add additional sampling policies based on your specific needs\n- Modify the existing policies to match your application's attributes\n- Update the transform processor to add or modify different attributes\n\n## Further Resources\n\n- [Grafana Alloy Tail Sampling Documentation](https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.tail_sampling/)\n- [Grafana Alloy Transform Processor Documentation](https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/)\n- [OpenTelemetry Tail Sampling Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor)\n- [Live Debugging in Grafana Alloy](https://grafana.com/docs/alloy/latest/debug-alloy-flow/) "
  },
  {
    "path": "otel-tail-sampling/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\n\nCOPY app.py .\n\nCMD [\"python\", \"app.py\"] "
  },
  {
    "path": "otel-tail-sampling/app/app.py",
    "content": "import os\nimport random\nimport time\nimport threading\nimport logging\nimport uuid\nfrom flask import Flask, request\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.resources import Resource, SERVICE_NAME\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\nimport requests\nfrom opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator\n\n# Configure logging\nlogging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\nlogger = logging.getLogger(__name__)\n\n# Configure the tracer\nresource = Resource.create(attributes={\n    SERVICE_NAME: \"trace-demo-tail-sampled\"\n})\ntrace.set_tracer_provider(TracerProvider(resource=resource))\n\n# Configure the OTLP exporter using environment variables\n# OTEL_EXPORTER_OTLP_ENDPOINT will be used automatically\notlp_exporter = OTLPSpanExporter(endpoint=\"http://alloy:4317/v1/traces\", insecure=True)\nspan_processor = BatchSpanProcessor(span_exporter=otlp_exporter, max_export_batch_size=1)\ntrace.get_tracer_provider().add_span_processor(span_processor)\n\n# Create a tracer\ntracer = trace.get_tracer(__name__)\n\n# Create a propagator for handling trace context\npropagator = TraceContextTextMapPropagator()\n\n# Create a Flask application\napp = Flask(__name__)\n\n# Instrument Flask\nFlaskInstrumentor().instrument_app(app)\n\n# Instrument requests\nRequestsInstrumentor().instrument()\n\n# Background trace generation functions\ndef generate_simple_trace():\n    with tracer.start_as_current_span(\"simple-operation\") as span:\n        span.set_attribute(\"operation.type\", \"simple\")\n        span.set_attribute(\"operation.value\", random.randint(1, 100))\n        # Set a sampling-relevant attribute\n        span.set_attribute(\"test_attr_key_1\", \"test_attr_val_1\" if random.random() < 0.3 else \"other_value\")\n        time.sleep(0.1)  # Simulate work\n        logger.info(\"Generated simple trace\")\n\ndef generate_nested_trace():\n    with tracer.start_as_current_span(\"parent-operation\") as parent:\n        parent.set_attribute(\"operation.type\", \"parent\")\n        parent.set_attribute(\"key1\", random.randint(1, 100))  # For numeric attribute sampling\n        time.sleep(0.05)  # Simulate work\n        \n        with tracer.start_as_current_span(\"child-operation-1\") as child1:\n            child1.set_attribute(\"operation.type\", \"child\")\n            child1.set_attribute(\"child.number\", 1)\n            child1.set_attribute(\"key2\", \"value1\" if random.random() < 0.5 else \"other_value\")  # For string attribute sampling\n            time.sleep(0.05)  # Simulate work\n            \n        with tracer.start_as_current_span(\"child-operation-2\") as child2:\n            child2.set_attribute(\"operation.type\", \"child\")\n            child2.set_attribute(\"child.number\", 2)\n            time.sleep(0.05)  # Simulate work\n            \n            with tracer.start_as_current_span(\"grandchild-operation\") as grandchild:\n                grandchild.set_attribute(\"operation.type\", \"grandchild\")\n                time.sleep(0.05)  # Simulate work\n                \n        logger.info(\"Generated nested trace\")\n\ndef generate_error_trace():\n    with tracer.start_as_current_span(\"error-operation\") as span:\n        span.set_attribute(\"operation.type\", \"error\")\n        try:\n            # Simulate an error\n            result = 1 / 0\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            logger.info(\"Generated error trace\")\n\ndef generate_high_latency_trace():\n    with tracer.start_as_current_span(\"high-latency-operation\") as span:\n        span.set_attribute(\"operation.type\", \"high-latency\")\n        # Add a randomized latency between 3-10 seconds\n        latency = random.uniform(3.0, 10.0)\n        span.set_attribute(\"latency.seconds\", latency)\n        time.sleep(latency)  # Simulate high latency work\n        logger.info(f\"Generated high latency trace with {latency:.2f}s delay\")\n\ndef generate_delayed_chain_trace():\n    \"\"\"Generate a chain of service calls with service D having high latency\"\"\"\n    try:\n        with tracer.start_as_current_span(\"delayed-chain-root\") as span:\n            span.set_attribute(\"operation.step\", \"start\")\n            span.set_attribute(\"operation.type\", \"delayed-chain\")\n            \n            # Start the chain with Service A\n            req_id = random.randint(1000, 9999)\n            \n            # Instead of making HTTP calls in the background, simulate the chain directly\n            with tracer.start_as_current_span(\"service-a-handler\") as span_a:\n                span_a.set_attribute(\"service\", \"A\")\n                span_a.set_attribute(\"request.id\", str(req_id))\n                span_a.set_attribute(\"service.latency\", \"normal\")\n                span_a.set_attribute(\"http.url\", \"/delayed/service-a\")\n                time.sleep(0.1)  # Normal latency\n                \n                with tracer.start_as_current_span(\"service-b-handler\") as span_b:\n                    span_b.set_attribute(\"service\", \"B\")\n                    span_b.set_attribute(\"request.id\", str(req_id))\n                    span_b.set_attribute(\"service.latency\", \"normal\")\n                    span_b.set_attribute(\"http.url\", \"/delayed/service-b\")\n                    time.sleep(0.15)  # Normal latency\n                    \n                    with tracer.start_as_current_span(\"service-c-handler\") as span_c:\n                        span_c.set_attribute(\"service\", \"C\")\n                        span_c.set_attribute(\"request.id\", str(req_id))\n                        span_c.set_attribute(\"service.latency\", \"normal\")\n                        span_c.set_attribute(\"http.url\", \"/delayed/service-c\")\n                        time.sleep(0.2)  # Normal latency\n                        \n                        with tracer.start_as_current_span(\"service-d-handler\") as span_d:\n                            span_d.set_attribute(\"service\", \"D\")\n                            span_d.set_attribute(\"request.id\", str(req_id))\n                            span_d.set_attribute(\"service.latency\", \"high\")\n                            span_d.set_attribute(\"latency.category\", \"bottleneck\")\n                            span_d.set_attribute(\"http.url\", \"/delayed/service-d\")\n                            \n                            # This service consistently has high latency (3-4 seconds)\n                            delay = random.uniform(3.0, 4.0)\n                            span_d.set_attribute(\"latency.seconds\", delay)\n                            time.sleep(delay)  # High latency\n                            \n                            with tracer.start_as_current_span(\"service-e-handler\") as span_e:\n                                span_e.set_attribute(\"service\", \"E\")\n                                span_e.set_attribute(\"request.id\", str(req_id))\n                                span_e.set_attribute(\"service.latency\", \"normal\")\n                                span_e.set_attribute(\"http.url\", \"/delayed/service-e\")\n                                time.sleep(0.1)  # Normal latency\n            \n            logger.info(\"Generated delayed chain trace with high latency in Service D\")\n    except Exception as e:\n        logger.error(f\"Error generating delayed chain trace: {e}\")\n\n# New function for generating true multi-service traces\ndef generate_multi_service_trace_bg():\n    \"\"\"Generate a trace that spans multiple services with true service.name differentiation\"\"\"\n    try:\n        # Create a unique trace ID for correlating spans\n        trace_id = str(uuid.uuid4())\n        transaction_id = str(uuid.uuid4())[:8]\n        logger.info(f\"Generating multi-service trace. Transaction ID: {transaction_id}\")\n        \n        # Simulate a microservice architecture with:\n        # 1. Frontend service (web-ui)\n        # 2. API Gateway (api-gateway)\n        # 3. Authentication service (auth-service)\n        # 4. User service (user-service)\n        # 5. Notification service (notification-service)\n        # 6. Database service (db-service)\n        \n        # Create a custom resource for each service\n        web_ui_resource = Resource.create(attributes={SERVICE_NAME: \"web-ui\"})\n        api_gw_resource = Resource.create(attributes={SERVICE_NAME: \"api-gateway\"})\n        auth_resource = Resource.create(attributes={SERVICE_NAME: \"auth-service\"})\n        user_resource = Resource.create(attributes={SERVICE_NAME: \"user-service\"})\n        notif_resource = Resource.create(attributes={SERVICE_NAME: \"notification-service\"})\n        db_resource = Resource.create(attributes={SERVICE_NAME: \"db-service\"})\n        \n        # Create tracers for each service\n        web_ui_tracer = trace.get_tracer(\"web-ui-tracer\", resource=web_ui_resource)\n        api_gw_tracer = trace.get_tracer(\"api-gw-tracer\", resource=api_gw_resource)\n        auth_tracer = trace.get_tracer(\"auth-tracer\", resource=auth_resource)\n        user_tracer = trace.get_tracer(\"user-tracer\", resource=user_resource)\n        notif_tracer = trace.get_tracer(\"notif-tracer\", resource=notif_resource)\n        db_tracer = trace.get_tracer(\"db-tracer\", resource=db_resource)\n        \n        # 1. Frontend service (web-ui) - User logs in\n        with web_ui_tracer.start_as_current_span(\"login-page-render\") as web_span:\n            web_span.set_attribute(\"component\", \"web-ui\")\n            web_span.set_attribute(\"transaction.id\", transaction_id)\n            web_span.set_attribute(\"user.action\", \"login\")\n            web_span.set_attribute(\"http.method\", \"GET\")\n            web_span.set_attribute(\"http.url\", \"/login\")\n            time.sleep(0.1)\n            \n            # 2. Send login request to API Gateway\n            with api_gw_tracer.start_as_current_span(\"api-gateway-login-handler\") as api_span:\n                api_span.set_attribute(\"component\", \"api-gateway\")\n                api_span.set_attribute(\"transaction.id\", transaction_id)\n                api_span.set_attribute(\"endpoint\", \"/api/v1/login\")\n                api_span.set_attribute(\"http.method\", \"POST\")\n                time.sleep(0.15)\n                \n                # 3. API Gateway calls Authentication Service\n                with auth_tracer.start_as_current_span(\"authenticate-user\") as auth_span:\n                    auth_span.set_attribute(\"component\", \"auth-service\")\n                    auth_span.set_attribute(\"transaction.id\", transaction_id)\n                    auth_span.set_attribute(\"auth.method\", \"password\")\n                    time.sleep(0.2)\n                    \n                    # 4. Auth service calls User Service to retrieve user details\n                    with user_tracer.start_as_current_span(\"get-user-details\") as user_span:\n                        user_span.set_attribute(\"component\", \"user-service\")\n                        user_span.set_attribute(\"transaction.id\", transaction_id)\n                        user_span.set_attribute(\"user.id\", f\"user_{random.randint(1000, 9999)}\")\n                        \n                        # 5. User service calls DB Service\n                        with db_tracer.start_as_current_span(\"db-query\") as db_span:\n                            db_span.set_attribute(\"component\", \"db-service\")\n                            db_span.set_attribute(\"transaction.id\", transaction_id)\n                            db_span.set_attribute(\"db.operation\", \"SELECT\")\n                            db_span.set_attribute(\"db.table\", \"users\")\n                            \n                            # Randomly introduce database latency\n                            if random.random() < 0.3:\n                                delay = random.uniform(0.5, 1.5)\n                                db_span.set_attribute(\"db.latency\", delay)\n                                db_span.set_attribute(\"latency.category\", \"slow-query\")\n                                time.sleep(delay)\n                            else:\n                                time.sleep(0.1)\n                \n                # 6. After successful login, send notification\n                with notif_tracer.start_as_current_span(\"send-login-notification\") as notif_span:\n                    notif_span.set_attribute(\"component\", \"notification-service\")\n                    notif_span.set_attribute(\"transaction.id\", transaction_id)\n                    notif_span.set_attribute(\"notification.type\", \"login_alert\")\n                    notif_span.set_attribute(\"notification.channel\", random.choice([\"email\", \"sms\", \"push\"]))\n                    time.sleep(0.15)\n        \n        logger.info(f\"Generated multi-service trace with transaction ID: {transaction_id}\")\n        return transaction_id\n    except Exception as e:\n        logger.error(f\"Error generating multi-service trace: {e}\")\n        return None\n\ndef generate_trace_batch():\n    \"\"\"Generates a batch of different trace types\"\"\"\n    trace_generators = [\n        generate_simple_trace,\n        generate_nested_trace,\n        generate_error_trace,\n        generate_high_latency_trace,\n        generate_delayed_chain_trace,\n        generate_multi_service_trace_bg  # Add the new trace type\n    ]\n    \n    # Randomly select which traces to generate with weighted probabilities\n    weights = [0.20, 0.20, 0.15, 0.1, 0.15, 0.2]  # Add weight for multi-service trace\n    \n    for _ in range(random.randint(3, 8)):  # Generate 3-8 traces per batch\n        selected_generator = random.choices(trace_generators, weights=weights, k=1)[0]\n        selected_generator()\n        time.sleep(random.uniform(0.1, 0.5))  # Small delay between traces\n\ndef trace_generator_thread():\n    \"\"\"Background thread that generates traces at regular intervals\"\"\"\n    while True:\n        try:\n            generate_trace_batch()\n            # Wait between 5-15 seconds before generating the next batch\n            delay = random.uniform(5, 15)\n            logger.info(f\"Next trace batch in {delay:.2f} seconds\")\n            time.sleep(delay)\n        except Exception as e:\n            logger.error(f\"Error in trace generation: {e}\")\n            time.sleep(5)  # Wait before retrying\n\n# API endpoints\n@app.route('/')\ndef home():\n    return \"\"\"\n    <h1>OpenTelemetry Tail Sampling Demo</h1>\n    <p>This app demonstrates OpenTelemetry tracing with Tail Sampling using Grafana Alloy.</p>\n    <p>The app automatically generates various types of traces in the background.</p>\n    <p>You can also trigger trace generation manually using these endpoints:</p>\n    <ul>\n        <li><a href=\"/simple\">Simple Trace</a></li>\n        <li><a href=\"/nested\">Nested Trace</a></li>\n        <li><a href=\"/error\">Error Trace</a></li>\n        <li><a href=\"/high-latency\">High Latency Trace</a></li>\n        <li><a href=\"/chain\">Chain of Services</a></li>\n        <li><a href=\"/delayed-chain\">Delayed Chain (with Service D having high latency)</a></li>\n        <li><a href=\"/multi-service\">Multi-Service Trace (with different service.name values)</a></li>\n        <li><a href=\"/batch\">Generate Trace Batch</a></li>\n    </ul>\n    \"\"\"\n\n@app.route('/simple')\ndef simple_trace():\n    generate_simple_trace()\n    return {\"status\": \"ok\", \"message\": \"Simple trace generated\"}\n\n@app.route('/nested')\ndef nested_trace():\n    generate_nested_trace()\n    return {\"status\": \"ok\", \"message\": \"Nested trace generated\"}\n\n@app.route('/error')\ndef error_trace():\n    generate_error_trace()\n    return {\"status\": \"ok\", \"message\": \"Error trace generated\"}\n\n@app.route('/high-latency')\ndef high_latency_trace():\n    generate_high_latency_trace()\n    return {\"status\": \"ok\", \"message\": \"High latency trace generated\"}\n\n@app.route('/batch')\ndef batch_trace():\n    generate_trace_batch()\n    return {\"status\": \"ok\", \"message\": \"Trace batch generated\"}\n\n@app.route('/multi-service')\ndef multi_service_trace():\n    transaction_id = generate_multi_service_trace_bg()\n    return {\n        \"status\": \"ok\", \n        \"message\": \"Multi-service trace generated\", \n        \"transaction_id\": transaction_id,\n        \"services\": [\"web-ui\", \"api-gateway\", \"auth-service\", \"user-service\", \"notification-service\", \"db-service\"]\n    }\n\n@app.route('/chain')\ndef chain_trace():\n    with tracer.start_as_current_span(\"chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        \n        # Simulate a chain of service calls\n        try:\n            # Call ourselves to simulate microservice calls\n            # In a real world example these would be different services\n            service_b_url = f\"http://localhost:8080/service/b?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_b_url)\n            return {\"status\": \"ok\", \"message\": \"Chain trace generated\", \"data\": response.json()}\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete chain\"}\n\n@app.route('/service/b')\ndef service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"http.url\", \"/service/b\")  # For URL-based sampling\n        time.sleep(0.1)  # Simulate work\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/service/c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/service/c')\ndef service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"http.url\", \"/service/c\")  # For URL-based sampling\n        time.sleep(0.15)  # Simulate work\n        \n        # Randomly fail sometimes to show error traces\n        if random.random() < 0.2:  # 20% chance of failure\n            span.set_status(trace.StatusCode.ERROR, \"Random failure\")\n            return {\"status\": \"error\", \"message\": \"Service C failed randomly\"}\n        \n        return {\"status\": \"ok\", \"message\": \"Service C completed successfully\"}\n\n# Add the delayed chain implementation\n@app.route('/delayed-chain')\ndef delayed_chain_trace_endpoint():\n    with tracer.start_as_current_span(\"delayed-chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        span.set_attribute(\"operation.type\", \"delayed-chain\")\n        \n        try:\n            # Start the chain with Service A\n            service_a_url = f\"http://localhost:8080/delayed/service-a?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_a_url)\n            return {\n                \"status\": \"ok\", \n                \"message\": \"Delayed chain trace generated\", \n                \"data\": response.json()\n            }\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete delayed chain\"}\n\n@app.route('/delayed/service-a')\ndef delayed_service_a():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-a-handler\") as span:\n        span.set_attribute(\"service\", \"A\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.url\", \"/delayed/service-a\")\n        time.sleep(0.1)  # Normal latency\n        \n        # Call service B\n        service_b_url = f\"http://localhost:8080/delayed/service-b?id={req_id}\"\n        response = requests.get(service_b_url)\n        return {\"status\": \"ok\", \"message\": \"Service A completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-b')\ndef delayed_service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.url\", \"/delayed/service-b\")\n        time.sleep(0.15)  # Normal latency\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/delayed/service-c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-c')\ndef delayed_service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.url\", \"/delayed/service-c\")\n        time.sleep(0.2)  # Normal latency\n        \n        # Call the slow service D\n        service_d_url = f\"http://localhost:8080/delayed/service-d?id={req_id}\"\n        response = requests.get(service_d_url)\n        return {\"status\": \"ok\", \"message\": \"Service C completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-d')\ndef delayed_service_d():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-d-handler\") as span:\n        span.set_attribute(\"service\", \"D\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"high\")\n        span.set_attribute(\"latency.category\", \"bottleneck\")\n        span.set_attribute(\"http.url\", \"/delayed/service-d\")\n        \n        # This service consistently has high latency (3-4 seconds)\n        delay = random.uniform(3.0, 4.0)\n        span.set_attribute(\"latency.seconds\", delay)\n        time.sleep(delay)  # High latency\n        \n        # Call final service E\n        service_e_url = f\"http://localhost:8080/delayed/service-e?id={req_id}\"\n        response = requests.get(service_e_url)\n        return {\"status\": \"ok\", \"message\": \"Service D completed (with delay)\", \"data\": response.json()}\n\n@app.route('/delayed/service-e')\ndef delayed_service_e():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-e-handler\") as span:\n        span.set_attribute(\"service\", \"E\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.url\", \"/delayed/service-e\")\n        time.sleep(0.1)  # Normal latency\n        \n        return {\"status\": \"ok\", \"message\": \"Service E completed (chain end)\"}\n\nif __name__ == '__main__':\n    # Start the background trace generator thread\n    trace_thread = threading.Thread(target=trace_generator_thread, daemon=True)\n    trace_thread.start()\n    \n    logger.info(\"Starting the application with background trace generation\")\n    app.run(host='0.0.0.0', port=8080) "
  },
  {
    "path": "otel-tail-sampling/app/requirements.txt",
    "content": "flask\nrequests\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests"
  },
  {
    "path": "otel-tail-sampling/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for Tail Sampling\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  # Tail Sampling: wait for complete traces before making sampling decisions\n  tail_sampling:\n    decision_wait: 10s\n    num_traces: 100\n    expected_new_traces_per_sec: 10\n    policies:\n      # Policy 1: Always sample traces with a specific attribute value\n      - name: test-attribute-policy\n        type: string_attribute\n        string_attribute:\n          key: test_attr_key_1\n          values: [test_attr_val_1]\n\n      # Policy 2: Sample error traces\n      - name: error-policy\n        type: status_code\n        status_code:\n          status_codes: [ERROR]\n\n      # Policy 3: Sample high latency traces (> 5s)\n      - name: latency-policy\n        type: latency\n        latency:\n          threshold_ms: 5000\n\n      # Policy 4: Sample traces matching a numeric attribute range\n      - name: numeric-policy\n        type: numeric_attribute\n        numeric_attribute:\n          key: key1\n          min_value: 70\n          max_value: 100\n\n      # Policy 5: URL-based policy to filter out health checks\n      - name: url-filter-policy\n        type: string_attribute\n        string_attribute:\n          key: http.url\n          values: [\"/health\", \"/metrics\"]\n          invert_match: true\n\n      # Policy 6: Probabilistic sampling as a fallback (sample 10% of remaining traces)\n      - name: probabilistic-policy\n        type: probabilistic\n        probabilistic:\n          sampling_percentage: 10\n\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [tail_sampling, batch]\n      exporters: [otlp/tempo]\n"
  },
  {
    "path": "otel-tail-sampling/config.alloy",
    "content": "/*\n * Alloy Configuration for OpenTelemetry Trace Collection with Tail Sampling\n */\n\n// Receive OpenTelemetry traces\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    traces = [otelcol.processor.tail_sampling.default.input]\n  }\n}\n\n// Tail Sampling processor\notelcol.processor.tail_sampling \"default\" {\n  // Wait time to make a sampling decision\n  decision_wait = \"10s\"\n  \n  // Number of traces kept in memory\n  num_traces = 100\n  \n  // Expected new traces per second\n  expected_new_traces_per_sec = 10\n  \n  // Policy 1: Always sample traces with a specific attribute value\n  policy {\n    name = \"test-attribute-policy\"\n    type = \"string_attribute\"\n    \n    string_attribute {\n      key    = \"test_attr_key_1\"\n      values = [\"test_attr_val_1\"]\n    }\n  }\n  \n  // Policy 2: Sample error traces\n  policy {\n    name = \"error-policy\"\n    type = \"status_code\"\n    \n    status_code {\n      status_codes = [\"ERROR\"]\n    }\n  }\n  \n  // Policy 3: Sample high latency traces\n  policy {\n    name = \"latency-policy\"\n    type = \"latency\"\n    \n    latency {\n      threshold_ms = 5000  // 5 seconds\n    }\n  }\n  \n  // Policy 4: Sample traces matching a numeric attribute range\n  policy {\n    name = \"numeric-policy\"\n    type = \"numeric_attribute\"\n    \n    numeric_attribute {\n      key       = \"key1\"\n      min_value = 70\n      max_value = 100\n    }\n  }\n  \n  // Policy 5: URL-based policy to filter out health checks\n  policy {\n    name = \"url-filter-policy\"\n    type = \"string_attribute\"\n    \n    string_attribute {\n      key             = \"http.url\"\n      values          = [\"/health\", \"/metrics\"]\n      invert_match    = true  // Sample everything EXCEPT these URLs\n    }\n  }\n  \n  // Policy 6: Probabilistic sampling as a fallback (sample 10% of remaining traces)\n  policy {\n    name = \"probabilistic-policy\"\n    type = \"probabilistic\"\n    \n    probabilistic {\n      sampling_percentage = 10\n    }\n  }\n  \n  output {\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\n// Batch processor to improve performance\notelcol.processor.batch \"default\" {\n  output {\n    traces = [otelcol.exporter.otlp.tempo.input]\n  }\n}\n\n// Send sampled traces to Tempo\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n      insecure = true\n    }\n  }\n} \n\nlivedebugging {\n  enabled = true\n}"
  },
  {
    "path": "otel-tail-sampling/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n"
  },
  {
    "path": "otel-tail-sampling/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo\n"
  },
  {
    "path": "otel-tail-sampling/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for tracing\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - prometheus\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy for telemetry pipeline and tail sampling\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n      - 4317:4317/tcp    # OTLP gRPC\n      - 4318:4318/tcp    # OTLP HTTP\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n\n  # Demo app that generates OpenTelemetry traces\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo "
  },
  {
    "path": "otel-tail-sampling/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "otel-tail-sampling/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.\n    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can\n      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver\n        thrift_http:                   #\n          endpoint: \"tempo:14268\"      # for a production deployment you should only enable the receivers you need!\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally\n\ncompactor:\n  compaction:\n    block_retention: 720h                # overall Tempo trace retention. set for demo purposes\n\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     # backend configuration to use\n    wal:\n      path: /var/tempo/wal             # where to store the wal locally\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator\n      generate_native_histograms: both\n      "
  },
  {
    "path": "otel-tracing-service-graphs/README.md",
    "content": "# Alloy Service Graphs with OpenTelemetry\n\nThis example demonstrates how to use Grafana Alloy to generate service graphs from OpenTelemetry traces and send them to Prometheus via OTLP HTTP, instead of relying on Tempo's built-in metrics generator.\n\n## Overview\n\nThe example includes:\n\n- A sample Python Flask application that generates various types of traces\n- Grafana Alloy as the telemetry pipeline with service graph generation\n- Tempo for trace storage and querying (without metrics generation)\n- Prometheus with OTLP receiver enabled for metrics collection\n- Memcached for Tempo caching\n- Grafana for visualization\n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd otel-tracing-service-graphs\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n   \n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh otel-tracing-service-graphs\n   ```\n\n4. Access the demo application at http://localhost:8080\n5. Access Grafana at http://localhost:3000\n6. Access Prometheus at http://localhost:9090\n\n## What to Expect\n\nThe demo application provides several endpoints that generate different types of traces:\n\n- **/simple**: Generates a simple trace with a single span\n- **/nested**: Generates a trace with nested spans (parent-child relationships)\n- **/error**: Generates a trace that includes an error\n- **/chain**: Simulates a chain of service calls to demonstrate distributed tracing\n\nAfter accessing these endpoints, you can view the traces and service graphs in Grafana.\n\n## Alloy Service Graph Generation\n\nThis example demonstrates using Alloy's `otelcol.connector.servicegraph` component to generate service graphs from traces, which offers several advantages over using Tempo's built-in metrics generator:\n\n1. **More Flexibility**: Alloy's service graph connector allows for customization of dimensions and collection intervals\n2. **Pipeline Integration**: The service graph metrics can be part of a larger telemetry pipeline with additional processing\n3. **Reduced Load on Tempo**: By offloading the service graph generation to Alloy, Tempo can focus on trace storage and querying\n\nThe key component in the Alloy configuration is:\n\n```\notelcol.connector.servicegraph \"default\" {\n  metrics_flush_interval = \"10s\"\n  dimensions = [\"http.method\"]\n  \n  output {\n    metrics = [otelcol.exporter.otlphttp.prometheus.input]\n  }\n}\n```\n\n## Prometheus OTLP Integration\n\nThis example uses Prometheus's OTLP HTTP receiver endpoint. This approach has several benefits:\n\n1. **Native OTLP Integration**: Uses the OpenTelemetry Protocol directly between Alloy and Prometheus\n2. **Simplified Configuration**: Uses Prometheus's built-in OTLP receiver without needing special ports\n3. **Better Metadata Handling**: Resource attributes from OTLP are properly promoted to Prometheus labels\n\nThe OTLP HTTP exporter configuration in Alloy is:\n\n```\notelcol.exporter.otlphttp \"prometheus\" {\n  client {\n    endpoint = \"http://prometheus:9090/api/v1/otlp\"\n    tls {\n      insecure = true\n    }\n  }\n}\n```\n\nAnd in Prometheus, we've enabled the OTLP receiver and configured resource attributes to be promoted to labels:\n\n```\notlp:\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\n    # ...and more relevant attributes\n```\n\n## Viewing Service Graphs\n\nTo view the service graph:\n\n1. Open Grafana (http://localhost:3000)\n2. Navigate to Explore\n3. Select the Tempo data source\n4. Click on the \"Service Graph\" tab\n5. You should see a visual representation of the relationships between services\n\nThe service graph metrics are stored in Prometheus with the following metrics:\n- `calls_total`: Total number of calls between services\n- `calls_failed_total`: Total number of failed calls between services\n- `latency`: Histogram of latencies between services\n\nThe metrics are segmented by HTTP method, allowing you to see which endpoints are being called.\n\n## Architecture\n\n```\n┌────────────┐     ┌──────────────────────┐      ┌───────┐      ┌─────────┐\n│ Demo App   │────▶│ Alloy                │─────▶│ Tempo │─────▶│ Grafana │\n│ (OTel SDK) │     │ ┌──────────────────┐ │      │       │      │         │\n└────────────┘     │ │Service Graph Gen.│ │      └───────┘      └─────────┘\n                   │ └────────┬─────────┘ │                          ▲\n                   └──────────┼───────────┘                          │\n                              │                                      │\n                              ▼                                      │\n                        ┌─────────┐                                  │\n                        │Prometheus│──────────────────────────────────┘\n                        │  (OTLP)  │\n                        └─────────┘\n```\n\nIn this architecture:\n1. The Demo App generates traces using the OpenTelemetry SDK and sends them to Alloy\n2. Alloy processes the traces and:\n   - Generates service graph metrics using the servicegraph connector\n   - Forwards the raw traces to Tempo\n3. Service graph metrics are sent to Prometheus via OTLP HTTP\n4. Grafana queries both Tempo for traces and Prometheus for service graph metrics\n\n## Customizing\n\nThe Alloy configuration can be further customized to add:\n- Additional processors for trace data\n- Filtering based on service names or other attributes\n- Custom dimensions for the service graph metrics (currently using HTTP method)\n- Additional metrics exporters for different backend systems \n"
  },
  {
    "path": "otel-tracing-service-graphs/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\n\nCOPY app.py .\n\nCMD [\"python\", \"app.py\"] "
  },
  {
    "path": "otel-tracing-service-graphs/app/app.py",
    "content": "import os\nimport random\nimport time\nimport uuid\nfrom flask import Flask, request\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\nimport requests\n\n# Configure the tracer\nresource = Resource.create(attributes={\n    SERVICE_NAME: \"trace-demo\"\n})\ntrace.set_tracer_provider(TracerProvider(resource=resource))\n\n# Configure the OTLP exporter using environment variables\n# OTEL_EXPORTER_OTLP_ENDPOINT will be used automatically\notlp_exporter = OTLPSpanExporter(endpoint=\"http://alloy:4317/v1/traces\", insecure=True)\nspan_processor = BatchSpanProcessor(span_exporter=otlp_exporter, max_export_batch_size=1)\ntrace.get_tracer_provider().add_span_processor(span_processor)\n\n# Create a tracer\ntracer = trace.get_tracer(__name__)\n\n# Create a Flask application\napp = Flask(__name__)\n\n# Instrument Flask\nFlaskInstrumentor().instrument_app(app)\n\n# Instrument requests\nRequestsInstrumentor().instrument()\n\n@app.route('/')\ndef home():\n    return \"\"\"\n    <h1>OpenTelemetry Service Graph Demo</h1>\n    <p>This app demonstrates OpenTelemetry tracing with Grafana Alloy and service graph generation.</p>\n    <ul>\n        <li><a href=\"/simple\">Simple Trace</a></li>\n        <li><a href=\"/nested\">Nested Trace</a></li>\n        <li><a href=\"/error\">Error Trace</a></li>\n        <li><a href=\"/chain\">Chain of Services</a></li>\n        <li><a href=\"/delayed-chain\">Delayed Chain (with Service D having high latency)</a></li>\n        <li><a href=\"/multi-service\">Multi-Service Trace (with different service.name values)</a></li>\n    </ul>\n    \"\"\"\n\n@app.route('/simple')\ndef simple_trace():\n    with tracer.start_as_current_span(\"simple-operation\") as span:\n        span.set_attribute(\"operation.type\", \"simple\")\n        span.set_attribute(\"operation.value\", random.randint(1, 100))\n        time.sleep(0.1)  # Simulate work\n        return {\"status\": \"ok\", \"message\": \"Simple trace generated\"}\n\n@app.route('/nested')\ndef nested_trace():\n    with tracer.start_as_current_span(\"parent-operation\") as parent:\n        parent.set_attribute(\"operation.type\", \"parent\")\n        time.sleep(0.05)  # Simulate work\n        \n        with tracer.start_as_current_span(\"child-operation-1\") as child1:\n            child1.set_attribute(\"operation.type\", \"child\")\n            child1.set_attribute(\"child.number\", 1)\n            time.sleep(0.05)  # Simulate work\n            \n        with tracer.start_as_current_span(\"child-operation-2\") as child2:\n            child2.set_attribute(\"operation.type\", \"child\")\n            child2.set_attribute(\"child.number\", 2)\n            time.sleep(0.05)  # Simulate work\n            \n            with tracer.start_as_current_span(\"grandchild-operation\") as grandchild:\n                grandchild.set_attribute(\"operation.type\", \"grandchild\")\n                time.sleep(0.05)  # Simulate work\n                \n        return {\"status\": \"ok\", \"message\": \"Nested trace generated\"}\n\n@app.route('/error')\ndef error_trace():\n    with tracer.start_as_current_span(\"error-operation\") as span:\n        span.set_attribute(\"operation.type\", \"error\")\n        try:\n            # Simulate an error\n            result = 1 / 0\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Error trace generated\"}\n\n@app.route('/chain')\ndef chain_trace():\n    with tracer.start_as_current_span(\"chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        \n        # Simulate a chain of service calls\n        try:\n            # Call ourselves to simulate microservice calls\n            # In a real world example these would be different services\n            service_b_url = f\"http://localhost:8080/service/b?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_b_url)\n            return {\"status\": \"ok\", \"message\": \"Chain trace generated\", \"data\": response.json()}\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete chain\"}\n\n@app.route('/service/b')\ndef service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"request.id\", req_id)\n        time.sleep(0.1)  # Simulate work\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/service/c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/service/c')\ndef service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(f\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"request.id\", req_id)\n        time.sleep(0.15)  # Simulate work\n        \n        # Randomly fail sometimes to show error traces\n        if random.random() < 0.2:  # 20% chance of failure\n            span.set_status(trace.StatusCode.ERROR, \"Random failure\")\n            return {\"status\": \"error\", \"message\": \"Service C failed randomly\"}\n        \n        return {\"status\": \"ok\", \"message\": \"Service C completed successfully\"}\n\n# New delayed chain implementation\n@app.route('/delayed-chain')\ndef delayed_chain_trace():\n    with tracer.start_as_current_span(\"delayed-chain-root\") as span:\n        span.set_attribute(\"operation.step\", \"start\")\n        span.set_attribute(\"operation.type\", \"delayed-chain\")\n        \n        try:\n            # Start the chain with Service A\n            service_a_url = f\"http://localhost:8080/delayed/service-a?id={random.randint(1000, 9999)}\"\n            response = requests.get(service_a_url)\n            return {\n                \"status\": \"ok\", \n                \"message\": \"Delayed chain trace generated\", \n                \"data\": response.json()\n            }\n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return {\"status\": \"error\", \"message\": \"Failed to complete delayed chain\"}\n\n@app.route('/delayed/service-a')\ndef delayed_service_a():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-a-handler\") as span:\n        span.set_attribute(\"service\", \"A\")\n        span.set_attribute(\"client.service.name\", \"frontend\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.method\", \"GET\")\n        time.sleep(0.1)  # Normal latency\n        \n        # Call service B\n        service_b_url = f\"http://localhost:8080/delayed/service-b?id={req_id}\"\n        response = requests.get(service_b_url)\n        return {\"status\": \"ok\", \"message\": \"Service A completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-b')\ndef delayed_service_b():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-b-handler\") as span:\n        span.set_attribute(\"service\", \"B\")\n        span.set_attribute(\"client.service.name\", \"service-a\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.method\", \"GET\")\n        time.sleep(0.15)  # Normal latency\n        \n        # Call service C\n        service_c_url = f\"http://localhost:8080/delayed/service-c?id={req_id}\"\n        response = requests.get(service_c_url)\n        return {\"status\": \"ok\", \"message\": \"Service B completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-c')\ndef delayed_service_c():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-c-handler\") as span:\n        span.set_attribute(\"service\", \"C\")\n        span.set_attribute(\"client.service.name\", \"service-b\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.method\", \"GET\")\n        time.sleep(0.2)  # Normal latency\n        \n        # Call the slow service D\n        service_d_url = f\"http://localhost:8080/delayed/service-d?id={req_id}\"\n        response = requests.get(service_d_url)\n        return {\"status\": \"ok\", \"message\": \"Service C completed\", \"data\": response.json()}\n\n@app.route('/delayed/service-d')\ndef delayed_service_d():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-d-handler\") as span:\n        span.set_attribute(\"service\", \"D\")\n        span.set_attribute(\"client.service.name\", \"service-c\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"high\")\n        span.set_attribute(\"latency.category\", \"bottleneck\")\n        span.set_attribute(\"http.method\", \"GET\")\n        \n        # This service consistently has high latency (3-4 seconds)\n        delay = random.uniform(3.0, 4.0)\n        span.set_attribute(\"latency.seconds\", delay)\n        time.sleep(delay)  # High latency\n        \n        # Call final service E\n        service_e_url = f\"http://localhost:8080/delayed/service-e?id={req_id}\"\n        response = requests.get(service_e_url)\n        return {\"status\": \"ok\", \"message\": \"Service D completed (with delay)\", \"data\": response.json()}\n\n@app.route('/delayed/service-e')\ndef delayed_service_e():\n    req_id = request.args.get('id', 'unknown')\n    with tracer.start_as_current_span(\"service-e-handler\") as span:\n        span.set_attribute(\"service\", \"E\")\n        span.set_attribute(\"client.service.name\", \"service-d\")\n        span.set_attribute(\"request.id\", req_id)\n        span.set_attribute(\"service.latency\", \"normal\")\n        span.set_attribute(\"http.method\", \"GET\")\n        time.sleep(0.1)  # Normal latency\n        \n        return {\"status\": \"ok\", \"message\": \"Service E completed (chain end)\"}\n\n@app.route('/multi-service')\ndef multi_service_trace():\n    transaction_id = generate_multi_service_trace()\n    return {\n        \"status\": \"ok\", \n        \"message\": \"Multi-service trace generated\", \n        \"transaction_id\": transaction_id,\n        \"services\": [\"web-ui\", \"api-gateway\", \"auth-service\", \"user-service\", \"notification-service\", \"db-service\"]\n    }\n# code fixed thanks to @hedss\ndef generate_multi_service_trace():\n    \"\"\"Generate a trace that spans multiple services with true service.name differentiation\"\"\"\n    try:\n        # Create a unique transaction ID for correlating spans\n        transaction_id = str(uuid.uuid4())[:8]\n\n        # Create a custom resource for each service\n        web_ui_resource = Resource.create(attributes={SERVICE_NAME: \"web-ui\"})\n        api_gw_resource = Resource.create(attributes={SERVICE_NAME: \"api-gateway\"})\n        auth_resource = Resource.create(attributes={SERVICE_NAME: \"auth-service\"})\n        user_resource = Resource.create(attributes={SERVICE_NAME: \"user-service\"})\n        notif_resource = Resource.create(attributes={SERVICE_NAME: \"notification-service\"})\n        db_resource = Resource.create(attributes={SERVICE_NAME: \"db-service\"})\n\n        # Create trace providers with each resource\n        web_ui_provider = TracerProvider(resource=web_ui_resource)\n        api_gw_provider = TracerProvider(resource=api_gw_resource)\n        auth_provider = TracerProvider(resource=auth_resource)\n        user_provider = TracerProvider(resource=user_resource)\n        notif_provider = TracerProvider(resource=notif_resource)\n        db_provider = TracerProvider(resource=db_resource)\n\n        # Connect the providers to the same OTLP exporter via span processors\n        web_ui_provider.add_span_processor(span_processor)\n        api_gw_provider.add_span_processor(span_processor)\n        auth_provider.add_span_processor(span_processor)\n        user_provider.add_span_processor(span_processor)\n        notif_provider.add_span_processor(span_processor)\n        db_provider.add_span_processor(span_processor)\n\n        # Create tracers for each service using their respective providers\n        web_ui_tracer = web_ui_provider.get_tracer(\"web-ui-tracer\")\n        api_gw_tracer = api_gw_provider.get_tracer(\"api-gw-tracer\")\n        auth_tracer = auth_provider.get_tracer(\"auth-tracer\")\n        user_tracer = user_provider.get_tracer(\"user-tracer\")\n        notif_tracer = notif_provider.get_tracer(\"notif-tracer\")\n        db_tracer = db_provider.get_tracer(\"db-tracer\")\n\n        # 1. Frontend service (web-ui) - User logs in\n        with web_ui_tracer.start_as_current_span(\"login-page-render\", kind=trace.SpanKind.SERVER) as web_span:\n            web_span.set_attribute(\"component\", \"web-ui\")\n            web_span.set_attribute(\"transaction.id\", transaction_id)\n            web_span.set_attribute(\"user.action\", \"login\")\n            web_span.set_attribute(\"http.method\", \"GET\")\n            web_span.set_attribute(\"http.url\", \"/login\")\n            time.sleep(0.1)\n\n            # 2. Send login request to API Gateway\n            with web_ui_tracer.start_as_current_span(\"api-gateway-request\", kind=trace.SpanKind.CLIENT) as web_client_span:\n                web_client_span.set_attribute(\"component\", \"web-ui\")\n                web_client_span.set_attribute(\"transaction.id\", transaction_id)\n                web_client_span.set_attribute(\"http.method\", \"POST\")\n                web_client_span.set_attribute(\"http.url\", \"/api/v1/login\")\n\n                # API Gateway receives the request\n                with api_gw_tracer.start_as_current_span(\"api-gateway-login-handler\", kind=trace.SpanKind.SERVER) as api_span:\n                    api_span.set_attribute(\"component\", \"api-gateway\")\n                    api_span.set_attribute(\"transaction.id\", transaction_id)\n                    api_span.set_attribute(\"endpoint\", \"/api/v1/login\")\n                    api_span.set_attribute(\"http.method\", \"POST\")\n                    time.sleep(0.15)\n\n                    # 3. API Gateway calls Authentication Service\n                    with api_gw_tracer.start_as_current_span(\"auth-service-request\", kind=trace.SpanKind.CLIENT) as api_client_span:\n                        api_client_span.set_attribute(\"component\", \"api-gateway\")\n                        api_client_span.set_attribute(\"transaction.id\", transaction_id)\n                        api_client_span.set_attribute(\"http.method\", \"POST\")\n                        api_client_span.set_attribute(\"http.url\", \"/auth/authenticate\")\n\n                        # Auth service receives the request\n                        with auth_tracer.start_as_current_span(\"authenticate-user\", kind=trace.SpanKind.SERVER) as auth_span:\n                            auth_span.set_attribute(\"component\", \"auth-service\")\n                            auth_span.set_attribute(\"transaction.id\", transaction_id)\n                            auth_span.set_attribute(\"auth.method\", \"password\")\n                            time.sleep(0.2)\n\n                            # 4. Auth service calls User Service\n                            with auth_tracer.start_as_current_span(\"user-service-request\", kind=trace.SpanKind.CLIENT) as auth_client_span:\n                                auth_client_span.set_attribute(\"component\", \"auth-service\")\n                                auth_client_span.set_attribute(\"transaction.id\", transaction_id)\n                                auth_client_span.set_attribute(\"http.method\", \"GET\")\n                                auth_client_span.set_attribute(\"http.url\", \"/user/details\")\n\n                                # User service receives the request\n                                with user_tracer.start_as_current_span(\"get-user-details\", kind=trace.SpanKind.SERVER) as user_span:\n                                    user_span.set_attribute(\"component\", \"user-service\")\n                                    user_span.set_attribute(\"transaction.id\", transaction_id)\n                                    user_span.set_attribute(\"user.id\", f\"user_{random.randint(1000, 9999)}\")\n\n                                    # 5. User service calls DB Service\n                                    with user_tracer.start_as_current_span(\"db-service-request\", kind=trace.SpanKind.CLIENT) as user_client_span:\n                                        user_client_span.set_attribute(\"component\", \"user-service\")\n                                        user_client_span.set_attribute(\"transaction.id\", transaction_id)\n                                        user_client_span.set_attribute(\"db.operation\", \"SELECT\")\n                                        user_client_span.set_attribute(\"db.table\", \"users\")\n\n                                        # DB service receives the request\n                                        with db_tracer.start_as_current_span(\"db-query\", kind=trace.SpanKind.SERVER) as db_span:\n                                            db_span.set_attribute(\"component\", \"db-service\")\n                                            db_span.set_attribute(\"transaction.id\", transaction_id)\n                                            db_span.set_attribute(\"db.operation\", \"SELECT\")\n                                            db_span.set_attribute(\"db.table\", \"users\")\n\n                                            # Randomly introduce database latency\n                                            if random.random() < 0.3:\n                                                delay = random.uniform(0.5, 1.5)\n                                                db_span.set_attribute(\"db.latency\", delay)\n                                                db_span.set_attribute(\"latency.category\", \"slow-query\")\n                                                time.sleep(delay)\n                                            else:\n                                                time.sleep(0.1)\n\n                    # 6. After successful login, send notification\n                    with api_gw_tracer.start_as_current_span(\"notification-service-request\", kind=trace.SpanKind.CLIENT) as notif_client_span:\n                        notif_client_span.set_attribute(\"component\", \"api-gateway\")\n                        notif_client_span.set_attribute(\"transaction.id\", transaction_id)\n                        notif_client_span.set_attribute(\"http.method\", \"POST\")\n                        notif_client_span.set_attribute(\"http.url\", \"/notifications/send\")\n\n                        # Notification service receives the request\n                        with notif_tracer.start_as_current_span(\"send-login-notification\", kind=trace.SpanKind.SERVER) as notif_span:\n                            notif_span.set_attribute(\"component\", \"notification-service\")\n                            notif_span.set_attribute(\"transaction.id\", transaction_id)\n                            notif_span.set_attribute(\"notification.type\", \"login_alert\")\n                            notif_span.set_attribute(\"notification.channel\", random.choice([\"email\", \"sms\", \"push\"]))\n                            time.sleep(0.15)\n\n        return transaction_id\n    except Exception as e:\n        print(f\"Error generating multi-service trace: {e}\")\n        return None\n\nif __name__ == '__main__':\n    app.run(host='0.0.0.0', port=8080) "
  },
  {
    "path": "otel-tracing-service-graphs/app/requirements.txt",
    "content": "flask\nrequests\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests"
  },
  {
    "path": "otel-tracing-service-graphs/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for Service Graph Generation\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\nconnectors:\n  servicegraph:\n    metrics_flush_interval: 10s\n    dimensions:\n      - service.name\n      - http.method\n    store:\n      max_items: 5000\n      ttl: 30s\n\nexporters:\n  otlphttp/prometheus:\n    endpoint: http://prometheus:9090/api/v1/otlp\n    tls:\n      insecure: true\n\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [servicegraph, otlp/tempo]\n    metrics:\n      receivers: [servicegraph]\n      exporters: [otlphttp/prometheus]\n"
  },
  {
    "path": "otel-tracing-service-graphs/config.alloy",
    "content": "/*\n * Alloy Configuration for OpenTelemetry Trace Collection with Service Graph Generation\n */\n\n// Receive OpenTelemetry traces\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\n// Batch processor to improve performance\notelcol.processor.batch \"default\" {\n  output {\n    traces = [\n      otelcol.connector.servicegraph.default.input,\n      otelcol.exporter.otlp.tempo.input,\n    ]\n  }\n}\n\n// Service Graph Generator \notelcol.connector.servicegraph \"default\" {\n  metrics_flush_interval = \"10s\"\n  dimensions = [\"service.name\", \"http.method\"]\n  \n  // Configure the span store for better pairing\n  store {\n    max_items = 5000\n    ttl = \"30s\"\n  }\n  \n  output {\n    metrics = [otelcol.exporter.otlphttp.prometheus.input]\n  }\n}\n\n// Send service graph metrics to Prometheus via OTLP\notelcol.exporter.otlphttp \"prometheus\" {\n  client {\n    endpoint = \"http://prometheus:9090/api/v1/otlp\"\n    tls {\n      insecure = true\n    }\n  }\n}\n\n// Send traces to Tempo for storage and visualization\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n      insecure = true\n    }\n  }\n} \n\nlivedebugging {\n  enabled = true\n}"
  },
  {
    "path": "otel-tracing-service-graphs/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n      - 4317:4317      # OTLP gRPC\n      - 4318:4318      # OTLP HTTP\n\n  # Override demo-app endpoint to use standard OTLP gRPC port\n  demo-app:\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo\n"
  },
  {
    "path": "otel-tracing-service-graphs/docker-compose.coda.yml",
    "content": "services:\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    network_mode: host\n    restart: unless-stopped\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:12345\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo\n"
  },
  {
    "path": "otel-tracing-service-graphs/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  # Tempo for tracing without metrics generation\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - prometheus\n\n  memcached:\n    image: memcached:1.6.40@sha256:572b011ce33954ee809066d8cecbeb3ec98912109ee3be3663a3197425fd81ac\n    container_name: memcached\n    ports:\n      - \"11211:11211\"\n    environment:\n      - MEMCACHED_MAX_MEMORY=64m  # Set the maximum memory usage\n      - MEMCACHED_THREADS=4       # Number of threads to use\n\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy for telemetry pipeline and service graph generation\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n\n  # Demo app that generates OpenTelemetry traces\n  demo-app:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:12345\n      - OTEL_SERVICE_NAME=demo-service\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=demo-service,service.version=1.0.0,deployment.environment=demo "
  },
  {
    "path": "otel-tracing-service-graphs/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\notlp:\n  # Recommended attributes to be promoted to labels.\n  promote_resource_attributes:\n    - service.instance.id\n    - service.name\n    - service.namespace\n    - service.version\n    - cloud.availability_zone\n    - cloud.region\n    - container.name\n    - deployment.environment\n    - deployment.environment.name\n    - k8s.cluster.name\n    - k8s.container.name\n    - k8s.cronjob.name\n    - k8s.daemonset.name\n    - k8s.deployment.name\n    - k8s.job.name\n    - k8s.namespace.name\n    - k8s.pod.name\n    - k8s.replicaset.name\n    - k8s.statefulset.name\n\n\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m\n"
  },
  {
    "path": "otel-tracing-service-graphs/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.\n    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can\n      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver\n        thrift_http:                   #\n          endpoint: \"tempo:14268\"      # for a production deployment you should only enable the receivers you need!\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally\n\ncompactor:\n  compaction:\n    block_retention: 720h                # overall Tempo trace retention. set for demo purposes\n\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     # backend configuration to use\n    wal:\n      path: /var/tempo/wal             # where to store the wal locally\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [local-blocks] # enables metrics generator\n      "
  },
  {
    "path": "postgres-monitoring/README.md",
    "content": "# PostgreSQL Monitoring with Grafana Alloy\n\nThis scenario demonstrates how to monitor a PostgreSQL database using Grafana Alloy's built-in `prometheus.exporter.postgres` component. Alloy scrapes PostgreSQL server metrics and forwards them to Prometheus via remote write. Grafana is pre-configured with Prometheus as a datasource so you can explore the collected metrics immediately.\n\n## Prerequisites\n\n- Docker\n- Docker Compose\n- Git\n\n## Getting Started\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/postgres-monitoring\ndocker compose up -d\n```\n\nTo use the centralized image versions from the repo root:\n\n```bash\ncd alloy-scenarios\n./run-example.sh postgres-monitoring\n```\n\n## Access Points\n\n| Service    | URL                        |\n|------------|----------------------------|\n| Grafana    | http://localhost:3000      |\n| Alloy UI   | http://localhost:12345     |\n| Prometheus | http://localhost:9090      |\n\nGrafana is configured with anonymous admin access enabled, so no login is required.\n\n## What to Expect\n\nOnce the stack is running, Alloy connects to the PostgreSQL instance and begins collecting metrics via the `prometheus.exporter.postgres` component. These metrics are scraped every 15 seconds and forwarded to Prometheus.\n\nMetrics you can explore in Grafana include:\n\n- **pg_up** -- Whether the PostgreSQL instance is reachable\n- **pg_stat_database_*/** -- Database-level statistics (transactions committed, rolled back, rows fetched, inserted, updated, deleted, deadlocks, temp files, etc.)\n- **pg_stat_bgwriter_*/** -- Background writer statistics (buffers written, checkpoints, etc.)\n- **pg_settings_*/** -- PostgreSQL server configuration settings exposed as metrics\n- **pg_stat_activity_*/** -- Connection and session activity\n- **pg_locks_*/** -- Lock statistics by mode\n\n### Exploring Metrics\n\n1. Open **Grafana** at http://localhost:3000\n2. Go to **Explore** and select the **Prometheus** datasource\n3. Search for metrics starting with `pg_` to browse all available PostgreSQL metrics\n\n### Debugging the Pipeline\n\n1. Open the **Alloy UI** at http://localhost:12345\n2. Navigate to the component graph to see the pipeline: `prometheus.exporter.postgres` -> `prometheus.scrape` -> `prometheus.remote_write`\n3. Use the **Live Debugging** feature (enabled in the config) to inspect data flowing through each component\n\n## Stopping the Scenario\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "postgres-monitoring/config.alloy",
    "content": "// ##############################################\n// #### PostgreSQL Metrics Configuration     ####\n// ##############################################\n\nlivedebugging {\n\tenabled = true\n}\n\n// Expose PostgreSQL metrics using the built-in postgres exporter.\nprometheus.exporter.postgres \"example\" {\n\tdata_source_names = [\"postgresql://alloy:alloy@postgres:5432/alloy?sslmode=disable\"]\n}\n\n// Scrape the postgres exporter targets.\nprometheus.scrape \"postgres\" {\n\ttargets    = prometheus.exporter.postgres.example.targets\n\tforward_to = [prometheus.remote_write.default.receiver]\n\n\tscrape_interval = \"15s\"\n}\n\n// Send metrics to the local Prometheus instance via remote write.\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "postgres-monitoring/docker-compose.coda.yml",
    "content": "services:\n  postgres:\n    image: postgres:18@sha256:78481659c47e862334611ccdaf7c369c986b3046da9857112f3b309114a65fb4\n    environment:\n      POSTGRES_USER: alloy\n      POSTGRES_PASSWORD: alloy\n      POSTGRES_DB: alloy\n    ports:\n      - \"5432:5432\"\n    healthcheck:\n      test: [\"CMD-SHELL\", \"pg_isready -U alloy\"]\n      interval: 5s\n      timeout: 5s\n      retries: 5\n"
  },
  {
    "path": "postgres-monitoring/docker-compose.yml",
    "content": "services:\n  postgres:\n    image: postgres:18@sha256:78481659c47e862334611ccdaf7c369c986b3046da9857112f3b309114a65fb4\n    environment:\n      POSTGRES_USER: alloy\n      POSTGRES_PASSWORD: alloy\n      POSTGRES_DB: alloy\n    ports:\n      - \"5432:5432\"\n    healthcheck:\n      test: [\"CMD-SHELL\", \"pg_isready -U alloy\"]\n      interval: 5s\n      timeout: 5s\n      retries: 5\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      postgres:\n        condition: service_healthy\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "postgres-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "rabbitmq-monitoring/README.md",
    "content": "# RabbitMQ Monitoring with Grafana Alloy\n\nThis scenario demonstrates RabbitMQ observability with a single Alloy pipeline:\n\n- **Metrics** - `prometheus.scrape` collects RabbitMQ's built-in `/metrics` endpoint from the `rabbitmq_prometheus` plugin and remote-writes the samples to Prometheus.\n- **Logs** - `loki.source.docker` tails the RabbitMQ container logs from the Docker socket and sends them to Loki.\n\n## Architecture\n\n- **RabbitMQ** - the monitored broker, running the management and Prometheus plugins\n- **loadgen** - a small RabbitMQ PerfTest publisher that creates the durable `alloy-sample` queue and publishes one persistent message per second\n- **Grafana Alloy** - scrapes broker metrics, collects broker container logs, and forwards both signals\n- **Loki / Prometheus / Grafana** - local backends and visualization, with datasources auto-provisioned\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root using centralized image versions\n./run-example.sh rabbitmq-monitoring\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345\n- **Prometheus**: http://localhost:9090\n- **Loki**: http://localhost:3100\n- **RabbitMQ Management UI**: http://localhost:15672 (`guest` / `guest`)\n- **RabbitMQ Prometheus endpoint**: http://localhost:15692/metrics\n\n## Trying It Out\n\nWithin about 30 seconds, open Grafana Explore and run these queries.\n\n### Metrics\n\n```promql\nrabbitmq_up\n```\n\n```promql\nrabbitmq_queue_messages{queue=\"alloy-sample\"}\n```\n\n```promql\nrabbitmq_channels\n```\n\nThe scenario sets `prometheus.return_per_object_metrics = true` so queue-level labels are visible on `/metrics`.\n\n### Logs\n\n```logql\n{job=\"rabbitmq\"}\n```\n\n```logql\n{job=\"rabbitmq\"} |~ \"accepting AMQP connection|authenticated and granted access\"\n```\n\nRabbitMQ logs connection lifecycle events by default. Channel counts are best checked with metrics:\n\n```promql\nrabbitmq_channels\n```\n\n## Key Configuration\n\n- `enabled_plugins` enables `rabbitmq_management` and `rabbitmq_prometheus`.\n- `rabbitmq.conf` sends debug-level console logs to Docker and returns per-object queue metrics from `/metrics`.\n- `config.alloy` keeps the metrics and logs pipelines separate and labels RabbitMQ logs as `job=\"rabbitmq\"`.\n\n## Stopping\n\n```bash\ndocker compose down -v\n```\n"
  },
  {
    "path": "rabbitmq-monitoring/config.alloy",
    "content": "// RabbitMQ Monitoring with Grafana Alloy.\n// Metrics: scrape RabbitMQ's built-in Prometheus endpoint.\n// Logs: collect RabbitMQ container logs from Docker and ship them to Loki.\n\nlivedebugging {\n\tenabled = true\n}\n\nprometheus.scrape \"rabbitmq\" {\n\ttargets = [{\n\t\t__address__ = \"rabbitmq:15692\",\n\t\tjob         = \"rabbitmq\",\n\t}]\n\tforward_to      = [prometheus.remote_write.local.receiver]\n\tscrape_interval = \"15s\"\n}\n\nprometheus.remote_write \"local\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n\ndiscovery.docker \"linux\" {\n\thost = \"unix:///var/run/docker.sock\"\n}\n\ndiscovery.relabel \"rabbitmq_logs\" {\n\ttargets = discovery.docker.linux.targets\n\n\trule {\n\t\tsource_labels = [\"__meta_docker_container_name\"]\n\t\tregex         = \"/rabbitmq-monitoring-rabbitmq\"\n\t\taction        = \"keep\"\n\t}\n\n\trule {\n\t\ttarget_label = \"job\"\n\t\treplacement  = \"rabbitmq\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_docker_container_name\"]\n\t\tregex         = \"/(.*)\"\n\t\ttarget_label  = \"container_name\"\n\t}\n}\n\nloki.source.docker \"rabbitmq\" {\n\thost          = \"unix:///var/run/docker.sock\"\n\ttargets       = discovery.docker.linux.targets\n\trelabel_rules = discovery.relabel.rabbitmq_logs.rules\n\tforward_to    = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "rabbitmq-monitoring/docker-compose.coda.yml",
    "content": "services:\n  rabbitmq:\n    image: rabbitmq:${RABBITMQ_VERSION:-4.3.0-management}\n    container_name: rabbitmq-monitoring-rabbitmq\n    hostname: rabbitmq\n    ports:\n      - \"5672:5672\"\n      - \"15672:15672\"\n      - \"15692:15692\"\n    environment:\n      - RABBITMQ_DEFAULT_USER=guest\n      - RABBITMQ_DEFAULT_PASS=guest\n    volumes:\n      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro\n      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro\n    healthcheck:\n      test: [\"CMD\", \"rabbitmq-diagnostics\", \"-q\", \"ping\"]\n      interval: 10s\n      timeout: 5s\n      retries: 12\n\n  loadgen:\n    image: pivotalrabbitmq/perf-test:${RABBITMQ_PERF_TEST_VERSION:-2.24.0}\n    container_name: rabbitmq-monitoring-loadgen\n    command:\n      - --uri\n      - amqp://guest:guest@rabbitmq:5672\n      - --queue\n      - alloy-sample\n      - --producers\n      - \"1\"\n      - --consumers\n      - \"0\"\n      - --rate\n      - \"1\"\n      - --size\n      - \"256\"\n      - --flag\n      - persistent\n      - --id\n      - alloy-rabbitmq-demo\n    depends_on:\n      rabbitmq:\n        condition: service_healthy\n    restart: unless-stopped\n"
  },
  {
    "path": "rabbitmq-monitoring/docker-compose.yml",
    "content": "services:\n  rabbitmq:\n    image: rabbitmq:${RABBITMQ_VERSION:-4.3.0-management}\n    container_name: rabbitmq-monitoring-rabbitmq\n    hostname: rabbitmq\n    ports:\n      - \"5672:5672\"\n      - \"15672:15672\"\n      - \"15692:15692\"\n    environment:\n      - RABBITMQ_DEFAULT_USER=guest\n      - RABBITMQ_DEFAULT_PASS=guest\n    volumes:\n      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro\n      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro\n    healthcheck:\n      test: [\"CMD\", \"rabbitmq-diagnostics\", \"-q\", \"ping\"]\n      interval: 10s\n      timeout: 5s\n      retries: 12\n\n  loadgen:\n    image: pivotalrabbitmq/perf-test:${RABBITMQ_PERF_TEST_VERSION:-2.24.0}\n    container_name: rabbitmq-monitoring-loadgen\n    command:\n      - --uri\n      - amqp://guest:guest@rabbitmq:5672\n      - --queue\n      - alloy-sample\n      - --producers\n      - \"1\"\n      - --consumers\n      - \"0\"\n      - --rate\n      - \"1\"\n      - --size\n      - \"256\"\n      - --flag\n      - persistent\n      - --id\n      - alloy-rabbitmq-demo\n    depends_on:\n      rabbitmq:\n        condition: service_healthy\n    restart: unless-stopped\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    container_name: rabbitmq-monitoring-alloy\n    ports:\n      - \"12345:12345\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /var/run/docker.sock:/var/run/docker.sock:ro\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      rabbitmq:\n        condition: service_healthy\n      loki:\n        condition: service_started\n      prometheus:\n        condition: service_started\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    container_name: rabbitmq-monitoring-loki\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    container_name: rabbitmq-monitoring-prometheus\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    container_name: rabbitmq-monitoring-grafana\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Prometheus\n          type: prometheus\n          access: proxy\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "rabbitmq-monitoring/enabled_plugins",
    "content": "[rabbitmq_management,rabbitmq_prometheus].\n"
  },
  {
    "path": "rabbitmq-monitoring/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 5m\n"
  },
  {
    "path": "rabbitmq-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "rabbitmq-monitoring/rabbitmq.conf",
    "content": "prometheus.return_per_object_metrics = true\n\nlog.console = true\nlog.console.level = debug\nlog.console.formatter.single_line = on\nlog.file = false\n"
  },
  {
    "path": "redis-monitoring/README.md",
    "content": "# Redis Monitoring with Grafana Alloy\n\nThis scenario demonstrates how to monitor a Redis instance using Grafana Alloy's built-in `prometheus.exporter.redis` component.\n\n## Architecture\n\n- **Redis** - The monitored Redis instance\n- **Grafana Alloy** - Collects Redis metrics via `prometheus.exporter.redis` and remote writes them to Prometheus\n- **Prometheus** - Stores the scraped metrics\n- **Grafana** - Visualizes Redis metrics (auto-provisioned with Prometheus datasource)\n\n## Running\n\n```bash\n# From this directory\ndocker compose up -d\n\n# Or from the repo root using centralized image versions\n./run-example.sh redis-monitoring\n```\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345\n- **Prometheus**: http://localhost:9090\n\n## Key Metrics\n\nOnce running, you can query Redis metrics in Grafana or Prometheus. Some useful metrics include:\n\n- `redis_up` - Whether Redis is reachable\n- `redis_connected_clients` - Number of connected clients\n- `redis_used_memory_bytes` - Memory usage\n- `redis_commands_total` - Total commands processed\n- `redis_keyspace_hits_total` / `redis_keyspace_misses_total` - Cache hit ratio\n\n## Stopping\n\n```bash\ndocker compose down\n```\n"
  },
  {
    "path": "redis-monitoring/config.alloy",
    "content": "// Redis Monitoring with Grafana Alloy\n// This configuration scrapes Redis metrics using the built-in prometheus.exporter.redis component\n// and remote writes them to Prometheus.\n\nlivedebugging {\n\tenabled = true\n}\n\nprometheus.exporter.redis \"default\" {\n\tredis_addr = \"redis:6379\"\n}\n\nprometheus.scrape \"redis\" {\n\ttargets    = prometheus.exporter.redis.default.targets\n\tforward_to = [prometheus.remote_write.default.receiver]\n}\n\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n"
  },
  {
    "path": "redis-monitoring/docker-compose.coda.yml",
    "content": "services:\n  redis:\n    image: redis:8@sha256:0c341492924cad6f5483f9133e43bd6c51ecdecbcadfac5b51657393b6a7936c\n    ports:\n      - \"6379:6379\"\n"
  },
  {
    "path": "redis-monitoring/docker-compose.yml",
    "content": "services:\n  redis:\n    image: redis:8@sha256:0c341492924cad6f5483f9133e43bd6c51ecdecbcadfac5b51657393b6a7936c\n    ports:\n      - \"6379:6379\"\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - redis\n      - prometheus\n"
  },
  {
    "path": "redis-monitoring/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "renovate.json",
    "content": "{\n  \"$schema\": \"https://docs.renovatebot.com/renovate-schema.json\",\n  \"description\": \"Local additive config — extends whatever org-level renovate config the bot is configured with. Tracks the centralized version pins in image-versions.env so they stay current alongside the docker-compose fallback defaults.\",\n  \"customManagers\": [\n    {\n      \"customType\": \"regex\",\n      \"description\": \"Bump every VERSION variable in image-versions.env. Each line is preceded by a `# renovate: datasource=… depName=…` comment that tells the bot what the variable refers to.\",\n      \"managerFilePatterns\": [\n        \"/^image-versions\\\\.env$/\"\n      ],\n      \"matchStrings\": [\n        \"# renovate: datasource=(?<datasource>.+?) depName=(?<depName>.+?)\\\\s+\\\\w+_VERSION=(?<currentValue>.+)\"\n      ]\n    },\n    {\n      \"customType\": \"regex\",\n      \"description\": \"Bump grafana/k8s-monitoring chart version pinned in k8s/*/README.md install commands. The other two charts in each scenario (backend + grafana) are intentionally unpinned (`helm install` resolves latest at run time), so only k8s-monitoring needs tracking.\",\n      \"managerFilePatterns\": [\n        \"/^k8s/.+/README\\\\.md$/\"\n      ],\n      \"matchStrings\": [\n        \"grafana/k8s-monitoring --version \\\"(?<currentValue>[^\\\"]+)\\\"\"\n      ],\n      \"datasourceTemplate\": \"helm\",\n      \"depNameTemplate\": \"k8s-monitoring\",\n      \"registryUrlTemplate\": \"https://grafana.github.io/helm-charts\"\n    },\n    {\n      \"customType\": \"regex\",\n      \"description\": \"Sync ${*_VERSION:-default} fallbacks in every docker-compose file alongside image-versions.env updates. Captures depName from the image reference itself (e.g. `image: nginx/nginx-prometheus-exporter:${NGINX_EXPORTER_VERSION:-1.4.2}` → depName=nginx/nginx-prometheus-exporter, currentValue=1.4.2). One rule covers every variable — adding a new VERSION var to image-versions.env requires no change here as long as the compose line follows the convention `image: <depName>:${<NAME>_VERSION:-<value>}`. Renovate's docker-compose manager treats `${VAR}` substitution as a templated reference and won't update inline fallback defaults — without this customManager, the env file moves but the fallbacks drift, breaking `docker compose up` for users who don't pass --env-file.\",\n      \"managerFilePatterns\": [\n        \"/docker-compose(\\\\.coda)?\\\\.ya?ml$/\"\n      ],\n      \"matchStrings\": [\n        \"image:\\\\s*(?<depName>\\\\S+?):\\\\$\\\\{[A-Z_]+_VERSION:-(?<currentValue>[^}]+)\\\\}\"\n      ],\n      \"datasourceTemplate\": \"docker\"\n    }\n  ]\n}\n"
  },
  {
    "path": "routing/README.MD",
    "content": "# Example scenario for grafana alloy routing\n\nSimple example for cases where a singular source [e.g. OCP cluster log forwarder] is sending logs from multiple applications, and you need to route it to different loki instances [or in the example case, same loki instance, but different tenants].\n\nThe example covers an if-else scenario, where looking at the logs contents, the `tenantKey` loki request metadata is populated, for the logs to be stored in the appropriate tenant.\n\nIn this setup, alloy is a single instance that receives logs from multiple OCP clusters [test and prod].\n\nIt checks the origin of the log [via the `hostname` field in the request] and afterwards checks the type of log [via the custom `message.logger` field].\n\n\n\nDiagram:\n```\n\t\t\t\t\t\t\t\t\t- - - Loki tenant test app\n\tOCP - - -\t\t\t\t\t\t|\n\t[test]\t|\t\t\t\t\t\t| - - Loki tenant test audit\n\t\t\t---> Alloy ---> Loki ---\n\t\t\t|\t\t\t\t\t\t| - - Loki tenant prod app\n\tOCP - - - \t\t\t\t\t\t|\n\t[prod]\t\t\t\t\t\t\t- - - Loki tenant prod audit\n```\t\n\n\n## Testing\nThe provided docker compose has a local promtail-alloy-loki-grafana setup to mimick a real env [using promtail to mimick an OCP log forwarder]. Logs are stored in a local minio s3 bucket.\n\nuse `docker compose up` to bring the system up. [note, if minio has errors starting up, create a `.customData` dir]\n\nPut your log entries [with newline endings] inside the `support/promtail/myCustomLog.log` file. They will be automatically pushed to loki via alloy.\n\nOpen grafana [localhost:3000], login with default admin [`admin/admin`] and go to `Explore`.\n\nYou will have multiple loki datasources; use the query `{job=\"myApp\"}`. \n\nDepending on the log content you put in the custom log file, the log will be stored in the appropriate loki tenant and is fetchable using the correct datasource.\n\n\n## Message examples\n\nTest app message [goes to test app tenant]\n\n`{\"@timestamp\":\"2025-10-31T09:40:25.229103272Z\",\"hostname\":\"tos-worker-002.tos.example.come\",\"level\":\"info\",\"log_source\":\"container\",\"log_type\":\"application\",\"message\":\"{\\\"timestamp\\\":\\\"2025-10-31 10:40:25.229\\\",\\\"level\\\":\\\"INFO\\\",\\\"thread\\\":\\\"http-nio-8080-exec-5\\\",\\\"mdc\\\":{\\\"correlationId\\\":\\\"test\\\",\\\"uniqueTrackingId\\\":\\\"test\\\"},\\\"logger\\\":\\\"app\\\",\\\"message\\\":\\\"[START] msisdn=38763383706\\\",\\\"context\\\":\\\"default\\\"}\"}`\n\nTest audit message [goes to test audit tenant]\n\n`{\"@timestamp\":\"2025-10-31T09:40:25.229103272Z\",\"hostname\":\"tos-worker-002.tos.example.come\",\"level\":\"info\",\"log_source\":\"container\",\"log_type\":\"application\",\"message\":\"{\\\"timestamp\\\":\\\"2025-10-31 10:40:25.229\\\",\\\"level\\\":\\\"INFO\\\",\\\"thread\\\":\\\"http-nio-8080-exec-5\\\",\\\"mdc\\\":{\\\"correlationId\\\":\\\"test\\\",\\\"uniqueTrackingId\\\":\\\"test\\\"},\\\"logger\\\":\\\"audit\\\",\\\"message\\\":\\\"[START] msisdn=38763383706\\\",\\\"context\\\":\\\"default\\\"}\"}`\n\nProd app message [goes to app tenant]\n\n`{\"@timestamp\":\"2025-10-31T09:40:25.229103272Z\",\"hostname\":\"pos-worker-001.pos.example.come\",\"level\":\"info\",\"log_source\":\"container\",\"log_type\":\"application\",\"message\":\"{\\\"timestamp\\\":\\\"2025-10-31 10:40:25.229\\\",\\\"level\\\":\\\"INFO\\\",\\\"thread\\\":\\\"http-nio-8080-exec-5\\\",\\\"mdc\\\":{\\\"correlationId\\\":\\\"test\\\",\\\"uniqueTrackingId\\\":\\\"test\\\"},\\\"logger\\\":\\\"app\\\",\\\"message\\\":\\\"[START] msisdn=38763383706\\\",\\\"context\\\":\\\"default\\\"}\"}`\n\nProd audit message [goes to audit tenant]\n\n`{\"@timestamp\":\"2025-10-31T09:40:25.229103272Z\",\"hostname\":\"pos-worker-001.pos.example.come\",\"level\":\"info\",\"log_source\":\"container\",\"log_type\":\"application\",\"message\":\"{\\\"timestamp\\\":\\\"2025-10-31 10:40:25.229\\\",\\\"level\\\":\\\"INFO\\\",\\\"thread\\\":\\\"http-nio-8080-exec-5\\\",\\\"mdc\\\":{\\\"correlationId\\\":\\\"test\\\",\\\"uniqueTrackingId\\\":\\\"test\\\"},\\\"logger\\\":\\\"audit\\\",\\\"message\\\":\\\"[START] msisdn=38763383706\\\",\\\"context\\\":\\\"default\\\"}\"}`\n\n"
  },
  {
    "path": "routing/config.alloy",
    "content": "//listen to api requests for incomming logs from OCP\nloki.source.api \"listener\" {\n    http {\n        //listen_address = \"\" //defaults to all/localhost\n        listen_port    = 3005\n    }\n\n\tforward_to = [loki.process.default_values.receiver]\n}\n\n//set a default label, so that all logs that passed thru alloy are marked as such\nloki.process \"default_values\" {\n   stage.static_labels {\n        values = {\n            source = \"grafana-alloy\",\n        }\n    }\n\n\tforward_to = [loki.process.redirect_env.receiver]\n}\n\n//we check the hostname field to see if the source is the test or prod cluster\nloki.process \"redirect_env\" {\t\n\tstage.json {\n\t  expressions = {extractedHostname = \"hostname\"}\n\t}\n\t\n\tstage.labels {\n\t  values = {hostnameLabel = \"extractedHostname\"}\n\t}\n\t\n\tstage.match {\n\t\tpipeline_name = \"Send to test tenants if tos source\"\n\t\tselector = \"{hostnameLabel =~ \\\"tos.*\\\"}\"\n\t\t\n\t\t//default for test\n\t\tstage.tenant {\n\t\t\tvalue = \"test_loki_app\"\n\t\t}\t\t\n\t\t\n\t\t//take main payload\n\t\tstage.json {\n\t\t  expressions = {payload = \"message\"}\n\t\t}\n\t\t\n\t\t//extract logger property from it\n\t\tstage.json {\n\t\t  source = \"payload\"\n\t\t  expressions = {logger = \"logger\"}\n\t\t}\n\t\t\n\t\t//set it as a label, match selector works only with it\n\t\tstage.labels {\n\t\t  values = {loggerLabel = \"logger\"}\n\t\t}\n\t\t\n\t\t//route only 'audit' logger types to audit tenant [rest goes to default - app tenant]\n\t\tstage.match {\n\t\t  pipeline_name = \"Audit log routing\"\n\t\t  selector = \"{loggerLabel = \\\"audit\\\"}\"\n\t\t\n\t\t  stage.tenant {\n\t\t\tvalue = \"test_loki_audit\"\n\t\t  }\t\t\n\t}\n\t\n\tstage.match {\n\t\tpipeline_name = \"Otherwise it is production - re check the logic above\"\n\t\tselector = \"{hostnameLabel !~ \\\"tos.*\\\"}\"\n\t\t\n\t\t//default value - app\n\t\tstage.tenant {\n\t\t\tvalue = \"loki_app\"\n\t\t}\t\t\n\t\t\t\t\n\t\t//take main payload\n\t\tstage.json {\n\t\t  expressions = {payload = \"message\"}\n\t\t}\n\t\t\n\t\t//extract logger property from it\n\t\tstage.json {\n\t\t  source = \"payload\"\n\t\t  expressions = {logger = \"logger\"}\n\t\t}\n\t\t\n\t\t\n\t\tstage.labels {\n\t\t  values = {loggerLabel = \"logger\"}\n\t\t}\n\t\t\n\t\t//route only 'audit' logger types to audit tenant [rest goes to default - app tenant]\n\t\tstage.match {\n\t\t  pipeline_name = \"Audit log routing\"\n\t\t  selector = \"{loggerLabel = \\\"audit\\\"}\"\n\t\t\n\t\t  stage.tenant {\n\t\t\tvalue = \"loki_audit\"\n\t\t  }\n\t   }\n\t}\n\t\n\tforward_to = [loki.write.loki_default.receiver]\n}\n\nloki.write \"loki_default\" {\n  endpoint {\n    url = \"http://<loki_endpoint>:<loki_port>/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "routing/docker-compose.yaml",
    "content": "services:\n  minio:\n    image: \"minio/minio:RELEASE.2024-10-29T16-01-48Z@sha256:ebd2af76d40ff25ccc630533615f7ccd55fbe83d629a4b7c7a1b6311c1af3d6c\"\n    restart: \"unless-stopped\"\n    entrypoint:\n      - \"sh\"\n      - \"-euc\"\n      - \"mkdir -p /data/loki && /usr/bin/docker-entrypoint.sh minio server --quiet --address 0.0.0.0:9000 --console-address ':9001' /data\"\n    volumes:\n      - \"./.customData/minio:/data\"\n    environment:\n      - \"MINIO_ROOT_USER=myuser\"\n      - \"MINIO_ROOT_PASSWORD=mypass\"\n    ports:\n      - \"9000:9000\"\n      - \"9001:9001\"\n\n  loki:\n    image: \"grafana/loki:latest@sha256:73e905b51a7f917f7a1075e4be68759df30226e03dcb3cd2213b989cc0dc8eb4\"\n    restart: \"unless-stopped\"\n    command: \"-config.file=/etc/loki/server.yml\"\n    volumes:\n      - \"./support/loki/server.yml:/etc/loki/server.yml\"\n    ports:\n      - \"3100:3100\"\n      - \"7946\"\n    depends_on:\n      - \"minio\"\n\n  grafana:\n    image: \"grafana/grafana:latest@sha256:0f86bada30d65ef9d0183b90c1e2682ac92d53d95da8bed322b984ea78a4a73a\"\n    restart: \"unless-stopped\"\n    user: '0'\n    volumes:\n      - \"./support/grafana/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml\"\n      - \"./.customData/grafana:/var/lib/grafana\"\n    ports:\n      - \"3000:3000\"\n    depends_on:\n      - \"loki\"\n      \n  alloy:\n    image: \"grafana/alloy:latest@sha256:51aeb9d829239345070619dad3edd6873186f913c84f45b365b74574fcb38ec0\"\n    restart: \"unless-stopped\"\n    command: \"run --server.http.listen-addr=0.0.0.0:3000 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\"\n    volumes:\n      - \"./config.alloy:/etc/alloy/config.alloy\"\n    ports:\n      - \"3005:3000\"\n      - \"3015:3005\"\n    depends_on:\n      - \"loki\"\n\n###Local app that generates logs\n  promtail:\n    image: \"grafana/promtail:latest@sha256:6cfa64ec432b24a912d640e2edb940eeae2666f61861a66c121d763dd7241381\"\n    volumes:\n      - \"./support/promtail/promtail-config.yml:/etc/promtail/config.yml\"\n      - \"./support/promtail/myCustomLog.txt:/var/log/myCustomLog.txt\"\n    ports:\n      - \"9080:9080\"\n    depends_on: \n      - \"alloy\""
  },
  {
    "path": "routing/support/grafana/datasources.yml",
    "content": "apiVersion: 1\n\ndatasources:\n  - name: \"Loki\"\n    type: \"loki\"\n    access: \"proxy\"\n    orgId: 1\n    url: \"http://loki:3100\"\n    basicAuth: false\n    isDefault: false\n    version: 1\n    editable: false\n    apiVersion: 1\n    uid: \"loki\"\n    jsonData:\n      httpHeaderName1: 'X-Scope-OrgID'\n    secureJsonData:\n      httpHeaderValue1: 'fake'\n\n  - name: \"Loki app\"\n    type: \"loki\"\n    access: \"proxy\"\n    orgId: 1\n    url: \"http://loki:3100\"\n    basicAuth: false\n    isDefault: false\n    version: 1\n    editable: false\n    apiVersion: 1\n    uid: \"lokiApp\"\n    jsonData:\n      httpHeaderName1: 'X-Scope-OrgID'\n    secureJsonData:\n      httpHeaderValue1: 'loki_app'\n  \n  - name: \"Loki audit\"\n    type: \"loki\"\n    access: \"proxy\"\n    orgId: 1\n    url: \"http://loki:3100\"\n    basicAuth: false\n    isDefault: false\n    version: 1\n    editable: false\n    apiVersion: 1\n    uid: \"lokiAudit\"\n    jsonData:\n      httpHeaderName1: 'X-Scope-OrgID'\n    secureJsonData:\n      httpHeaderValue1: 'loki_audit'\n      \n  - name: \"Loki test app\"\n    type: \"loki\"\n    access: \"proxy\"\n    orgId: 1\n    url: \"http://loki:3100\"\n    basicAuth: false\n    isDefault: false\n    version: 1\n    editable: false\n    apiVersion: 1\n    uid: \"lokiTestApp\"\n    jsonData:\n      httpHeaderName1: 'X-Scope-OrgID'\n    secureJsonData:\n      httpHeaderValue1: 'test_loki_app'\n\n  - name: \"Loki test audit\"\n    type: \"loki\"\n    access: \"proxy\"\n    orgId: 1\n    url: \"http://loki:3100\"\n    basicAuth: false\n    isDefault: false\n    version: 1\n    editable: false\n    apiVersion: 1\n    uid: \"lokiTestAudit\"\n    jsonData:\n      httpHeaderName1: 'X-Scope-OrgID'\n    secureJsonData:\n      httpHeaderValue1: 'test_loki_audit'\n\n"
  },
  {
    "path": "routing/support/loki/server.yaml",
    "content": "auth_enabled: true\n\nserver:\n  http_listen_address: 0.0.0.0\n  grpc_listen_address: 0.0.0.0\n  http_listen_port: 3100\n  grpc_listen_port: 9095\n  log_level: \"info\"\n\ncommon:\n  path_prefix: \"/loki/data\"\n  storage:\n    s3:\n      endpoint: \"minio:9000\"\n      insecure: true\n      bucketnames: \"loki\"\n      access_key_id: \"myuser\"\n      secret_access_key: \"mypass\"\n      s3forcepathstyle: true\n\nmemberlist:\n  dead_node_reclaim_time: \"30s\"\n  gossip_to_dead_nodes_time: \"15s\"\n  left_ingesters_timeout: \"30s\"\n  gossip_interval: \"2s\"\n  bind_port: 7946\n  bind_addr:\n    - \"0.0.0.0\"\n  join_members:\n    - \"loki\"\n\ningester:\n  lifecycler:\n    join_after: \"10s\"\n    observe_period: \"5s\"\n    ring:\n      replication_factor: 1\n      kvstore:\n        store: \"memberlist\"\n    final_sleep: \"0s\"\n  chunk_idle_period: \"1m\"\n  wal:\n    enabled: true\n    dir: \"/loki/wal\"\n  max_chunk_age: \"1m\"\n  chunk_retain_period: \"30s\"\n  chunk_encoding: \"snappy\"\n  chunk_target_size: 1.572864e+06\n  chunk_block_size: 262144\n  flush_op_timeout: \"10s\"\n\nschema_config:\n  configs:\n    - from: \"2020-08-01\"\n      store: \"tsdb\"\n      object_store: \"s3\"\n      schema: \"v13\"\n      index:\n        prefix: \"index_\"\n        period: \"24h\"\n\nstorage_config:\n  boltdb_shipper:\n    active_index_directory: \"/tmp/index\"\n    cache_location: \"/tmp/boltdb-cache\"\n\nlimits_config:\n  max_cache_freshness_per_query: \"10m\"\n  reject_old_samples: true\n  reject_old_samples_max_age: \"30m\"\n  split_queries_by_interval: \"15m\"\n  ingestion_rate_mb: 10\n  ingestion_burst_size_mb: 20\n\ntable_manager:\n  retention_deletes_enabled: true\n  retention_period: \"336h\"\n\nquery_range:\n  max_retries: 5\n  align_queries_with_step: true\n  parallelise_shardable_queries: true\n  cache_results: true\n\nfrontend:\n  log_queries_longer_than: \"5s\"\n  compress_responses: true\n  max_outstanding_per_tenant: 2048\n\nquery_scheduler:\n  max_outstanding_requests_per_tenant: 1024\n\nquerier:\n  query_ingesters_within: \"2h\"\n\ncompactor:\n  working_directory: \"/tmp/compactor\"\n  retention_enabled: true\n  compaction_interval: 30m\n  retention_delete_delay: 1h\n  retention_delete_worker_count: 150"
  },
  {
    "path": "routing/support/promtail/myCustomLog.txt",
    "content": "############################################################################\n#find this in grafana via - {job=\"myApp\"} |= ``\n############################################################################\n############################################################################\n2025-10-15 - MY APP - Started logging by custom means...\n"
  },
  {
    "path": "routing/support/promtail/promtail-config.yml",
    "content": "server:\n  http_listen_port: 9080\n  grpc_listen_port: 0\n\npositions:\n  filename: /tmp/positions.yaml\n\nclients:\n  - url: http://alloy:3005/loki/api/v1/push\n\nscrape_configs:\n  - job_name: logs\n    static_configs:\n    - targets:\n        - localhost\n      labels:\n        job: myApp\n        __path__: /var/log/myCustomLog.txt\n        "
  },
  {
    "path": "run-example.sh",
    "content": "#!/bin/bash\nset -euo pipefail\n\n# Usage check\nif [ $# -lt 1 ]; then\n    echo \"Usage: $0 <example-directory>\"\n    echo \"Available examples:\"\n    ls -d */ | grep -v \"k8s\\|img\\|.git\" | tr -d '/'\n    exit 1\nfi\n\nEXAMPLE_DIR=$1\n\n# Check if the example directory exists\nif [ ! -d \"$EXAMPLE_DIR\" ]; then\n    echo \"Error: Example directory '$EXAMPLE_DIR' not found.\"\n    exit 1\nfi\n\n# Check if a docker-compose file exists in the example directory.\n# Some scenarios use .yaml instead of .yml; accept either.\nif [ ! -f \"$EXAMPLE_DIR/docker-compose.yml\" ] && [ ! -f \"$EXAMPLE_DIR/docker-compose.yaml\" ]; then\n    echo \"Error: No docker-compose.yml or docker-compose.yaml found in '$EXAMPLE_DIR'.\"\n    exit 1\nfi\n\n# Source the image versions\nif [ ! -f \"image-versions.env\" ]; then\n    echo \"Error: image-versions.env file not found.\"\n    exit 1\nfi\n\n# Run docker-compose in the example directory with the environment variables\necho \"Starting example: $EXAMPLE_DIR\"\n(cd \"$EXAMPLE_DIR\" && docker compose --env-file ../image-versions.env up -d)\n\necho \"Example started successfully.\"\necho \"Access Grafana at http://localhost:3000\"\necho \"To stop the example, run: cd $EXAMPLE_DIR && docker compose down\" "
  },
  {
    "path": "self-monitoring/README.md",
    "content": "# Self-Monitoring with Grafana Alloy\n\nThis example demonstrates how to configure Grafana Alloy to monitor itself, collecting both its own metrics and logs alongside other Docker containers.\n\n## Prerequisites\n- Docker\n- Docker Compose\n- Git\n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/self-monitoring\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Prometheus UI\nOpen your browser and go to `http://localhost:9090`.\n\n### Step 5: Access Loki\nLoki is available at `http://localhost:3100`.\n\n## What This Demo Shows\n\nThis scenario demonstrates:\n\n- **Metrics Collection**: Using `prometheus.exporter.self` to export Alloy's own internal metrics\n- **Log Collection**: Using `loki.source.docker` to collect logs from all Docker containers, including Alloy itself\n- **Service Discovery**: Automatic discovery of Docker containers with proper labeling\n- **Remote Write**: Sending metrics to Prometheus and logs to Loki\n\n## Key Configuration Elements\n\n### Self-Monitoring Metrics\n\nThe `prometheus.exporter.self` component exposes Alloy's internal metrics:\n- Memory usage\n- CPU utilization\n- Component health\n- Scrape statistics\n\n### Docker Log Collection\n\nThe configuration automatically discovers and collects logs from all Docker containers running on the host, including:\n- Alloy's own logs\n- Prometheus logs\n- Loki logs\n- Any other containers running on the same Docker host\n\n\n"
  },
  {
    "path": "self-monitoring/config.alloy",
    "content": "\n\nprometheus.exporter.self \"integrations_alloy_health\" { }\n\ndiscovery.relabel \"integrations_alloy_health\" {\n\ttargets = prometheus.exporter.self.integrations_alloy_health.targets\n\n\trule {\n\t\ttarget_label = \"instance\"\n\t\treplacement  = constants.hostname\n\t}\n\n\trule {\n\t\ttarget_label = \"container\"\n\t\treplacement  = \"alloy\"\n\t}\n}\n\nprometheus.scrape \"integrations_alloy_health\" {\n\ttargets = array.concat(\n\t\tdiscovery.relabel.integrations_alloy_health.output,\n\t)\n\tforward_to = [prometheus.relabel.integrations_alloy_health.receiver]\n\tjob_name   = \"integrations/alloy\"\n}\n\nprometheus.relabel \"integrations_alloy_health\" {\n\tforward_to = [prometheus.remote_write.default.receiver]\n}\n\nprometheus.remote_write \"default\" {\n\tendpoint {\n\t\turl = \"http://prometheus:9090/api/v1/write\"\n\t}\n}\n\n\n// ###############################\n// #### Logging Configuration ####\n// ###############################\n\n// Discover Docker containers and extract metadata.\ndiscovery.docker \"linux\" {\n  host = \"unix:///var/run/docker.sock\"\n}\n\n// Define a relabeling rule to create a service name from the container name.\ndiscovery.relabel \"logs_integrations_docker\" {\n      targets = []\n  \n  // Extract the docker-compose service name from container names like\n  // /grafana-pathfinder-app-alloy-1 -> alloy\n  rule {\n      source_labels = [\"__meta_docker_container_name\"]\n      regex = \"^/(?:.+-)?([^-]+)-(?:\\\\d+)$\"\n      target_label = \"container\"\n  }\n\n     rule {\n        target_label = \"instance\"\n        replacement  = constants.hostname\n    }\n\n  }\n\n\n// Configure a loki.source.docker component to collect logs from Docker containers.\nloki.source.docker \"default\" {\n  host       = \"unix:///var/run/docker.sock\"\n  targets    = discovery.docker.linux.targets\n  relabel_rules = discovery.relabel.logs_integrations_docker.rules\n  forward_to = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "self-monitoring/docker-compose.yaml",
    "content": "services:\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --enable-feature=native-histograms\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - '3100:3100'\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345 # Alloy HTTP server\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - /var/run/docker.sock:/var/run/docker.sock\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy"
  },
  {
    "path": "self-monitoring/loki-config.yaml",
    "content": "# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\ndistributor:\n  otlp_config:\n    # List of default otlp resource attributes to be picked as index labels\n    # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels\n    default_resource_attributes_as_index_labels:\n      [\n        service.name service.namespace service.instance.id deployment.environment deployment.environment.name cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name,\n      ]\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n"
  },
  {
    "path": "snmp/Readme.md",
    "content": "# Monitoring Linux with Alloy\n\nGrafana Alloy can be used to monitor Linux servers and containers. In this guide, we will show you how to deploy Grafana Alloy in a Docker environment to monitor Linux system metrics and logs. The setup consists of:\n* Node Exporter metrics for system performance monitoring\n* System logs collection with Loki\n\n## Prerequisites\n\n* Git - You will need Git to clone the repository.\n* Docker and Docker Compose - This tutorial uses Docker to host Grafana, Loki, Prometheus, and Alloy.\n* Linux environment - Either a Linux host running Docker or a Linux VM.\n\n## About this Demo\n\nThis demo runs Alloy in a container alongside Grafana, Prometheus, and Loki, creating a self-contained monitoring stack. The Alloy container acts as a \"fake Linux server\" to demonstrate monitoring capabilities out of the box.\n\nIn a production environment, you would typically install Alloy directly on each Linux server you want to monitor.\n\n## Step 1: Clone the Repository\n\nClone the repository to your machine:\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/linux\n```\n\n## Step 2: Deploy the Monitoring Stack\n\nUse Docker Compose to deploy Grafana, Loki, Prometheus, and Alloy:\n\n```bash\ndocker-compose up -d\n```\n\nYou can check the status of the containers:\n\n```bash\ndocker ps\n```\n\nGrafana should be running on [http://localhost:3000](http://localhost:3000).\n\n## Step 3: Explore the Monitoring Data\n\nOnce the stack is running, you can explore the collected metrics and logs:\n\n1. Access Grafana at [http://localhost:3000](http://localhost:3000) (default credentials are admin/admin)\n2. Import the Node Exporter dashboard to visualize system metrics:\n   - Go to Dashboards → Import\n   - Upload the JSON file from [here](https://grafana.com/api/dashboards/1860/revisions/37/download)\n   - Select the Prometheus data source and click Import\n\nThis community dashboard provides comprehensive system metrics including CPU, memory, disk, and network usage.\n\n## Step 4: Viewing Logs\n\nOpen your browser and go to [http://localhost:3000/a/grafana-lokiexplore-app](http://localhost:3000/a/grafana-lokiexplore-app). This will take you to the Loki explorer in Grafana.\n\n## Deploying on Bare Metal\n\nTo monitor actual Linux servers in production, you would:\n\n1. Install Alloy directly on each Linux server\n\n2. Modify the `config.alloy` file to point to your Prometheus and Loki instances:\n   ```\n   prometheus.remote_write \"local\" {\n     endpoint {\n       url = \"http://localhost:9090/api/v1/write\"\n     }\n   }\n   \n   loki.write \"local\" {\n     endpoint {\n       url = \"http://localhost:3100/loki/api/v1/push\"\n     }\n   }\n   ```\n\n3. Run Alloy as a service:\n   ```bash\n   sudo alloy run /path/to/config.alloy\n   ```\n\n## Configuration Customization\n\nThe included `config.alloy` file sets up:\n\n1. Node Exporter integration to collect system metrics\n2. Log collection from system logs and journal\n3. Relabeling rules to organize metrics and logs\n4. Remote write endpoints for Prometheus and Loki\n\nYou can customize which collectors are enabled/disabled and adjust scrape intervals in the configuration file.\n\n## Troubleshooting\n\nIf you encounter issues:\n\n* Check container logs: `docker-compose logs`\n* Verify Alloy is running: `docker-compose ps`\n* Ensure ports are not conflicting with existing services\n* Review the Alloy configuration in `config.alloy`\n"
  },
  {
    "path": "snmp/config.alloy",
    "content": "// --- Remote Write to Prometheus ---\nprometheus.remote_write \"remote\" {\n  endpoint {\n    url = \"http://prometheus:9090/api/v1/write\"\n  }\n}\n\n// --- SNMP Exporter Configuration ---\nprometheus.exporter.snmp \"snmp_exporter\" {\n    config_file = \"/etc/snmp/snmp.yml\"\n\n    target \"tm\" {\n        address     = \"snmpd\"\n        module      = \"CISCO\"\n        walk_params = \"Cisco\"\n        labels = {\n            \"ilo_node\" = \"switch\",\n        }\n    }\n\n    walk_param \"cisco\" {\n        retries = \"2\"\n        timeout = \"30s\"\n    }\n}\n\n// --- SNMP Scrape Configuration ---\ndiscovery.relabel \"snmp_targets\" {\n  targets = prometheus.exporter.snmp.snmp_exporter.targets\n  rule {\n    target_label = \"job\"\n    replacement  = \"smpt\"\n  }\n}\n\nprometheus.scrape \"snmp_targets\" {\n  scrape_interval = \"30s\"\n  targets         = discovery.relabel.snmp_targets.output\n  forward_to      = [prometheus.remote_write.remote.receiver]\n}\n\n// --- Enable Live Debugging ---\nlivedebugging {}\n"
  },
  {
    "path": "snmp/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n\n  prometheus:\n     image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n     command:\n       - --web.enable-remote-write-receiver\n       - --config.file=/etc/prometheus/prometheus.yml\n     ports:\n      - 9090:9090/tcp\n     volumes:\n        - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         - name: Prometheus\n           type: prometheus\n           orgId: 1\n           url: http://prometheus:9090\n           basicAuth: false\n           isDefault: true\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./snmp.yml:/etc/alloy/snmp.yml\n      \n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n"
  },
  {
    "path": "snmp/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\ndistributor:\n  otlp_config:\n    # List of default otlp resource attributes to be picked as index labels\n    # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels\n      default_resource_attributes_as_index_labels: [service.name service.namespace service.instance.id deployment.environment deployment.environment.name cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name]\n\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n"
  },
  {
    "path": "snmp/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "snmp/snmp.yml",
    "content": "modules:\n  CISCO:\n    walk:\n      - 1.4.6.1.4.3.9.9.244.1.2.1.1.7\n    metrics:\n      - name: ifInterface\n        oid: 1.4.6.1.4.3.9.9.244.1.2.1.1.7\n        type: gauge\n        help: A unique value, greater than zero, for each interface\n        indexes:\n          - labelname: ifInterface\n            type: gauge\nauths:\n  public_v1:\n    community: <community>\n    security_level: noAuthNoPriv\n    version: 1\n  public_v2:\n    community: <community>\n    security_level: noAuthNoPriv\n    version: 2\n\n"
  },
  {
    "path": "syslog/README.md",
    "content": "# Syslog Scenario\n\nThis scenario demonstrates how to use ryslog and Alloy to monitor non RFC5424 compliant syslog messages. Alloy by itself does not support non RFC5424 compliant syslog messages. However, we can use rsyslog to convert non RFC5424 compliant syslog messages to RFC5424 compliant syslog messages. \n\n## Running the Demo\n\n### Step 1: Clone the repository\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n### Step 2: Deploy the monitoring stack\n```bash\ncd alloy-scenarios/syslog\ndocker-compose up -d\n```\n\n### Step 3: Access Grafana Alloy UI\nOpen your browser and go to `http://localhost:12345`. \n\n### Step 4: Access Grafana UI\nOpen your browser and go to `http://localhost:3000`.\n\n\n"
  },
  {
    "path": "syslog/config.alloy",
    "content": "\n\nlivedebugging {\n  enabled = true\n}\n\nloki.source.syslog \"local\" {\n  listener {\n    address  = \"0.0.0.0:51893\"\n    labels   = { component = \"loki.source.syslog\", protocol = \"tcp\" }\n  }\n\n  listener {\n    address  = \"0.0.0.0:51898\"\n    protocol = \"udp\"\n    labels   = { component = \"loki.source.syslog\", protocol = \"udp\"}\n  }\n\n  forward_to = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n  endpoint {\n    url = \"http://loki:3100/loki/api/v1/push\"\n  }\n}"
  },
  {
    "path": "syslog/docker-compose.coda.yml",
    "content": "services:\n  rsyslog:\n    image: rsyslog/syslog_appliance_alpine:latest@sha256:c0dd7cad9ff3234967ff59879590175b7590e8a5f5621ec49a85aff546b44a3b\n    container_name: rsyslog\n    ports:\n      - \"514:514/udp\"\n      - \"514:514/tcp\"\n    volumes:\n      - ./rsyslog.conf:/etc/rsyslog.conf\n\n  syslog-simulator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: syslog-simulator\n    volumes:\n      - ./syslog_simulator.py:/syslog_simulator.py\n    environment:\n      - SYSLOG_HOST=rsyslog\n      - SYSLOG_PORT=514\n    depends_on:\n      - rsyslog\n    command: [\"python3\", \"/syslog_simulator.py\"]\n"
  },
  {
    "path": "syslog/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Rsyslog service\n  rsyslog:\n    image: rsyslog/syslog_appliance_alpine:latest@sha256:c0dd7cad9ff3234967ff59879590175b7590e8a5f5621ec49a85aff546b44a3b\n    container_name: rsyslog\n    ports:\n      - \"514:514/udp\"     # Standard syslog UDP port\n      - \"514:514/tcp\"     # Standard syslog TCP port (if needed)\n    volumes:\n      - ./rsyslog.conf:/etc/rsyslog.conf    # Custom rsyslog configuration\n    depends_on:\n      - alloy\n\n  # Syslog simulator using a Python script\n  syslog-simulator:\n    image: python:${PYTHON_VERSION:-3.11-slim}\n    container_name: syslog-simulator\n    volumes:\n      - ./syslog_simulator.py:/syslog_simulator.py  # Syslog simulator script\n    environment:\n      - SYSLOG_HOST=rsyslog\n      - SYSLOG_PORT=514\n    depends_on:\n      - rsyslog\n    command: [\"python3\", \"/syslog_simulator.py\"]\n  \n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345\n      - 51893:51893\n      - 51898:51898\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      - ./logs:/tmp/app-logs/\n    command: run --server.http.listen-addr=0.0.0.0:12345 --stability.level=experimental  --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\nvolumes:\n  rsyslog_data:\n"
  },
  {
    "path": "syslog/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true"
  },
  {
    "path": "syslog/rsyslog.conf",
    "content": "# Load necessary modules\nmodule(load=\"imudp\")   # For receiving UDP messages\ninput(type=\"imudp\" port=\"514\")\n\n# TCP endpoint configuration\n*.* action(type=\"omfwd\" target=\"alloy\" port=\"51893\" protocol=\"tcp\" Template=\"RSYSLOG_SyslogProtocol23Format\")"
  },
  {
    "path": "syslog/syslog_simulator.py",
    "content": "import socket\nimport time\nimport os\nimport random\nfrom datetime import datetime\n\n# Get the target host and port from environment variables\nsyslog_host = os.getenv('SYSLOG_HOST', 'localhost')\nsyslog_port = int(os.getenv('SYSLOG_PORT', 514))\n\n# Create a UDP socket\nsock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)\n\n# Define log levels and messages\nlog_levels = [\"INFO\", \"WARNING\", \"ERROR\", \"DEBUG\", \"CRITICAL\"]\nmessages = [\n    \"System started successfully\",\n    \"User login successful\",\n    \"Configuration loaded\",\n    \"Connection to database failed\",\n    \"Data processed successfully\",\n    \"Invalid API request received\",\n    \"Memory usage high\",\n    \"Disk space low\",\n    \"Unknown error occurred\",\n    \"Service restarted\",\n]\n\n# Generate and send syslog messages every few seconds\nwhile True:\n    # Correct timestamp format\n    timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')\n    log_level = random.choice(log_levels)\n    message_text = random.choice(messages)\n    pid = random.randint(100, 999)  # Simulate random process IDs\n    app_name = \"MyApp\"\n    hostname = socket.gethostname()\n    msgid = '-'\n    structured_data = '-'\n    # Include the log level in the message body\n    message_body = f\"{log_level}: {message_text}\"\n    # Correct syslog message format\n    message = f\"<34>1 {timestamp} {hostname} {app_name} {pid} {msgid} {structured_data} {message_body}\"\n    sock.sendto(message.encode(), (syslog_host, syslog_port))\n    print(f\"Sent syslog message to {syslog_host}:{syslog_port} - {message_body}\")\n    time.sleep(random.randint(3, 8))  # Send a message every 3-8 seconds\n\n"
  },
  {
    "path": "systemd-journal/README.md",
    "content": "# systemd journal to Loki — focused filtering recipes\n\nA focused logs-only scenario for shipping a Linux host's systemd journal to Loki, with filtering and label promotion tuned for keeping the index lean and queries fast.\n\n## How this differs from `linux/`\n\n| Aspect | `linux/` (existing) | `systemd-journal/` (this) |\n|---|---|---|\n| Scope | Metrics + journal + flat files (full Linux observability suite) | **Journal only** — focused scenario |\n| Pipeline | Pass-through ingest, all units, all priorities | **Drops noisy units + drops info/debug priorities** |\n| Stack | Prom + Loki + Grafana + node_exporter | **Loki + Grafana only** |\n| Labels promoted | none specifically | `unit`, `priority`, `hostname` |\n| Demo intent | \"monitor a Linux box end-to-end\" | \"show advanced journal filtering recipes\" |\n\nIf you want general-purpose Linux observability, use `linux/`. If you specifically need journal filtering recipes (drop noisy units, drop low-priority entries, label by unit/priority for fast filtering), this scenario is the minimal moving-parts version.\n\n## Linux host required\n\n`loki.source.journal` reads `/var/log/journal` and `/run/log/journal`. **These directories only exist on Linux hosts running systemd**. On macOS or Windows Docker Desktop:\n\n- The bind mounts will resolve to empty directories (Docker creates them silently).\n- Alloy will start cleanly but the source will sit idle with no journal entries.\n- The scenario is functionally a no-op — there's no synthesised journal to fall back to.\n\nTo exercise the scenario fully you need:\n- A Linux host (bare metal, VM, WSL2 with systemd, or a Linux VM on macOS such as OrbStack / Lima / multipass).\n- `systemd` writing journals to `/var/log/journal` (persistent) or `/run/log/journal` (volatile). Most distros ship with at least the volatile journal active.\n\n## Running\n\nOn a Linux host:\n\n```bash\ncd systemd-journal\ndocker compose up -d\n```\n\nWait ~10 seconds, then open Grafana.\n\n## Accessing\n\n- **Grafana**: http://localhost:3000 (no login required)\n- **Alloy UI**: http://localhost:12345 — confirm components are healthy and use livedebugging to inspect entries flowing through each stage\n- **Loki API**: http://localhost:3100\n\n## Trying it out\n\nGenerate some journal traffic on the Linux host:\n\n```bash\n# Trigger a notice\nlogger -p user.notice \"test from systemd-journal scenario\"\n\n# Trigger an error\nlogger -p user.err \"this is a test error\"\n\n# Tickle a service unit to produce events\nsudo systemctl restart cron 2>/dev/null || sudo systemctl restart crond\n```\n\nThen in Grafana Explore on Loki:\n\n```logql\n# All journal entries (after filtering)\n{job=\"systemd-journal\"}\n\n# Errors only\n{job=\"systemd-journal\", priority=~\"err|crit|alert|emerg\"}\n\n# A specific unit\n{job=\"systemd-journal\", unit=\"ssh.service\"}\n\n# A specific host (useful when shipping from many)\n{job=\"systemd-journal\", hostname=\"my-server\"}\n\n# All recent NetworkManager events\n{job=\"systemd-journal\", unit=\"NetworkManager.service\"}\n```\n\n## What's filtered out\n\nThe pipeline drops these at the Alloy side:\n\n| Filter | What it drops | Why |\n|---|---|---|\n| `{unit=~\"systemd-logind.service\\|systemd-tmpfiles-clean.service\\|cron.service\"}` | Login session housekeeping, tmpfile cleanup, every cron tick | High-volume, low-signal in dev/ops dashboards |\n| `{priority=~\"info\\|debug\"}` | LOG_INFO and LOG_DEBUG entries | Keep `notice` and above |\n\nTo keep one of these back, edit `stage.match` in `config.alloy` — remove the corresponding entry from the regex.\n\n## Why run Alloy as root\n\nThe Alloy container runs with `user: \"0:0\"`. On most Linux distros, `/var/log/journal/*.journal` files are owned by `root:systemd-journal` with mode 0640. Reading them requires either being root or a member of the `systemd-journal` group. Running Alloy as root inside a container with a read-only bind-mount keeps things simple for a demo. In production, prefer running the Alloy native package as a service — it joins the right groups automatically.\n\n## Stopping\n\n```bash\ndocker compose down -v\n```\n\n## Customization ideas\n\n- **Promote more journal fields**: extend the `loki.relabel.journal` block. `__journal__pid` → `pid`, `__journal__exe` → `exe`, `__journal__cmdline` → `cmdline`, etc.\n- **Per-environment unit filters**: maintain different `stage.match` regexes for prod vs dev.\n- **Forward errors only**: add a `stage.match` keeping only `priority=~\"err|crit|alert|emerg\"` if you want a focused error stream.\n- **Multi-host fan-in**: deploy this on every Linux host with the same `loki.write` URL pointing at a central Loki cluster.\n"
  },
  {
    "path": "systemd-journal/config.alloy",
    "content": "// systemd journal → Loki, with filtering recipes.\n//\n// Demonstrates three patterns the broader `linux/` scenario doesn't:\n//   1. Promoting useful journal fields (`unit`, `priority`, `hostname`)\n//      to Loki labels via `loki.relabel`.\n//   2. Dropping noisy systemd units that flood the journal but rarely\n//      carry useful signal.\n//   3. Dropping low-priority entries (info/debug) at ingestion time\n//      to keep Loki cardinality and storage low.\n//\n// Linux-host only — `loki.source.journal` reads /var/log/journal,\n// which doesn't exist on macOS or Windows. See README for details.\n\nlivedebugging { enabled = true }\n\n// Translate the journal's underscore-prefixed metadata into clean\n// Loki label names. The journal exposes a lot of fields; we promote\n// only a few useful ones.\nloki.relabel \"journal\" {\n\tforward_to = []\n\n\trule {\n\t\tsource_labels = [\"__journal__systemd_unit\"]\n\t\ttarget_label  = \"unit\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__journal_priority_keyword\"]\n\t\ttarget_label  = \"priority\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__journal__hostname\"]\n\t\ttarget_label  = \"hostname\"\n\t}\n}\n\nloki.source.journal \"host\" {\n\tpath          = \"/var/log/journal\"\n\tmax_age       = \"12h\"\n\trelabel_rules = loki.relabel.journal.rules\n\tlabels        = { job = \"systemd-journal\" }\n\tforward_to    = [loki.process.journal.receiver]\n}\n\nloki.process \"journal\" {\n\t// Drop high-volume units that rarely carry actionable signal in a\n\t// generic dev/ops dashboard. Tune this list to your environment.\n\tstage.match {\n\t\tselector = `{unit=~\"systemd-logind.service|systemd-tmpfiles-clean.service|cron.service\"}`\n\t\taction   = \"drop\"\n\t}\n\n\t// Drop low-priority entries (info / debug). Keep notice and above.\n\t// Adjust if you want to keep info messages.\n\tstage.match {\n\t\tselector = `{priority=~\"info|debug\"}`\n\t\taction   = \"drop\"\n\t}\n\n\tforward_to = [loki.write.local.receiver]\n}\n\nloki.write \"local\" {\n\tendpoint {\n\t\turl = \"http://loki:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "systemd-journal/docker-compose.yml",
    "content": "services:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100/tcp\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    # Run as root so Alloy can read /var/log/journal — the journal files\n    # are owned by root:systemd-journal with mode 0640 on most distros.\n    user: \"0:0\"\n    ports:\n      - \"12345:12345\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n      # Bind-mount the host's journal read-only. On Linux hosts this\n      # exposes the actual systemd journal. On macOS/Windows the path\n      # doesn't exist and Docker creates an empty directory; Alloy\n      # will run but the source will report \"no journal entries\".\n      - /var/log/journal:/var/log/journal:ro\n      - /run/log/journal:/run/log/journal:ro\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - loki\n"
  },
  {
    "path": "systemd-journal/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 5m\n"
  },
  {
    "path": "trace-delivery/README.md",
    "content": "# Trace Delivery Demo\n\nThis scenario demonstrates how distributed tracing works using a realistic sofa delivery workflow. It shows the journey of a sofa order from the shop to the customer's house, passing through multiple services.\n\n## Overview\n\nThe demo includes five interconnected services simulating a sofa ordering and delivery process:\n\n1. **Sofa Shop** - Where customers browse sofas and place orders\n2. **Sofa Factory** - Manufactures the ordered sofas with detailed assembly steps\n3. **Global Distribution Center** - Handles global logistics and shipping\n4. **Local Distribution Center** - Manages local delivery logistics\n5. **Customer House** - The final destination for delivery\n\nEach service generates spans as part of a complete trace that follows the sofa from order to delivery. This demo includes three main scenarios:\n\n1. **Successful Delivery** - A complete, happy-path delivery with no issues\n2. **Failed Delivery** - Simulated failures at different points in the delivery process\n3. **Latency Issues** - Abnormal delays in one service affecting the entire delivery process\n\n## Architecture\n\n```\n┌────────────┐     ┌──────────────┐     ┌─────────────────────┐     ┌──────────────────┐     ┌────────────────┐\n│  Sofa Shop │────▶│ Sofa Factory │────▶│ Global Distribution │────▶│ Local Distribution│────▶│ Customer House │\n└────────────┘     └──────────────┘     └─────────────────────┘     └──────────────────┘     └────────────────┘\n                                                                            │\n                                                                            │\n                                                                            ▼\n                                                                     ┌────────────┐\n                                                                     │ Sofa Shop  │\n                                                                     └────────────┘\n                                                                     (notification)\n```\n\nAll services are instrumented with OpenTelemetry to generate traces, which are collected by Grafana Alloy and visualized in Grafana via Tempo.\n\n## Demo Features\n\n- **Realistic Business Process**: Simulates a real-world business workflow with multiple services and dependencies\n- **Trace Context Propagation**: Demonstrates how trace context is passed between services\n- **Background Trace Generation**: Automatically generates traces for all scenarios periodically\n- **Nested Spans**: Shows detailed manufacturing steps with nested spans and span events\n- **Bidirectional Communication**: Local Distribution center notifies the Shop when delivery is dispatched\n- **Error Cases**: Shows how errors are recorded and propagated in traces with exceptions\n- **Latency Visualization**: Illustrates how performance bottlenecks appear in traces\n- **Span Events**: Each service adds detailed span events to provide context for operations\n- **Tail Sampling**: Demonstrates tail sampling policies that focus on errors, latency issues, and specific order attributes\n- **Service Graph**: Visualizes the connections between services \n\n## Running the Demo\n\n1. Clone the repository:\n   ```\n   git clone https://github.com/grafana/alloy-scenarios.git\n   cd alloy-scenarios\n   ```\n\n2. Navigate to this example directory:\n   ```\n   cd trace-delivery\n   ```\n\n3. Run using Docker Compose:\n   ```\n   docker compose up -d\n   ```\n   \n   Or use the centralized image management:\n   ```\n   cd ..\n   ./run-example.sh trace-delivery\n   ```\n\n4. Access the Sofa Shop at http://localhost:8080\n\n## Demo Scenarios\n\n### 1. Successful Delivery\n\nNavigate to http://localhost:8080/demo/success to trigger a successful delivery flow, which will:\n- Create an order for a Classic Comfort sofa\n- Process it through all stages of the delivery pipeline\n- Show the detailed manufacturing steps with nested spans\n- Have the Local Distribution center notify the Shop of the dispatch\n- Complete delivery successfully\n- Generate a full trace that can be examined in Grafana\n\n### 2. Failed Delivery\n\nNavigate to http://localhost:8080/demo/failure to simulate a failure scenario, which will:\n- Create an order for a Luxury Lounge sofa\n- Simulate a failure at one of the services (factory by default)\n- Record an actual exception in the trace with detailed error information\n- Generate an error trace that will be sampled by the error policy\n\nYou can change where the failure occurs by adding a query parameter:\n- http://localhost:8080/demo/failure?service=sofa-factory\n- http://localhost:8080/demo/failure?service=global-distribution\n- http://localhost:8080/demo/failure?service=local-distribution\n\n### 3. Latency Issues\n\nNavigate to http://localhost:8080/demo/latency to simulate a latency scenario, which will:\n- Create an order for a Limited Edition Designer sofa\n- Introduce significant latency in one service (factory by default)\n- Add span events explaining the cause of the latency\n- Demonstrate how tail sampling captures high-latency traces\n\nYou can change where the latency occurs by adding a query parameter:\n- http://localhost:8080/demo/latency?service=sofa-factory\n- http://localhost:8080/demo/latency?service=global-distribution\n- http://localhost:8080/demo/latency?service=local-distribution\n\n## Background Trace Generation\n\nThe demo automatically generates traces in the background to populate your trace data:\n- Successful delivery traces (70% of background traces)\n- Failure scenarios (15% of background traces)\n- Latency scenarios (15% of background traces)\n\nThis helps ensure you have data to analyze without having to manually trigger scenarios.\n\n## Viewing Traces\n\n1. Open Grafana at http://localhost:3000\n2. Navigate to Explore\n3. Select Tempo as the data source\n4. Click on the \"Search\" tab and select filters like:\n   - `delivery.status = \"failed\"` to see failed deliveries\n   - `sofa.model = \"limited-edition\"` to see traces for limited edition sofas\n   - `customer.type = \"vip\"` to see VIP customer orders\n   - `background = true` to see background-generated traces\n   - `scenario = \"delivery-failure\"` to see failure scenarios\n5. Or explore the service graph by clicking the \"Service Graph\" tab\n\n## Span Events\n\nEach span in the trace contains detailed events providing context about what's happening:\n- **Manufacturing**: Events for each assembly step like frame construction, spring installation, etc.\n- **Distribution**: Events for package preparation, routing, loading, etc.\n- **Delivery**: Events for delivery dispatched, delivered, etc.\n- **Failure**: Detailed information about what went wrong and where\n- **Latency**: Information about delays and their causes\n\n## Tail Sampling Policies\n\nThis demo configures Grafana Alloy with six tail sampling policies:\n\n1. **Failed Delivery Policy**: Captures all traces with `delivery.status = \"failed\"`\n2. **Error Policy**: Samples traces with errors\n3. **Latency Policy**: Samples traces exceeding 5 seconds in duration\n4. **VIP Customer Policy**: Samples all orders from VIP customers\n5. **Limited Edition Policy**: Samples all orders for limited edition sofas\n6. **Probabilistic Policy**: Samples 20% of all remaining traces\n\nThese policies ensure important traces (errors, performance issues, VIP customers) are retained while still sampling a representative subset of normal traffic.\n\n## Troubleshooting\n\nIf you encounter issues:\n\n1. **Missing services**: Ensure all containers are running with `docker compose ps`\n2. **Network issues**: Check if services can communicate with each other\n3. **Trace data missing**: Verify Alloy and Tempo are configured properly\n4. **Service failures**: Check logs with `docker compose logs <service-name>`\n\n## Customizing the Demo\n\nYou can modify the demo in several ways:\n\n- Edit `app.py` to change service behavior, add new features, or adjust timing\n- Modify `config.alloy` to change sampling policies or add new connectors\n- Edit failure and latency probabilities in the script to increase/decrease error rates\n- Add new sofa models or customer types to expand the demo\n\n## Learning from the Demo\n\nThis demo helps understand:\n\n1. How distributed tracing works across multiple services\n2. How trace context is propagated through HTTP requests\n3. How nested spans create a hierarchical view of operations\n4. How span events provide detailed context about operations\n5. How to use tail sampling to focus on important traces\n6. How to troubleshoot errors and performance issues using traces\n7. How service graphs visualize the relationships between services "
  },
  {
    "path": "trace-delivery/app/Dockerfile",
    "content": "ARG PYTHON_VERSION=3.11-slim@sha256:6d85378d88a19cd4d76079817532d62232be95757cb45945a99fec8e8084b9c2\nFROM python:${PYTHON_VERSION}\n\nWORKDIR /app\n\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\n\nCOPY *.py ./\n\n# We'll pass the service name as an environment variable\nENV SERVICE_PORT=8080\nENV PYTHONUNBUFFERED=1\n\nCMD [\"python\", \"app.py\"] "
  },
  {
    "path": "trace-delivery/app/app.py",
    "content": "import os\nimport random\nimport time\nimport uuid\nimport logging\nimport threading\nfrom flask import Flask, request, jsonify\nimport requests\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.resources import Resource\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\nfrom opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator\n\n# Configure logging\nlogging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\nlogger = logging.getLogger(__name__)\n\n# Get environment variables\nservice_name = os.environ.get('OTEL_SERVICE_NAME', 'unknown-service')\nservice_port = int(os.environ.get('SERVICE_PORT', '8080'))\n\n# Configure the tracer\nresource = Resource.create()  # Use OTEL_RESOURCE_ATTRIBUTES environment variable\ntrace.set_tracer_provider(TracerProvider(resource=resource))\n\n# Configure the OTLP exporter\notlp_exporter = OTLPSpanExporter()\nspan_processor = BatchSpanProcessor(span_exporter=otlp_exporter)\ntrace.get_tracer_provider().add_span_processor(span_processor)\n\n# Create a tracer\ntracer = trace.get_tracer(__name__)\n\n# Create a propagator for handling trace context\npropagator = TraceContextTextMapPropagator()\n\n# Create Flask application\napp = Flask(__name__)\nFlaskInstrumentor().instrument_app(app)\nRequestsInstrumentor().instrument()\n\n# Furniture models available\nsofa_models = [\n    {\"id\": \"classic-001\", \"name\": \"Classic Comfort\", \"price\": 899.99, \"production_time\": 2},\n    {\"id\": \"modern-002\", \"name\": \"Modern Minimalist\", \"price\": 1299.99, \"production_time\": 3},\n    {\"id\": \"luxury-003\", \"name\": \"Luxury Lounge\", \"price\": 2499.99, \"production_time\": 5},\n    {\"id\": \"sectional-004\", \"name\": \"Sectional Supreme\", \"price\": 1899.99, \"production_time\": 4},\n    {\"id\": \"limited-edition\", \"name\": \"Limited Edition Designer\", \"price\": 4999.99, \"production_time\": 7}\n]\n\n# Customer types\ncustomer_types = [\"regular\", \"premium\", \"vip\"]\n\n# Distribution centers\ndistribution_centers = {\n    \"global\": [\"New York\", \"Shanghai\", \"Berlin\", \"Sydney\"],\n    \"local\": [\"North District\", \"South District\", \"East District\", \"West District\"]\n}\n\n# Simulated failures by service\nfailure_scenarios = {\n    \"sofa-factory\": {\"probability\": 0.2, \"message\": \"Production line issue: Unable to complete sofa manufacturing\"},\n    \"global-distribution\": {\"probability\": 0.15, \"message\": \"Item lost in global distribution center\"},\n    \"local-distribution\": {\"probability\": 0.1, \"message\": \"Delivery vehicle breakdown\"}\n}\n\n# Simulated latency scenarios\nlatency_scenarios = {\n    \"sofa-factory\": {\"probability\": 0.1, \"min_delay\": 5, \"max_delay\": 8, \"message\": \"Production backlog causing delays\"},\n    \"global-distribution\": {\"probability\": 0.1, \"min_delay\": 6, \"max_delay\": 10, \"message\": \"Customs inspection delay\"},\n    \"local-distribution\": {\"probability\": 0.1, \"min_delay\": 3, \"max_delay\": 7, \"message\": \"Traffic congestion affecting local delivery\"}\n}\n\n# Generate a unique order ID with a prefix\ndef generate_order_id():\n    return f\"ORD-{uuid.uuid4().hex[:8].upper()}\"\n\n# Select a random item from a list\ndef random_item(items):\n    return random.choice(items)\n\n# Determine if a failure should occur based on probability\ndef should_fail(service_name, order):\n    # Check if this is a failure demo or has a failure scenario tag\n    if order.get(\"demo\") == \"failure\" and order.get(\"failure_service\") == service_name:\n        return True\n    \n    # Check if this is a background failure scenario\n    if order.get(\"scenario\") == \"delivery-failure\" and order.get(\"failure_service\") == service_name:\n        return True\n    \n    # Regular orders should NOT randomly fail\n    return False\n\n# Add latency if applicable for the service\ndef maybe_add_latency(service_name, span):\n    if service_name in latency_scenarios:\n        if random.random() < latency_scenarios[service_name][\"probability\"]:\n            scenario = latency_scenarios[service_name]\n            delay = random.uniform(scenario[\"min_delay\"], scenario[\"max_delay\"])\n            reason = scenario[\"message\"]\n            span.set_attribute(\"latency.seconds\", delay)\n            span.set_attribute(\"latency.reason\", reason)\n            time.sleep(delay)\n            return (True, delay, reason)\n    return (False, None, None)\n\n# SOFA SHOP SERVICE (entry point)\n@app.route('/')\ndef home():\n    if service_name == \"sofa-shop\":\n        return \"\"\"\n        <h1>Sofa Shop - Trace Delivery Demo</h1>\n        <p>Welcome to our sofa shop! Here you can order sofas and track their delivery through our system.</p>\n        <h2>Endpoints:</h2>\n        <ul>\n            <li><a href=\"/catalog\">View Catalog</a></li>\n            <li><a href=\"/order\">Place New Order</a> (random sofa)</li>\n            <li><a href=\"/order-status?order_id=ORD-12345678\">Check Order Status</a> (replace with your order ID)</li>\n        </ul>\n        <h2>Demo Scenarios:</h2>\n        <ul>\n            <li><a href=\"/demo/success\">Successful Delivery Demo</a></li>\n            <li><a href=\"/demo/failure\">Failed Delivery Demo</a></li>\n            <li><a href=\"/demo/latency\">Delivery with Latency Demo</a></li>\n        </ul>\n        \"\"\"\n    else:\n        return f\"<h1>{service_name} service</h1><p>This service is part of the trace delivery demo.</p>\"\n\n# CATALOG ENDPOINT - SHOP SERVICE\n@app.route('/catalog')\ndef catalog():\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    with tracer.start_as_current_span(\"view-catalog\") as span:\n        span.set_attribute(\"action\", \"view-catalog\")\n        return jsonify({\"sofas\": sofa_models})\n\n# ORDER ENDPOINT - SHOP SERVICE\n@app.route('/order')\ndef place_order():\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    with tracer.start_as_current_span(\"place-order\") as span:\n        # Generate order data\n        order_id = generate_order_id()\n        sofa = random_item(sofa_models)\n        customer_type = random_item(customer_types)\n        \n        # Set span attributes\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa[\"id\"])\n        span.set_attribute(\"sofa.name\", sofa[\"name\"])\n        span.set_attribute(\"sofa.price\", sofa[\"price\"])\n        span.set_attribute(\"customer.type\", customer_type)\n        span.set_attribute(\"action\", \"place-order\")\n        \n        # Create order\n        order = {\n            \"order_id\": order_id,\n            \"sofa\": sofa,\n            \"customer_type\": customer_type,\n            \"timestamp\": time.time()\n        }\n        \n        logger.info(f\"New order placed: {order_id} for {sofa['name']}\")\n        \n        # Forward to factory for manufacturing\n        try:\n            factory_url = os.environ.get('SERVICE_FACTORY_URL', 'http://sofa-factory:8081')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{factory_url}/manufacture\",\n                json=order,\n                headers=headers\n            )\n            \n            if response.status_code == 200:\n                result = response.json()\n                return jsonify({\n                    \"message\": \"Order placed successfully!\",\n                    \"order_id\": order_id,\n                    \"sofa\": sofa[\"name\"],\n                    \"customer_type\": customer_type,\n                    \"status\": \"manufacturing\"\n                })\n            else:\n                span.set_status(trace.StatusCode.ERROR)\n                return jsonify({\"error\": \"Failed to process order at factory\", \"details\": response.text}), 500\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return jsonify({\"error\": f\"Failed to connect to factory: {str(e)}\"}), 500\n\n# ORDER STATUS ENDPOINT - SHOP SERVICE\n@app.route('/order-status')\ndef check_order_status():\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    order_id = request.args.get('order_id')\n    if not order_id:\n        return jsonify({\"error\": \"No order ID provided\"}), 400\n    \n    with tracer.start_as_current_span(\"check-order-status\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"action\", \"check-order-status\")\n        \n        # In a real system, we would look up the order status in a database\n        # For this demo, we'll return a random status\n        statuses = [\"manufactured\", \"picked up\", \"in global distribution\", \"in local distribution\", \"out for delivery\", \"delivered\"]\n        status = random_item(statuses)\n        \n        return jsonify({\n            \"order_id\": order_id,\n            \"status\": status,\n            \"last_update\": time.time()\n        })\n\n# DELIVERY NOTIFICATION ENDPOINT - SHOP SERVICE\n@app.route('/delivery-notification', methods=['POST'])\ndef delivery_notification():\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    notification = request.json\n    order_id = notification.get(\"order_id\")\n    notification_type = notification.get(\"notification_type\")\n    delivery_time = notification.get(\"delivery_time\")\n    \n    with tracer.start_as_current_span(\"process-delivery-notification\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"notification.type\", notification_type)\n        span.set_attribute(\"action\", \"process-notification\")\n        \n        # Add a span event for processing the notification\n        span.add_event(\"notification_received\", {\n            \"order_id\": order_id,\n            \"notification_type\": notification_type,\n            \"timestamp\": time.time()\n        })\n        \n        # In a real app, we would update the order status in the database\n        # For this demo, we'll just log it\n        logger.info(f\"Notification received: Order {order_id} has been {notification_type} at {delivery_time}\")\n        \n        # Simulate update to database or other processing\n        time.sleep(0.1)\n        \n        # Add span event for completing notification processing\n        span.add_event(\"notification_processed\", {\n            \"order_id\": order_id,\n            \"success\": True,\n            \"timestamp\": time.time()\n        })\n        \n        return jsonify({\n            \"status\": \"success\",\n            \"message\": f\"Notification for order {order_id} processed successfully\",\n            \"notification_type\": notification_type\n        })\n\n# MANUFACTURE ENDPOINT - FACTORY SERVICE\n@app.route('/manufacture', methods=['POST'])\ndef manufacture():\n    if service_name != \"sofa-factory\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    order = request.json\n    order_id = order.get(\"order_id\")\n    sofa = order.get(\"sofa\", {})\n    is_background = order.get(\"background\", False)\n    \n    with tracer.start_as_current_span(\"manufacture-sofa\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa.get(\"id\", \"unknown\"))\n        span.set_attribute(\"sofa.name\", sofa.get(\"name\", \"unknown\"))\n        span.set_attribute(\"action\", \"manufacture\")\n        span.set_attribute(\"background\", is_background)\n        \n        # Add a span event for manufacture start\n        span.add_event(\"manufacture_started\", {\n            \"order_id\": order_id,\n            \"timestamp\": time.time(),\n            \"sofa_model\": sofa.get(\"name\", \"unknown\")\n        })\n        \n        # Check for simulated failure\n        if should_fail(service_name, order):\n            error_message = failure_scenarios[service_name][\"message\"]\n            logger.error(f\"Manufacturing failure for order {order_id}: {error_message}\")\n            span.set_attribute(\"error\", True)\n            span.set_attribute(\"error.message\", error_message)\n            span.set_attribute(\"delivery.status\", \"failed\")\n            \n            # Add span event for the failure\n            span.add_event(\"manufacture_failed\", {\n                \"error\": error_message,\n                \"timestamp\": time.time()\n            })\n            \n            # Record an actual exception to show in the trace\n            try:\n                raise Exception(f\"Manufacturing process failed: {error_message}\")\n            except Exception as e:\n                span.record_exception(e)\n                span.set_status(trace.StatusCode.ERROR, str(e))\n            \n            return jsonify({\"error\": error_message}), 500\n        \n        # Add latency if applicable\n        latency_result = (False, None, None)\n        if order.get(\"demo\") == \"latency\" and order.get(\"latency_service\") == \"sofa-factory\":\n            # For demo, explicitly add latency\n            delay = random.uniform(5, 8)\n            reason = \"Production backlog causing delays\"\n            span.set_attribute(\"latency.seconds\", delay)\n            span.set_attribute(\"latency.reason\", reason)\n            time.sleep(delay)\n            latency_result = (True, delay, reason)\n        else:\n            # Check for random latency\n            latency_result = maybe_add_latency(service_name, span)\n        \n        # If latency was added, record the event\n        if latency_result[0]:\n            delay = latency_result[1]\n            reason = latency_result[2]\n            span.add_event(\"manufacture_delayed\", {\n                \"delay_seconds\": delay,\n                \"reason\": reason,\n                \"timestamp\": time.time()\n            })\n        \n        # Create nested spans for the assembly process\n        # 1. Frame construction\n        with tracer.start_as_current_span(\"frame-construction\") as frame_span:\n            frame_span.set_attribute(\"order.id\", order_id)\n            frame_span.set_attribute(\"assembly.step\", \"frame\")\n            frame_span.set_attribute(\"material\", \"hardwood\")\n            \n            # Simulate work\n            time.sleep(0.2)\n            \n            frame_span.add_event(\"frame_completed\", {\n                \"timestamp\": time.time(),\n                \"quality_check\": \"passed\"\n            })\n        \n        # 2. Spring installation\n        with tracer.start_as_current_span(\"spring-installation\") as spring_span:\n            spring_span.set_attribute(\"order.id\", order_id)\n            spring_span.set_attribute(\"assembly.step\", \"springs\")\n            spring_span.set_attribute(\"spring.count\", 24)\n            \n            # Simulate work\n            time.sleep(0.15)\n            \n            spring_span.add_event(\"springs_installed\", {\n                \"timestamp\": time.time(),\n                \"tension_test\": \"passed\"\n            })\n        \n        # 3. Cushion preparation\n        with tracer.start_as_current_span(\"cushion-preparation\") as cushion_span:\n            cushion_span.set_attribute(\"order.id\", order_id)\n            cushion_span.set_attribute(\"assembly.step\", \"cushions\")\n            \n            # Sub-step: foam cutting\n            with tracer.start_as_current_span(\"foam-cutting\") as foam_span:\n                foam_span.set_attribute(\"material\", \"memory foam\")\n                foam_span.set_attribute(\"density\", \"high\")\n                time.sleep(0.1)\n            \n            # Sub-step: fabric cutting\n            with tracer.start_as_current_span(\"fabric-cutting\") as fabric_span:\n                fabric_span.set_attribute(\"material\", \"premium leather\" if sofa.get(\"id\") == \"luxury-003\" else \"fabric\")\n                time.sleep(0.1)\n            \n            # Sub-step: cushion assembly\n            with tracer.start_as_current_span(\"cushion-assembly\") as assembly_span:\n                assembly_span.set_attribute(\"components\", \"foam + fabric + zippers\")\n                time.sleep(0.15)\n            \n            cushion_span.add_event(\"cushions_completed\", {\n                \"timestamp\": time.time()\n            })\n        \n        # 4. Final assembly\n        with tracer.start_as_current_span(\"final-assembly\") as final_span:\n            final_span.set_attribute(\"order.id\", order_id)\n            final_span.set_attribute(\"assembly.step\", \"final\")\n            \n            # Simulate work\n            time.sleep(0.25)\n            \n            final_span.add_event(\"assembly_completed\", {\n                \"timestamp\": time.time(),\n                \"inspector\": f\"Inspector #{random.randint(1, 10)}\"\n            })\n        \n        # Simulate manufacturing time (in addition to the assembly steps)\n        production_time = sofa.get(\"production_time\", 3)\n        time.sleep(production_time / 20)  # Scale down for demo purposes\n        \n        # Add event for manufacturing completion\n        span.add_event(\"manufacture_completed\", {\n            \"order_id\": order_id,\n            \"timestamp\": time.time(),\n            \"quality_check\": \"passed\",\n            \"inspector_id\": f\"QA-{random.randint(100, 999)}\"\n        })\n        \n        logger.info(f\"Completed manufacturing for order {order_id}\")\n        \n        # Request pickup from global distribution\n        try:\n            distribution_url = os.environ.get('SERVICE_DISTRIBUTION_URL', 'http://global-distribution:8082')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{distribution_url}/pickup\",\n                json=order,\n                headers=headers\n            )\n            \n            if response.status_code == 200:\n                result = response.json()\n                return jsonify({\n                    \"order_id\": order_id,\n                    \"status\": \"manufactured\",\n                    \"next_step\": \"global distribution\"\n                })\n            else:\n                error_message = f\"Global distribution pickup failed: {response.text}\"\n                span.set_status(trace.StatusCode.ERROR)\n                span.set_attribute(\"delivery.status\", \"failed\")\n                return jsonify({\"error\": error_message}), 500\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            span.set_attribute(\"delivery.status\", \"failed\")\n            return jsonify({\"error\": f\"Failed to connect to global distribution: {str(e)}\"}), 500\n\n# PICKUP ENDPOINT - GLOBAL DISTRIBUTION SERVICE\n@app.route('/pickup', methods=['POST'])\ndef global_pickup():\n    if service_name != \"global-distribution\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    order = request.json\n    order_id = order.get(\"order_id\")\n    sofa = order.get(\"sofa\", {})\n    \n    with tracer.start_as_current_span(\"global-distribution-pickup\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa.get(\"id\", \"unknown\"))\n        distribution_center = random_item(distribution_centers[\"global\"])\n        span.set_attribute(\"distribution.center\", distribution_center)\n        span.set_attribute(\"action\", \"global-pickup\")\n        \n        # Add event for starting the pickup process\n        span.add_event(\"global_pickup_started\", {\n            \"order_id\": order_id,\n            \"distribution_center\": distribution_center,\n            \"timestamp\": time.time()\n        })\n        \n        # Check for simulated failure\n        if should_fail(service_name, order):\n            error_message = failure_scenarios[service_name][\"message\"]\n            logger.error(f\"Global distribution failure for order {order_id}: {error_message}\")\n            span.set_attribute(\"error\", True)\n            span.set_attribute(\"error.message\", error_message)\n            span.set_attribute(\"delivery.status\", \"failed\")\n            \n            # Add event for the failure\n            span.add_event(\"global_pickup_failed\", {\n                \"error\": error_message,\n                \"timestamp\": time.time()\n            })\n            \n            # Record an actual exception to show in the trace\n            try:\n                raise Exception(f\"Global distribution failed: {error_message}\")\n            except Exception as e:\n                span.record_exception(e)\n                span.set_status(trace.StatusCode.ERROR, str(e))\n            \n            return jsonify({\"error\": error_message}), 500\n        \n        # Add latency if applicable\n        latency_result = (False, None, None)\n        if order.get(\"demo\") == \"latency\" and order.get(\"latency_service\") == \"global-distribution\":\n            # For demo, explicitly add latency\n            delay = random.uniform(6, 10)\n            reason = \"Customs inspection delay\"\n            span.set_attribute(\"latency.seconds\", delay)\n            span.set_attribute(\"latency.reason\", reason)\n            time.sleep(delay)\n            latency_result = (True, delay, reason)\n        else:\n            # Check for random latency\n            latency_result = maybe_add_latency(service_name, span)\n        \n        # If latency was added, record the event\n        if latency_result[0]:\n            delay = latency_result[1]\n            reason = latency_result[2]\n            span.add_event(\"global_pickup_delayed\", {\n                \"delay_seconds\": delay,\n                \"reason\": reason,\n                \"timestamp\": time.time()\n            })\n        \n        # Create nested spans for logistics operations\n        with tracer.start_as_current_span(\"inventory-processing\") as inventory_span:\n            inventory_span.set_attribute(\"order.id\", order_id)\n            inventory_span.set_attribute(\"operation\", \"inventory\")\n            inventory_span.set_attribute(\"location\", distribution_center)\n            \n            # Simulate inventory processing\n            time.sleep(0.1)\n            \n            inventory_span.add_event(\"inventory_processed\", {\n                \"warehouse\": f\"{distribution_center}-{random.randint(1, 5)}\",\n                \"timestamp\": time.time()\n            })\n        \n        with tracer.start_as_current_span(\"global-logistics\") as logistics_span:\n            logistics_span.set_attribute(\"order.id\", order_id)\n            logistics_span.set_attribute(\"operation\", \"logistics\")\n            \n            # Simulate logistics processing\n            time.sleep(0.2)\n            \n            # Select random transport type\n            transport = random.choice([\"air\", \"sea\", \"road\", \"rail\"])\n            logistics_span.set_attribute(\"transport.type\", transport)\n            \n            logistics_span.add_event(\"transport_arranged\", {\n                \"type\": transport,\n                \"carrier\": f\"Carrier-{random.randint(100, 999)}\",\n                \"timestamp\": time.time()\n            })\n        \n        # Simulate processing time\n        time.sleep(0.3)\n        \n        # Add event for successful pickup\n        span.add_event(\"global_pickup_completed\", {\n            \"order_id\": order_id,\n            \"distribution_center\": distribution_center,\n            \"timestamp\": time.time()\n        })\n        \n        logger.info(f\"Global distribution processed order {order_id}\")\n        \n        # Forward to local distribution\n        try:\n            local_url = os.environ.get('SERVICE_LOCAL_URL', 'http://local-distribution:8083')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{local_url}/deliver\",\n                json=order,\n                headers=headers\n            )\n            \n            if response.status_code == 200:\n                result = response.json()\n                return jsonify({\n                    \"order_id\": order_id,\n                    \"status\": \"in global distribution\",\n                    \"next_step\": \"local distribution\"\n                })\n            else:\n                error_message = f\"Local distribution handoff failed: {response.text}\"\n                span.set_status(trace.StatusCode.ERROR)\n                span.set_attribute(\"delivery.status\", \"failed\")\n                return jsonify({\"error\": error_message}), 500\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            span.set_attribute(\"delivery.status\", \"failed\")\n            return jsonify({\"error\": f\"Failed to connect to local distribution: {str(e)}\"}), 500\n\n# DELIVER ENDPOINT - LOCAL DISTRIBUTION SERVICE\n@app.route('/deliver', methods=['POST'])\ndef local_deliver():\n    if service_name != \"local-distribution\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    order = request.json\n    order_id = order.get(\"order_id\")\n    sofa = order.get(\"sofa\", {})\n    \n    with tracer.start_as_current_span(\"local-distribution-delivery\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa.get(\"id\", \"unknown\"))\n        distribution_center = random_item(distribution_centers[\"local\"])\n        span.set_attribute(\"distribution.center\", distribution_center)\n        span.set_attribute(\"action\", \"local-delivery\")\n        \n        # Add event for starting local delivery\n        span.add_event(\"local_delivery_started\", {\n            \"order_id\": order_id,\n            \"distribution_center\": distribution_center,\n            \"timestamp\": time.time()\n        })\n        \n        # Check for simulated failure\n        if should_fail(service_name, order):\n            error_message = failure_scenarios[service_name][\"message\"]\n            logger.error(f\"Local distribution failure for order {order_id}: {error_message}\")\n            span.set_attribute(\"error\", True)\n            span.set_attribute(\"error.message\", error_message)\n            span.set_attribute(\"delivery.status\", \"failed\")\n            \n            # Add event for the failure\n            span.add_event(\"local_delivery_failed\", {\n                \"error\": error_message,\n                \"timestamp\": time.time()\n            })\n            \n            # Record an actual exception to show in the trace\n            try:\n                raise Exception(f\"Local delivery failed: {error_message}\")\n            except Exception as e:\n                span.record_exception(e)\n                span.set_status(trace.StatusCode.ERROR, str(e))\n            \n            return jsonify({\"error\": error_message}), 500\n        \n        # Add latency if applicable\n        latency_result = (False, None, None)\n        if order.get(\"demo\") == \"latency\" and order.get(\"latency_service\") == \"local-distribution\":\n            # For demo, explicitly add latency\n            delay = random.uniform(3, 7)\n            reason = \"Traffic congestion affecting local delivery\"\n            span.set_attribute(\"latency.seconds\", delay)\n            span.set_attribute(\"latency.reason\", reason)\n            time.sleep(delay)\n            latency_result = (True, delay, reason)\n        else:\n            # Check for random latency\n            latency_result = maybe_add_latency(service_name, span)\n        \n        # If latency was added, record the event\n        if latency_result[0]:\n            delay = latency_result[1]\n            reason = latency_result[2]\n            span.add_event(\"local_delivery_delayed\", {\n                \"delay_seconds\": delay,\n                \"reason\": reason,\n                \"timestamp\": time.time()\n            })\n        \n        # Create nested spans for local delivery operations\n        with tracer.start_as_current_span(\"package-preparation\") as prep_span:\n            prep_span.set_attribute(\"order.id\", order_id)\n            prep_span.set_attribute(\"operation\", \"package-prep\")\n            \n            # Simulate packaging operations\n            time.sleep(0.15)\n            \n            prep_span.add_event(\"package_prepared\", {\n                \"packaging_type\": \"heavy-duty\",\n                \"timestamp\": time.time()\n            })\n        \n        with tracer.start_as_current_span(\"delivery-route-planning\") as route_span:\n            route_span.set_attribute(\"order.id\", order_id)\n            route_span.set_attribute(\"operation\", \"route-planning\")\n            \n            # Simulate route planning\n            time.sleep(0.15)\n            \n            # Pick random delivery details\n            vehicle = random.choice([\"van\", \"truck\", \"specialized transport\"])\n            route_span.set_attribute(\"delivery.vehicle\", vehicle)\n            driver = f\"Driver-{random.randint(100, 999)}\"\n            route_span.set_attribute(\"delivery.driver\", driver)\n            \n            route_span.add_event(\"route_planned\", {\n                \"vehicle\": vehicle,\n                \"driver\": driver,\n                \"estimated_arrival\": time.time() + 3600,  # 1 hour from now\n                \"timestamp\": time.time()\n            })\n        \n        # Simulate processing time\n        time.sleep(0.4)\n        \n        # Add event for successfully loaded for delivery\n        span.add_event(\"local_delivery_loaded\", {\n            \"order_id\": order_id,\n            \"distribution_center\": distribution_center,\n            \"timestamp\": time.time()\n        })\n        \n        logger.info(f\"Local distribution processed order {order_id}\")\n        \n        # Notify the shop that the order has been dispatched for delivery\n        with tracer.start_as_current_span(\"notify-shop-delivery-dispatched\") as notify_span:\n            notify_span.set_attribute(\"order.id\", order_id)\n            notify_span.set_attribute(\"action\", \"notify-shop\")\n            \n            # Create the notification\n            notification = {\n                \"order_id\": order_id,\n                \"sofa\": sofa,\n                \"customer_type\": order.get(\"customer_type\", \"regular\"),\n                \"dispatch_time\": time.time(),\n                \"notification_type\": \"delivery_dispatched\",\n                \"vehicle\": vehicle,\n                \"driver\": driver,\n                \"distribution_center\": distribution_center\n            }\n            \n            # Send notification to shop\n            shop_url = \"http://sofa-shop:8080/delivery-notification\"\n            headers = {}\n            propagator.inject(headers)\n            \n            notify_span.add_event(\"sending_notification\", {\n                \"target\": \"sofa-shop\",\n                \"notification_type\": \"delivery_dispatched\",\n                \"timestamp\": time.time()\n            })\n            \n            # Try to send the notification - don't fail the whole delivery if this fails\n            try:\n                requests.post(\n                    shop_url,\n                    json=notification,\n                    headers=headers,\n                    timeout=1  # Short timeout so we don't block if shop is down\n                )\n                notify_span.add_event(\"notification_sent\", {\n                    \"success\": True,\n                    \"timestamp\": time.time()\n                })\n            except Exception as notify_err:\n                logger.warning(f\"Failed to notify shop of dispatch: {str(notify_err)}\")\n                notify_span.record_exception(notify_err)\n                notify_span.set_status(trace.StatusCode.ERROR, str(notify_err))\n                notify_span.add_event(\"notification_failed\", {\n                    \"success\": False,\n                    \"error\": str(notify_err),\n                    \"timestamp\": time.time()\n                })\n        \n        # Deliver to customer\n        try:\n            customer_url = os.environ.get('SERVICE_CUSTOMER_URL', 'http://customer-house:8084')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{customer_url}/receive\",\n                json=order,\n                headers=headers\n            )\n            \n            if response.status_code == 200:\n                result = response.json()\n                span.add_event(\"local_delivery_completed\", {\n                    \"order_id\": order_id,\n                    \"timestamp\": time.time()\n                })\n                return jsonify({\n                    \"order_id\": order_id,\n                    \"status\": \"out for delivery\",\n                    \"next_step\": \"customer delivery\"\n                })\n            else:\n                error_message = f\"Customer delivery failed: {response.text}\"\n                span.set_status(trace.StatusCode.ERROR)\n                span.set_attribute(\"delivery.status\", \"failed\")\n                return jsonify({\"error\": error_message}), 500\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            span.set_attribute(\"delivery.status\", \"failed\")\n            return jsonify({\"error\": f\"Failed to connect to customer house: {str(e)}\"}), 500\n\n# RECEIVE ENDPOINT - CUSTOMER HOUSE SERVICE\n@app.route('/receive', methods=['POST'])\ndef customer_receive():\n    if service_name != \"customer-house\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    order = request.json\n    order_id = order.get(\"order_id\")\n    sofa = order.get(\"sofa\", {})\n    customer_type = order.get(\"customer_type\", \"regular\")\n    \n    with tracer.start_as_current_span(\"customer-house-receive\") as span:\n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa.get(\"id\", \"unknown\"))\n        span.set_attribute(\"customer.type\", customer_type)\n        span.set_attribute(\"action\", \"customer-receive\")\n        span.set_attribute(\"delivery.status\", \"delivered\")\n        \n        # Add span event for delivery\n        span.add_event(\"sofa_delivered\", {\n            \"order_id\": order_id,\n            \"timestamp\": time.time(),\n            \"customer_type\": customer_type\n        })\n        \n        # Simulate final delivery\n        time.sleep(0.2)\n        \n        logger.info(f\"Order {order_id} successfully delivered to customer\")\n        \n        # Generate customer satisfaction score - VIP customers are generally more satisfied\n        satisfaction = random.randint(85, 100) if customer_type == \"vip\" else random.randint(70, 95)\n        \n        return jsonify({\n            \"order_id\": order_id,\n            \"status\": \"delivered\",\n            \"delivery_time\": time.time(),\n            \"satisfaction\": satisfaction\n        })\n\n# DEMO ENDPOINTS - SHOP SERVICE\n@app.route('/demo/success')\ndef demo_success():\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    # Set environment variables for other services to not fail\n    os.environ[\"FORCE_SUCCESS\"] = \"true\"\n    \n    with tracer.start_as_current_span(\"demo-success-flow\") as span:\n        # Use a predefined sofa for the demo\n        order_id = generate_order_id()\n        sofa = sofa_models[0]  # Classic sofa\n        customer_type = \"regular\"\n        \n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa[\"id\"])\n        span.set_attribute(\"sofa.name\", sofa[\"name\"])\n        span.set_attribute(\"customer.type\", customer_type)\n        span.set_attribute(\"demo\", \"success-flow\")\n        \n        # Create order\n        order = {\n            \"order_id\": order_id,\n            \"sofa\": sofa,\n            \"customer_type\": customer_type,\n            \"timestamp\": time.time(),\n            \"demo\": \"success\"\n        }\n        \n        logger.info(f\"Demo success flow initiated: {order_id}\")\n        \n        # Forward to factory for manufacturing\n        try:\n            factory_url = os.environ.get('SERVICE_FACTORY_URL', 'http://sofa-factory:8081')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{factory_url}/manufacture\",\n                json=order,\n                headers=headers\n            )\n            \n            if response.status_code == 200:\n                return jsonify({\n                    \"message\": \"Success demo initiated!\",\n                    \"order_id\": order_id,\n                    \"sofa\": sofa[\"name\"],\n                    \"trace_id\": span.get_span_context().trace_id\n                })\n            else:\n                return jsonify({\"error\": \"Demo failed to start\", \"details\": response.text}), 500\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            return jsonify({\"error\": f\"Demo failed to start: {str(e)}\"}), 500\n\n@app.route('/demo/failure')\ndef demo_failure_endpoint():\n    return demo_failure()\n\ndef demo_failure(failure_service=None, is_background=False):\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    # Set environment variables for this specific demo\n    os.environ[\"FORCE_FAILURE\"] = \"true\"\n    os.environ[\"FAILURE_SERVICE\"] = failure_service or request.args.get('service', 'sofa-factory')\n    \n    with tracer.start_as_current_span(\"background-failure-scenario\" if is_background else \"demo-failure-flow\") as span:\n        # Use a predefined sofa for the demo\n        order_id = generate_order_id()\n        sofa = sofa_models[2]  # Luxury sofa\n        customer_type = \"premium\"\n        \n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa[\"id\"])\n        span.set_attribute(\"sofa.name\", sofa[\"name\"])\n        span.set_attribute(\"customer.type\", customer_type)\n        span.set_attribute(\"demo\", \"failure-flow\")\n        span.set_attribute(\"background\", is_background)\n        span.set_attribute(\"scenario\", \"delivery-failure\")\n        span.set_attribute(\"failure_service\", os.environ[\"FAILURE_SERVICE\"])\n        \n        # Create order\n        order = {\n            \"order_id\": order_id,\n            \"sofa\": sofa,\n            \"customer_type\": customer_type,\n            \"timestamp\": time.time(),\n            \"demo\": \"failure\",\n            \"background\": is_background,\n            \"scenario\": \"delivery-failure\",\n            \"failure_service\": os.environ[\"FAILURE_SERVICE\"]\n        }\n        \n        logger.info(f\"{'Background' if is_background else 'Demo'} failure flow initiated: {order_id} (failure in {os.environ['FAILURE_SERVICE']})\")\n        \n        # Forward to factory for manufacturing\n        try:\n            factory_url = os.environ.get('SERVICE_FACTORY_URL', 'http://sofa-factory:8081')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{factory_url}/manufacture\",\n                json=order,\n                headers=headers\n            )\n            \n            if is_background:\n                return None\n            else:\n                return jsonify({\n                    \"message\": \"Failure demo initiated!\",\n                    \"order_id\": order_id,\n                    \"sofa\": sofa[\"name\"],\n                    \"failure_service\": os.environ[\"FAILURE_SERVICE\"],\n                    \"trace_id\": span.get_span_context().trace_id\n                })\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            if is_background:\n                logger.error(f\"Background demo failed to start: {str(e)}\")\n                return None\n            else:\n                return jsonify({\"error\": f\"Demo failed to start: {str(e)}\"}), 500\n\n@app.route('/demo/latency')\ndef demo_latency_endpoint():\n    return demo_latency()\n\ndef demo_latency(latency_service=None, is_background=False):\n    if service_name != \"sofa-shop\":\n        return jsonify({\"error\": f\"Not available in {service_name}\"}), 404\n    \n    # Set environment variables for this specific demo\n    os.environ[\"FORCE_LATENCY\"] = \"true\"\n    os.environ[\"LATENCY_SERVICE\"] = latency_service or request.args.get('service', 'sofa-factory')\n    \n    with tracer.start_as_current_span(\"background-latency-scenario\" if is_background else \"demo-latency-flow\") as span:\n        # Use a predefined sofa for the demo\n        order_id = generate_order_id()\n        sofa = sofa_models[4]  # Limited edition\n        customer_type = \"vip\"\n        \n        span.set_attribute(\"order.id\", order_id)\n        span.set_attribute(\"sofa.model\", sofa[\"id\"])\n        span.set_attribute(\"sofa.name\", sofa[\"name\"])\n        span.set_attribute(\"customer.type\", customer_type)\n        span.set_attribute(\"demo\", \"latency-flow\")\n        span.set_attribute(\"background\", is_background)\n        span.set_attribute(\"scenario\", \"delivery-latency\")\n        span.set_attribute(\"latency_service\", os.environ[\"LATENCY_SERVICE\"])\n        \n        # Create order\n        order = {\n            \"order_id\": order_id,\n            \"sofa\": sofa,\n            \"customer_type\": customer_type,\n            \"timestamp\": time.time(),\n            \"demo\": \"latency\",\n            \"background\": is_background,\n            \"scenario\": \"delivery-latency\",\n            \"latency_service\": os.environ[\"LATENCY_SERVICE\"]\n        }\n        \n        logger.info(f\"{'Background' if is_background else 'Demo'} latency flow initiated: {order_id} (latency in {os.environ['LATENCY_SERVICE']})\")\n        \n        # Forward to factory for manufacturing\n        try:\n            factory_url = os.environ.get('SERVICE_FACTORY_URL', 'http://sofa-factory:8081')\n            headers = {}\n            propagator.inject(headers)\n            \n            response = requests.post(\n                f\"{factory_url}/manufacture\",\n                json=order,\n                headers=headers\n            )\n            \n            if is_background:\n                return None\n            else:\n                return jsonify({\n                    \"message\": \"Latency demo initiated!\",\n                    \"order_id\": order_id,\n                    \"sofa\": sofa[\"name\"],\n                    \"latency_service\": os.environ[\"LATENCY_SERVICE\"],\n                    \"trace_id\": span.get_span_context().trace_id\n                })\n        \n        except Exception as e:\n            span.record_exception(e)\n            span.set_status(trace.StatusCode.ERROR, str(e))\n            if is_background:\n                logger.error(f\"Background demo failed to start: {str(e)}\")\n                return None\n            else:\n                return jsonify({\"error\": f\"Demo failed to start: {str(e)}\"}), 500\n\n# Background trace generation functions\ndef generate_random_trace():\n    \"\"\"Generate a random trace in the background\"\"\"\n    if service_name != \"sofa-shop\":\n        return  # Only the shop should generate random traces\n    \n    # Randomly choose between normal order, error scenario, or latency scenario\n    scenario_type = random.choices(\n        [\"normal\", \"error\", \"latency\"], \n        weights=[0.7, 0.15, 0.15], \n        k=1\n    )[0]\n    \n    try:\n        if scenario_type == \"normal\":\n            # Normal order flow\n            order_id = generate_order_id()\n            sofa = random_item(sofa_models)\n            customer_type = random_item(customer_types)\n            \n            with tracer.start_as_current_span(\"background-successful-order\") as span:\n                span.set_attribute(\"order.id\", order_id)\n                span.set_attribute(\"sofa.model\", sofa[\"id\"])\n                span.set_attribute(\"sofa.name\", sofa[\"name\"])\n                span.set_attribute(\"sofa.price\", sofa[\"price\"])\n                span.set_attribute(\"customer.type\", customer_type)\n                span.set_attribute(\"action\", \"place-order\")\n                span.set_attribute(\"background\", True)\n                span.set_attribute(\"scenario\", \"successful-delivery\")\n                \n                # Add a span event for order creation\n                span.add_event(\"order_created\", {\n                    \"order_id\": order_id,\n                    \"timestamp\": time.time(),\n                    \"customer_type\": customer_type,\n                    \"scenario\": \"successful-delivery\"\n                })\n                \n                # Create order\n                order = {\n                    \"order_id\": order_id,\n                    \"sofa\": sofa,\n                    \"customer_type\": customer_type,\n                    \"timestamp\": time.time(),\n                    \"background\": True,\n                    \"scenario\": \"successful-delivery\"\n                }\n                \n                logger.info(f\"Background successful order placed: {order_id} for {sofa['name']}\")\n                \n                # Forward to factory for manufacturing\n                factory_url = os.environ.get('SERVICE_FACTORY_URL', 'http://sofa-factory:8081')\n                headers = {}\n                propagator.inject(headers)\n                \n                requests.post(\n                    f\"{factory_url}/manufacture\",\n                    json=order,\n                    headers=headers\n                )\n        \n        elif scenario_type == \"error\":\n            # Error scenario\n            failure_service = random.choice(list(failure_scenarios.keys()))\n            demo_failure(failure_service=failure_service, is_background=True)\n            \n        elif scenario_type == \"latency\":\n            # Latency scenario\n            latency_service = random.choice(list(latency_scenarios.keys()))\n            demo_latency(latency_service=latency_service, is_background=True)\n            \n    except Exception as e:\n        logger.error(f\"Error generating background trace: {str(e)}\")\n\ndef trace_generator_thread():\n    \"\"\"Background thread that generates traces at regular intervals\"\"\"\n    while True:\n        try:\n            # Only generate random traces if we're the sofa-shop service\n            if service_name == \"sofa-shop\":\n                generate_random_trace()\n                \n            # Wait between 20-60 seconds before generating the next trace\n            delay = random.uniform(10, 20)\n            logger.info(f\"Next background trace in {delay:.2f} seconds\")\n            time.sleep(delay)\n        except Exception as e:\n            logger.error(f\"Error in trace generation thread: {e}\")\n            time.sleep(10)  # Wait before retrying\n\nif __name__ == '__main__':\n    logger.info(f\"Starting {service_name} service on port {service_port}\")\n    \n    # Start the background trace generator thread (only for sofa-shop)\n    if service_name == \"sofa-shop\":\n        trace_thread = threading.Thread(target=trace_generator_thread, daemon=True)\n        trace_thread.start()\n        logger.info(\"Started background trace generator\")\n    \n    app.run(host='0.0.0.0', port=service_port) "
  },
  {
    "path": "trace-delivery/app/requirements.txt",
    "content": "flask\nrequests\nopentelemetry-api\nopentelemetry-sdk\nopentelemetry-exporter-otlp\nopentelemetry-instrumentation-flask\nopentelemetry-instrumentation-requests"
  },
  {
    "path": "trace-delivery/config-otel.yaml",
    "content": "#\n# OTel Collector YAML Configuration for Sofa Delivery Trace Demo\n#\n# This is the OTel-native equivalent of config.alloy for use with the Alloy OTel Engine.\n# Run with: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n#\n\nreceivers:\n  otlp:\n    protocols:\n      grpc:\n        endpoint: 0.0.0.0:4317\n      http:\n        endpoint: 0.0.0.0:4318\n\nprocessors:\n  batch: {}\n\nexporters:\n  otlp/tempo:\n    endpoint: tempo:4317\n    tls:\n      insecure: true\n\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      processors: [batch]\n      exporters: [otlp/tempo]\n"
  },
  {
    "path": "trace-delivery/config.alloy",
    "content": "/*\n * Alloy Configuration for Sofa Delivery Trace Demo\n */\n\n// Receive OpenTelemetry traces\notelcol.receiver.otlp \"default\" {\n  http {}\n  grpc {}\n\n  output {\n    traces = [otelcol.processor.batch.default.input]\n  }\n}\n\n// Batch processor to improve performance\notelcol.processor.batch \"default\" {\n  output {\n    traces = [otelcol.exporter.otlp.tempo.input]\n  }\n}\n\n\n// Send traces to Tempo\notelcol.exporter.otlp \"tempo\" {\n  client {\n    endpoint = \"tempo:4317\"\n    tls {\n      insecure = true\n    }\n  }\n}\n\nlivedebugging {\n  enabled = true\n} "
  },
  {
    "path": "trace-delivery/docker-compose-otel.yml",
    "content": "# OTel Engine Override\n#\n# Uses Alloy's experimental OTel Engine to run a standard OTel Collector YAML config\n# instead of the River/HCL config.alloy file.\n#\n# Usage: docker compose -f docker-compose.yml -f docker-compose-otel.yml up -d\n# Stop:  docker compose -f docker-compose.yml -f docker-compose-otel.yml down\n#\n\nservices:\n  alloy:\n    command: otel --config=/etc/alloy/config-otel.yaml\n    volumes:\n      - ./config-otel.yaml:/etc/alloy/config-otel.yaml\n    ports:\n      - 8888:8888      # OTel Engine HTTP server\n"
  },
  {
    "path": "trace-delivery/docker-compose.coda.yml",
    "content": "services:\n  # Sofa Shop Service\n  sofa-shop:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=sofa-shop\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=sofa-shop,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_FACTORY_URL=http://sofa-factory:8081\n    depends_on:\n      - sofa-factory\n    restart: on-failure\n\n  # Sofa Factory Service\n  sofa-factory:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8081:8081\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=sofa-factory\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=sofa-factory,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8081\n      - SERVICE_DISTRIBUTION_URL=http://global-distribution:8082\n    depends_on:\n      - global-distribution\n    restart: on-failure\n\n  # Global Distribution Service\n  global-distribution:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8082:8082\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=global-distribution\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=global-distribution,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8082\n      - SERVICE_LOCAL_URL=http://local-distribution:8083\n    depends_on:\n      - local-distribution\n    restart: on-failure\n\n  # Local Distribution Service\n  local-distribution:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8083:8083\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=local-distribution\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=local-distribution,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8083\n      - SERVICE_CUSTOMER_URL=http://customer-house:8084\n    depends_on:\n      - customer-house\n    restart: on-failure\n\n  # Customer House Service\n  customer-house:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8084:8084\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=customer-house\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=customer-house,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8084\n    restart: on-failure\n"
  },
  {
    "path": "trace-delivery/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n  # Prometheus for metrics collection\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    command:\n      - --web.enable-remote-write-receiver\n      - --web.enable-otlp-receiver\n      - --enable-feature=native-histograms\n      - --enable-feature=exemplar-storage\n      - --config.file=/etc/prometheus/prometheus.yml\n    ports:\n      - 9090:9090/tcp\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n  memcached:\n    image: memcached:1.6.40@sha256:572b011ce33954ee809066d8cecbeb3ec98912109ee3be3663a3197425fd81ac\n    container_name: memcached\n    ports:\n      - \"11211:11211\"\n    environment:\n      - MEMCACHED_MAX_MEMORY=64m  # Set the maximum memory usage\n      - MEMCACHED_THREADS=4       # Number of threads to use\n\n  # Tempo for tracing\n  tempo:\n    image: grafana/tempo:${GRAFANA_TEMPO_VERSION:-2.10.4}\n    command: [\"-config.file=/etc/tempo.yaml\"]\n    ports:\n      - 3200:3200/tcp    # tempo\n    volumes:\n      - ./tempo-config.yaml:/etc/tempo.yaml\n    depends_on:\n      - prometheus\n      - memcached\n\n  # Grafana for visualization\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n      - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-exploretraces-app/grafana-exploretraces-app-latest.zip;grafana-traces-app\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: false\n          version: 1\n          editable: false\n        - name: Tempo\n          type: tempo\n          access: proxy\n          orgId: 1\n          url: http://tempo:3200\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n          jsonData:\n            serviceMap:\n              datasourceUid: 'Prometheus'\n            nodeGraph:\n              enabled: true\n        EOF\n        /run.sh\n    depends_on:\n      - prometheus\n      - tempo\n\n  # Alloy for telemetry pipeline\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    ports:\n      - 12345:12345      # Alloy HTTP server\n      - 4317:4317/tcp    # OTLP gRPC (used by our services)\n      - 4318:4318/tcp    # OTLP HTTP (used by our services)\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      - tempo\n      - prometheus\n\n  # Sofa Shop Service\n  sofa-shop:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8080:8080\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=sofa-shop\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=sofa-shop,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_FACTORY_URL=http://sofa-factory:8081\n    depends_on:\n      - alloy\n      - sofa-factory\n    restart: on-failure\n\n  # Sofa Factory Service\n  sofa-factory:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8081:8081\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=sofa-factory\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=sofa-factory,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8081\n      - SERVICE_DISTRIBUTION_URL=http://global-distribution:8082\n    depends_on:\n      - alloy\n      - global-distribution\n    restart: on-failure\n\n  # Global Distribution Service\n  global-distribution:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8082:8082\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=global-distribution\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=global-distribution,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8082\n      - SERVICE_LOCAL_URL=http://local-distribution:8083\n    depends_on:\n      - alloy\n      - local-distribution\n    restart: on-failure\n\n  # Local Distribution Service\n  local-distribution:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8083:8083\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=local-distribution\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=local-distribution,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8083\n      - SERVICE_CUSTOMER_URL=http://customer-house:8084\n    depends_on:\n      - alloy\n      - customer-house\n    restart: on-failure\n\n  # Customer House Service\n  customer-house:\n    build:\n      context: ./app\n      dockerfile: Dockerfile\n      args:\n        - PYTHON_VERSION=${PYTHON_VERSION:-3.11-slim}\n    ports:\n      - 8084:8084\n    environment:\n      - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4317\n      - OTEL_SERVICE_NAME=customer-house\n      - OTEL_RESOURCE_ATTRIBUTES=service.name=customer-house,service.version=1.0.0,deployment.environment=delivery-demo\n      - SERVICE_PORT=8084\n    depends_on:\n      - alloy\n    restart: on-failure "
  },
  {
    "path": "trace-delivery/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n\notlp:\n  # Recommended attributes to be promoted to labels.\n  promote_resource_attributes:\n    - service.name\n    - service.namespace\n    - service.version\n    - deployment.environment\n\nstorage:\n  tsdb:\n    out_of_order_time_window: 30m "
  },
  {
    "path": "trace-delivery/tempo-config.yaml",
    "content": "stream_over_http_enabled: true\nserver:\n  http_listen_port: 3200\n  log_level: info\n\n\ncache:\n  background:\n    writeback_goroutines: 5\n  caches:\n  - roles:\n    - frontend-search  \n    memcached: \n      addresses: dns+memcached:11211\n\nquery_frontend:\n  search:\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n    metadata_slo:\n        duration_slo: 5s\n        throughput_bytes_slo: 1.073741824e+09\n  trace_by_id:\n    duration_slo: 100ms\n  metrics:\n    max_duration: 200h                # maximum duration of a metrics query, increase for local setups\n    query_backend_after: 5m\n    duration_slo: 5s\n    throughput_bytes_slo: 1.073741824e+09\n\ndistributor:\n  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.\n    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can\n      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver\n        thrift_http:                   #\n          endpoint: \"tempo:14268\"      # for a production deployment you should only enable the receivers you need!\n        grpc:\n          endpoint: \"tempo:14250\"\n        thrift_binary:\n          endpoint: \"tempo:6832\"\n        thrift_compact:\n          endpoint: \"tempo:6831\"\n    zipkin:\n      endpoint: \"tempo:9411\"\n    otlp:\n      protocols:\n        grpc:\n          endpoint: \"tempo:4317\"\n        http:\n          endpoint: \"tempo:4318\"\n    opencensus:\n      endpoint: \"tempo:55678\"\n\ningester:\n  max_block_duration: 5m               # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally\n\ncompactor:\n  compaction:\n    block_retention: 720h                # overall Tempo trace retention. set for demo purposes\n\nmetrics_generator:\n  registry:\n    external_labels:\n      source: tempo\n      cluster: docker-compose\n  storage:\n    path: /var/tempo/generator/wal\n    remote_write:\n      - url: http://prometheus:9090/api/v1/write\n        send_exemplars: true\n  traces_storage:\n    path: /var/tempo/generator/traces\n  processor:\n    local_blocks:\n      filter_server_spans: false\n      flush_to_storage: true\n\nstorage:\n  trace:\n    backend: local                     # backend configuration to use\n    wal:\n      path: /var/tempo/wal             # where to store the wal locally\n    local:\n      path: /var/tempo/blocks\n\noverrides:\n  defaults:\n    metrics_generator:\n      processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator\n      generate_native_histograms: both\n      "
  },
  {
    "path": "vault-secrets/README.md",
    "content": "# Vault secrets with Grafana Alloy\n\nDemonstrates Alloy's [`remote.vault`](https://grafana.com/docs/alloy/latest/reference/components/remote/remote.vault/) component pulling `prometheus.remote_write` basic_auth credentials from HashiCorp Vault at runtime, and shows that rotating the Vault secret is picked up without restarting Alloy.\n\n## Overview\n\n| Service       | Role                                                                |\n| ------------- | ------------------------------------------------------------------- |\n| `vault`       | HashiCorp Vault in dev mode. Boots, then seeds `secret/alloy/remote-write` from its entrypoint before unsealing the healthcheck. |\n| `nginx-auth`  | Basic-auth reverse proxy in front of Prometheus's remote-write API. |\n| `prometheus`  | Receives remote-writes from Alloy.                                  |\n| `grafana`     | Pre-provisioned with Prometheus as the default datasource.          |\n| `alloy`       | Scrapes its own `/metrics` and remote-writes via `nginx-auth`, with `basic_auth` credentials sourced from Vault. |\n\n```\n                                              ┌─────────────┐\n                                  reread 30s  │             │\n                ┌──── remote.vault ◀──────────│    Vault    │\n                │   (auth.token)              │             │\n                ▼                             └─────────────┘\n            ┌────────┐                              ▲\n            │ Alloy  │ scrape self → remote_write   │ vault kv put\n            └────────┘    (basic_auth from Vault)   │ via rotate.sh\n                │                                   │\n                ▼                                   │\n        ┌─────────────────┐   updated htpasswd     │\n        │ nginx-auth      │◀────────────────────────┘\n        │ (basic_auth)    │       via rotate.sh\n        └─────────────────┘\n                │\n                ▼\n          ┌────────────┐\n          │ Prometheus │\n          └────────────┘\n                ▲\n                │\n          ┌────────────┐\n          │  Grafana   │\n          └────────────┘\n```\n\n## Running\n\n```bash\ndocker compose up -d\n# or, from the repo root:\n./run-example.sh vault-secrets\n```\n\n| Service     | URL                                            |\n| ----------- | ---------------------------------------------- |\n| Grafana     | <http://localhost:3000>                        |\n| Alloy UI    | <http://localhost:12345>                       |\n| Prometheus  | <http://localhost:9090>                        |\n| Vault       | <http://localhost:8200> (token: `root-token-for-demo`) |\n| nginx-auth  | <http://localhost:8080> (basic-auth required)  |\n\n## What to expect on a fresh boot\n\n1. Watch nginx accept Alloy's writes:\n\n   ```bash\n   docker compose logs --tail=20 nginx-auth\n   ```\n\n   You should see `200` responses with `user=alloy`.\n\n2. Confirm the seeded secret in Vault:\n\n   ```bash\n   docker exec -e VAULT_ADDR=http://127.0.0.1:8200 \\\n     -e VAULT_TOKEN=root-token-for-demo \\\n     vault-secrets-vault vault kv get secret/alloy/remote-write\n   ```\n\n3. Inspect the Alloy pipeline at <http://localhost:12345> — `prometheus.remote_write.via_nginx` should be healthy with no last-error.\n\n4. Verify metrics flowed to Prometheus:\n\n   ```bash\n   curl -s 'http://localhost:9090/api/v1/query?query=up' | jq '.data.result'\n   ```\n\n## Demonstrating credential rotation\n\nThe interesting moment is the `401 → 200` transition: rotating nginx's htpasswd makes Alloy fail auth immediately, then Alloy recovers automatically once the Vault secret is updated and `remote.vault` re-reads (≤ 30 s).\n\n```bash\n# Step 1 — rotate htpasswd, reload nginx. Alloy starts 401-ing.\n./rotate.sh htpasswd hunter2\n\n# Watch nginx logs for 401s with user=-\ndocker compose logs -f nginx-auth\n\n# Step 2 — update Vault to the new value. Alloy catches up within\n# reread_frequency (30s) and goes back to 200 with user=alloy.\n./rotate.sh vault hunter2\n\n# Or do both in one go with a built-in 5s gap to make the 401 window\n# observable:\n./rotate.sh both rotated-password\n```\n\nYou can also rotate Vault directly without the helper:\n\n```bash\ndocker exec -e VAULT_ADDR=http://127.0.0.1:8200 \\\n  -e VAULT_TOKEN=root-token-for-demo \\\n  vault-secrets-vault \\\n  vault kv put secret/alloy/remote-write username=alloy password=hunter2\n```\n\n## Inspecting Vault\n\n```bash\n# Read the current secret\ndocker exec -e VAULT_ADDR=http://127.0.0.1:8200 \\\n  -e VAULT_TOKEN=root-token-for-demo \\\n  vault-secrets-vault vault kv get secret/alloy/remote-write\n\n# Open the UI\nopen http://localhost:8200\n# Token: root-token-for-demo\n```\n\n## Notes and caveats\n\n- **Root token is hardcoded.** `root-token-for-demo` is fine for a demo, never for production. The real-world swap-in is `auth.approle` (with a wrapped role-id/secret-id) or `auth.kubernetes` — same component, different `auth.*` block.\n- **`convert.nonsensitive` on `basic_auth.username`.** `remote.vault.creds.data.username` is a `Secret`; `basic_auth.username` expects a plain `string`, so it has to be unwrapped. `basic_auth.password` accepts `Secret` directly, so it doesn't need the conversion. Forgetting `convert.nonsensitive` on the username is the single most common mistake — the error is \"expected string, got secret\" at config load.\n- **nginx is the source of truth for the credential.** If you update Vault but forget to update the htpasswd file, Alloy will 401 forever — that's the deliberate demo property, not a bug.\n- **Vault dev-mode is in-memory.** A `docker compose down` followed by `up` resets the secret to `initial-password`.\n- **Production caveat for the basic-auth path itself:** `Authorization: Basic …` is base64-encoded, not encrypted. In production this hop must be TLS — out of scope for this demo.\n\n## Stopping\n\n```bash\ndocker compose down --remove-orphans\n```\n"
  },
  {
    "path": "vault-secrets/auth/htpasswd",
    "content": "alloy:$2y$05$yXToETJn9D.sOxFM3036b.l2/FkJU1iN2CIuWYAqIIgT7xSMDvJtO\n\n"
  },
  {
    "path": "vault-secrets/config.alloy",
    "content": "// vault-secrets scenario\n//\n// remote.vault pulls remote_write basic_auth credentials from HashiCorp\n// Vault at runtime. reread_frequency makes Alloy pick up rotated values\n// without a restart — see README for the rotation demo.\n\nlivedebugging {\n\tenabled = true\n}\n\nremote.vault \"creds\" {\n\tserver = \"http://vault:8200\"\n\t// path = the KV mount; key = the secret path within that mount.\n\t// Alloy handles the KV v2 /data/ prefix internally.\n\tpath = \"secret\"\n\tkey  = \"alloy/remote-write\"\n\n\treread_frequency = \"30s\"\n\n\tauth.token {\n\t\ttoken = \"root-token-for-demo\"\n\t}\n}\n\nprometheus.exporter.self \"self\" {}\n\nprometheus.scrape \"self\" {\n\ttargets         = prometheus.exporter.self.self.targets\n\tforward_to      = [prometheus.remote_write.via_nginx.receiver]\n\tscrape_interval = \"10s\"\n}\n\nprometheus.remote_write \"via_nginx\" {\n\tendpoint {\n\t\turl = \"http://nginx-auth/api/v1/write\"\n\n\t\tbasic_auth {\n\t\t\tusername = convert.nonsensitive(remote.vault.creds.data.username)\n\t\t\tpassword = remote.vault.creds.data.password\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "vault-secrets/docker-compose.yml",
    "content": "services:\n  vault:\n    image: hashicorp/vault:${VAULT_VERSION:-2.0.0}\n    container_name: vault-secrets-vault\n    ports:\n      - \"8200:8200\"\n    environment:\n      VAULT_ADDR: http://127.0.0.1:8200\n      VAULT_TOKEN: root-token-for-demo\n    # Start dev-mode in the background, wait for readiness, then seed\n    # secret/alloy/remote-write. The wait keeps Vault as PID 1.\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        vault server -dev \\\n          -dev-listen-address=0.0.0.0:8200 \\\n          -dev-root-token-id=root-token-for-demo &\n        VAULT_PID=$$!\n        until vault status >/dev/null 2>&1; do sleep 1; done\n        vault kv put secret/alloy/remote-write \\\n          username=alloy \\\n          password=initial-password\n        echo \"seeded secret/alloy/remote-write\"\n        wait $$VAULT_PID\n    healthcheck:\n      # Pass only once the secret has been seeded — otherwise Alloy may\n      # start before the KV write lands and fail its first reread.\n      test: [\"CMD\", \"sh\", \"-c\", \"vault kv get secret/alloy/remote-write >/dev/null 2>&1\"]\n      interval: 5s\n      timeout: 3s\n      retries: 20\n\n  nginx-auth:\n    image: nginx:${NGINX_VERSION:-1.30-alpine}\n    container_name: vault-secrets-nginx-auth\n    ports:\n      - \"8080:80\"\n    volumes:\n      - ./nginx.conf:/etc/nginx/nginx.conf:ro\n      - ./auth/htpasswd:/etc/nginx/htpasswd:ro\n    depends_on:\n      - prometheus\n\n  prometheus:\n    image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n    container_name: vault-secrets-prometheus\n    command:\n      - --web.enable-remote-write-receiver\n      - --config.file=/etc/prometheus/prometheus.yml\n    volumes:\n      - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n    ports:\n      - \"9090:9090\"\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    container_name: vault-secrets-grafana\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Prometheus\n          type: prometheus\n          orgId: 1\n          url: http://prometheus:9090\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n\n  alloy:\n    image: grafana/alloy:${GRAFANA_ALLOY_VERSION:-v1.16.1}\n    container_name: vault-secrets-alloy\n    ports:\n      - \"12345:12345\"\n    volumes:\n      - ./config.alloy:/etc/alloy/config.alloy\n    command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy\n    depends_on:\n      vault:\n        condition: service_healthy\n      nginx-auth:\n        condition: service_started\n      prometheus:\n        condition: service_started\n"
  },
  {
    "path": "vault-secrets/nginx.conf",
    "content": "worker_processes 1;\nevents { worker_connections 1024; }\n\nhttp {\n    log_format auth '$remote_addr user=$remote_user [$time_local] '\n                    '\"$request\" $status $body_bytes_sent';\n    access_log /dev/stdout auth;\n    error_log  /dev/stderr warn;\n\n    upstream prom {\n        server prometheus:9090;\n    }\n\n    server {\n        listen 80;\n        server_name _;\n\n        location /api/v1/write {\n            auth_basic           \"alloy-remote-write\";\n            auth_basic_user_file /etc/nginx/htpasswd;\n\n            proxy_pass         http://prom/api/v1/write;\n            proxy_http_version 1.1;\n            proxy_set_header   Host $host;\n            proxy_set_header   X-Forwarded-For $remote_addr;\n        }\n\n        location = /healthz {\n            access_log off;\n            return 200 \"ok\\n\";\n        }\n    }\n}\n"
  },
  {
    "path": "vault-secrets/prom-config.yaml",
    "content": "global:\n  scrape_interval: 15s\n  evaluation_interval: 15s\n"
  },
  {
    "path": "vault-secrets/rotate.sh",
    "content": "#!/usr/bin/env bash\n# Demo helper for the vault-secrets scenario.\n#\n# Usage:\n#   ./rotate.sh htpasswd <new-password>   # update nginx htpasswd + reload\n#   ./rotate.sh vault    <new-password>   # update the Vault secret\n#   ./rotate.sh both     <new-password>   # do both, with a 5s gap so the\n#                                         # 401 window is visible\n\nset -euo pipefail\n\ncmd=${1:-}\npw=${2:-}\n\nif [[ -z \"$cmd\" || -z \"$pw\" ]]; then\n  echo \"usage: rotate.sh htpasswd|vault|both <new-password>\" >&2\n  exit 2\nfi\n\ncd \"$(dirname \"$0\")\"\n\nrotate_htpasswd() {\n  echo \">> generating new bcrypt entry for alloy\"\n  docker run --rm httpd:2.4-alpine htpasswd -nbB -C 5 alloy \"$pw\" \\\n    > auth/htpasswd\n  echo \">> reloading nginx\"\n  docker exec vault-secrets-nginx-auth nginx -s reload\n}\n\nrotate_vault() {\n  echo \">> writing new credentials to Vault\"\n  docker exec \\\n    -e VAULT_ADDR=http://127.0.0.1:8200 \\\n    -e VAULT_TOKEN=root-token-for-demo \\\n    vault-secrets-vault \\\n    vault kv put secret/alloy/remote-write \\\n      username=alloy \\\n      password=\"$pw\"\n}\n\ncase \"$cmd\" in\n  htpasswd) rotate_htpasswd ;;\n  vault)    rotate_vault ;;\n  both)\n    rotate_htpasswd\n    echo \">> nginx flipped; Alloy will 401 until Vault catches up. Sleeping 5s...\"\n    sleep 5\n    rotate_vault\n    ;;\n  *)\n    echo \"unknown command: $cmd\" >&2\n    exit 2\n    ;;\nesac\n"
  },
  {
    "path": "windows/README.md",
    "content": "# Monitoring Windows with Alloy\n\nGrafana Alloy can be used to monitor Windows servers and desktops. In this guide we will show you how to install Grafana Alloy on a Windows machine and how to configure it to monitor the following system attributes:\n* Windows Performance Metrics\n* Windows Event Logs\n\n## Prerequisites\n\n* Git - You will need Git to clone the repository.\n* Docker - In this tutorial we assume you are using Docker desktop for Windows. This is where we host Grafana, Loki and Prometheus. Note that you can also install native Windows versions of Grafana, Loki and Prometheus if you prefer or host them on a Linux server.\n* Windows Server or Desktop - We will be monitoring a Windows machine, so you will need a Windows server or desktop to monitor.\n* Admin access to the Windows machine - You will need admin access to the Windows machine to install the Grafana Alloy and configure it to collect metrics and logs.\n\n## Step 1: Clone the Repository\n\nClone the repository to your Windows machine.\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\n```\n\n## Step 2: Deploy Grafana, Loki and Prometheus\n\nFirst, you need to deploy Grafana, Loki and Prometheus on your Windows machine. Within this tutorial, we have included a docker-compose file that will deploy Grafana, Loki and Prometheus on your Windows machine.\n\n```bash\ncd alloy-scenarios/windows\ndocker-compose up -d\n```\n\nYou can check the status of the containers by running the following command:\n\n```bash\ndocker ps\n```\nGrafana should be running on [http://localhost:3000](http://localhost:3000).\n\n## Step 3: Install Grafana Alloy\n\nFollow the instructions in the [Grafana Alloy documentation](https://grafana.com/docs/alloy/latest/set-up/install/windows/) to install Grafana Alloy on your Windows machine.\n\nRecommended steps:\n* Install Grafana Alloy as a Windows service.\n* Use Windows Installer to install Grafana Alloy.\n\nMake sure to also checkout the [Grafana Alloy configuration](https://grafana.com/docs/alloy/latest/set-up/configuration/) documentation.\n\nPersonal recommendation: If you would like to see the Alloy UI from a remote machine you need to change the run arguments of the Grafana Alloy service. To do this:\n\n1. Open Registery Editor.\n2. Navigate to `HKEY_LOCAL_MACHINE\\SOFTWARE\\GrafanaLabs\\Alloy`.\n3. Double click on `Arguments`\n4. Change the contents to the following:\n```\nrun\nC:\\Program Files\\GrafanaLabs\\Alloy\\config.alloy\n--storage.path=C:\\ProgramData\\GrafanaLabs\\Alloy\\data\n--server.http.listen-addr=0.0.0.0:12345\n```\n5. Restart the Grafana Alloy service. (Search for `Services` in the start menu, find `Grafana Alloy`, right click and restart)\n\nYou should now be able to access the Alloy UI from a remote machine by going to `http://<windows-machine-ip>:12345`.\n\n## Step 4: Configure Grafana Alloy to Monitor Windows\n\nNow that you have Grafana Alloy installed, you need to configure it to monitor your Windows machine. Grafana Alloy will currently be running a default configuration file. This needs to be replaced with the `config.alloy` file that is included in the `alloy-scenarios/windows` directory. To do this: \n1. Stop the Grafana Alloy service.\n2. Replace the `config.alloy` file in `C:\\Program Files\\GrafanaLabs\\Alloy` with the `config.alloy` file from the `alloy-scenarios/windows` directory.\n3. Start the Grafana Alloy service.\n4. Open your browser and go to `http://localhost:12345` to access the Alloy UI.\n\n## Step 5: Viewing the Windows Performance Metrics and Event Logs\n\nYou will now be able to view the Windows Performance Metrics and Event Logs in Grafana:\n\n* Open your browser and go to [http://localhost:3000/explore/metrics](http://localhost:3000/explore/metrics). This will take you to the metrics explorer in Grafana.\n\n* Open your browser and go to [http://localhost:3000/a/grafana-lokiexplore-app](http://localhost:3000/a/grafana-lokiexplore-app). This will take you to the Loki explorer in Grafana.\n\n\n"
  },
  {
    "path": "windows/config.alloy",
    "content": "// ####################################\n// Windows Server Metrics Configuration\n// ####################################\n\nprometheus.exporter.windows \"default\" {\n  enabled_collectors = [\"cpu\",\"cs\",\"logical_disk\",\"net\",\"os\",\"service\",\"system\", \"memory\", \"scheduled_task\", \"tcp\"]\n}\n\n// Configure a prometheus.scrape component to collect windows metrics.\nprometheus.scrape \"example\" {\n  targets    = prometheus.exporter.windows.default.targets\n  forward_to = [prometheus.remote_write.demo.receiver]\n}\n\nprometheus.remote_write \"demo\" {\n  endpoint {\n    url = \"http://localhost:9090/api/v1/write\"\n  }\n}\n\n// ####################################\n// Windows Server Logs Configuration\n// ####################################\n\nloki.source.windowsevent \"application\"  {\n    eventlog_name = \"Application\"\n    use_incoming_timestamp = true\n    forward_to = [loki.process.endpoint.receiver]\n}\n\nloki.source.windowsevent \"System\"  {\n    eventlog_name = \"System\"\n    use_incoming_timestamp = true\n    forward_to = [loki.process.endpoint.receiver]\n}\n\nloki.process \"endpoint\" {\n  forward_to = [loki.write.endpoint.receiver]\n  stage.json {\n      expressions = {\n          message = \"\",\n          Overwritten = \"\",\n          source = \"\",\n          computer = \"\",\n          eventRecordID = \"\",\n          channel = \"\",\n          component_id = \"\",\n          execution = \"\",\n      }\n  }\n\n  // Extract nested fields from the \"execution\" object (e.g. processId, processName).\n  stage.json {\n      source = \"execution\"\n      expressions = {\n          processId = \"\",\n          processName = \"\",\n      }\n  }\n\n  stage.structured_metadata {\n      values = {\n          \"eventRecordID\" = \"\",\n          \"channel\" = \"\",\n          \"component_id\" = \"\",\n          \"execution_processId\" = \"processId\",\n          \"execution_processName\" = \"processName\",\n      }\n  }\n\n  stage.eventlogmessage {\n      source = \"message\"\n      overwrite_existing = true\n  }\n\n  stage.labels {\n      values = {\n          \"service_name\" = \"source\",\n      }\n}\n\nstage.output {\n    source = \"message\"\n}\n\n}\n\n\nloki.write \"endpoint\" {\n    endpoint {\n        url =\"http://localhost:3100/loki/api/v1/push\"\n    }\n}\n\nlivedebugging{}"
  },
  {
    "path": "windows/docker-compose.yml",
    "content": "version: '3.8'\n\nservices:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - 3100:3100/tcp\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n\n  prometheus:\n     image: prom/prometheus:${PROMETHEUS_VERSION:-v3.11.3}\n     command:\n       - --web.enable-remote-write-receiver\n       - --config.file=/etc/prometheus/prometheus.yml\n     ports:\n      - 9090:9090/tcp\n     volumes:\n        - ./prom-config.yaml:/etc/prometheus/prometheus.yml\n\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - 3000:3000/tcp\n    entrypoint:\n       - sh\n       - -euc\n       - |\n         mkdir -p /etc/grafana/provisioning/datasources\n         cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n         apiVersion: 1\n         datasources:\n         - name: Loki\n           type: loki\n           access: proxy\n           orgId: 1\n           url: http://loki:3100\n           basicAuth: false\n           isDefault: false\n           version: 1\n           editable: false\n         - name: Prometheus\n           type: prometheus\n           orgId: 1\n           url: http://prometheus:9090\n           basicAuth: false\n           isDefault: true\n           version: 1\n           editable: false\n         EOF\n         /run.sh\n\n"
  },
  {
    "path": "windows/loki-config.yaml",
    "content": "\n# This is a complete configuration to deploy Loki backed by the filesystem.\n# The index will be shipped to the storage via tsdb-shipper.\n\nauth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n  - from: 2020-05-15\n    store: tsdb\n    object_store: filesystem\n    schema: v13\n    index:\n      prefix: index_\n      period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\n# Note: We are setting the max chunk age far lower than the default expected value\n# This is due to the fact this scenario is used within the LogCLI demo and we need a short flush time.\n# To show how logcli stats --since 24h '{service_name=\"Delivery World\", package_size=\"Large\"}' works.\ningester:\n  max_chunk_age: 5m # Should be 2 hours"
  },
  {
    "path": "windows/prom-config.yaml",
    "content": "# my global config\nglobal:\n  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.\n  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.\n  # scrape_timeout is set to the global default (10s).\n\n\n"
  },
  {
    "path": "windows-events/README.md",
    "content": "# Windows Security Event Logs with Grafana Alloy\n\nA focused logs-only scenario for shipping the **Windows Security event channel** to Loki, with filtering and field-extraction tuned for SOC-style queries (logon attempts, privilege escalation, account changes).\n\n## How this differs from the [`windows/`](../windows/) scenario\n\n| Aspect | `windows/` (broad) | `windows-events/` (this) |\n|---|---|---|\n| Channels | Application + System + Performance metrics | **Security** only |\n| Processing | Pass-through with basic JSON parsing | **Drops noise event IDs** + extracts security-specific fields as labels |\n| Backend | Loki + Prometheus + Grafana | **Loki + Grafana** (no metrics) |\n| Demo intent | \"ship Windows logs to Loki\" | \"make Security events queryable for SOC use cases\" |\n\nIf you want general-purpose Windows monitoring, use `windows/`. If you specifically care about Security audit events, use this one.\n\n## Prerequisites\n\n- A Windows host (Server or Desktop) with admin access — `loki.source.windowsevent` reads from the Windows Event Log API and only runs on Windows.\n- Docker Desktop for Windows (or any Linux machine you can reach over the network) for the Loki/Grafana backend.\n- Git, to clone the repo.\n\n## Step 1 — Backend (Loki + Grafana)\n\nOn the machine that will host the backend (the Windows host itself, or any Linux machine):\n\n```bash\ngit clone https://github.com/grafana/alloy-scenarios.git\ncd alloy-scenarios/windows-events\ndocker compose up -d\n```\n\nGrafana is on `http://<backend-host>:3000` with the Loki datasource already provisioned.\n\n## Step 2 — Install Alloy on the Windows host\n\nFollow the [Windows install guide](https://grafana.com/docs/alloy/latest/set-up/install/windows/). Recommended: Windows Installer + Windows Service.\n\nIf your backend is on a different machine than the Windows host, edit the `loki.write` URL in `config.alloy` from `http://localhost:3100` to `http://<backend-host>:3100`.\n\n## Step 3 — Replace the Alloy config\n\n1. Stop the `Grafana Alloy` Windows service.\n2. Replace `C:\\Program Files\\GrafanaLabs\\Alloy\\config.alloy` with the [`config.alloy`](./config.alloy) from this directory.\n3. Start the service.\n4. Open `http://localhost:12345` to confirm the components load without error.\n\n## Step 4 — Generate Security events\n\nTo see traffic, trigger some auditable actions on the Windows host:\n\n- **Failed logon (4625)**: try to log in with a wrong password from a remote machine, or run `runas /user:fakeuser cmd` and enter a wrong password.\n- **Successful logon (4624)**: log out and back in, or open a new RDP session.\n- **User created (4720)**: `net user testuser P@ssw0rd /add` from an admin shell.\n- **Privilege use (4672)**: any action requiring Administrator elevation.\n\nSome of these only generate events if the corresponding **audit policy** is enabled. Check `auditpol /get /category:*` on the Windows host; enable additional audit policies via `auditpol /set /subcategory:\"<name>\" /success:enable /failure:enable` if needed.\n\n## Step 5 — Query in Grafana\n\n```logql\n# All Security events\n{eventlog_name=\"Security\"}\n\n# Failed logons\n{eventlog_name=\"Security\", event_id=\"4625\"}\n\n# Successful logons by a specific user\n{eventlog_name=\"Security\", event_id=\"4624\", target_user_name=\"alice\"}\n\n# All events affecting a specific user account\n{eventlog_name=\"Security\", target_user_name=\"alice\"}\n\n# Recent privileged-operation events\n{eventlog_name=\"Security\", event_id=~\"4672|4673\"}\n```\n\nThe promoted labels are `event_id`, `subject_user_name`, `target_user_name`, and `logon_type`. Other event fields (computer, eventRecordID, channel) are kept as **structured metadata** — searchable via Loki's `| json` filter without inflating the label index.\n\n## What's filtered out\n\nThe pipeline drops these event IDs at the Alloy side:\n\n| Event ID | Description | Why dropped |\n|---|---|---|\n| 4658 | Handle to an object was closed | Pairs with 4656/4663; on its own rarely actionable |\n| 4690 | Attempt to duplicate a handle to an object | Audit noise |\n| 4674 | Operation attempted on a privileged object | Fires for routine privileged ops |\n| 5379 | Credential Manager credentials were read | Frequent false-positive in normal use |\n\nIf you want one of these back, edit `stage.match` in `config.alloy` to remove the corresponding ID from the `event_id=~\"…\"` regex.\n\n## Stopping\n\n```bash\ndocker compose down -v\n```\n\nStop the Alloy Windows service separately if you no longer want it running.\n"
  },
  {
    "path": "windows-events/config.alloy",
    "content": "// ###################################################################\n// Windows Security Event Log → Loki, with filtering and field labels\n// ###################################################################\n//\n// Differs from the broader `windows/` scenario in three ways:\n//   1. Security channel only (Application + System are covered there)\n//   2. Drops high-volume audit-noise event IDs that bury real signal\n//   3. Promotes security-specific fields (subject_user_name,\n//      target_user_name, logon_type) to labels for SOC-style queries\n//\n// Run target: a Windows host with Alloy installed natively. The\n// docker-compose.yml in this directory only runs Loki + Grafana;\n// Alloy itself is a Windows service.\n\nlivedebugging {}\n\n// Ingest the Security channel. `use_incoming_timestamp = true` keeps\n// the original event time rather than the time Alloy received it,\n// which matters when replaying historical logs after an Alloy restart.\nloki.source.windowsevent \"security\" {\n\teventlog_name          = \"Security\"\n\tuse_incoming_timestamp = true\n\tforward_to             = [loki.process.security.receiver]\n}\n\nloki.process \"security\" {\n\t// Step 1: parse the windowsevent JSON wrapper.\n\tstage.json {\n\t\texpressions = {\n\t\t\tmessage       = \"\",\n\t\t\teventRecordID = \"\",\n\t\t\tchannel       = \"\",\n\t\t\tcomputer      = \"\",\n\t\t}\n\t}\n\n\t// Step 2: parse the event message (XML/EventData) into top-level\n\t// fields. The exact keys depend on event type — `eventlogmessage`\n\t// pulls every named field from the XML/EventData payload.\n\tstage.eventlogmessage {\n\t\tsource             = \"message\"\n\t\toverwrite_existing = true\n\t}\n\n\t// Step 3: drop high-noise event IDs that are rarely useful in a\n\t// SOC dashboard but consume most of the Security log volume:\n\t//   4658 — handle to an object closed\n\t//   4690 — attempt to duplicate a handle to an object\n\t//   4674 — operation attempted on a privileged object\n\t//   5379 — Credential Manager credentials read\n\tstage.match {\n\t\tselector = `{event_id=~\"4658|4690|4674|5379\"}`\n\t\taction   = \"drop\"\n\t}\n\n\t// Step 4: promote useful fields to labels. Indexed labels make\n\t// \"show me all failed logons by username\" queries cheap.\n\tstage.labels {\n\t\tvalues = {\n\t\t\tevent_id          = \"\",\n\t\t\tsubject_user_name = \"\",\n\t\t\ttarget_user_name  = \"\",\n\t\t\tlogon_type        = \"\",\n\t\t}\n\t}\n\n\t// Step 5: keep heavyweight fields out of labels but searchable\n\t// via structured metadata.\n\tstage.structured_metadata {\n\t\tvalues = {\n\t\t\teventRecordID = \"\",\n\t\t\tchannel       = \"\",\n\t\t\tcomputer      = \"\",\n\t\t}\n\t}\n\n\tforward_to = [loki.write.endpoint.receiver]\n}\n\nloki.write \"endpoint\" {\n\tendpoint {\n\t\turl = \"http://localhost:3100/loki/api/v1/push\"\n\t}\n}\n"
  },
  {
    "path": "windows-events/docker-compose.yml",
    "content": "services:\n\n  loki:\n    image: grafana/loki:${GRAFANA_LOKI_VERSION:-3.6.10}\n    ports:\n      - \"3100:3100/tcp\"\n    volumes:\n      - ./loki-config.yaml:/etc/loki/local-config.yaml\n    command: -config.file=/etc/loki/local-config.yaml\n\n  grafana:\n    image: grafana/grafana:${GRAFANA_VERSION:-13.0.1}\n    environment:\n      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin\n      - GF_AUTH_ANONYMOUS_ENABLED=true\n      - GF_AUTH_BASIC_ENABLED=false\n    ports:\n      - \"3000:3000/tcp\"\n    entrypoint:\n      - sh\n      - -euc\n      - |\n        mkdir -p /etc/grafana/provisioning/datasources\n        cat <<EOF > /etc/grafana/provisioning/datasources/ds.yaml\n        apiVersion: 1\n        datasources:\n        - name: Loki\n          type: loki\n          access: proxy\n          orgId: 1\n          url: http://loki:3100\n          basicAuth: false\n          isDefault: true\n          version: 1\n          editable: false\n        EOF\n        /run.sh\n"
  },
  {
    "path": "windows-events/loki-config.yaml",
    "content": "auth_enabled: false\n\nlimits_config:\n  allow_structured_metadata: true\n  volume_enabled: true\n\nserver:\n  http_listen_port: 3100\n\ncommon:\n  ring:\n    instance_addr: 0.0.0.0\n    kvstore:\n      store: inmemory\n  replication_factor: 1\n  path_prefix: /tmp/loki\n\nschema_config:\n  configs:\n    - from: 2020-05-15\n      store: tsdb\n      object_store: filesystem\n      schema: v13\n      index:\n        prefix: index_\n        period: 24h\n\nstorage_config:\n  tsdb_shipper:\n    active_index_directory: /tmp/loki/index\n    cache_location: /tmp/loki/index_cache\n  filesystem:\n    directory: /tmp/loki/chunks\n\npattern_ingester:\n  enabled: true\n\ningester:\n  max_chunk_age: 5m\n"
  }
]