[
  {
    "path": ".github/ai-opt-out",
    "content": "opt-out: true\n"
  },
  {
    "path": ".github/workflows/docker-image.yml",
    "content": "---\nname: docker-image\non:\n  push:\n    tags: ['*']\n    branches: ['master']\n    paths-ignore: ['charts/**']\npermissions:\n  id-token: write\n  contents: read\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: aws-actions/configure-aws-credentials@v4\n        with:\n          aws-region: ${{ vars.RP_AWS_CRED_REGION }}\n          role-to-assume: arn:aws:iam::${{ secrets.RP_AWS_CRED_ACCOUNT_ID }}:role/${{ vars.RP_AWS_CRED_BASE_ROLE_NAME }}${{ github.event.repository.name }}\n      - uses: aws-actions/aws-secretsmanager-get-secrets@v2\n        with:\n          secret-ids: |\n            ,sdlc/prod/github/dockerhub\n          parse-json-secrets: true\n      - uses: actions/checkout@v4\n      - uses: docker/setup-qemu-action@v3\n      - uses: docker/setup-buildx-action@v3\n        with:\n          driver-opts: |\n            image=moby/buildkit:v0.21.1\n            network=host\n      - name: Set build date\n        run: |\n          echo \"BUILT_AT=$(date --rfc-3339=date)\" >> ${GITHUB_ENV}\n      - uses: docker/metadata-action@v5\n        id: docker_meta\n        with:\n          # list of Docker images to use as base name for tags\n          images: |\n            redpandadata/kminion\n            name=public.ecr.aws/l9j0i2e0/kminion,enable=${{ startsWith(github.ref, 'refs/tags/v') }}\n          # generate Docker tags based on the following events/attributes\n          # Semver type is only active on 'push tag' events,\n          # hence no enable condition required\n          tags: |\n            type=sha,prefix={{branch}}-,format=short,enable={{is_default_branch}}\n            type=semver,pattern={{raw}}\n      - uses: docker/login-action@v3\n        with:\n          username: ${{ env.DOCKERHUB_USER }}\n          password: ${{ env.DOCKERHUB_TOKEN }}\n      - uses: aws-actions/configure-aws-credentials@v4\n        if: ${{ startsWith(github.ref, 'refs/tags/v') }}\n        with:\n          aws-region: us-east-1\n          role-to-assume: arn:aws:iam::${{ secrets.RP_AWS_CRED_ACCOUNT_ID }}:role/${{ vars.RP_AWS_CRED_BASE_ROLE_NAME }}${{ github.event.repository.name }}\n      - uses: aws-actions/amazon-ecr-login@v2\n        if: ${{ startsWith(github.ref, 'refs/tags/v') }}\n        with:\n          registry-type: public\n      - uses: docker/build-push-action@v6\n        with:\n          provenance: false\n          push: true\n          platforms: linux/amd64,linux/arm64\n          tags: ${{ steps.docker_meta.outputs.tags }}\n          build-args: |\n            VERSION=${{ fromJSON(steps.docker_meta.outputs.json).labels['org.opencontainers.image.version'] }}\n            BUILT_AT=${{ env.BUILT_AT }}\n            COMMIT=${{ github.sha }}\n          cache-from: type=gha\n          cache-to: type=gha,mode=max\n"
  },
  {
    "path": ".github/workflows/goreleaser.yml",
    "content": "---\nname: goreleaser\non:\n  push:\n    tags: ['*']\njobs:\n  goreleaser:\n    runs-on: ubuntu-latest\n    permissions:\n      contents: write\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          fetch-depth: 0\n      - uses: actions/setup-go@v6\n        with:\n          go-version-file: 'go.mod'\n      - uses: goreleaser/goreleaser-action@v7\n        if: startsWith(github.ref, 'refs/tags/')\n        with:\n          version: latest\n          args: release --clean\n          workdir: .\n        env:\n          CGO_ENABLED: 0\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GORELEASER_CURRENT_TAG: ${{ github.ref_name }}\n"
  },
  {
    "path": ".gitignore",
    "content": "# Binaries for programs and plugins\n*.exe\n*.exe~\n*.dll\n*.so\n*.dylib\n\n# Test binary, build with `go test -c`\n*.test\n\n# Output of the go coverage tool, specifically when used with LiteIDE\n*.out\nzk-single-kafka-single\nzk-multiple-kafka-multiple\n.vscode\n.idea\n\nconfig\n/kminion\n"
  },
  {
    "path": ".goreleaser.yml",
    "content": "---\nversion: 2\nrelease:\n  name_template: '{{.Version}} / {{time \"2006-01-02\"}}'\n  prerelease: auto\n  mode: append\n  footer: |\n    ## Docker Image\n    Use the following command to pull this release's Docker image:\n    ```sh\n    docker pull redpandadata/kminion:{{ .Tag }}\n    ```\nchangelog:\n  disable: false\n  use: github\n  filters:\n    # Commit messages matching the regexp listed here will be removed from the changelog\n    exclude:\n      - '^docs:'\n      - '^test:'\n      - '^npm:'\n      - '^go.mod:'\n      - '^.github:'\n      - 'Merge branch'\nbuilds:\n  - id: kminion\n    binary: kminion\n    goos:\n      - darwin\n      - linux\n      - windows\n    goarch:\n      - amd64\n      - arm64\n    ldflags:\n      - -s -w -X main.version={{.Version}} -X main.builtAt={{.Date}} -X main.commit={{.Commit}}\nchecksum:\n  name_template: 'checksums.txt'\n"
  },
  {
    "path": "Dockerfile",
    "content": "############################################################\n# Build image\n############################################################\nFROM golang:1.26-alpine AS builder\n\nARG VERSION\nARG BUILT_AT\nARG COMMIT\n\nRUN apk update && apk upgrade --no-cache && apk add --no-cache git ca-certificates && update-ca-certificates\n\nWORKDIR /app\n\nCOPY go.mod .\nCOPY go.sum .\nRUN go mod download\n\nCOPY . .\n\nRUN CGO_ENABLED=0 go build \\\n    -ldflags=\"-w -s \\\n    -X main.version=$VERSION \\\n    -X main.commit=$COMMIT \\\n    -X main.builtAt=$BUILT_AT\" \\\n    -o ./bin/kminion\n\n############################################################\n# Runtime Image\n############################################################\nFROM alpine:3\nCOPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/\nCOPY --from=builder /app/bin/kminion /app/kminion\nRUN addgroup -S redpanda \\\n    && adduser -S redpanda -G redpanda \\\n    && chmod o+rx /app/kminion \\\n    && apk upgrade --no-cache\nUSER redpanda\n\nENTRYPOINT [\"/app/kminion\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2021 CloudHut\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# Redpanda Minion (KMinion) - Prometheus Exporter via Kafka API\n\n[Redpanda](https://www.redpanda.com) Minion is a feature-rich and flexible Prometheus Exporter to monitor your Redpanda or Kafka cluster. \nAll valuable information that are accessible via the Kafka protocol are supposed to be accessible using\nKMinion.\n\n## 🚀 Features\n\n- **Kafka versions:** Supports all Kafka versions v0.11+\n- **Supported SASL mechanisms:** plain, scram-sha-256/512, gssapi/kerberos\n- **TLS support:** TLS is supported, regardless whether you need mTLS, a custom CA, encrypted keys or just the trusted\n  root certs\n- **Consumer Group Lags:** Number of messages a consumer group is lagging behind the latest offset\n- **Log dir sizes:** Metric for log dir sizes either grouped by broker or by topic\n- **Broker info:** Metric for each broker with its address, broker id, controller and rack id\n- **Configurable granularity:** Export metrics (e.g. consumer group lags) either per partition or per topic. Helps to reduce the number of exported metric series.\n- **End to End Monitoring:** Sends messages to its own topic and consumes them, measuring a messages real-world \"roundtrip\" latency. Also provides ack-latency and offset-commit-latency. [More Info](/docs/end-to-end.md)\n- **Configurable targets:** You can configure what topics or groups you'd like to export using regex expressions\n- **Multiple config parsers:** It's possible to configure KMinion using YAML, Environment variables or a mix of both\n\nYou can find a list of all exported metrics here: [/docs/metrics.md](/docs/metrics.md)\n\n## Getting started\n\n### 🐳 Docker image\n\nAll images will be built on each push to master or for every new release. You can find an overview of all available tags\nin our [DockerHub repository](https://hub.docker.com/r/redpandadata/kminion/tags).\n\n```shell\ndocker pull redpandadata/kminion:latest\n```\n\n### ☸ Helm chart\n\nA Helm chart will be maintained as part of Redpanda's [helm-charts](https://github.com/redpanda-data/helm-charts/tree/main/charts/kminion) repository.\n\n### 🔧 Configuration\n\nAll options in KMinion can be configured via YAML or environment variables. Configuring some options via YAML and some\nvia environment variables is also possible. Environment variables take precedence in this case. You can find the\nreference config with additional documentation in [/docs/reference-config.yaml](/docs/reference-config.yaml).\n\nIf you want to use a YAML config file, specify the path to the config file by setting the env variable\n`CONFIG_FILEPATH`.\n\n### 📊 Grafana Dashboards\n\nI uploaded three separate Grafana dashboards that can be used as inspiration in order to create your own dashboards. Please take note that these dashboards might not immediately work for you due to different labeling in your Prometheus config.\n\nCluster Dashboard: https://grafana.com/grafana/dashboards/14012\n\nConsumer Group Dashboard: https://grafana.com/grafana/dashboards/14014\n\nTopic Dashboard: https://grafana.com/grafana/dashboards/14013\n\n<p float=\"left\">\n  <img src=\"/docs/screenshots/kminion-cluster.png\" width=\"250\" />\n  <img src=\"/docs/screenshots/kminion-groups.png\" width=\"250\" /> \n  <img src=\"/docs/screenshots/kminion-topics.png\" width=\"250\" />\n</p>\n\n### ⚡ Testing locally\n\nThis repo contains a docker-compose file that you can run on your machine. It will spin up a Kafka & ZooKeeper cluster\nand starts KMinion on port 8080 which is exposed to your host machine:\n\n```shell\n# 1. Clone this repo\n# 2. Browse to the repo's root directory and run:\ndocker-compose up\n```\n\n## Chat with us\n\nWe use Slack to communicate. If you are looking for more interactive discussions or support, you are invited to join\nour Slack server: https://redpanda.com/slack\n\n## License\n\nKMinion is distributed under the [MIT License](https://github.com/cloudhut/kminion/blob/master/LICENSE).\n"
  },
  {
    "path": "charts/README.md",
    "content": "# Helm Chart\n\n⚠️ This chart has been moved to https://github.com/redpanda-data/helm-charts/tree/main/charts/kminion . Please install this chart instead. The existing archives are still being hosted here, to not break existing deployments.\n\n---\n\nThis chart is intentionally very light on input validation. The goal was to offer a flexible Helm chart that allows\nusers to deploy KMinion the way they want to. Therefore it's very flexible at the cost of less input validation, so that\nyou might run into runtime errors for a misconfiguration.\n\nAll available input is documented inside of the [values.yaml](./kminion/values.yaml) file.\n\n## Installing the Helm chart\n\n```shell\nhelm repo add kminion https://raw.githubusercontent.com/cloudhut/kminion/master/charts/archives\nhelm repo update\nhelm install -f values.yaml kminion kminion/kminion\n```\n"
  },
  {
    "path": "charts/archives/.gitkeep",
    "content": ""
  },
  {
    "path": "charts/archives/index.yaml",
    "content": "apiVersion: v1\nentries:\n  kminion:\n  - apiVersion: v2\n    appVersion: v2.2.5\n    created: \"2023-07-03T16:38:22.568312+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 32e2ee36d0b0a045061d4e1490780fef905b4c85d7a23659819c5cb128aaa119\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.12.0.tgz\n    version: 0.12.0\n  - apiVersion: v2\n    appVersion: v2.2.5\n    created: \"2023-07-03T16:38:22.567922+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 8a7be130d57f6f8ead720277b69319ff4dcd364859e80f4750416abe5ed460c3\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.11.3.tgz\n    version: 0.11.3\n  - apiVersion: v2\n    appVersion: v2.2.3\n    created: \"2023-07-03T16:38:22.5675+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 42991a871f58b6d31a9e5b38539eb3d1e9cd35c0097a0fcf63f21f818fa7a999\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.11.2.tgz\n    version: 0.11.2\n  - apiVersion: v2\n    appVersion: v2.2.3\n    created: \"2023-07-03T16:38:22.566877+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 65d7231f1e8ee586bec42bc383b66726d596fe03e0f3183e14b688174a3a8112\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.11.1.tgz\n    version: 0.11.1\n  - apiVersion: v2\n    appVersion: v2.2.0\n    created: \"2023-07-03T16:38:22.575384+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 358bdd509f573049d4bfe77d2edb94c7ad3938f609aea11a8e2c2dc65cca2a9a\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.9.0.tgz\n    version: 0.9.0\n  - apiVersion: v2\n    appVersion: v2.2.0\n    created: \"2023-07-03T16:38:22.574906+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: be8f0047b345d3954fc7c7e7f8953a848c909ef253107d6e77ed747843ddd167\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.8.3.tgz\n    version: 0.8.3\n  - apiVersion: v2\n    appVersion: v2.1.0\n    created: \"2023-07-03T16:38:22.573746+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 888bc665cddc6b6b99af1ce6dd1dea0b107a2e928dff6bfe1c077bc741e20ef7\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.8.2.tgz\n    version: 0.8.2\n  - apiVersion: v2\n    appVersion: v2.1.0\n    created: \"2023-07-03T16:38:22.573271+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: e59c5d5574f162708bf1434c266acbfd9040a89aa7a4abd4a0db70885248e38d\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.8.1.tgz\n    version: 0.8.1\n  - apiVersion: v2\n    appVersion: v2.1.0\n    created: \"2023-07-03T16:38:22.572697+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: f54d8236f8cf03c863b53e077e1647164ffe2a7c34e1cf77101fa3312c589706\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.8.0.tgz\n    version: 0.8.0\n  - apiVersion: v2\n    appVersion: v2.1.0\n    created: \"2023-07-03T16:38:22.572269+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 4cc64cd9f78bd55673b00612579157e493020fb76440abbef10fe5152aef9acc\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.7.0.tgz\n    version: 0.7.0\n  - apiVersion: v2\n    appVersion: v2.1.0\n    created: \"2023-07-03T16:38:22.571852+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 0955e04fe9ef4b516fb0d9ed439ae79778ccdffcf817f09099790cb7e183e4d4\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.6.0.tgz\n    version: 0.6.0\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.571391+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: d3eb64d05535e136802538662eef7e9fdfdb3f0b93b6a42dfdcc93ee7deeadbd\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.5.0.tgz\n    version: 0.5.0\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.570618+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 6b4209352d1dffd7873791ee1573dc325eb08d67656b01b430729f45dea4c09a\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.4.0.tgz\n    version: 0.4.0\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.570281+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: c51e3b45791e9fd51f33036916b0d36f7ac695e2fa916a9e99882ea83914ed97\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.3.1.tgz\n    version: 0.3.1\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.569892+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: a2be2dd8a02dc5222ec7386195a0e25b2682a39bbdcf52b60793c171acac7653\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.3.0.tgz\n    version: 0.3.0\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.569445+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 321b6d5ff95ce310d2a3257b3d55f9ced51de99af6519d6d91723d7bdb6456fa\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.2.2.tgz\n    version: 0.2.2\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.569089+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: ed57df27158521a1eb33d215731fcc3248c71b3f36a4a029eb2d3a7b617ca519\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.2.1.tgz\n    version: 0.2.1\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.568694+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 025661ee7cc574ad8dde7a68093a3b614fc92e26dd5dd398fc89d0b5308010e1\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.2.0.tgz\n    version: 0.2.0\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.566269+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: e277e976d864b4bd2e505038dd865a9300486ae8c4323d3f0be40b84df75732b\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.1.3.tgz\n    version: 0.1.3\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.565773+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 562937d3613624c55984e51adbc6765e7898d1cf8cc2d7d241b6d671bbc12303\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.1.2.tgz\n    version: 0.1.2\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.562776+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 25e83d7c7cc92a63268d76b13ecc13077758b48be093490f281498a4f55ad3ca\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.1.1.tgz\n    version: 0.1.1\n  - apiVersion: v2\n    appVersion: v2.0.0\n    created: \"2023-07-03T16:38:22.562046+01:00\"\n    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache\n      Kafka\n    digest: 7c10e9d9957e9752bc6f4b4a1fffb742d88cd57be06bf4f26ff7b5031645ccbd\n    name: kminion\n    type: application\n    urls:\n    - kminion-0.1.0.tgz\n    version: 0.1.0\ngenerated: \"2023-07-03T16:38:22.560328+01:00\"\n"
  },
  {
    "path": "charts/kminion/.helmignore",
    "content": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation (prefixed with !). Only one pattern per line.\n.DS_Store\n# Common VCS dirs\n.git/\n.gitignore\n.bzr/\n.bzrignore\n.hg/\n.hgignore\n.svn/\n# Common backup files\n*.swp\n*.bak\n*.tmp\n*.orig\n*~\n# Various IDEs\n.project\n.idea/\n*.tmproj\n.vscode/\n"
  },
  {
    "path": "charts/kminion/Chart.yaml",
    "content": "apiVersion: v2\nname: kminion\ndescription: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache Kafka\n\n# A chart can be either an 'application' or a 'library' chart.\n#\n# Application charts are a collection of templates that can be packaged into versioned archives\n# to be deployed.\n#\n# Library charts provide useful utilities or functions for the chart developer. They're included as\n# a dependency of application charts to inject those utilities and functions into the rendering\n# pipeline. Library charts do not define any templates and therefore cannot be deployed.\ntype: application\n\n# This is the chart version. This version number should be incremented each time you make changes\n# to the chart and its templates, including the app version.\n# Versions are expected to follow Semantic Versioning (https://semver.org/)\nversion: 0.12.0\n\n# This is the version number of the application being deployed. This version number should be\n# incremented each time you make changes to the application. Versions are not expected to\n# follow Semantic Versioning. They should reflect the version the application is using.\n# It is recommended to use it with quotes.\nappVersion: \"v2.2.5\"\n"
  },
  {
    "path": "charts/kminion/templates/NOTES.txt",
    "content": "1. Get the application URL by running these commands:\n{{- if .Values.ingress.enabled }}\n     {{- range .Values.ingress.hosts }}\n     http://{{ . }}\n     {{- end }}\n{{- else if contains \"NodePort\" .Values.service.type }}\n  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath=\"{.spec.ports[0].nodePort}\" services {{ include \"kminion.fullname\" . }})\n  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath=\"{.items[0].status.addresses[0].address}\")\n  echo http://$NODE_IP:$NODE_PORT\n{{- else if contains \"LoadBalancer\" .Values.service.type }}\n     NOTE: It may take a few minutes for the LoadBalancer IP to be available.\n           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include \"kminion.fullname\" . }}'\n  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include \"kminion.fullname\" . }} --template \"{{\"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}\"}}\")\n  echo http://$SERVICE_IP:{{ .Values.service.port }}\n{{- else if contains \"ClusterIP\" .Values.service.type }}\n  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l \"app.kubernetes.io/name={{ include \"kminion.name\" . }},app.kubernetes.io/instance={{ .Release.Name }}\" -o jsonpath=\"{.items[0].metadata.name}\")\n  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath=\"{.spec.containers[0].ports[0].containerPort}\")\n  echo \"Visit http://127.0.0.1:8080 to use your application\"\n  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/_helpers.tpl",
    "content": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"kminion.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCreate a default fully qualified app name.\nWe truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).\nIf release name contains chart name it will be used as a full name.\n*/}}\n{{- define \"kminion.fullname\" -}}\n{{- if .Values.fullnameOverride }}\n{{- .Values.fullnameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- $name := default .Chart.Name .Values.nameOverride }}\n{{- if contains $name .Release.Name }}\n{{- .Release.Name | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n{{- end }}\n{{- end }}\n\n{{/*\nCreate chart name and version as used by the chart label.\n*/}}\n{{- define \"kminion.chart\" -}}\n{{- printf \"%s-%s\" .Chart.Name .Chart.Version | replace \"+\" \"_\" | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCommon labels\n*/}}\n{{- define \"kminion.labels\" -}}\nhelm.sh/chart: {{ include \"kminion.chart\" . }}\n{{ include \"kminion.selectorLabels\" . }}\n{{- if .Chart.AppVersion }}\napp.kubernetes.io/version: {{ .Chart.AppVersion | quote }}\n{{- end }}\napp.kubernetes.io/managed-by: {{ .Release.Service }}\n{{- if .Values.customLabels}}\n{{ toYaml .Values.customLabels }}\n{{- end}}\n{{- end }}\n\n{{/*\nSelector labels\n*/}}\n{{- define \"kminion.selectorLabels\" -}}\napp.kubernetes.io/name: {{ include \"kminion.name\" . }}\napp.kubernetes.io/instance: {{ .Release.Name }}\n{{- end }}\n\n{{/*\nCreate the name of the service account to use\n*/}}\n{{- define \"kminion.serviceAccountName\" -}}\n{{- if .Values.serviceAccount.create }}\n{{- default (include \"kminion.fullname\" .) .Values.serviceAccount.name }}\n{{- else }}\n{{- default \"default\" .Values.serviceAccount.name }}\n{{- end }}\n{{- end }}\n\n{{/*\nReturn the appropriate apiVersion for ingress.\n*/}}\n{{- define \"kminion.ingress.apiVersion\" -}}\n{{- if and ($.Capabilities.APIVersions.Has \"networking.k8s.io/v1\") (semverCompare \">= 1.19-0\" .Capabilities.KubeVersion.Version) }}\n{{- print \"networking.k8s.io/v1\" }}\n{{- else if $.Capabilities.APIVersions.Has \"networking.k8s.io/v1beta1\" }}\n{{- print \"networking.k8s.io/v1beta1\" }}\n{{- else }}\n{{- print \"extensions/v1beta1\" }}\n{{- end }}\n{{- end }}\n{{/*\nReturn if ingress is stable.\n*/}}\n{{- define \"kminion.ingress.isStable\" -}}\n{{- eq (include \"kminion.ingress.apiVersion\" .) \"networking.k8s.io/v1\" }}\n{{- end }}\n{{/*\nReturn if ingress supports ingressClassName.\n*/}}\n{{- define \"kminion.ingress.supportsIngressClassName\" -}}\n{{- or (eq (include \"kminion.ingress.isStable\" .) \"true\") (and (eq (include \"kminion.ingress.apiVersion\" .) \"networking.k8s.io/v1beta1\") (semverCompare \">= 1.18-0\" .Capabilities.KubeVersion.Version)) }}\n{{- end }}\n\n{{/*\nReturn if ingress supports pathType.\n*/}}\n{{- define \"kminion.ingress.supportsPathType\" -}}\n{{- or (eq (include \"kminion.ingress.isStable\" .) \"true\") (and (eq (include \"kminion.ingress.apiVersion\" .) \"networking.k8s.io/v1beta1\") (semverCompare \">= 1.18-0\" .Capabilities.KubeVersion.Version)) }}\n{{- end }}\n\n{{/*\nReturn the appropriate apiVersion for podDisruptionBudget.\n*/}}\n{{- define \"kminion.podDisruptionBudget.apiVersion\" -}}\n{{- if $.Capabilities.APIVersions.Has \"policy/v1/PodDisruptionBudget\" }}\n{{- print \"policy/v1\" }}\n{{- else }}\n{{- print \"policy/v1beta1\" }}\n{{- end }}\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/configmap.yaml",
    "content": "apiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: {{include \"kminion.fullname\" .}}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4}}\ndata:\n  config.yaml: |\n    {{- toYaml .Values.kminion.config | nindent 4}}\n"
  },
  {
    "path": "charts/kminion/templates/daemonset.yaml",
    "content": "{{- if .Values.daemonset.enabled }}\napiVersion: apps/v1\nkind: DaemonSet\nmetadata:\n  name: {{include \"kminion.fullname\" .}}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4}}\nspec:\n  updateStrategy:\n    type: OnDelete\n  selector:\n    matchLabels:\n      {{- include \"kminion.selectorLabels\" . | nindent 6}}\n  template:\n    metadata:\n      {{- with .Values.podAnnotations}}\n      annotations:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      labels:\n        {{- include \"kminion.selectorLabels\" . | nindent 8}}\n        {{- if .Values.customLabels}}\n        {{toYaml .Values.customLabels | nindent 8}}\n        {{- end}}\n    spec:\n      {{- with .Values.imagePullSecrets}}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      securityContext:\n        {{- toYaml .Values.podSecurityContext | nindent 8}}\n      serviceAccountName: {{ .Values.serviceAccount.name }}\n      volumes:\n        - name: config\n          configMap:\n            name: {{include \"kminion.fullname\" .}}\n        {{- range .Values.deployment.volumes.secrets}}\n        - name: {{.secretName}}\n          secret:\n            secretName: {{.secretName}}\n        {{- end}}\n      containers:\n        - name: {{.Chart.Name}}\n          securityContext:\n            {{- toYaml .Values.securityContext | nindent 12}}\n          image: \"{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}\"\n          imagePullPolicy: {{.Values.image.pullPolicy}}\n          ports:\n            - name: metrics\n              containerPort: {{.Values.service.port}}\n              protocol: TCP\n          env:\n            - name: POD_NAME\n              valueFrom:\n                fieldRef:\n                  fieldPath: metadata.name\n            - name: POD_NAMESPACE\n              valueFrom:\n                fieldRef:\n                  fieldPath: metadata.namespace\n            - name: CONFIG_FILEPATH\n              value: /etc/kminion/config.yaml\n            {{- range .Values.deployment.env.values}}\n            - name: {{.name}}\n              value: {{.value | quote}}\n            {{- end}}\n            {{- range .Values.deployment.env.secretKeyRefs}}\n            - name: {{.name}}\n              valueFrom:\n                secretKeyRef:\n                  name: {{.secretName}}\n                  key: {{.secretKey}}\n            {{- end}}\n            {{- range .Values.deployment.env.configMapKeyRefs}}\n            - name: {{.name}}\n              valueFrom:\n                configMapKeyRef:\n                  name: {{.configMapName}}\n                  key: {{.configMapKey}}\n            {{- end}}\n          volumeMounts:\n            - name: config\n              mountPath: /etc/kminion\n            {{- range .Values.deployment.volumes.secrets}}\n            - name: {{.secretName}}\n              mountPath: {{.mountPath}}\n            {{- end}}\n          resources:\n            {{- toYaml .Values.resources | nindent 12}}\n          livenessProbe:\n            failureThreshold: 3\n            httpGet:\n              path: /ready\n              port: metrics\n              scheme: HTTP\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 1\n          readinessProbe:\n            failureThreshold: 3\n            httpGet:\n              path: /ready\n              port: metrics\n              scheme: HTTP\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 1\n      {{- with .Values.affinity}}\n      affinity:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      {{- with .Values.tolerations}}\n      tolerations:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/deployment.yaml",
    "content": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{include \"kminion.fullname\" .}}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4}}\n    {{- with .Values.deployment.labels}}\n    {{- toYaml . | nindent 4}}\n    {{- end}}\n  {{- with .Values.deployment.annotations}}\n  annotations:\n    {{- toYaml . | nindent 4}}\n  {{- end}}\nspec:\n  {{- if not .Values.autoscaling.enabled}}\n  replicas: {{.Values.replicaCount}}\n  {{- end}}\n  selector:\n    matchLabels:\n      {{- include \"kminion.selectorLabels\" . | nindent 6}}\n  template:\n    metadata:\n      {{- with .Values.podAnnotations}}\n      annotations:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      labels:\n        {{- include \"kminion.selectorLabels\" . | nindent 8}}\n        {{- if .Values.customLabels}}\n        {{toYaml .Values.customLabels | nindent 8}}\n        {{- end}}\n    spec:\n      {{- with .Values.imagePullSecrets}}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      serviceAccountName: {{include \"kminion.serviceAccountName\" .}}\n      securityContext:\n        {{- toYaml .Values.podSecurityContext | nindent 8}}\n      volumes:\n        - name: config\n          configMap:\n            name: {{include \"kminion.fullname\" .}}\n        {{- range .Values.deployment.volumes.secrets}}\n        - name: {{.secretName}}\n          secret:\n            secretName: {{.secretName}}\n        {{- end}}\n        {{- with .Values.deployment.volumes.extra }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n      initContainers:\n      {{- with .Values.deployment.initContainers }}\n      {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: {{.Chart.Name}}\n          securityContext:\n            {{- toYaml .Values.securityContext | nindent 12}}\n          image: \"{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}\"\n          imagePullPolicy: {{.Values.image.pullPolicy}}\n          ports:\n            - name: metrics\n              containerPort: {{.Values.service.port}}\n              protocol: TCP\n          env:\n            - name: CONFIG_FILEPATH\n              value: /etc/kminion/config.yaml\n            {{- range .Values.deployment.env.values}}\n            - name: {{.name}}\n              value: {{.value | quote}}\n            {{- end}}\n            {{- range .Values.deployment.env.secretKeyRefs}}\n            - name: {{.name}}\n              valueFrom:\n                secretKeyRef:\n                  name: {{.secretName}}\n                  key: {{.secretKey}}\n            {{- end}}\n            {{- range .Values.deployment.env.configMapKeyRefs}}\n            - name: {{.name}}\n              valueFrom:\n                configMapKeyRef:\n                  name: {{.configMapName}}\n                  key: {{.configMapKey}}\n            {{- end}}\n          volumeMounts:\n            - name: config\n              mountPath: /etc/kminion\n            {{- range .Values.deployment.volumes.secrets}}\n            - name: {{.secretName}}\n              mountPath: {{.mountPath}}\n            {{- end}}\n          resources:\n            {{- toYaml .Values.resources | nindent 12}}\n          {{- if .Values.deployment.readinessProbe.enabled }}\n          readinessProbe:\n            httpGet:\n              path: /ready\n              port: {{.Values.service.port}}\n            initialDelaySeconds: 10\n          {{- end }}\n        {{- with .Values.deployment.extraContainers }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n      {{- with .Values.nodeSelector}}\n      nodeSelector:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      {{- with .Values.affinity}}\n      affinity:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n      {{- with .Values.tolerations}}\n      tolerations:\n        {{- toYaml . | nindent 8}}\n      {{- end}}\n"
  },
  {
    "path": "charts/kminion/templates/hpa.yaml",
    "content": "{{- if .Values.autoscaling.enabled }}\napiVersion: {{ ternary \"autoscaling/v2\" \"autoscaling/v2beta1\" (.Capabilities.APIVersions.Has \"autoscaling/v2\") }}\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"kminion.fullname\" . }}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"kminion.fullname\" . }}\n  minReplicas: {{ .Values.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        {{- if .Capabilities.APIVersions.Has \"autoscaling/v2\" }}\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}\n        {{ else }}\n        targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}\n        {{- end }}\n    {{- end }}\n    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        {{- if .Capabilities.APIVersions.Has \"autoscaling/v2\" }}\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}\n        {{ else }}\n        targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}\n        {{- end }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/ingress.yaml",
    "content": "{{- if .Values.ingress.enabled -}}\n{{- $fullName := include \"kminion.fullname\" . -}}\n{{- $svcPort := .Values.service.port -}}\n{{- $ingressApiIsStable := eq (include \"kminion.ingress.isStable\" .) \"true\" -}}\n{{- $ingressSupportsIngressClassName := eq (include \"kminion.ingress.supportsIngressClassName\" .) \"true\" -}}\n{{- $ingressSupportsPathType := eq (include \"kminion.ingress.supportsPathType\" .) \"true\" -}}\n{{- $fullName := include \"kminion.fullname\" . -}}\n{{- $servicePort := .Values.service.port -}}\n{{- $ingressPath := .Values.ingress.path -}}\n{{- $ingressPathType := .Values.ingress.pathType -}}\n{{- $extraPaths := .Values.ingress.extraPaths -}}\n\napiVersion: {{ include \"kminion.ingress.apiVersion\" . }}\nkind: Ingress\nmetadata:\n  name: {{ $fullName }}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4 }}\n  {{- with .Values.ingress.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\nspec:\n  {{- if and $ingressSupportsIngressClassName .Values.ingress.ingressClassName }}\n  ingressClassName: {{ .Values.ingress.ingressClassName }}\n  {{- end -}}\n  {{- with .Values.ingress.tls }}\n  tls:\n    {{- tpl (toYaml .) $ | nindent 4 }}\n  {{- end }}\n  rules:\n  {{- if .Values.ingress.hosts  }}\n  {{- range .Values.ingress.hosts }}\n    - host: {{ tpl . $ }}\n      http:\n        paths:\n          {{- with $extraPaths }}\n          {{- toYaml . | nindent 10 }}\n          {{- end }}\n          - path: {{ $ingressPath }}\n            {{- if $ingressSupportsPathType }}\n            pathType: {{ $ingressPathType }}\n            {{- end }}\n            backend:\n              {{- if $ingressApiIsStable }}\n              service:\n                name: {{ $fullName }}\n                port:\n                  number: {{ $servicePort }}\n              {{- else }}\n              serviceName: {{ $fullName }}\n              servicePort: {{ $servicePort }}\n              {{- end }}\n  {{- end }}\n  {{- else }}\n    - http:\n        paths:\n          - backend:\n              {{- if $ingressApiIsStable }}\n              service:\n                name: {{ $fullName }}\n                port:\n                  number: {{ $servicePort }}\n              {{- else }}\n              serviceName: {{ $fullName }}\n              servicePort: {{ $servicePort }}\n              {{- end }}\n            {{- with $ingressPath }}\n            path: {{ . }}\n            {{- end }}\n            {{- if $ingressSupportsPathType }}\n            pathType: {{ $ingressPathType }}\n            {{- end }}\n  {{- end -}}\n  {{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/poddisruptionbudget.yaml",
    "content": "{{- if .Values.podDisruptionBudget }}\napiVersion: {{ include \"kminion.podDisruptionBudget.apiVersion\" . }}\nkind: PodDisruptionBudget\nmetadata:\n  name: {{ template \"kminion.fullname\" . }}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4}}\nspec:\n  {{- if .Values.podDisruptionBudget.minAvailable }}\n  minAvailable: {{ .Values.podDisruptionBudget.minAvailable }}\n  {{- end }}\n  {{- if .Values.podDisruptionBudget.maxUnavailable }}\n  maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"kminion.selectorLabels\" . | nindent 6}}\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/service.yaml",
    "content": "apiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"kminion.fullname\" . }}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4 }}\n  {{- if .Values.service.annotations }}\n  annotations:\n    {{- toYaml .Values.service.annotations | nindent 4 }}\n  {{- end }}\nspec:\n  type: {{ .Values.service.type }}\n  ports:\n    - port: {{ .Values.service.port }}\n      targetPort: metrics\n      protocol: TCP\n      name: metrics\n  {{- if .Values.service.extraPorts }}\n    {{- toYaml .Values.service.extraPorts | nindent 4 }}\n  {{- end }}\n  selector:\n    {{- include \"kminion.selectorLabels\" . | nindent 4 }}\n"
  },
  {
    "path": "charts/kminion/templates/serviceaccount.yaml",
    "content": "{{- if .Values.serviceAccount.create -}}\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: {{ include \"kminion.serviceAccountName\" . }}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4 }}\n  {{- with .Values.serviceAccount.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n{{- end }}\n"
  },
  {
    "path": "charts/kminion/templates/servicemonitor.yaml",
    "content": "{{- if .Values.serviceMonitor.create }}\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: {{include \"kminion.fullname\" .}}\n  namespace: {{ .Release.Namespace | quote }}\n  labels:\n    {{- include \"kminion.labels\" . | nindent 4}}\n    {{- if .Values.serviceMonitor.additionalLabels}}\n    {{toYaml .Values.serviceMonitor.additionalLabels | nindent 4}}\n    {{- end}}\nspec:\n  selector:\n    matchLabels:\n      {{- include \"kminion.labels\" . | nindent 6}}\n  endpoints:\n    - port: metrics\n      path: /metrics\n      honorLabels: {{ .Values.serviceMonitor.honorLabels }}\n      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}\n      interval: {{ .Values.serviceMonitor.interval }}\n      {{- if .Values.serviceMonitor.relabelings }}\n      relabelings:\n      {{ toYaml .Values.serviceMonitor.relabelings | nindent 6 }}\n      {{- end }}\n  {{- if .Values.serviceMonitor.targetLabels}}\n  targetLabels:\n    {{- toYaml .Values.serviceMonitor.targetLabels | nindent 4}}\n  {{- end}}\n  {{- if .Values.customLabels }}\n  podTargetLabels:\n    {{- (keys .Values.customLabels | sortAlpha) | toYaml | nindent 4 }}\n  {{- end}}\n {{- end }}\n"
  },
  {
    "path": "charts/kminion/values.yaml",
    "content": "# Default values for kminion.\n# This is a YAML-formatted file.\n# Declare variables to be passed into your templates.\n\nreplicaCount: 1\n\nimage:\n  repository: redpandadata/kminion\n  pullPolicy: IfNotPresent\n  # Overrides the image tag whose default is the chart appVersion.\n  tag: \"\"\n\nimagePullSecrets: []\nnameOverride: \"\"\nfullnameOverride: \"\"\n\nserviceAccount:\n  # Specifies whether a service account should be created\n  create: true\n  # Annotations to add to the service account\n  annotations: {}\n  # The name of the service account to use.\n  # If not set and create is true, a name is generated using the fullname template\n  name: \"\"\n\npodAnnotations: {}\n#  prometheus.io/scrape: \"true\"\n#  prometheus.io/port: \"8080\"\n#  prometheus.io/path: \"/metrics\"\n\npodSecurityContext:\n  runAsUser: 99\n  fsGroup: 99\n\n## See `kubectl explain poddisruptionbudget.spec` for more\n## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/\npodDisruptionBudget:\n  maxUnavailable: 1\n  # minAvailable: 1\n\nsecurityContext:\n  allowPrivilegeEscalation: false\n# capabilities:\n#   drop:\n#   - ALL\n# readOnlyRootFilesystem: true\n# runAsNonRoot: true\n# runAsUser: 1000\n\nservice:\n  type: ClusterIP\n  port: 8080 # This port is also used as exposed container port\n  annotations: {} # # Annotations to add to the service\n  extraPorts: [] # when []extraContainers expose additional metrics, make\n                 # discoverable for servicemontors\n    # - port: 8443\n    #   targetPort: 8443\n    #   protocol: TCP\n    #   name: expose-x509-for-ttl-checks\n\n\ningress:\n  enabled: false\n  # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName\n  # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress\n  # ingressClassName: nginx\n  # Values can be templated\n  annotations: {}\n    # kubernetes.io/ingress.class: nginx\n    # kubernetes.io/tls-acme: \"true\"\n  labels: {}\n  path: /\n\n  # pathType is only for k8s >= 1.1=\n  pathType: Prefix\n\n  hosts:\n    - chart-example.local\n\n  ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.\n  extraPaths: []\n  # - path: /*\n  #   backend:\n  #     serviceName: ssl-redirect\n  #     servicePort: use-annotation\n  ## Or for k8s > 1.19\n  # - path: /*\n  #   pathType: Prefix\n  #   backend:\n  #     service:\n  #       name: ssl-redirect\n  #       port:\n  #         name: use-annotation\n\n  tls: []\n  #  - secretName: chart-example-tls\n  #    hosts:\n  #      - chart-example.local\n  # ingressClassName:\n\nresources: {}\n# We usually recommend not to specify default resources and to leave this as a conscious\n# choice for the user. This also increases chances charts run on environments with little\n# resources, such as Minikube. If you do want to specify resources, uncomment the following\n# lines, adjust them as necessary, and remove the curly braces after 'resources:'.\n# limits:\n#   cpu: 100m\n#   memory: 128Mi\n# requests:\n#   cpu: 100m\n#   memory: 128Mi\n\nautoscaling:\n  enabled: false\n  minReplicas: 1\n  maxReplicas: 100\n  targetCPUUtilizationPercentage: 80\n  # targetMemoryUtilizationPercentage: 80\n\nnodeSelector: {}\n\ntolerations: []\n\naffinity: {}\n\ncustomLabels: {}\n\nserviceMonitor:\n  create: false\n  additionalLabels: {}\n  honorLabels: false\n  scrapeTimeout: 10s\n  interval: 15s\n  relabelings: []\n  # - sourceLabels: [__meta_kubernetes_pod_label_my_label]\n  #   separator: ;\n  #   regex: (.*)\n  #   targetLabel: my_label\n  #   replacement: $1\n  #   action: replace\n\n# For DaemonSet mode you may set daemonset to \"true\" and replicaCount to 0.\ndaemonset:\n  enabled: false\n\ndeployment:\n  readinessProbe:\n    enabled: true\n\n  labels: {}\n  # Annotations to add to the Deployment resource\n  annotations: {}\n  volumes:\n    # Mount files from Kubernetes secrets into the container\n    secrets: []\n    # - secretName: vault-tls\n    #   mountPath: /vault/tls\n    extra: []\n    # - name: example\n    #   emptyDir: {}\n\n  # If you want to provide specifc config settings like sensitive Kafka credentials via environment variables you can\n  # do so by making them available here. See the kminion reference config to figure out the expected variable names.\n  env:\n    # Configure environment variables which you want to make available\n    values: []\n    # - name: KAFKA_SASL_MECHANISM\n    #   value: PLAIN\n\n    # Configure environment variables which you want to make available from a config map\n    configMapKeyRefs: []\n    # - name: KAFKA_SASL_USERNAME\n    #   configMapName: kafka-user-team-xy\n    #   key: username\n\n    # Configure environment variables which you want to make available from a secret\n    secretKeyRefs: []\n    # - name: KAFKA_SASL_PASSWORD\n    #   secretName: kafka-credentials-team-xy\n    #   secretKey: password\n\n  # Add additional containers, e. g. for oauth2-proxy\n  extraContainers: {}\n  # Add additional init containers, e. g. for oauth2-proxy\n  initContainers: {}\n\nkminion:\n  # KMinion can be configured using environment variables and/or a YAML config. The yaml contents under config will\n  # end up in a YAML file which will be mounted into the kminion container.\n  # See reference config: https://github.com/cloudhut/kminion/blob/master/docs/reference-config.yaml\n  config: {}\n#    kafka:\n#      brokers: [ ]\n#      clientId: \"kminion\"\n#      rackId: \"\"\n#      tls:\n#        enabled: false\n#        caFilepath: \"\"\n#        certFilepath: \"\"\n#        keyFilepath: \"\"\n#        passphrase: \"\"\n#        insecureSkipTlsVerify: false\n#      sasl:\n#        # Whether or not SASL authentication will be used for authentication\n#        enabled: false\n#        # Username to use for PLAIN or SCRAM mechanism\n#        username: \"\"\n#        # Password to use for PLAIN or SCRAM mechanism\n#        password: \"\"\n#        # Mechanism to use for SASL Authentication. Valid values are PLAIN, SCRAM-SHA-256, SCRAM-SHA-512, GSSAPI\n#        mechanism: \"PLAIN\"\n#        # GSSAPI / Kerberos config properties\n#        gssapi:\n#          authType: \"\"\n#          keyTabPath: \"\"\n#          kerberosConfigPath: \"\"\n#          serviceName: \"\"\n#          username: \"\"\n#          password: \"\"\n#          realm: \"\"\n#      # Whether to retry the initial test connection to Kafka. False will exit with code 1 on error,\n#      # while true will retry until success.\n#      retryInitConnection: false\n#\n#    minion:\n#      consumerGroups:\n#        # Enabled specifies whether consumer groups shall be scraped and exported or not.\n#        enabled: true\n#        # Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal\n#        # __consumer_offsets topic. Both modes have their advantages and disadvantages.\n#        scrapeMode: adminApi # Valid values: adminApi, offsetsTopic\n#        # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n#        # you aren't interested in per partition lags you could choose \"topic\" where all partition lags will be summed\n#        # and only topic lags will be exported.\n#        granularity: partition\n#        # AllowedGroups are regex strings of group ids that shall be exported\n#        # You can specify allowed groups by providing literals like \"my-consumergroup-name\" or by providing regex expressions\n#        # like \"/internal-.*/\".\n#        allowedGroups: [ ]\n#        # IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups\n#        # take precedence over allowed groups.\n#        ignoredGroups: [ ]\n#      topics:\n#        # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n#        # you aren't interested in per partition metrics you could choose \"topic\".\n#        granularity: partition\n#        # AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.\n#        # You can specify allowed topics by providing literals like \"my-topic-name\" or by providing regex expressions\n#        # like \"/internal-.*/\".\n#        allowedTopics: [ ]\n#\n#        # IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics\n#        # take precedence over allowed topics.\n#        ignoredTopics: [ ]\n#        # infoMetric is a configuration object for the kminion_kafka_topic_info metric\n#        infoMetric:\n#          # ConfigKeys are set of strings of Topic configs that you want to have exported as part of the metric\n#          configKeys: [\"cleanup.policy\"]\n#      logDirs:\n#        # Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior\n#        # to version 1.0.0 as describing log dirs was not supported back then.\n#        enabled: true\n#\n#    exporter:\n#      # Namespace is the prefix for all exported Prometheus metrics\n#      namespace: \"kminion\"\n#      # Host that shall be used to bind the HTTP server on\n#      host: \"\"\n#      # Port that shall be used to bind the HTTP server on\n#      port: 8080\n#\n#    logger:\n#      # Level is a logging priority. Higher levels are more important. Valid values are: debug, info, warn, error, fatal, panic\n#      level: info\n"
  },
  {
    "path": "config.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n\n\t\"github.com/cloudhut/kminion/v2/kafka\"\n\t\"github.com/cloudhut/kminion/v2/logging\"\n\t\"github.com/cloudhut/kminion/v2/minion\"\n\t\"github.com/cloudhut/kminion/v2/prometheus\"\n\t\"github.com/knadh/koanf\"\n\t\"github.com/knadh/koanf/parsers/yaml\"\n\t\"github.com/knadh/koanf/providers/env\"\n\t\"github.com/knadh/koanf/providers/file\"\n\t\"github.com/mitchellh/mapstructure\"\n\t\"go.uber.org/zap\"\n)\n\ntype Config struct {\n\tKafka    kafka.Config      `koanf:\"kafka\"`\n\tMinion   minion.Config     `koanf:\"minion\"`\n\tExporter prometheus.Config `koanf:\"exporter\"`\n\tLogger   logging.Config    `koanf:\"logger\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.Kafka.SetDefaults()\n\tc.Minion.SetDefaults()\n\tc.Exporter.SetDefaults()\n\tc.Logger.SetDefaults()\n}\n\nfunc (c *Config) Validate() error {\n\terr := c.Kafka.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate kafka config: %w\", err)\n\t}\n\n\terr = c.Minion.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate minion config: %w\", err)\n\t}\n\n\terr = c.Logger.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate logger config: %w\", err)\n\t}\n\n\treturn nil\n}\n\nfunc newConfig(logger *zap.Logger) (Config, error) {\n\tk := koanf.New(\".\")\n\tvar cfg Config\n\tcfg.SetDefaults()\n\n\t// 1. Check if a config filepath is set via flags. If there is one we'll try to load the file using a YAML Parser\n\tenvKey := \"CONFIG_FILEPATH\"\n\tconfigFilepath := os.Getenv(envKey)\n\tif configFilepath == \"\" {\n\t\tlogger.Info(\"the env variable '\" + envKey + \"' is not set, therefore no YAML config will be loaded\")\n\t} else {\n\t\terr := k.Load(file.Provider(configFilepath), yaml.Parser())\n\t\tif err != nil {\n\t\t\treturn Config{}, fmt.Errorf(\"failed to parse YAML config: %w\", err)\n\t\t}\n\t}\n\n\t// We could unmarshal the loaded koanf input after loading both providers, however we want to unmarshal the YAML\n\t// config with `ErrorUnused` set to true, but unmarshal environment variables with `ErrorUnused` set to false (default).\n\t// Rationale: Orchestrators like Kubernetes inject unrelated environment variables, which we still want to allow.\n\terr := k.UnmarshalWithConf(\"\", &cfg, koanf.UnmarshalConf{\n\t\tTag:       \"\",\n\t\tFlatPaths: false,\n\t\tDecoderConfig: &mapstructure.DecoderConfig{\n\t\t\tDecodeHook: mapstructure.ComposeDecodeHookFunc(\n\t\t\t\tmapstructure.StringToTimeDurationHookFunc()),\n\t\t\tMetadata:         nil,\n\t\t\tResult:           &cfg,\n\t\t\tWeaklyTypedInput: true,\n\t\t\tErrorUnused:      true,\n\t\t},\n\t})\n\tif err != nil {\n\t\treturn Config{}, err\n\t}\n\n\terr = k.Load(env.ProviderWithValue(\"\", \".\", func(s string, v string) (string, interface{}) {\n\t\t// key := strings.Replace(strings.ToLower(s), \"_\", \".\", -1)\n\t\tkey := strings.Replace(strings.ToLower(s), \"_\", \".\", -1)\n\t\t// Check to exist if we have a configuration option already and see if it's a slice\n\t\t// If there is a comma in the value, split the value into a slice by the comma.\n\t\tif strings.Contains(v, \",\") {\n\t\t\treturn key, strings.Split(v, \",\")\n\t\t}\n\n\t\t// Otherwise return the new key with the unaltered value\n\t\treturn key, v\n\t}), nil)\n\tif err != nil {\n\t\treturn Config{}, err\n\t}\n\n\terr = k.Unmarshal(\"\", &cfg)\n\tif err != nil {\n\t\treturn Config{}, err\n\t}\n\n\terr = cfg.Validate()\n\tif err != nil {\n\t\treturn Config{}, fmt.Errorf(\"failed to validate config: %w\", err)\n\t}\n\n\treturn cfg, nil\n}\n"
  },
  {
    "path": "docker-compose.yml",
    "content": "---\nversion: '2.1'\n\nservices:\n\n  zookeeper:\n    image: confluentinc/cp-zookeeper:latest\n    ports:\n      - 2181:2181\n    environment:\n      ZOOKEEPER_CLIENT_PORT: 2181\n      ZOOKEEPER_TICK_TIME: 2000\n    container_name: zookeeper\n    hostname: zookeeper\n\n  kafka:\n    image: confluentinc/cp-kafka:latest\n    hostname: kafka\n    container_name: kafka\n    depends_on:\n      - zookeeper\n    ports:\n      - 9092:9092\n    environment:\n      KAFKA_BROKER_ID: 1\n      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181\n      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092\n      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT\n      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT\n      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1\n      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1\n      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1\n\n  kafka-minion:\n    build:\n      context: .\n      dockerfile: ./Dockerfile\n    hostname: kafka-minion\n    container_name: kafka-minion\n    depends_on:\n      - zookeeper\n      - kafka\n    ports:\n      - 8080:8080\n    environment:\n      KAFKA_BROKERS: kafka:29092\n    restart: unless-stopped"
  },
  {
    "path": "docs/end-to-end.md",
    "content": "# End-To-End Monitoring\n\nThis page describes the end-to-end monitoring feature in KMinion, how it works, and what metrics it provides.\n\n## Motivation\n\n> What is the issue? Why did we build this feature?\n\nWe can monitor metrics like CPU usage, free disk space, or even consumer group lag. However, these metrics don't give us\na good idea of the performance characteristics an actual, real-world, client experiences when connected to the cluster.\n\nWith the \"classic\" metrics lots of questions go unanswered:\n\n- Can a client produce messages to the cluster?\n- Can clients produce & consume messages as well as commit group offsets with an acceptable latency?\n- Is the cluster in a healthy state from a client's perspective?\n\n## Approach & Implementation\n\n> How do we solve those issues? How does the feature work?\n\nThe most reliably way to get real-world performance and availability metrics is to actually run a producer/consumer\nourselves. This is exactly what the end-to-end monitoring feature does!\n\n## High Level Overview\n\nIn order to determine if the cluster is fully operational, and it's performance is within acceptable limits, KMinion\ncontinuously produces and consumes messages to/from the cluster. That way we can measure things like ack-latency,\ncommit-latency, and roundtrip-time.\n\nKMinion creates and manages its own topic for the end-to-end test messages. The name of the topic can be configured.\n\n**The first step** is to create a message and send it to the cluster.\n\n- Every produced message is added to an internal tracker, so we can recognize messages being \"lost\". A message is\n  considered lost if it doesn't arrive back at the consumer within the configured time span.\n\n**The second step** is to continuously consume the topic.\n\n- As each message arrives, we calculate its roundtrip time (time from the point the message was created, until KMinion\n  received it again)\n- Consumer group offsets are committed periodically, while also recording the time each commit takes.\n\n### Topic Management\n\nThe topic KMinion uses, is created and managed completely automatically (the topic name can be configured though).\n\nKMinion continuously checks the topic and fixes issues/imbalances automatically:\n\n- Add partitions to the topic, so it has at least as many partitions as there are brokers.\n- Will reassign partitions to ensure every broker leads at least one partition, and that all partitions' replicas are\n  distributed evenly across the brokers. KMinion tries to assign partitionIDs to brokers that have the same broker id.\n\n### Consumer Group Management\n\nOn startup each KMinion instance generates a unique identifier (UUID) that is used to create its own consumer group. It\nincorporates the shared prefix from the config.\n\nThat is necessary because:\n\n- Offsets must not be shared among multiple instances.\n- Each instance must always consume **all** partitions of the topic.\n\nThe instances' UUID is also embedded in every message, so each instance can easily filter out messages it didn't\nproduce. That's why it is perfectly fine to run multiple KMinion instances against the same cluster, using the same\ntopic.\n\nKMinion also monitors and deletes consumer groups that use it's configured prefix. That way, when an instance\nexits/restarts, previous consumer groups will be cleaned up quickly (check happens every 20s).\n\n## Available Metrics\n\nThe end-to-end monitoring feature exports the following metrics.\n\n### Counters\n\n| Name | Description |\n| --- | --- |\n| `kminion_end_to_end_messages_produced_total ` | Messages KMinion *tried* to send |\n| `kminion_end_to_end_messages_received_total ` | Number of messages received (only counts those that match, i.e. that this instance actually produced itself) |\n| `kminion_end_to_end_offset_commits_total` | Number of successful offset commits |\n| `kminion_end_to_end_messages_lost_total` Number of messages that have been produced successfully but not received within the configured SLA duration |\n| `kminion_end_to_end_messages_produced_failed_total` Number of messages failed to produce to Kafka because of a timeout or failure |\n| `kminion_end_to_end_offset_commits_total` Counts how many times kminions end-to-end test has committed offsets |\n\n### Histograms\n\n| Name | Description |\n| --- | --- |\n| `kminion_end_to_end_produce_latency_seconds ` | Duration until the cluster acknowledged a message.  |\n| `kminion_end_to_end_offset_commit_latency_seconds` Time kafka took to respond to kminion's offset commit |\n| `kminion_end_to_end_roundtrip_latency_seconds ` | Duration from creation of a message, until it was received/consumed again. |\n\n### Gauges\n| Name | Description |\n| --- | --- |\n| `kminion_end_to_end_messages_produced_in_flight` Number of messages that kminion's end-to-end test produced but has not received an answer for yet |\n\n## Config Properties\n\nAll config properties related to this feature are located in `minion.endToEnd`.\n\n```yaml\n  endToEnd:\n    enabled: true\n    probeInterval: 800ms # how often to send end-to-end test messages\n    topicManagement:\n      # You can disable topic management, without disabling the testing feature.\n      # Only makes sense if you have multiple kminion instances, and for some reason only want one of them to create/configure the topic.\n      # It is strongly recommended to leave this enabled.\n      enabled: true\n\n      # Name of the topic kminion uses to send its test messages\n      # You do *not* need to change this if you are running multiple kminion instances on the same cluster.\n      # Different instances are perfectly fine with sharing the same topic!\n      name: kminion-end-to-end\n\n      # How often kminion checks its topic to validate configuration, partition count, and partition assignments\n      reconciliationInterval: 10m\n\n      # Useful for monitoring the performance of acks (if >1 this is best combined with 'producer.requiredAcks' set to 'all')\n      replicationFactor: 1\n\n      # Rarely makes sense to change this, but maybe if you want some sort of cheap load test?\n      partitionsPerBroker: 1\n\n    producer:\n      # This defines the maximum time to wait for an ack response after producing a message,\n      # and the upper bound for histogram buckets in \"produce_latency_seconds\"\n      ackSla: 5s\n      # Can be to \"all\" (default) so kafka only reports an end-to-end test message as acknowledged if\n      # the message was written to all in-sync replicas of the partition.\n      # Or can be set to \"leader\" to only require to have written the message to its log.\n      requiredAcks: all\n\n    consumer:\n      # Prefix kminion uses when creating its consumer groups. Current kminion instance id will be appended automatically\n      groupIdPrefix: kminion-end-to-end\n\n      # Whether KMinion should try to delete empty consumer groups with the same prefix. This can be used if you want\n      # KMinion to cleanup it's old consumer groups. It should only be used if you use a unique prefix for KMinion.\n      deleteStaleConsumerGroups: false\n\n      # Defines the time limit beyond which a message is considered \"lost\" (failed the roundtrip),\n      # also used as the upper bound for histogram buckets in \"roundtrip_latency\"\n      roundtripSla: 20s\n\n      # Maximum time an offset commit is allowed to take before considering it failed,\n      # also used as the upper bound for histogram buckets in \"commit_latency_seconds\"\n      commitSla: 10s\n```\n\n"
  },
  {
    "path": "docs/metrics.md",
    "content": "# Exported Metrics\n\nThis document lists all exported metrics in an exemplary way.\n\n## Exporter Metrics\n\n```\n# HELP kminion_exporter_up Build info about this Prometheus Exporter. Gauge value is 0 if one or more scrapes have failed.\n# TYPE kminion_exporter_up gauge\nkminion_exporter_up{version=\"sha-0ab0dcdf862f7a34b06998cd2d980148e048151a\"} 1\n\n# HELP kminion_exporter_offset_consumer_records_consumed_total The number of offset records that have been consumed by the internal offset consumer\n# TYPE kminion_exporter_offset_consumer_records_consumed_total counter\nkminion_exporter_offset_consumer_records_consumed_total 5.058244883e+09\n```\n\n## Kafka Metrics\n\n### General / Cluster Metrics\n\n```\n# HELP kminion_kafka_broker_info Kafka broker information\n# TYPE kminion_kafka_broker_info gauge\nkminion_kafka_broker_info{address=\"broker-9.analytics-prod.kafka.cloudhut.dev\",broker_id=\"9\",is_controller=\"false\",port=\"9092\",rack_id=\"europe-west1-b\"} 1\n\n# HELP kminion_kafka_cluster_info Kafka cluster information\n# TYPE kminion_kafka_cluster_info gauge\nkminion_kafka_cluster_info{broker_count=\"12\",cluster_id=\"UYZJg8bhT_6SxhsdaQZEQ\",cluster_version=\"v2.6\",controller_id=\"6\"} 1\n```\n\n### Log Dir Metrics\n\n```\n# HELP kminion_kafka_broker_log_dir_size_total_bytes The summed size in bytes of all log dirs for a given broker\n# TYPE kminion_kafka_broker_log_dir_size_total_bytes gauge\nkminion_kafka_broker_log_dir_size_total_bytes{address=\"broker-9.analytics-prod.kafka.cloudhut.dev\",broker_id=\"9\",port=\"9092\",rack_id=\"europe-west1-b\"} 8.32654935115e+11\n\n# HELP kminion_kafka_topic_log_dir_size_total_bytes The summed size in bytes of partitions for a given topic. This includes the used space for replica partitions.\n# TYPE kminion_kafka_topic_log_dir_size_total_bytes gauge\nkminion_kafka_topic_log_dir_size_total_bytes{topic_name=\"__consumer_offsets\"} 9.026554258e+09\n```\n\n### Topic & Partition Metrics\n\n```\n# HELP kminion_kafka_topic_info Info labels for a given topic\n# TYPE kminion_kafka_topic_info gauge\nkminion_kafka_topic_info{cleanup_policy=\"compact\",partition_count=\"1\",replication_factor=\"1\",topic_name=\"_confluent-ksql-default__command_topic\"} 1\n\n# HELP kminion_kafka_topic_partition_low_water_mark Partition Low Water Mark\n# TYPE kminion_kafka_topic_partition_low_water_mark gauge\nkminion_kafka_topic_partition_low_water_mark{partition_id=\"0\",topic_name=\"__consumer_offsets\"} 0\n\n# HELP kminion_kafka_topic_low_water_mark_sum Sum of all the topic's partition low water marks\n# TYPE kminion_kafka_topic_low_water_mark_sum gauge\nkminion_kafka_topic_low_water_mark_sum{topic_name=\"__consumer_offsets\"} 0\n\n# HELP kminion_kafka_topic_partition_high_water_mark Partition High Water Mark\n# TYPE kminion_kafka_topic_partition_high_water_mark gauge\nkminion_kafka_topic_partition_high_water_mark{partition_id=\"0\",topic_name=\"__consumer_offsets\"} 2.04952001e+08\n\n# HELP kminion_kafka_topic_high_water_mark_sum Sum of all the topic's partition high water marks\n# TYPE kminion_kafka_topic_high_water_mark_sum gauge\nkminion_kafka_topic_high_water_mark_sum{topic_name=\"__consumer_offsets\"} 1.512023846873e+12\n```\n\n### Consumer Group Metrics\n\n```\n# HELP kminion_kafka_consumer_group_info Consumer Group info metrics. It will report 1 if the group is in the stable state, otherwise 0.\n# TYPE kminion_kafka_consumer_group_info gauge\nkminion_kafka_consumer_group_info{coordinator_id=\"0\",group_id=\"bigquery-sink\",protocol=\"range\",protocol_type=\"consumer\",state=\"Stable\"} 1\n\n# HELP kminion_kafka_consumer_group_members Consumer Group member count metrics. It will report the number of members in the consumer group\n# TYPE kminion_kafka_consumer_group_members gauge\nkminion_kafka_consumer_group_members{group_id=\"bigquery-sink\"} 2\n\n# HELP kminion_kafka_consumer_group_empty_members Consumer Group Empty Members. It will report the number of members in the consumer group with no partition assigned\n# TYPE kminion_kafka_consumer_group_empty_members gauge\nkminion_kafka_consumer_group_empty_members{group_id=\"bigquery-sink\"} 1\n\n# HELP kminion_kafka_consumer_group_topic_members Consumer Group topic member count metrics. It will report the number of members in the consumer group assigned on a given topic\n# TYPE kminion_kafka_consumer_group_topic_members gauge\nkminion_kafka_consumer_group_topic_members{group_id=\"bigquery-sink\",topic_name=\"shop-activity\"} 4\n\n# HELP kminion_kafka_consumer_group_topic_assigned_partitions Consumer Group topic partitions count metrics. It will report the number of partitions assigned in the consumer group for a given topic\n# TYPE kminion_kafka_consumer_group_topic_assigned_partitions gauge\nkminion_kafka_consumer_group_topic_assigned_partitions{group_id=\"bigquery-sink\",topic_name=\"shop-activity\"} 32\n\n# HELP kminion_kafka_consumer_group_topic_offset_sum The sum of all committed group offsets across all partitions in a topic\n# TYPE kminion_kafka_consumer_group_topic_offset_sum gauge\nkminion_kafka_consumer_group_topic_offset_sum{group_id=\"bigquery-sink\",topic_name=\"shop-activity\"} 4.259513e+06\n\n# HELP kminion_kafka_consumer_group_topic_partition_lag The number of messages a consumer group is lagging behind the latest offset of a partition\n# TYPE kminion_kafka_consumer_group_topic_partition_lag gauge\nkminion_kafka_consumer_group_topic_partition_lag{group_id=\"bigquery-sink\",partition_id=\"10\",topic_name=\"shop-activity\"} 147481\n\n# HELP kminion_kafka_consumer_group_topic_lag The number of messages a consumer group is lagging behind across all partitions in a topic\n# TYPE kminion_kafka_consumer_group_topic_lag gauge\nkminion_kafka_consumer_group_topic_lag{group_id=\"bigquery-sink\",topic_name=\"shop-activity\"} 147481\n\n# HELP kminion_kafka_consumer_group_offset_commits_total The number of offsets committed by a group\n# TYPE kminion_kafka_consumer_group_offset_commits_total counter\nkminion_kafka_consumer_group_offset_commits_total{group_id=\"bigquery-sink\"} 1098\n```\n\n### End-to-End Metrics\n\n```\n# HELP kminion_end_to_end_messages_produced_total Number of messages that kminion's end-to-end test has tried to send to kafka\n# TYPE kminion_end_to_end_messages_produced_total counter\nkminion_end_to_end_messages_produced_total 384\n\n# HELP kminion_end_to_end_offset_commits_total Counts how many times kminions end-to-end test has committed messages\n# TYPE kminion_end_to_end_offset_commits_total counter\nkminion_end_to_end_offset_commits_total 18\n\n# HELP kminion_end_to_end_messages_received_total Number of *matching* messages kminion received. Every roundtrip message has a minionID (randomly generated on startup) and a timestamp. Kminion only considers a message a match if it it arrives within the configured roundtrip SLA (and it matches the minionID)\n# TYPE kminion_end_to_end_messages_received_total counter\nkminion_end_to_end_messages_received_total 383\n\n# HELP kminion_end_to_end_produce_latency_seconds Time until we received an ack for a produced message\n# TYPE kminion_end_to_end_produce_latency_seconds histogram\nkminion_end_to_end_produce_latency_seconds_bucket{partitionId=\"0\",le=\"0.005\"} 0\n\n# HELP kminion_end_to_end_offset_commit_latency_seconds Time kafka took to respond to kminion's offset commit\n# TYPE kminion_end_to_end_offset_commit_latency_seconds histogram\nkminion_end_to_end_offset_commit_latency_seconds_bucket{groupCoordinatorBrokerId=\"0\",le=\"0.005\"} 0\n\n# HELP kminion_end_to_end_roundtrip_latency_seconds Time it took between sending (producing) and receiving (consuming) a message\n# TYPE kminion_end_to_end_roundtrip_latency_seconds histogram\nkminion_end_to_end_roundtrip_latency_seconds_bucket{partitionId=\"0\",le=\"0.005\"} 0\n\n# HELP kminion_end_to_end_messages_lost_total Number of messages that have been produced successfully but not received within the configured SLA duration\n# TYPE kminion_end_to_end_messages_lost_total counter\nkminion_end_to_end_messages_lost_total{partition_id=\"0\"} 0\n\n# HELP kminion_end_to_end_messages_produced_failed_total Number of messages failed to produce to Kafka because of a timeout or failure\n# TYPE kminion_end_to_end_messages_produced_failed_total counter\nkminion_end_to_end_messages_produced_failed_total{partition_id=\"0\"} 0\n\n# HELP kminion_end_to_end_messages_produced_in_flight Number of messages that kminion's end-to-end test produced but has not received an answer for yet\n# TYPE kminion_end_to_end_messages_produced_in_flight gauge\nkminion_end_to_end_messages_produced_in_flight{partition_id=\"0\"} 0\n```\n"
  },
  {
    "path": "docs/reference-config.yaml",
    "content": "#####################################################################################\n# This file documents all the available config options and it's default values.\n#\n# All config options can be configured via environment variables as well.\n# If you specify both the env variable and yaml option for the same configuration\n# the environment variable will take precedence. If you want to use a YAML config\n# file, specify the path to the config file by setting the env variable\n# CONFIG_FILEPATH.\n#\n# The env variable name is auto generated by upper casing everything and adding\n# an underscore for each indentation/level. Some examples:\n# kafka.rackId => KAFKA_RACKID\n# kafka.tls.caFilepath => KAFKA_TLS_CAFILEPATH\n# minion.consumerGroups.allowedGroups => MINION_CONSUMERGROUPS_ALLOWEDGROUPS\n#\n# Env variables that expect array values can be provided by separting them using\n# a comma: KAFKA_BROKERS = \"broker1:9092,broker2:9092,broker3:9092\"\n#####################################################################################\n\nlogger:\n  # Valid values are: debug, info, warn, error, fatal, panic\n  level: info\n\nkafka:\n  brokers: [ ]\n  clientId: \"kminion\"\n  rackId: \"\"\n  tls:\n    enabled: false\n    caFilepath: \"\"\n    certFilepath: \"\"\n    keyFilepath: \"\"\n    # base64 encoded tls CA, cannot be set if 'caFilepath' is set\n    ca: \"\"\n    # base64 encoded tls cert, cannot be set if 'certFilepath' is set\n    cert: \"\"\n    # base64 encoded tls key, cannot be set if 'keyFilepath' is set\n    key: \"\"\n    passphrase: \"\"\n    insecureSkipTlsVerify: false\n\n  sasl:\n    # Whether or not SASL authentication will be used for authentication\n    enabled: false\n    # Username to use for PLAIN or SCRAM mechanism\n    username: \"\"\n    # Password to use for PLAIN or SCRAM mechanism\n    password: \"\"\n    # Mechanism to use for SASL Authentication. Valid values are PLAIN, SCRAM-SHA-256, SCRAM-SHA-512, GSSAPI, OAUTHBEARER\n    mechanism: \"PLAIN\"\n    # GSSAPI / Kerberos config properties\n    gssapi:\n      # Required. One of USER_AUTH or KEYTAB_AUTH\n      authType: \"\"\n      keyTabPath: \"\"\n      kerberosConfigPath: \"\"\n      serviceName: \"\"\n      username: \"\"\n      password: \"\"\n      realm: \"\"\n      enableFast: true\n    # OAUTHBEARER config properties\n    oauth:\n      tokenEndpoint: \"\"\n      clientId: \"\"\n      clientSecret: \"\"\n      scope: \"\"\n\nminion:\n  consumerGroups:\n    # Enabled specifies whether consumer groups shall be scraped and exported or not.\n    enabled: true\n    # Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal\n    # __consumer_offsets topic. Both modes have their advantages and disadvantages.\n    # * adminApi:\n    #     - Useful for managed kafka clusters that do not provide access to the offsets topic.\n    # * offsetsTopic\n    #     - Enables kminion_kafka_consumer_group_offset_commits_total metrics.\n    #     - Processing the offsetsTopic requires slightly more memory and cpu than using the adminApi. The amount depends on the\n    #       size and throughput of the offsets topic.\n    scrapeMode: adminApi # Valid values: adminApi, offsetsTopic\n    # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n    # you aren't interested in per partition lags you could choose \"topic\" where all partition lags will be summed\n    # and only topic lags will be exported.\n    granularity: partition\n    # AllowedGroups are regex strings of group ids that shall be exported\n    # You can specify allowed groups by providing literals like \"my-consumergroup-name\" or by providing regex expressions\n    # like \"/internal-.*/\".\n    allowedGroups: [ \".*\" ]\n    # IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups\n    # take precedence over allowed groups.\n    ignoredGroups: [ ]\n  topics:\n    # Enabled can be set to false in order to disable collecting any topic metrics.\n    enabled: true\n    # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n    # you aren't interested in per partition metrics you could choose \"topic\".\n    granularity: partition\n    # AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.\n    # You can specify allowed topics by providing literals like \"my-topic-name\" or by providing regex expressions\n    # like \"/internal-.*/\".\n    allowedTopics: [ \".*\" ]\n    # IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics\n    # take precedence over allowed topics.\n    ignoredTopics: [ ]\n    # infoMetric is a configuration object for the kminion_kafka_topic_info metric\n    infoMetric:\n      # ConfigKeys are set of strings of Topic configs that you want to have exported as part of the metric\n      configKeys: [ \"cleanup.policy\" ]\n  logDirs:\n    # Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior\n    # to version 1.0.0 as describing log dirs was not supported back then.\n    enabled: true\n\n  # EndToEnd Metrics\n  # When enabled, kminion creates a topic which it produces to and consumes from, to measure various advanced metrics. See docs for more info\n  endToEnd:\n    enabled: false\n    # How often to send end-to-end test messages\n    probeInterval: 100ms\n    topicManagement:\n      # Controls whether kminion should create, validate, and alter the end-to-end topic.\n      #\n      # When enabled (true):\n      # - kminion creates the topic if it doesn't exist\n      # - Validates partition count and replica assignments on startup and periodically (based on reconciliationInterval)\n      # - Alters the topic (reassigns partitions, adds partitions) to ensure one partition leader per broker\n      # - Fails on startup if topic alteration operations fail\n      #\n      # When disabled (false):\n      # - kminion will NOT create the topic if it doesn't exist (startup will fail)\n      # - Will NOT validate or alter an existing topic (accepts current partition layout as-is)\n      # - Logs warnings if the topic configuration is suboptimal\n      # - Continues end-to-end testing with whatever partition layout exists\n      #\n      # Use disabled mode on managed Kafka platforms that restrict partition reassignment operations.\n      # In this mode, you must manually create the topic with appropriate partition distribution\n      # before starting kminion.\n      enabled: true\n\n      # Name of the topic kminion uses to send its test messages\n      # You do *not* need to change this if you are running multiple kminion instances on the same cluster.\n      # Different instances are perfectly fine with sharing the same topic!\n      name: kminion-end-to-end\n\n      # How often kminion checks its topic to validate configuration, partition count, and partition assignments\n      reconciliationInterval: 10m\n\n      # Depending on the desired monitoring (e.g. you want to alert on broker failure vs. cluster that is not writable)\n      # you may choose replication factor 1 or 3 most commonly.\n      replicationFactor: 1\n\n      # Rarely makes sense to change this, but maybe if you want some sort of cheap load test?\n      # By default (1) every broker gets one partition\n      partitionsPerBroker: 1\n\n      # Whether kminion should issue AlterPartitionAssignments requests to rebalance partition\n      # leaders across brokers. Disable this when running against Kafka-compatible clusters that\n      # reject AlterPartitionAssignments (e.g. Redpanda with autobalancer enabled, Confluent Cloud).\n      # Topic creation and partition count management remain active regardless of this setting.\n      rebalancePartitions: true\n\n    producer:\n      # This defines:\n      # - Maximum time to wait for an ack response after producing a message\n      # - Upper bound for histogram buckets in \"produce_latency_seconds\"\n      ackSla: 5s\n      # Can be to \"all\" (default) so kafka only reports an end-to-end test message as acknowledged if\n      # the message was written to all in-sync replicas of the partition.\n      # Or can be set to \"leader\" to only require to have written the message to its log.\n      requiredAcks: all\n\n    consumer:\n      # Prefix kminion uses when creating its consumer groups. Current kminion instance id will be appended automatically\n      groupIdPrefix: kminion-end-to-end\n\n      # Whether KMinion should try to delete empty consumer groups with the same prefix. This can be used if you want\n      # KMinion to cleanup it's old consumer groups. It should only be used if you use a unique prefix for KMinion.\n      deleteStaleConsumerGroups: false\n\n      # This defines:\n      # - Upper bound for histogram buckets in \"roundtrip_latency\"\n      # - Time limit beyond which a message is considered \"lost\" (failed the roundtrip)\n      roundtripSla: 20s\n\n      # - Upper bound for histogram buckets in \"commit_latency_seconds\"\n      # - Maximum time an offset commit is allowed to take before considering it failed\n      commitSla: 10s\n\nexporter:\n  # Namespace is the prefix for all exported Prometheus metrics\n  namespace: \"kminion\"\n  # Host that shall be used to bind the HTTP server on\n  host: \"\"\n  # Port that shall be used to bind the HTTP server on\n  port: 8080\n"
  },
  {
    "path": "e2e/client_hooks.go",
    "content": "package e2e\n\nimport (\n\t\"net\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// in e2e we only use client hooks for logging connect/disconnect messages\ntype clientHooks struct {\n\tlogger *zap.Logger\n\n\tlastCoordinatorUpdate time.Time\n\tcurrentCoordinator    *atomic.Value // kgo.BrokerMetadata\n}\n\nfunc newEndToEndClientHooks(logger *zap.Logger) *clientHooks {\n\treturn &clientHooks{\n\t\tlogger:             logger.Named(\"e2e_hooks\"),\n\t\tcurrentCoordinator: &atomic.Value{},\n\t}\n}\n\nfunc (c *clientHooks) OnBrokerConnect(meta kgo.BrokerMetadata, dialDur time.Duration, _ net.Conn, err error) {\n\tif err != nil {\n\t\tc.logger.Error(\"kafka connection failed\", zap.String(\"broker_host\", meta.Host), zap.Int32(\"broker_id\", meta.NodeID), zap.Error(err))\n\t\treturn\n\t}\n\tc.logger.Debug(\"kafka connection succeeded\",\n\t\tzap.String(\"host\", meta.Host), zap.Int32(\"broker_id\", meta.NodeID),\n\t\tzap.Int64(\"dial_duration_ms\", dialDur.Milliseconds()))\n}\n\nfunc (c *clientHooks) OnDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {\n\tc.logger.Warn(\"kafka broker disconnected\", zap.Int32(\"broker_id\", meta.NodeID),\n\t\tzap.String(\"host\", meta.Host))\n}\n\n// OnBrokerWrite is passed the broker metadata, the key for the request that\n// was written, the number of bytes written, how long the request\n// waited before being written, how long it took to write the request,\n// and any error.\n//\n// The bytes written does not count any tls overhead.\n// OnWrite is called after a write to a broker.\n//\n// OnWrite(meta BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error)\nfunc (c *clientHooks) OnBrokerWrite(meta kgo.BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error) {\n\tkeyName := kmsg.NameForKey(key)\n\tif keyName != \"OffsetCommit\" {\n\t\treturn\n\t}\n\n\t// c.logger.Info(\"hooks onWrite\",\n\t// \tzap.Duration(\"timeToWrite\", timeToWrite),\n\t// \tzap.NamedError(\"err\", err))\n}\n\n// OnBrokerRead is passed the broker metadata, the key for the response that\n// was read, the number of bytes read, how long the Client waited\n// before reading the response, how long it took to read the response,\n// and any error.\n//\n// The bytes written does not count any tls overhead.\n// OnRead is called after a read from a broker.\n// OnRead(meta BrokerMetadata, key int16, bytesRead int, readWait, timeToRead time.Duration, err error)\nfunc (c *clientHooks) OnBrokerRead(meta kgo.BrokerMetadata, key int16, bytesRead int, readWait, timeToRead time.Duration, err error) {\n\tconsumerGroupMsgKeys := []int16{\n\t\t(&kmsg.OffsetCommitResponse{}).Key(),\n\t\t(&kmsg.JoinGroupResponse{}).Key(),\n\t\t(&kmsg.HeartbeatResponse{}).Key(),\n\t\t(&kmsg.SyncGroupResponse{}).Key(),\n\t}\n\n\tisMessageFromGroupCoordinator := isInArray(key, consumerGroupMsgKeys)\n\tif !isMessageFromGroupCoordinator {\n\t\treturn\n\t}\n\n\tif err == nil {\n\t\tc.currentCoordinator.Store(meta)\n\t\tc.lastCoordinatorUpdate = time.Now()\n\t}\n}\n"
  },
  {
    "path": "e2e/config.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"time\"\n)\n\ntype Config struct {\n\tEnabled         bool                   `koanf:\"enabled\"`\n\tTopicManagement EndToEndTopicConfig    `koanf:\"topicManagement\"`\n\tProbeInterval   time.Duration          `koanf:\"probeInterval\"`\n\tProducer        EndToEndProducerConfig `koanf:\"producer\"`\n\tConsumer        EndToEndConsumerConfig `koanf:\"consumer\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.Enabled = false\n\tc.ProbeInterval = 100 * time.Millisecond\n\tc.TopicManagement.SetDefaults()\n\tc.Producer.SetDefaults()\n\tc.Consumer.SetDefaults()\n}\n\nfunc (c *Config) Validate() error {\n\n\tif !c.Enabled {\n\t\treturn nil\n\t}\n\n\t// If the timeduration is 0s or 0ms or its variation of zero, it will be parsed as 0\n\tif c.ProbeInterval == 0 {\n\t\treturn fmt.Errorf(\"failed to validate probeInterval config, the duration can't be zero\")\n\t}\n\n\terr := c.TopicManagement.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate topicManagement config: %w\", err)\n\t}\n\n\t_, err = time.ParseDuration(c.ProbeInterval.String())\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to parse '%s' to time.Duration: %v\", c.ProbeInterval.String(), err)\n\t}\n\n\terr = c.Producer.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate producer config: %w\", err)\n\t}\n\n\terr = c.Consumer.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate consumer config: %w\", err)\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "e2e/config_consumer.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"time\"\n)\n\ntype EndToEndConsumerConfig struct {\n\tGroupIdPrefix             string `koanf:\"groupIdPrefix\"`\n\tDeleteStaleConsumerGroups bool   `koanf:\"deleteStaleConsumerGroups\"`\n\n\t// RoundtripSLA is the time duration from the moment where we try to produce until the moment where we consumed\n\t// the message. Therefore this should always be higher than the produceTimeout / SLA.\n\tRoundtripSla time.Duration `koanf:\"roundtripSla\"`\n\tCommitSla    time.Duration `koanf:\"commitSla\"`\n}\n\nfunc (c *EndToEndConsumerConfig) SetDefaults() {\n\tc.GroupIdPrefix = \"kminion-end-to-end\"\n\tc.DeleteStaleConsumerGroups = false\n\tc.RoundtripSla = 20 * time.Second\n\tc.CommitSla = 5 * time.Second\n}\n\nfunc (c *EndToEndConsumerConfig) Validate() error {\n\tif len(c.GroupIdPrefix) < 3 {\n\t\treturn fmt.Errorf(\"kminion prefix should be at least 3 characters long\")\n\t}\n\n\tif c.RoundtripSla <= 0 {\n\t\treturn fmt.Errorf(\"consumer.roundtripSla must be greater than zero\")\n\t}\n\n\tif c.CommitSla <= 0 {\n\t\treturn fmt.Errorf(\"consumer.commitSla must be greater than zero\")\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "e2e/config_producer.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"time\"\n)\n\ntype EndToEndProducerConfig struct {\n\tAckSla       time.Duration `koanf:\"ackSla\"`\n\tRequiredAcks string        `koanf:\"requiredAcks\"`\n}\n\nfunc (c *EndToEndProducerConfig) SetDefaults() {\n\tc.AckSla = 5 * time.Second\n\tc.RequiredAcks = \"all\"\n}\n\nfunc (c *EndToEndProducerConfig) Validate() error {\n\n\tif c.RequiredAcks != \"all\" && c.RequiredAcks != \"leader\" {\n\t\treturn fmt.Errorf(\"producer.requiredAcks must be 'all' or 'leader\")\n\t}\n\n\tif c.AckSla <= 0 {\n\t\treturn fmt.Errorf(\"producer.ackSla must be greater than zero\")\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "e2e/config_topic.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"time\"\n)\n\ntype EndToEndTopicConfig struct {\n\tEnabled                bool          `koanf:\"enabled\"`\n\tName                   string        `koanf:\"name\"`\n\tReplicationFactor      int           `koanf:\"replicationFactor\"`\n\tPartitionsPerBroker    int           `koanf:\"partitionsPerBroker\"`\n\tReconciliationInterval time.Duration `koanf:\"reconciliationInterval\"`\n\t// RebalancePartitions controls whether kminion will issue AlterPartitionAssignments\n\t// requests to rebalance partition leaders across brokers. Set to false when running\n\t// against Kafka-compatible clusters (e.g. Redpanda with autobalancer enabled, Confluent\n\t// Cloud) that reject AlterPartitionAssignments. Topic creation and partition count\n\t// management remain active regardless of this setting.\n\tRebalancePartitions bool `koanf:\"rebalancePartitions\"`\n}\n\nfunc (c *EndToEndTopicConfig) SetDefaults() {\n\tc.Enabled = true\n\tc.Name = \"kminion-end-to-end\"\n\tc.ReplicationFactor = 1\n\tc.PartitionsPerBroker = 1\n\tc.ReconciliationInterval = 10 * time.Minute\n\tc.RebalancePartitions = true\n}\n\nfunc (c *EndToEndTopicConfig) Validate() error {\n\n\tif c.ReplicationFactor < 1 {\n\t\treturn fmt.Errorf(\"failed to parse replicationFactor, it should be more than 1, retrieved value %v\", c.ReplicationFactor)\n\t}\n\n\tif c.PartitionsPerBroker < 1 {\n\t\treturn fmt.Errorf(\"failed to parse partitionsPerBroker, it should be more than 1, retrieved value %v\", c.PartitionsPerBroker)\n\t}\n\n\t// If the timeduration is 0s or 0ms or its variation of zero, it will be parsed as 0\n\tif c.ReconciliationInterval == 0 {\n\t\treturn fmt.Errorf(\"failed to validate topic.ReconciliationInterval config, the duration can't be zero\")\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "e2e/config_topic_test.go",
    "content": "package e2e\n\nimport (\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/assert\"\n\t\"github.com/stretchr/testify/require\"\n\t\"go.uber.org/zap\"\n)\n\nfunc TestEndToEndTopicConfig_SetDefaults(t *testing.T) {\n\tvar cfg EndToEndTopicConfig\n\tcfg.SetDefaults()\n\n\tassert.True(t, cfg.Enabled)\n\tassert.Equal(t, \"kminion-end-to-end\", cfg.Name)\n\tassert.Equal(t, 1, cfg.ReplicationFactor)\n\tassert.Equal(t, 1, cfg.PartitionsPerBroker)\n\tassert.Equal(t, 10*time.Minute, cfg.ReconciliationInterval)\n\tassert.True(t, cfg.RebalancePartitions, \"RebalancePartitions should default to true for backward compatibility\")\n}\n\nfunc TestEndToEndTopicConfig_Validate(t *testing.T) {\n\ttests := []struct {\n\t\tname    string\n\t\tcfg     EndToEndTopicConfig\n\t\twantErr bool\n\t}{\n\t\t{\n\t\t\tname: \"valid config with rebalance enabled\",\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tEnabled:                true,\n\t\t\t\tName:                   \"test-topic\",\n\t\t\t\tReplicationFactor:      3,\n\t\t\t\tPartitionsPerBroker:    1,\n\t\t\t\tReconciliationInterval: 10 * time.Minute,\n\t\t\t\tRebalancePartitions:    true,\n\t\t\t},\n\t\t\twantErr: false,\n\t\t},\n\t\t{\n\t\t\tname: \"valid config with rebalance disabled\",\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tEnabled:                true,\n\t\t\t\tName:                   \"test-topic\",\n\t\t\t\tReplicationFactor:      3,\n\t\t\t\tPartitionsPerBroker:    1,\n\t\t\t\tReconciliationInterval: 10 * time.Minute,\n\t\t\t\tRebalancePartitions:    false,\n\t\t\t},\n\t\t\twantErr: false,\n\t\t},\n\t\t{\n\t\t\tname: \"invalid replication factor\",\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:      0,\n\t\t\t\tPartitionsPerBroker:    1,\n\t\t\t\tReconciliationInterval: 10 * time.Minute,\n\t\t\t\tRebalancePartitions:    false,\n\t\t\t},\n\t\t\twantErr: true,\n\t\t},\n\t\t{\n\t\t\tname: \"invalid partitions per broker\",\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:      1,\n\t\t\t\tPartitionsPerBroker:    0,\n\t\t\t\tReconciliationInterval: 10 * time.Minute,\n\t\t\t\tRebalancePartitions:    false,\n\t\t\t},\n\t\t\twantErr: true,\n\t\t},\n\t\t{\n\t\t\tname: \"zero reconciliation interval\",\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:      1,\n\t\t\t\tPartitionsPerBroker:    1,\n\t\t\t\tReconciliationInterval: 0,\n\t\t\t\tRebalancePartitions:    false,\n\t\t\t},\n\t\t\twantErr: true,\n\t\t},\n\t}\n\n\tfor _, tt := range tests {\n\t\tt.Run(tt.name, func(t *testing.T) {\n\t\t\terr := tt.cfg.Validate()\n\t\t\tif tt.wantErr {\n\t\t\t\trequire.Error(t, err)\n\t\t\t} else {\n\t\t\t\trequire.NoError(t, err)\n\t\t\t}\n\t\t})\n\t}\n}\n\n// TestPartitionPlanner_RebalancePartitionsDisabled verifies that when\n// RebalancePartitions is false the planner still produces a valid plan\n// (the caller in topic.go is responsible for not executing reassignments),\n// and that partition creation assignments are still generated normally.\nfunc TestPartitionPlanner_RebalancePartitionsDisabled(t *testing.T) {\n\t// Three brokers, topic already exists with suboptimal leader distribution:\n\t// all partitions led by broker 0.\n\tmeta := buildMeta(\"e2e\",\n\t\tmap[int32]string{0: \"\", 1: \"\", 2: \"\"},\n\t\t[][]int32{\n\t\t\t{0, 1, 2},\n\t\t\t{0, 2, 1},\n\t\t\t{0, 1, 2},\n\t\t},\n\t)\n\n\tcfg := EndToEndTopicConfig{\n\t\tEnabled:                true,\n\t\tName:                   \"e2e\",\n\t\tReplicationFactor:      3,\n\t\tPartitionsPerBroker:    1,\n\t\tReconciliationInterval: 10 * time.Minute,\n\t\tRebalancePartitions:    false,\n\t}\n\n\tplanner := NewPartitionPlanner(cfg, zap.NewNop())\n\tplan, err := planner.Plan(meta)\n\trequire.NoError(t, err)\n\n\t// The planner should still detect that reassignments are needed —\n\t// it's the caller's responsibility to skip executing them.\n\tassert.NotEmpty(t, plan.Reassignments, \"planner should detect reassignments are needed\")\n\tfor _, ra := range plan.Reassignments {\n\t\tassertNoDuplicates(t, ra.Replicas)\n\t\tassert.Len(t, ra.Replicas, cfg.ReplicationFactor)\n\t}\n\n\t// No new partitions should be created (3 brokers × 1 per broker = 3 already exist).\n\tassert.Empty(t, plan.CreateAssignments)\n\tassert.Equal(t, 3, plan.FinalPartitionCount)\n}\n\n// TestPartitionPlanner_RebalancePartitionsDisabled_Creates verifies that when\n// RebalancePartitions is false and new partitions need to be created, Phase 3\n// uses actual current leaders (not predicted leaders from staged reassignments)\n// to pick the preferred leader for new partitions.\nfunc TestPartitionPlanner_RebalancePartitionsDisabled_Creates(t *testing.T) {\n\t// 4 brokers, 3 partitions all led by broker 0.\n\t// PartitionsPerBroker=1 means desired = 4, so Phase 3 must create 1.\n\tmeta := buildMeta(\"e2e\",\n\t\tmap[int32]string{0: \"\", 1: \"\", 2: \"\", 3: \"\"},\n\t\t[][]int32{\n\t\t\t{0, 1, 2},\n\t\t\t{0, 2, 3},\n\t\t\t{0, 1, 3},\n\t\t},\n\t)\n\n\tcfg := EndToEndTopicConfig{\n\t\tEnabled:                true,\n\t\tName:                   \"e2e\",\n\t\tReplicationFactor:      3,\n\t\tPartitionsPerBroker:    1,\n\t\tReconciliationInterval: 10 * time.Minute,\n\t\tRebalancePartitions:    false,\n\t}\n\n\tplanner := NewPartitionPlanner(cfg, zap.NewNop())\n\tplan, err := planner.Plan(meta)\n\trequire.NoError(t, err)\n\n\t// Phase 3 should create exactly 1 partition (4 desired - 3 existing).\n\trequire.Len(t, plan.CreateAssignments, 1)\n\n\t// The new partition's preferred leader should NOT be broker 0,\n\t// because actual state shows broker 0 already leads 3 partitions.\n\t// With rebalancePartitions=false, Phase 3 counts from actual leaders,\n\t// so it should pick one of the under-represented brokers (1, 2, or 3).\n\tnewLeader := plan.CreateAssignments[0].Replicas[0]\n\tassert.NotEqual(t, int32(0), newLeader,\n\t\t\"new partition should not be led by broker 0 (already leads 3 partitions in actual state)\")\n}\n"
  },
  {
    "path": "e2e/consumer.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"strconv\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\nfunc (s *Service) startConsumeMessages(ctx context.Context, initializedCh chan<- bool) {\n\tclient := s.client\n\n\ts.logger.Info(\"starting to consume end-to-end topic\",\n\t\tzap.String(\"topic_name\", s.config.TopicManagement.Name),\n\t\tzap.String(\"group_id\", s.groupId))\n\n\tisInitialized := false\n\tfor {\n\t\tfetches := client.PollFetches(ctx)\n\t\tif !isInitialized {\n\t\t\tisInitialized = true\n\t\t\tinitializedCh <- true\n\t\t\tclose(initializedCh)\n\t\t}\n\n\t\t// Log all errors and continue afterwards as we might get errors and still have some fetch results\n\t\terrors := fetches.Errors()\n\t\tfor _, err := range errors {\n\t\t\ts.logger.Error(\"kafka fetch error\",\n\t\t\t\tzap.String(\"topic\", err.Topic),\n\t\t\t\tzap.Int32(\"partition\", err.Partition),\n\t\t\t\tzap.Error(err.Err))\n\t\t}\n\n\t\tfetches.EachRecord(s.processMessage)\n\t}\n}\n\nfunc (s *Service) commitOffsets(ctx context.Context) {\n\tclient := s.client\n\tuncommittedOffset := client.UncommittedOffsets()\n\tif uncommittedOffset == nil {\n\t\treturn\n\t}\n\n\tstartCommitTimestamp := time.Now()\n\n\tchildCtx, cancel := context.WithTimeout(ctx, s.config.Consumer.CommitSla)\n\tclient.CommitOffsets(childCtx, uncommittedOffset, func(_ *kgo.Client, req *kmsg.OffsetCommitRequest, r *kmsg.OffsetCommitResponse, err error) {\n\t\tcancel()\n\n\t\tcoordinator := s.clientHooks.currentCoordinator.Load().(kgo.BrokerMetadata)\n\t\tcoordinatorID := strconv.Itoa(int(coordinator.NodeID))\n\n\t\tlatency := time.Since(startCommitTimestamp)\n\t\ts.offsetCommitLatency.WithLabelValues(coordinatorID).Observe(latency.Seconds())\n\t\ts.offsetCommitsTotal.WithLabelValues(coordinatorID).Inc()\n\t\t// We do this to ensure that a series with that coordinator id is initialized\n\t\ts.offsetCommitsTotal.WithLabelValues(coordinatorID).Add(0)\n\n\t\t// If we have at least one error in our commit response we want to report it as an error with an appropriate\n\t\t// reason as label.\n\t\tif errCode := s.logCommitErrors(r, err); errCode != \"\" {\n\t\t\ts.offsetCommitsFailedTotal.WithLabelValues(coordinatorID, errCode).Inc()\n\t\t\treturn\n\t\t}\n\t})\n}\n\n// processMessage:\n// - deserializes the message\n// - checks if it is from us, or from another kminion process running somewhere else\n// - hands it off to the service, which then reports metrics on it\nfunc (s *Service) processMessage(record *kgo.Record) {\n\tif record.Value == nil {\n\t\t// Init messages have nil values - we want to skip these. They are only used to make sure a consumer is ready.\n\t\treturn\n\t}\n\n\tvar msg EndToEndMessage\n\tif jerr := json.Unmarshal(record.Value, &msg); jerr != nil {\n\t\ts.logger.Error(\"failed to unmarshal message value\", zap.Error(jerr))\n\t\treturn // maybe older version\n\t}\n\n\tif msg.MinionID != s.minionID {\n\t\treturn // not from us\n\t}\n\n\t// restore partition, which is not serialized\n\tmsg.partition = int(record.Partition)\n\ts.messageTracker.onMessageArrived(&msg)\n}\n"
  },
  {
    "path": "e2e/endtoend_message.go",
    "content": "package e2e\n\nimport \"time\"\n\nconst (\n\t_ = iota\n\tEndToEndMessageStateCreated\n\tEndToEndMessageStateProducedSuccessfully\n)\n\ntype EndToEndMessage struct {\n\tMinionID  string `json:\"minionID\"`     // unique for each running kminion instance\n\tMessageID string `json:\"messageID\"`    // unique for each message\n\tTimestamp int64  `json:\"createdUtcNs\"` // when the message was created, unix nanoseconds\n\n\t// The following properties are only used within the message tracker\n\tpartition      int\n\tstate          int\n\tproduceLatency float64\n}\n\nfunc (m *EndToEndMessage) creationTime() time.Time {\n\treturn time.Unix(0, m.Timestamp)\n}\n"
  },
  {
    "path": "e2e/group_tracker.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\nconst (\n\toldGroupCheckInterval = 5 * time.Second  // how often to check for old kminion groups\n\toldGroupMaxAge        = 20 * time.Second // maximum age after which an old group should be deleted\n)\n\n// groupTracker keeps checking for empty consumerGroups matching the kminion prefix.\n// When a group was seen empty for some time, we delete it.\n// Why?\n// Whenever a kminion instance starts up it creates a consumer-group for itself in order to not \"collide\" with other kminion instances.\n// When an instance restarts (for whatever reason), it creates a new group again, so we'd end up with a lot of unused groups.\ntype groupTracker struct {\n\tcfg                    Config\n\tlogger                 *zap.Logger\n\tclient                 *kgo.Client          // kafka client\n\tgroupId                string               // our own groupId\n\tpotentiallyEmptyGroups map[string]time.Time // groupName -> utc timestamp when the group was first seen\n}\n\nfunc newGroupTracker(cfg Config, logger *zap.Logger, client *kgo.Client, groupID string) *groupTracker {\n\treturn &groupTracker{\n\t\tcfg:                    cfg,\n\t\tlogger:                 logger.Named(\"group_tracker\"),\n\t\tclient:                 client,\n\t\tgroupId:                groupID,\n\t\tpotentiallyEmptyGroups: make(map[string]time.Time),\n\t}\n}\n\nfunc (g *groupTracker) start(ctx context.Context) {\n\tg.logger.Debug(\"starting group tracker\")\n\n\tdeleteOldGroupsTicker := time.NewTicker(oldGroupCheckInterval)\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\tg.logger.Debug(\"stopping group tracker, context was cancelled\")\n\t\t\treturn\n\t\tcase <-deleteOldGroupsTicker.C:\n\t\t\tchildCtx, cancel := context.WithTimeout(ctx, 10*time.Second)\n\t\t\terr := g.checkAndDeleteOldConsumerGroups(childCtx)\n\t\t\tif err != nil {\n\t\t\t\tg.logger.Error(\"failed to check for old consumer groups: %w\", zap.Error(err))\n\t\t\t}\n\t\t\tcancel()\n\t\t}\n\t}\n}\n\nfunc (g *groupTracker) checkAndDeleteOldConsumerGroups(ctx context.Context) error {\n\tgroupsRq := kmsg.NewListGroupsRequest()\n\tgroupsRq.StatesFilter = []string{\"Empty\"}\n\n\tg.logger.Debug(\"checking for stale kminion consumer groups\")\n\n\tshardedResponse := g.client.RequestSharded(ctx, &groupsRq)\n\n\t// find groups that start with the kminion prefix\n\tmatchingGroups := make([]string, 0)\n\tfor _, shard := range shardedResponse {\n\t\tif shard.Err != nil {\n\t\t\tg.logger.Error(\"error in response to ListGroupsRequest\", zap.Int32(\"broker_id\", shard.Meta.NodeID), zap.Error(shard.Err))\n\t\t\tcontinue\n\t\t}\n\n\t\tr, ok := shard.Resp.(*kmsg.ListGroupsResponse)\n\t\tif !ok {\n\t\t\tg.logger.Error(\"cannot cast responseShard.Resp to kmsg.ListGroupsResponse\")\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, group := range r.Groups {\n\t\t\tname := group.Group\n\n\t\t\tif name == g.groupId {\n\t\t\t\tcontinue // skip our own consumer group\n\t\t\t}\n\n\t\t\tif strings.HasPrefix(name, g.cfg.Consumer.GroupIdPrefix) {\n\t\t\t\tmatchingGroups = append(matchingGroups, name)\n\t\t\t}\n\t\t}\n\t}\n\n\t// save new (previously unseen) groups to tracker\n\tg.logger.Debug(\"checked for stale consumer groups\", zap.Int(\"found_groups\", len(matchingGroups)), zap.Strings(\"groups\", matchingGroups))\n\tfor _, name := range matchingGroups {\n\t\t_, exists := g.potentiallyEmptyGroups[name]\n\t\tif !exists {\n\t\t\t// add it with the current timestamp\n\t\t\tg.potentiallyEmptyGroups[name] = time.Now()\n\t\t\tg.logger.Debug(\"found new empty kminion group, adding it to the tracker\", zap.String(\"group\", name))\n\t\t}\n\t}\n\n\t// go through saved groups:\n\t// - don't track the ones we don't see anymore (bc they got deleted or are not empty anymore)\n\t// - mark the ones that are too old (have been observed as empty for too long)\n\tgroupsToDelete := make([]string, 0)\n\tfor name, firstSeen := range g.potentiallyEmptyGroups {\n\t\texists, _ := containsStr(matchingGroups, name)\n\t\tif exists {\n\t\t\t// still there, check age and maybe delete it\n\t\t\tage := time.Since(firstSeen)\n\t\t\tif age > oldGroupMaxAge {\n\t\t\t\t// group was unused for too long, delete it\n\t\t\t\tgroupsToDelete = append(groupsToDelete, name)\n\t\t\t\tdelete(g.potentiallyEmptyGroups, name)\n\t\t\t}\n\t\t} else {\n\t\t\t// does not exist anymore, it must have been deleted, or is in use now (no longer empty)\n\t\t\t// don't track it anymore\n\t\t\tdelete(g.potentiallyEmptyGroups, name)\n\t\t}\n\t}\n\n\t// actually delete the groups we've decided to delete\n\tif len(groupsToDelete) == 0 {\n\t\treturn nil\n\t}\n\n\tdeleteRq := kmsg.NewDeleteGroupsRequest()\n\tdeleteRq.Groups = groupsToDelete\n\tdeleteResp := g.client.RequestSharded(ctx, &deleteRq)\n\n\t// done, now just errors\n\t// if we get a not authorized error we'll disable deleting groups\n\tfoundNotAuthorizedError := false\n\tdeletedGroups := make([]string, 0)\n\tfor _, shard := range deleteResp {\n\t\tif shard.Err != nil {\n\t\t\tg.logger.Error(\"sharded consumer group delete request failed\", zap.Error(shard.Err))\n\t\t\tcontinue\n\t\t}\n\n\t\tresp, ok := shard.Resp.(*kmsg.DeleteGroupsResponse)\n\t\tif !ok {\n\t\t\tg.logger.Error(\"failed to cast shard response to DeleteGroupsResponse while handling an error for deleting groups\", zap.String(\"shard_host\", shard.Meta.Host), zap.Int32(\"broker_id\", shard.Meta.NodeID), zap.Error(shard.Err))\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, groupResp := range resp.Groups {\n\t\t\terr := kerr.ErrorForCode(groupResp.ErrorCode)\n\t\t\tif err != nil {\n\t\t\t\tg.logger.Error(\"failed to delete consumer group\", zap.String(\"shard\", shard.Meta.Host), zap.Int32(\"broker_id\", shard.Meta.NodeID), zap.String(\"group\", groupResp.Group), zap.Error(err))\n\n\t\t\t\tif groupResp.ErrorCode == kerr.GroupAuthorizationFailed.Code {\n\t\t\t\t\tfoundNotAuthorizedError = true\n\t\t\t\t}\n\n\t\t\t} else {\n\t\t\t\tdeletedGroups = append(deletedGroups, groupResp.Group)\n\t\t\t}\n\t\t}\n\t}\n\tg.logger.Info(\"deleted old consumer groups\", zap.Strings(\"deleted_groups\", deletedGroups))\n\n\tif foundNotAuthorizedError {\n\t\tg.logger.Info(\"disabling trying to delete old kminion consumer-groups since one of the last delete results had an 'GroupAuthorizationFailed' error\")\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "e2e/message_tracker.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"strconv\"\n\t\"time\"\n\n\t\"github.com/jellydator/ttlcache/v2\"\n\n\t\"go.uber.org/zap\"\n)\n\n// messageTracker keeps track of the messages' lifetime\n//\n// When we successfully send a mesasge, it will be added to this tracker.\n// Later, when we receive the message back in the consumer, the message is marked as completed and removed from the tracker.\n// If the message does not arrive within the configured `consumer.roundtripSla`, it is counted as lost. Messages that\n// failed to be produced will not be\n// considered as lost message.\n//\n// We use a dedicated counter to track messages that couldn't be  produced to Kafka.\ntype messageTracker struct {\n\tsvc    *Service\n\tlogger *zap.Logger\n\tcache  *ttlcache.Cache\n}\n\nfunc newMessageTracker(svc *Service) *messageTracker {\n\tdefaultExpirationDuration := svc.config.Consumer.RoundtripSla\n\tcache := ttlcache.NewCache()\n\tcache.SetTTL(defaultExpirationDuration)\n\n\tt := &messageTracker{\n\t\tsvc:    svc,\n\t\tlogger: svc.logger.Named(\"message_tracker\"),\n\t\tcache:  cache,\n\t}\n\tt.cache.SetExpirationReasonCallback(func(key string, reason ttlcache.EvictionReason, value interface{}) {\n\t\tt.onMessageExpired(key, reason, value.(*EndToEndMessage))\n\t})\n\n\treturn t\n}\n\nfunc (t *messageTracker) addToTracker(msg *EndToEndMessage) {\n\tt.cache.Set(msg.MessageID, msg)\n}\n\n// updateItemIfExists only updates a message if it still exists in the cache. The remaining time to live will not\n// be refreshed.\n// If it doesn't exist an ttlcache.ErrNotFound error will be returned.\nfunc (t *messageTracker) updateItemIfExists(msg *EndToEndMessage) error {\n\t_, ttl, err := t.cache.GetWithTTL(msg.MessageID)\n\tif err != nil {\n\t\tif err == ttlcache.ErrNotFound {\n\t\t\treturn err\n\t\t}\n\t\tpanic(err)\n\t}\n\n\t// Because the returned TTL is set to the original TTL duration (and not the remaining TTL) we have to calculate\n\t// the remaining TTL now as we want to updat the existing cache item without changing the remaining time to live.\n\texpiryTimestamp := msg.creationTime().Add(ttl)\n\tremainingTTL := expiryTimestamp.Sub(time.Now())\n\tif remainingTTL < 0 {\n\t\t// This entry should have been deleted already. Race condition.\n\t\treturn ttlcache.ErrNotFound\n\t}\n\n\terr = t.cache.SetWithTTL(msg.MessageID, msg, remainingTTL)\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\n\treturn nil\n}\n\n// removeFromTracker removes an entry from the cache. If the key does not exist it will return an ttlcache.ErrNotFound error.\nfunc (t *messageTracker) removeFromTracker(messageID string) error {\n\treturn t.cache.Remove(messageID)\n}\n\nfunc (t *messageTracker) onMessageArrived(arrivedMessage *EndToEndMessage) {\n\tcm, err := t.cache.Get(arrivedMessage.MessageID)\n\tif err != nil {\n\t\tif err == ttlcache.ErrNotFound {\n\t\t\t// message expired and was removed from the cache\n\t\t\t// it arrived too late, nothing to do here...\n\t\t\treturn\n\t\t} else {\n\t\t\tpanic(fmt.Errorf(\"failed to get message from cache: %w\", err))\n\t\t}\n\t}\n\n\tmsg := cm.(*EndToEndMessage)\n\n\texpireTime := msg.creationTime().Add(t.svc.config.Consumer.RoundtripSla)\n\tisExpired := time.Now().Before(expireTime)\n\tlatency := time.Now().Sub(msg.creationTime())\n\n\tif !isExpired {\n\t\t// Message arrived late, but was still in cache. We don't increment the lost counter here because eventually\n\t\t// it will be evicted from the cache. This case should only pop up if the sla time is exceeded, but if the\n\t\t// item has not been evicted from the cache yet.\n\t\tt.logger.Info(\"message arrived late, will be marked as a lost message\",\n\t\t\tzap.Int64(\"delay_ms\", latency.Milliseconds()),\n\t\t\tzap.String(\"id\", msg.MessageID))\n\t\treturn\n\t}\n\n\t// message arrived early enough\n\tpID := strconv.Itoa(msg.partition)\n\tt.svc.messagesReceived.WithLabelValues(pID).Inc()\n\tt.svc.roundtripLatency.WithLabelValues(pID).Observe(latency.Seconds())\n\n\t// Remove message from cache, so that we don't track it any longer and won't mark it as lost when the entry expires.\n\tt.cache.Remove(msg.MessageID)\n}\n\nfunc (t *messageTracker) onMessageExpired(_ string, reason ttlcache.EvictionReason, value interface{}) {\n\tif reason == ttlcache.Removed {\n\t\t// We are not interested in messages that have been removed by us!\n\t\treturn\n\t}\n\n\tmsg := value.(*EndToEndMessage)\n\n\tcreated := msg.creationTime()\n\tage := time.Since(created)\n\tt.svc.lostMessages.WithLabelValues(strconv.Itoa(msg.partition)).Inc()\n\n\tt.logger.Debug(\"message expired/lost\",\n\t\tzap.Int64(\"age_ms\", age.Milliseconds()),\n\t\tzap.Int(\"partition\", msg.partition),\n\t\tzap.String(\"message_id\", msg.MessageID),\n\t\tzap.Bool(\"successfully_produced\", msg.state == EndToEndMessageStateProducedSuccessfully),\n\t\tzap.Float64(\"produce_latency_seconds\", msg.produceLatency),\n\t)\n}\n"
  },
  {
    "path": "e2e/partition_planner.go",
    "content": "package e2e\n\nimport (\n\t\"fmt\"\n\t\"sort\"\n\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// PartitionPlanner is a thin orchestrator around the three planning phases. It wires in\n// configuration, logging, and the replica-selection strategy. The planning phases are:\n//  1. Fix replication factor and rack diversity on existing partitions.\n//  2. Ensure every broker is the preferred leader of at least 1 partition.\n//  3. Ensure total partition count meets the configured lower bound.\n//\n// The probe topic is created to continuously test end-to-end availability by\n// producing and consuming records per partition. The planner enforces:\n//\n//   - Correct replication factor (RF == configured RF) and no duplicate brokers\n//     within a single partition's replica set.\n//   - Rack awareness: maximize the number of unique racks per partition (bounded\n//     by min(RF, #racks)). This reduces the blast radius of a rack failure.\n//   - Sufficient partition count: >= max(current, #brokers, partitionsPerBroker*#brokers).\n//   - Leader coverage: every broker must be the preferred leader (replicas[0]) of\n//     at least one partition, so a per-broker failure is observable.\n//   - Minimal movement: prefer to fix RF and rack issues first, then rotate/swap\n//     leaders to fill gaps, and only create partitions when necessary.\ntype PartitionPlanner struct {\n\tcfg    EndToEndTopicConfig\n\tlogger *zap.Logger\n\tsel    ReplicaSelector\n}\n\n// NewPartitionPlanner constructs a Planner with the given config & logger. The replica\n// selector is chosen in Plan() once we have ClusterState ready.\nfunc NewPartitionPlanner(cfg EndToEndTopicConfig, logger *zap.Logger) *PartitionPlanner {\n\treturn &PartitionPlanner{cfg: cfg, logger: logger}\n}\n\n// Plan produces an in-memory plan (reassignments + creations) for the probe\n// topic based on current cluster metadata. See the package header for the\n// invariants we enforce.\nfunc (p *PartitionPlanner) Plan(meta *kmsg.MetadataResponse) (*Plan, error) {\n\tif meta == nil || len(meta.Topics) == 0 {\n\t\treturn nil, fmt.Errorf(\"metadata response has no topics\")\n\t}\n\tif len(meta.Brokers) == 0 {\n\t\treturn nil, fmt.Errorf(\"metadata response has no brokers\")\n\t}\n\tif p.cfg.ReplicationFactor > len(meta.Brokers) {\n\t\treturn nil, fmt.Errorf(\"replication factor %d exceeds available brokers %d\", p.cfg.ReplicationFactor, len(meta.Brokers))\n\t}\n\n\t// Build state required for the planning\n\tstate := BuildState(meta)\n\tdesired := ComputeDesired(state, p.cfg)\n\ttracker := NewLoadTracker(state)\n\tselector := NewRackAwareSelector(state, tracker)\n\tp.sel = selector\n\n\tb := NewPlanBuilder(state, desired, tracker, p.cfg.RebalancePartitions)\n\n\t// Phase 1: normalize RF and racks (low movement first)\n\t// Grow/trim replicas to configured RF and re-pick to maximize unique racks\n\t// per partition. We avoid moving the leader when possible.\n\tfixReplicationAndRack(b, selector, p.cfg.ReplicationFactor)\n\n\t// Phase 2: ensure each broker is preferred leader for >= 1 partition\n\t// This guarantees probe coverage: if a broker dies, some partition leader is\n\t// unavailable and the probe trips.\n\tensureLeaderCoverage(b, selector)\n\n\t// Phase 3: ensure desired partition count\n\t// Enforce >= max(current, #brokers, partitionsPerBroker * #brokers).\n\tensurePartitionCount(b, selector)\n\n\t// Build final plan and log the changes\n\tplan := b.Build()\n\n\t// Log with appropriate level based on whether changes are needed\n\ttotalChanges := len(plan.Reassignments) + len(plan.CreateAssignments)\n\tif totalChanges == 0 {\n\t\tp.logger.Info(\"e2e probe topic partition leadership and replica distribution check completed - optimal\",\n\t\t\tzap.String(\"topic\", state.TopicName),\n\t\t\tzap.Int(\"brokers\", len(state.Brokers)),\n\t\t\tzap.Int(\"partitions\", len(state.Partitions)),\n\t\t\tzap.String(\"status\", \"optimal\"),\n\t\t)\n\t} else {\n\t\tp.logger.Info(\"plan to change partition leadership and replica placements on e2e topic has been prepared\",\n\t\t\tzap.String(\"topic\", state.TopicName),\n\t\t\tzap.Int(\"brokers\", len(state.Brokers)),\n\t\t\tzap.Int(\"cur_partitions\", len(state.Partitions)),\n\t\t\tzap.Int(\"final_partitions\", plan.FinalPartitionCount),\n\t\t\tzap.Int(\"reassignments\", len(plan.Reassignments)),\n\t\t\tzap.Int(\"creates\", len(plan.CreateAssignments)),\n\t\t\tzap.Int(\"total_changes\", totalChanges),\n\t\t)\n\t}\n\treturn plan, nil\n}\n\n// -----------------------------------------------------------------------------\n// Planning phases\n// -----------------------------------------------------------------------------\n\n// PlanBuilder holds a predictive view of partition -> replicas after applying\n// staged operations. We never mutate ClusterState.Partitions; instead, we write\n// new assignments into PlanBuilder.view and record high-level operations to\n// produce Kafka requests at the end (see Plan.ToRequests).\n//\n// This keeps the planning phases simple and side-effect free.\ntype PlanBuilder struct {\n\tstate   ClusterState\n\tdesired Desired\n\ttracker *LoadTracker\n\n\t// rebalancePartitions indicates whether reassignments will actually be executed.\n\t// When false, Phase 3 uses actual current leaders instead of predicted leaders\n\t// from the view, since reassignments won't be applied.\n\trebalancePartitions bool\n\n\t// view is our predictive map: partitionID -> replicas (preferred leader at idx 0)\n\tview map[int32][]int32\n\n\treassignments []Reassignment     // staged reassignments for existing partitions\n\tcreations     []CreateAssignment // staged creations of new partitions\n}\n\n// Reassignment captures a single partition’s new replica list.\n//\n// The order of Replicas matters: index 0 will become the preferred leader after\n// reassignment completes on the broker side.\ntype Reassignment struct {\n\tPartition int32\n\tReplicas  []int32\n}\n\n// CreateAssignment captures the replica list for a *new* partition that will be\n// appended to the topic during CreatePartitions.\n//\n// The order of Replicas matters: index 0 is the preferred leader for the new\n// partition.\ntype CreateAssignment struct {\n\tReplicas []int32\n}\n\n// Plan is the final, immutable result of planning. It can be turned into Kafka\n// requests via ToRequests.\n//\n// FinalPartitionCount is the topic’s partition count after applying creations.\n// (Reassignments do not change the count.)\ntype Plan struct {\n\tReassignments       []Reassignment\n\tCreateAssignments   []CreateAssignment\n\tFinalPartitionCount int\n}\n\n// NewPlanBuilder initializes a predictive view by cloning the current\n// partition->replicas map. We avoid accidental mutation by copying slices.\nfunc NewPlanBuilder(state ClusterState, desired Desired, tracker *LoadTracker, rebalancePartitions bool) *PlanBuilder {\n\tview := make(map[int32][]int32, len(state.Partitions))\n\tfor pid, p := range state.Partitions {\n\t\tview[pid] = append([]int32(nil), p.Replicas...)\n\t}\n\treturn &PlanBuilder{state: state, desired: desired, tracker: tracker, rebalancePartitions: rebalancePartitions, view: view}\n}\n\n// Build freezes the current staged operations into a Plan. We compute the final\n// partition count as current + number of creates.\nfunc (b *PlanBuilder) Build() *Plan {\n\treturn &Plan{\n\t\tReassignments:       b.reassignments,\n\t\tCreateAssignments:   b.creations,\n\t\tFinalPartitionCount: len(b.state.Partitions) + len(b.creations),\n\t}\n}\n\n// CommitReassignment records a reassignment and updates the predictive view.\nfunc (b *PlanBuilder) CommitReassignment(pid int32, reps []int32) {\n\tb.reassignments = append(b.reassignments, Reassignment{Partition: pid, Replicas: reps})\n\tb.view[pid] = reps\n}\n\n// CommitCreate records a new-partition assignment. The final partition count is\n// computed when building the Plan.\nfunc (b *PlanBuilder) CommitCreate(reps []int32) {\n\tb.creations = append(b.creations, CreateAssignment{Replicas: reps})\n}\n\n// fixReplicationAndRack enforces configured RF on each existing partition\n// (growing or shrinking as needed) and re-picks replicas when rack diversity can\n// be improved. We try to keep the current leader by always retaining replicas[0]\n// when shrinking.\nfunc fixReplicationAndRack(b *PlanBuilder, sel ReplicaSelector, rf int) {\n\t// Sort partition IDs for deterministic iteration\n\tpids := make([]int32, 0, len(b.view))\n\tfor pid := range b.view {\n\t\tpids = append(pids, pid)\n\t}\n\tsort.Slice(pids, func(i, j int) bool { return pids[i] < pids[j] })\n\n\tfor _, pid := range pids {\n\t\treplicas := b.view[pid]\n\t\tdesiredRF := rf\n\t\tnewReplicas := replicas\n\n\t\tswitch {\n\t\t// Grow: re-pick the full set based on the current actual leader.\n\t\tcase len(replicas) < desiredRF:\n\t\t\t// Use the actual current leader from the partition metadata\n\t\t\tcurrentPartition, exists := b.state.Partitions[pid]\n\t\t\tpreferredLeader := replicas[0] // fallback to preferred leader\n\t\t\tif exists && currentPartition.Leader != -1 {\n\t\t\t\tpreferredLeader = currentPartition.Leader\n\t\t\t}\n\t\t\tnewReplicas = sel.ChooseReplicas(preferredLeader, desiredRF)\n\n\t\t// Shrink: keep leader; then pick remaining replicas preferring\n\t\t// new racks, then lower load, then lower broker ID.\n\t\tcase len(replicas) > desiredRF:\n\t\t\tnewReplicas = shrinkPreservingLeader(b, pid, replicas, desiredRF)\n\n\t\t// Same RF: if rack diversity can be improved, re-pick.\n\t\tdefault:\n\t\t\tif violatesRackDiversity(replicas, b.state.NumRacks, b.state.Brokers) {\n\t\t\t\t// Use the actual current leader from the partition metadata\n\t\t\t\tcurrentPartition, exists := b.state.Partitions[pid]\n\t\t\t\tpreferredLeader := replicas[0] // fallback to preferred leader\n\t\t\t\tif exists && currentPartition.Leader != -1 {\n\t\t\t\t\tpreferredLeader = currentPartition.Leader\n\t\t\t\t}\n\t\t\t\tnewReplicas = sel.ChooseReplicas(preferredLeader, desiredRF)\n\t\t\t}\n\t\t}\n\n\t\tif !equalInt32s(newReplicas, replicas) {\n\t\t\tb.CommitReassignment(pid, newReplicas)\n\t\t}\n\t}\n}\n\n// shrinkPreservingLeader returns a replica set of size rf that keeps the\n// current leader and greedily prefers candidates that add a new rack;\n// among equals, chooses lower load, then lower broker ID.\nfunc shrinkPreservingLeader(b *PlanBuilder, pid int32, replicas []int32, rf int) []int32 {\n\t// Use the actual current leader from the partition metadata, not replicas[0]\n\tcurrentPartition, exists := b.state.Partitions[pid]\n\tleader := replicas[0] // fallback to preferred leader if no current leader found\n\tif exists && currentPartition.Leader != -1 {\n\t\tleader = currentPartition.Leader\n\t}\n\tkeep := []int32{leader}\n\n\tseen := map[string]struct{}{\n\t\tb.state.Brokers[leader].Rack: {},\n\t}\n\n\ttype cand struct {\n\t\tid   int32\n\t\track string\n\t\tload int\n\t}\n\n\t// Build the candidate pool from non-leader replicas.\n\tpool := make([]cand, 0, len(replicas)-1)\n\tfor _, id := range replicas[1:] {\n\t\tpool = append(pool, cand{\n\t\t\tid:   id,\n\t\t\track: b.state.Brokers[id].Rack,\n\t\t\tload: b.tracker.Load(id).Replicas,\n\t\t})\n\t}\n\n\t// Greedy selection with dynamic \"seen racks\".\n\tfor len(keep) < rf && len(pool) > 0 {\n\t\tbest := 0\n\t\tfor i := 1; i < len(pool); i++ {\n\t\t\ta, b2 := pool[i], pool[best]\n\t\t\t_, aSeen := seen[a.rack]\n\t\t\t_, bSeen := seen[b2.rack]\n\n\t\t\tswitch {\n\t\t\t// Prefer a candidate that adds a new rack.\n\t\t\tcase aSeen != bSeen:\n\t\t\t\tif !aSeen && bSeen {\n\t\t\t\t\tbest = i\n\t\t\t\t}\n\t\t\t// Then prefer lower load.\n\t\t\tcase a.load != b2.load:\n\t\t\t\tif a.load < b2.load {\n\t\t\t\t\tbest = i\n\t\t\t\t}\n\t\t\t// Then prefer lower broker ID (stable tie-breaker).\n\t\t\tcase a.id < b2.id:\n\t\t\t\tbest = i\n\t\t\t}\n\t\t}\n\n\t\tchosen := pool[best]\n\t\tkeep = append(keep, chosen.id)\n\t\tseen[chosen.rack] = struct{}{}\n\n\t\t// Remove chosen from pool (swap-delete).\n\t\tpool[best] = pool[len(pool)-1]\n\t\tpool = pool[:len(pool)-1]\n\t}\n\n\treturn keep\n}\n\n// ensureLeaderCoverage guarantees that each broker becomes preferred leader for\n// at least one partition. We try the cheapest options first:\n//  1. If the broker already hosts a replica of some partition where it is not\n//     leader, rotate it to index 0 (no RF change, minimal movement).\n//  2. Otherwise, replace a non-unique-rack replica in some donor partition and\n//     rotate the target broker to index 0.\n//  3. If neither is possible, create a new partition led by the target broker.\nfunc ensureLeaderCoverage(b *PlanBuilder, sel ReplicaSelector) {\n\t// Guard: if actual leaders already cover all brokers, skip preferred leader rebalancing\n\tactualLeaders := make(map[int32][]int32, len(b.state.BrokerIDs))\n\tfor _, id := range b.state.BrokerIDs {\n\t\tactualLeaders[id] = nil\n\t}\n\tfor pid, part := range b.state.Partitions {\n\t\tif part.Leader != -1 {\n\t\t\tactualLeaders[part.Leader] = append(actualLeaders[part.Leader], pid)\n\t\t}\n\t}\n\tif len(brokersMissingLeadership(b.state.BrokerIDs, actualLeaders)) == 0 {\n\t\treturn // Actual coverage is perfect - no need to rebalance preferred leaders\n\t}\n\n\t// Build \"leadersByBroker\": broker -> list of partition IDs it currently leads (preferred).\n\tleadersByBroker := indexLeaders(b.state.BrokerIDs, b.view)\n\n\t// Brokers that currently lead zero partitions (preferred).\n\t// However, if a broker already has actual leadership (even if not preferred),\n\t// we can skip it to minimize unnecessary reassignments.\n\tmissing := []int32{}\n\tfor _, broker := range brokersMissingLeadership(b.state.BrokerIDs, leadersByBroker) {\n\t\t// Skip if this broker already has actual leadership\n\t\tif len(actualLeaders[broker]) > 0 {\n\t\t\tcontinue\n\t\t}\n\t\tmissing = append(missing, broker)\n\t}\n\tif len(missing) == 0 {\n\t\treturn\n\t}\n\n\t// Local helpers that both perform the action and update leadersByBroker.\n\trotateIfReplica := func(target int32, donors []int32) bool {\n\t\tfor _, donor := range donors {\n\t\t\t// Collect candidate partitions where target is already a replica\n\t\t\tpids := append([]int32(nil), leadersByBroker[donor]...)\n\n\t\t\t// Sort with preference: partitions where the donor is the ACTUAL leader first.\n\t\t\t// This ensures we're actually freeing up leadership from the donor, rather than\n\t\t\t// rotating a partition where the donor is only the preferred leader.\n\t\t\t// Then by partition ID for determinism.\n\t\t\tsort.Slice(pids, func(i, j int) bool {\n\t\t\t\tpi, pj := pids[i], pids[j]\n\n\t\t\t\t// Prefer partitions where the donor is the actual leader\n\t\t\t\tiDonorIsActual := b.state.Partitions[pi].Leader == donor\n\t\t\t\tjDonorIsActual := b.state.Partitions[pj].Leader == donor\n\n\t\t\t\tif iDonorIsActual != jDonorIsActual {\n\t\t\t\t\treturn iDonorIsActual\n\t\t\t\t}\n\t\t\t\t// Then by partition ID for stability\n\t\t\t\treturn pi < pj\n\t\t\t})\n\n\t\t\tfor _, pid := range pids {\n\t\t\t\treps := b.view[pid]\n\t\t\t\tif !contains(reps, target) {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tnewReps := putFirst(reps, target) // make target the leader\n\t\t\t\tb.CommitReassignment(pid, newReps)\n\n\t\t\t\t// Update bookkeeping: pid moves from donor to target.\n\t\t\t\tleadersByBroker[donor] = remove(leadersByBroker[donor], pid)\n\t\t\t\tleadersByBroker[target] = append(leadersByBroker[target], pid)\n\t\t\t\treturn true\n\t\t\t}\n\t\t}\n\t\treturn false\n\t}\n\n\treplaceDuplicateAndRotate := func(target int32, donors []int32) bool {\n\t\tfor _, donor := range donors {\n\t\t\t// Sort partition IDs for deterministic iteration\n\t\t\tpids := append([]int32(nil), leadersByBroker[donor]...)\n\t\t\tsort.Slice(pids, func(i, j int) bool { return pids[i] < pids[j] })\n\t\t\tfor _, pid := range pids {\n\t\t\t\treps := b.view[pid]\n\t\t\t\tif contains(reps, target) {\n\t\t\t\t\tcontinue // covered by rotate path above\n\t\t\t\t}\n\t\t\t\tidx := victimIndex(reps, b.state.Brokers)\n\t\t\t\tif idx < 0 {\n\t\t\t\t\tcontinue // no safe replica to swap (unique racks already)\n\t\t\t\t}\n\t\t\t\tnewReps := append([]int32{}, reps...)\n\t\t\t\tnewReps[idx] = target\n\t\t\t\tnewReps = putFirst(newReps, target)\n\t\t\t\tb.CommitReassignment(pid, newReps)\n\n\t\t\t\tleadersByBroker[donor] = remove(leadersByBroker[donor], pid)\n\t\t\t\tleadersByBroker[target] = append(leadersByBroker[target], pid)\n\t\t\t\treturn true\n\t\t\t}\n\t\t}\n\t\treturn false\n\t}\n\n\tfor _, target := range missing {\n\t\t// Donors: brokers leading more partitions than their soft target,\n\t\t// sorted by largest surplus first (tie-breaker: smaller broker ID).\n\t\tdonors := donorBrokers(b.state.BrokerIDs, leadersByBroker, b.desired.TargetLeaders)\n\n\t\t// 1) Cheapest: rotate target to lead where it already is a replica.\n\t\tif rotateIfReplica(target, donors) {\n\t\t\tcontinue\n\t\t}\n\t\t// 2) Next-cheapest: replace a duplicate-rack replica, then rotate.\n\t\tif replaceDuplicateAndRotate(target, donors) {\n\t\t\tcontinue\n\t\t}\n\t\t// 3) Last resort: create a new partition led by target.\n\t\treps := sel.ChooseReplicas(target, b.desired.RF)\n\t\tb.CommitCreate(reps)\n\n\t\t// Track a synthetic partition ID so counts stay consistent within this loop.\n\t\tnewPID := int32(len(b.state.Partitions) + len(b.creations) - 1)\n\t\tleadersByBroker[target] = append(leadersByBroker[target], newPID)\n\t}\n}\n\n// ensurePartitionCount adds partitions until we reach DesiredPartitions.\n//\n// Leader selection strategy per new partition:\n//  1. Prefer a broker still below its soft target (TargetLeaders). Among those,\n//     pick the one closest to its target (smallest positive gap).\n//  2. If all brokers are at/above target, pick the broker that currently leads\n//     the fewest partitions (stable tie-breaker via leastLoadedLeader).\nfunc ensurePartitionCount(b *PlanBuilder, sel ReplicaSelector) {\n\tdesiredTotal := b.desired.DesiredPartitions\n\ttotal := len(b.state.Partitions) + len(b.creations)\n\tif total >= desiredTotal || len(b.state.BrokerIDs) == 0 {\n\t\treturn\n\t}\n\n\t// Count current leaders per broker.\n\tleaderCount := make(map[int32]int, len(b.state.BrokerIDs))\n\tif b.rebalancePartitions {\n\t\t// Use predictive view (reassignments will be applied)\n\t\tfor _, reps := range b.view {\n\t\t\tif len(reps) > 0 {\n\t\t\t\tleaderCount[reps[0]]++\n\t\t\t}\n\t\t}\n\t} else {\n\t\t// Use actual current leaders (reassignments won't be applied)\n\t\tfor _, p := range b.state.Partitions {\n\t\t\tif p.Leader != -1 {\n\t\t\t\tleaderCount[p.Leader]++\n\t\t\t}\n\t\t}\n\t}\n\t// Always include leaders from staged creates (Phase 2 fallback creates are always executed)\n\tfor _, ca := range b.creations {\n\t\tif len(ca.Replicas) > 0 {\n\t\t\tleaderCount[ca.Replicas[0]]++\n\t\t}\n\t}\n\t// Make sure every broker has an entry in leaderCount.\n\tfor _, id := range b.state.BrokerIDs {\n\t\tif _, ok := leaderCount[id]; !ok {\n\t\t\tleaderCount[id] = 0\n\t\t}\n\t}\n\n\tfor total < desiredTotal {\n\t\t// Pick a preferred leader for the new partition.\n\t\tpreferred := pickLeader(b.state.BrokerIDs, leaderCount, b.desired.TargetLeaders)\n\n\t\t// Materialize replicas and commit the create.\n\t\treps := sel.ChooseReplicas(preferred, b.desired.RF)\n\t\tb.CommitCreate(reps)\n\n\t\tleaderCount[preferred]++\n\t\ttotal++\n\t}\n}\n\n// pickLeader returns the broker to prefer as the new partition's leader.\n//\n// If any brokers are still below their soft target, it returns the one with the\n// smallest positive remaining gap (# of missing leaderships for partitions).\n// Otherwise, it returns the least-loaded leader.\nfunc pickLeader(brokerIDs []int32, leaderCount map[int32]int, target map[int32]int) int32 {\n\tvar (\n\t\tchosen  int32\n\t\tbestGap int // smallest positive gap seen so far\n\t\tfound   bool\n\t)\n\n\tfor _, id := range brokerIDs {\n\t\tgap := target[id] - leaderCount[id] // how many leaders this broker still “should” get\n\t\tif gap <= 0 {\n\t\t\tcontinue\n\t\t}\n\t\tif !found || gap < bestGap {\n\t\t\tchosen = id\n\t\t\tbestGap = gap\n\t\t\tfound = true\n\t\t}\n\t}\n\tif found {\n\t\treturn chosen\n\t}\n\t// Everyone at/above target: fall back to least leaders.\n\treturn leastLoadedLeader(leaderCount, brokerIDs)\n}\n\n// -----------------------------------------------------------------------------\n// State & Desired\n// -----------------------------------------------------------------------------\n\n// Broker is an immutable snapshot of a broker’s ID and rack label at plan time.\n// An empty rack value means the broker did not advertise one.\n//\n// We never mutate this during planning; it mirrors the Metadata response.\ntype Broker struct {\n\tID   int32\n\tRack string // empty string if unknown\n}\n\n// Partition is an immutable snapshot of a partition's current replica list.\n//\n// The order of Replicas matters: index 0 is the *preferred leader* for this\n// partition, i.e., where the controller will attempt to place leadership after\n// changes. The Leader field contains the *actual current leader* as reported\n// by Kafka metadata, which may differ from the preferred leader (replicas[0]).\n//\n// We do not mutate these structs; instead we work with a predictive \"view\"\n// inside PlanBuilder (below).\ntype Partition struct {\n\tID       int32\n\tLeader   int32   // actual current leader from metadata\n\tReplicas []int32 // order matters; index 0 is preferred leader\n}\n\n// ClusterState is a convenience wrapper around the metadata we actually need in\n// this planner. It is deliberately small to keep the code readable.\n//\n// - TopicName: the probe topic name (for logging and request building)\n// - Brokers: map of broker id -> Broker\n// - BrokerIDs: sorted slice of broker IDs for deterministic iteration\n// - ByRack: rack -> broker IDs, to reason about rack diversity\n// - NumRacks: number of distinct racks (empty rack counts as one)\n// - Partitions: map of partition id -> Partition snapshot\n//\n// None of the members are mutated after construction.\ntype ClusterState struct {\n\tTopicName  string\n\tBrokers    map[int32]Broker\n\tBrokerIDs  []int32 // sorted\n\tByRack     map[string][]int32\n\tNumRacks   int\n\tPartitions map[int32]Partition\n}\n\n// BuildState converts Metadata response to ClusterState and copies slices so\n// planning cannot accidentally mutate the input.\nfunc BuildState(meta *kmsg.MetadataResponse) ClusterState {\n\tbrokers := make(map[int32]Broker, len(meta.Brokers))\n\tbyRack := make(map[string][]int32)\n\tvar ids []int32\n\tfor _, b := range meta.Brokers {\n\t\track := \"\"\n\t\tif b.Rack != nil {\n\t\t\track = *b.Rack\n\t\t}\n\t\tbrokers[b.NodeID] = Broker{ID: b.NodeID, Rack: rack}\n\t\tbyRack[rack] = append(byRack[rack], b.NodeID)\n\t\tids = append(ids, b.NodeID)\n\t}\n\tsort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })\n\n\tparts := make(map[int32]Partition)\n\tfor _, p := range meta.Topics[0].Partitions {\n\t\trep := append([]int32(nil), p.Replicas...)\n\t\tparts[p.Partition] = Partition{ID: p.Partition, Leader: p.Leader, Replicas: rep}\n\t}\n\n\treturn ClusterState{\n\t\tTopicName:  pointerStrToStr(meta.Topics[0].Topic),\n\t\tBrokers:    brokers,\n\t\tBrokerIDs:  ids,\n\t\tByRack:     byRack,\n\t\tNumRacks:   len(byRack),\n\t\tPartitions: parts,\n\t}\n}\n\ntype Desired struct {\n\tRF                int\n\tDesiredPartitions int\n\tTargetLeaders     map[int32]int // per broker (even split)\n}\n\n// ComputeDesired derives the minimal partition count and a soft target for\n// preferred leader distribution. We require at least one partition per broker so\n// everyone can lead, and we honor PartitionsPerBroker as an additional lower\n// bound.\nfunc ComputeDesired(state ClusterState, cfg EndToEndTopicConfig) Desired {\n\tperBroker := cfg.PartitionsPerBroker\n\tif perBroker < 1 {\n\t\tperBroker = 1\n\t}\n\tcur := len(state.Partitions)\n\tdesiredPartitions := max(cur, perBroker*len(state.BrokerIDs))\n\ttarget := evenSplit(desiredPartitions, state.BrokerIDs)\n\treturn Desired{RF: cfg.ReplicationFactor, DesiredPartitions: desiredPartitions, TargetLeaders: target}\n}\n\n// -----------------------------------------------------------------------------\n// Replica selection (rack-aware) & load tracking\n// -----------------------------------------------------------------------------\n\n// Load captures, per broker, how many times it appears as a replica and as a\n// preferred leader across the predictive view of the topic. We use this to\n// bias selection towards less-loaded brokers to avoid hot spots.\n//\n// Note: this is a transient view local to a single planning run. Nothing here\n// is persisted and no attempt is made to perfectly balance replicas across the\n// cluster—only to avoid obviously uneven choices.\ntype Load struct {\n\tReplicas int\n\tLeaders  int\n}\n\n// LoadTracker is a tiny helper holding a per-broker Load map with convenience\n// methods for reading and incrementing counts while the plan is being\n// constructed.\n//\n// Why not compute loads on the fly? We do that initially (from current\n// assignments) and then update incrementally while choosing replicas for new or\n// changed partitions to keep subsequent choices informed by earlier ones.\n// Keeping it explicit in a struct makes the intent obvious and testing easier.\ntype LoadTracker struct {\n\tl map[int32]Load\n}\n\n// NewLoadTracker builds initial loads from the current assignments in the\n// metadata (before any staged changes). The caller updates loads as it makes\n// predictive choices so the next decision can see the latest picture.\nfunc NewLoadTracker(state ClusterState) *LoadTracker {\n\tl := make(map[int32]Load, len(state.BrokerIDs))\n\tfor _, id := range state.BrokerIDs {\n\t\tl[id] = Load{}\n\t}\n\tfor _, p := range state.Partitions {\n\t\tfor _, r := range p.Replicas {\n\t\t\tld := l[r]\n\t\t\tld.Replicas++\n\t\t\tl[r] = ld\n\t\t}\n\t\t// Count the actual current leader separately\n\t\tif p.Leader != -1 { // -1 indicates no leader (error state)\n\t\t\tld := l[p.Leader]\n\t\t\tld.Leaders++\n\t\t\tl[p.Leader] = ld\n\t\t}\n\t}\n\treturn &LoadTracker{l: l}\n}\n\n// AddReplica increments replica and (optionally) leader counts for a broker in\n// the predictive view. Call this after the planner decides to place a replica\n// (e.g., in ChooseReplicas).\nfunc (t *LoadTracker) AddReplica(id int32, leader bool) {\n\tld := t.l[id]\n\tld.Replicas++\n\tif leader {\n\t\tld.Leaders++\n\t}\n\tt.l[id] = ld\n}\n\n// Load returns the current transient load counters for a broker.\nfunc (t *LoadTracker) Load(id int32) Load { return t.l[id] }\n\n// ReplicaSelector abstracts the heuristic used to pick a concrete replica set\n// for a given (preferred) leader and RF. Keeping this as an interface makes it\n// trivial to swap strategy in tests if you’d like to assert specific behaviors.\n// The production strategy we use is RackAwareSelector.\ntype ReplicaSelector interface {\n\tChooseReplicas(preferredLeader int32, rf int) []int32\n}\n\n// RackAwareSelector is a simple greedy strategy that tries to:\n//  1. Always include the requested preferred leader at index 0.\n//  2. Maximize rack diversity by preferring brokers on new racks first.\n//  3. Among candidates on equally novel racks, prefer lower replica load.\n//  4. Use broker ID as a final tiebreaker for determinism.\n//\n// After selecting, we update the transient LoadTracker so subsequent decisions\n// are informed by this choice.\n//\n// This is intentionally not perfect or global-optimal—just a pragmatic heuristic\n// that produces good, stable results for the probe topic.\ntype RackAwareSelector struct {\n\tstate ClusterState\n\tloads *LoadTracker\n}\n\n// NewRackAwareSelector constructs the default selection strategy.\nfunc NewRackAwareSelector(state ClusterState, loads *LoadTracker) *RackAwareSelector {\n\treturn &RackAwareSelector{state: state, loads: loads}\n}\n\n// ChooseReplicas returns an ordered replica list of length rf where index 0 is\n// the preferred leader. Candidates are scored by new rack first, then lower\n// load, then lower broker ID.\nfunc (s *RackAwareSelector) ChooseReplicas(preferredLeader int32, rf int) []int32 {\n\trf = min(rf, len(s.state.BrokerIDs))\n\tres := make([]int32, 0, rf)\n\tres = append(res, preferredLeader)\n\tseen := map[int32]struct{}{preferredLeader: {}}\n\tusedRack := map[string]struct{}{s.state.Brokers[preferredLeader].Rack: {}}\n\ttype cand struct {\n\t\tid   int32\n\t\track string\n\t\tload int\n\t}\n\tbuild := func() []cand {\n\t\tout := make([]cand, 0, len(s.state.BrokerIDs))\n\t\tfor _, id := range s.state.BrokerIDs {\n\t\t\tif _, ok := seen[id]; ok {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tout = append(out, cand{id: id, rack: s.state.Brokers[id].Rack, load: s.loads.Load(id).Replicas})\n\t\t}\n\t\tsort.Slice(out, func(i, j int) bool {\n\t\t\t_, iu := usedRack[out[i].rack]\n\t\t\t_, ju := usedRack[out[j].rack]\n\t\t\tif iu != ju {\n\t\t\t\treturn !iu && ju\n\t\t\t}\n\t\t\tif out[i].load != out[j].load {\n\t\t\t\treturn out[i].load < out[j].load\n\t\t\t}\n\t\t\treturn out[i].id < out[j].id\n\t\t})\n\t\treturn out\n\t}\n\tfor len(res) < rf {\n\t\tcands := build()\n\t\tif len(cands) == 0 {\n\t\t\tbreak\n\t\t}\n\t\tc := cands[0]\n\t\tres = append(res, c.id)\n\t\tseen[c.id] = struct{}{}\n\t\tusedRack[c.rack] = struct{}{}\n\t}\n\t// update transient loads\n\tfor i, id := range res {\n\t\ts.loads.AddReplica(id, i == 0)\n\t}\n\treturn res\n}\n\n// -----------------------------------------------------------------------------\n// Plan -> Kafka requests\n// -----------------------------------------------------------------------------\n\n// ToRequests converts a Plan to Kafka admin requests. Either result may be nil\n// if the plan contains no operations of that type.\n//\n// rebalancePartitions controls whether explicit replica assignments are included\n// in the CreatePartitions request. Set it to false for Redpanda Cloud, which\n// disallows explicit partition assignments via the Kafka API and returns\n// INVALID_REQUEST when they are present.\nfunc (p *Plan) ToRequests(topic string, rebalancePartitions bool) (*kmsg.AlterPartitionAssignmentsRequest, *kmsg.CreatePartitionsRequest) {\n\tvar alter *kmsg.AlterPartitionAssignmentsRequest\n\tvar create *kmsg.CreatePartitionsRequest\n\n\tif len(p.Reassignments) > 0 {\n\t\tr := kmsg.NewAlterPartitionAssignmentsRequest()\n\t\tt := kmsg.NewAlterPartitionAssignmentsRequestTopic()\n\t\tt.Topic = topic\n\t\tfor _, ra := range p.Reassignments {\n\t\t\tpr := kmsg.NewAlterPartitionAssignmentsRequestTopicPartition()\n\t\t\tpr.Partition = ra.Partition\n\t\t\tpr.Replicas = append([]int32(nil), ra.Replicas...)\n\t\t\tt.Partitions = append(t.Partitions, pr)\n\t\t}\n\t\tr.Topics = []kmsg.AlterPartitionAssignmentsRequestTopic{t}\n\t\talter = &r\n\t}\n\n\tif len(p.CreateAssignments) > 0 {\n\t\tr := kmsg.NewCreatePartitionsRequest()\n\t\tt := kmsg.NewCreatePartitionsRequestTopic()\n\t\tt.Topic = topic\n\t\tt.Count = int32(p.FinalPartitionCount)\n\t\t// Redpanda Cloud disallows explicit partition assignments via the Kafka API\n\t\t// (returning INVALID_REQUEST), the same restriction that applies to\n\t\t// AlterPartitionAssignments. Omit the Assignment list and let the broker\n\t\t// auto-place new partitions when rebalancing is disabled.\n\t\tif rebalancePartitions {\n\t\t\tfor _, ca := range p.CreateAssignments {\n\t\t\t\tta := kmsg.NewCreatePartitionsRequestTopicAssignment()\n\t\t\t\tta.Replicas = append([]int32(nil), ca.Replicas...)\n\t\t\t\tt.Assignment = append(t.Assignment, ta)\n\t\t\t}\n\t\t}\n\t\tr.Topics = []kmsg.CreatePartitionsRequestTopic{t}\n\t\tcreate = &r\n\t}\n\treturn alter, create\n}\n\n// -----------------------------------------------------------------------------\n// Utils\n// -----------------------------------------------------------------------------\n\n// indexLeaders builds broker -> list of partition IDs it currently leads (from view).\nfunc indexLeaders(brokerIDs []int32, view map[int32][]int32) map[int32][]int32 {\n\tm := make(map[int32][]int32, len(brokerIDs))\n\tfor _, id := range brokerIDs {\n\t\tm[id] = nil\n\t}\n\tfor pid, reps := range view {\n\t\tif len(reps) > 0 {\n\t\t\tm[reps[0]] = append(m[reps[0]], pid)\n\t\t}\n\t}\n\treturn m\n}\n\n// brokersMissingLeadership returns brokers that lead zero partitions.\nfunc brokersMissingLeadership(brokerIDs []int32, leadersByBroker map[int32][]int32) []int32 {\n\tvar out []int32\n\tfor _, id := range brokerIDs {\n\t\tif len(leadersByBroker[id]) == 0 {\n\t\t\tout = append(out, id)\n\t\t}\n\t}\n\treturn out\n}\n\n// donorBrokers returns brokers that currently lead more than their soft target,\n// sorted by largest surplus first; ties broken by broker ID ascending.\nfunc donorBrokers(brokerIDs []int32, leadersByBroker map[int32][]int32, target map[int32]int) []int32 {\n\tvar donors []int32\n\tfor _, id := range brokerIDs {\n\t\tif len(leadersByBroker[id]) > target[id] {\n\t\t\tdonors = append(donors, id)\n\t\t}\n\t}\n\tsort.Slice(donors, func(i, j int) bool {\n\t\tsurplusI := len(leadersByBroker[donors[i]]) - target[donors[i]]\n\t\tsurplusJ := len(leadersByBroker[donors[j]]) - target[donors[j]]\n\t\tif surplusI != surplusJ {\n\t\t\treturn surplusI > surplusJ\n\t\t}\n\t\treturn donors[i] < donors[j]\n\t})\n\treturn donors\n}\n\n// violatesRackDiversity returns true if a partition’s replicas do not use as\n// many unique racks as they could (bounded by min(len(reps), numRacks)).\nfunc violatesRackDiversity(reps []int32, numRacks int, brokers map[int32]Broker) bool {\n\tif len(reps) <= 1 {\n\t\treturn false\n\t}\n\tseen := map[string]struct{}{}\n\tfor _, r := range reps {\n\t\tseen[brokers[r].Rack] = struct{}{}\n\t}\n\tmaxUnique := min(len(reps), numRacks)\n\treturn len(seen) < maxUnique\n}\n\n// victimIndex returns the index of a replica that sits on a rack appearing more\n// than once within the replica set (i.e., a duplicate-rack candidate). This is\n// used when we need to swap in a new broker to keep/restore diversity. If none\n// exists, we fall back to the last replica (a stable, simple choice that avoids\n// touching the leader at index 0).\nfunc victimIndex(reps []int32, brokers map[int32]Broker) int {\n\trc := map[string]int{}\n\tfor _, r := range reps {\n\t\trc[brokers[r].Rack]++\n\t}\n\tfor i, r := range reps {\n\t\tif rc[brokers[r].Rack] > 1 {\n\t\t\treturn i\n\t\t}\n\t}\n\treturn len(reps) - 1 // fallback: last (never the leader)\n}\n\n// remove removes v from a slice without preserving order. Used for maintaining\n// the leaders map in ensureLeaderCoverage.\nfunc remove(xs []int32, v int32) []int32 {\n\tout := xs[:0]\n\tfor _, x := range xs {\n\t\tif x != v {\n\t\t\tout = append(out, x)\n\t\t}\n\t}\n\treturn out\n}\n\n// putFirst moves id to index 0 while preserving the relative order of the\n// remaining elements. This models changing the preferred leader.\nfunc putFirst(reps []int32, id int32) []int32 {\n\tout := make([]int32, 0, len(reps))\n\tout = append(out, id)\n\tfor _, r := range reps {\n\t\tif r != id {\n\t\t\tout = append(out, r)\n\t\t}\n\t}\n\treturn out\n}\n\n// contains reports whether v is present in xs.\nfunc contains(xs []int32, v int32) bool {\n\tfor _, x := range xs {\n\t\tif x == v {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\n// evenSplit returns a soft target leader count per broker such that totals sum\n// to n. The remainder (+1) is assigned to the lowest broker IDs for stability.\nfunc evenSplit(n int, ids []int32) map[int32]int {\n\tm := make(map[int32]int, len(ids))\n\tif len(ids) == 0 {\n\t\treturn m\n\t}\n\tbase := n / len(ids)\n\trem := n % len(ids)\n\tfor i, id := range ids {\n\t\tm[id] = base\n\t\tif i < rem {\n\t\t\tm[id]++\n\t\t}\n\t}\n\treturn m\n}\n\n// leastLoadedLeader returns the broker with the smallest number of preferred\n// leader assignments (ties broken by smaller broker ID).\nfunc leastLoadedLeader(leaders map[int32]int, ids []int32) int32 {\n\tbest, bestCnt := ids[0], 1<<30\n\tfor _, id := range ids {\n\t\tif leaders[id] < bestCnt || (leaders[id] == bestCnt && id < best) {\n\t\t\tbestCnt, best = leaders[id], id\n\t\t}\n\t}\n\treturn best\n}\n\nfunc equalInt32s(a, b []int32) bool {\n\tif len(a) != len(b) {\n\t\treturn false\n\t}\n\tfor i := range a {\n\t\tif a[i] != b[i] {\n\t\t\treturn false\n\t\t}\n\t}\n\treturn true\n}\n"
  },
  {
    "path": "e2e/partition_planner_test.go",
    "content": "package e2e\n\nimport (\n\t\"sort\"\n\t\"testing\"\n\n\t\"github.com/stretchr/testify/assert\"\n\t\"github.com/stretchr/testify/require\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// buildMeta constructs a MetadataResponse for tests.\n// brokers: brokerID -> rack label (\"\" means no rack/unknown).\n// partitions: list of replica lists where index 0 is the preferred leader.\nfunc buildMeta(topic string, brokers map[int32]string, partitions [][]int32) *kmsg.MetadataResponse {\n\t// Brokers\n\tbs := make([]kmsg.MetadataResponseBroker, 0, len(brokers))\n\tids := make([]int32, 0, len(brokers))\n\tfor id := range brokers {\n\t\tids = append(ids, id)\n\t}\n\tsort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })\n\tfor _, id := range ids {\n\t\track := brokers[id] // copy for address stability\n\t\tbs = append(bs, kmsg.MetadataResponseBroker{\n\t\t\tNodeID: id,\n\t\t\tRack:   &rack, // empty string is allowed and treated as one \"rack\" bucket by planner\n\t\t})\n\t}\n\n\t// Partitions\n\tps := make([]kmsg.MetadataResponseTopicPartition, 0, len(partitions))\n\tfor i, reps := range partitions {\n\t\tcp := append([]int32(nil), reps...)\n\t\tleader := int32(-1)\n\t\tif len(reps) > 0 {\n\t\t\tleader = reps[0] // In tests, assume preferred leader is actual leader\n\t\t}\n\t\tps = append(ps, kmsg.MetadataResponseTopicPartition{\n\t\t\tPartition: int32(i),\n\t\t\tLeader:    leader,\n\t\t\tReplicas:  cp,\n\t\t})\n\t}\n\n\treturn &kmsg.MetadataResponse{\n\t\tBrokers: bs,\n\t\tTopics: []kmsg.MetadataResponseTopic{\n\t\t\t{\n\t\t\t\tTopic:      kmsg.StringPtr(topic),\n\t\t\t\tPartitions: ps,\n\t\t\t},\n\t\t},\n\t}\n}\n\n// applyPlan returns the final predictive assignments after applying the plan\n// to the given metadata snapshot (without mutating meta).\nfunc applyPlan(meta *kmsg.MetadataResponse, plan *Plan) map[int32][]int32 {\n\tfinal := map[int32][]int32{}\n\tfor _, p := range meta.Topics[0].Partitions {\n\t\tfinal[p.Partition] = append([]int32(nil), p.Replicas...)\n\t}\n\tfor _, ra := range plan.Reassignments {\n\t\tfinal[ra.Partition] = append([]int32(nil), ra.Replicas...)\n\t}\n\tnextID := int32(len(meta.Topics[0].Partitions))\n\tfor _, ca := range plan.CreateAssignments {\n\t\tfinal[nextID] = append([]int32(nil), ca.Replicas...)\n\t\tnextID++\n\t}\n\treturn final\n}\n\nfunc countLeaders(assigns map[int32][]int32) map[int32]int {\n\tm := map[int32]int{}\n\tfor _, reps := range assigns {\n\t\tif len(reps) > 0 {\n\t\t\tm[reps[0]]++\n\t\t}\n\t}\n\treturn m\n}\n\nfunc assertNoDuplicates(t *testing.T, reps []int32) {\n\tt.Helper()\n\tseen := map[int32]struct{}{}\n\tfor _, r := range reps {\n\t\t_, dup := seen[r]\n\t\tassert.Falsef(t, dup, \"duplicate broker in replica set: %v\", reps)\n\t\tseen[r] = struct{}{}\n\t}\n}\n\nfunc max(a, b int) int {\n\tif a > b {\n\t\treturn a\n\t}\n\treturn b\n}\n\n// --- tests -----------------------------------------------------------------\n\nfunc TestPartitionPlanner_Plan(t *testing.T) {\n\ttype tc struct {\n\t\tname       string\n\t\tbrokers    map[int32]string    // brokerID -> rack (\"\" for no rack)\n\t\tpartitions [][]int32           // ordered replicas (index 0 = preferred leader)\n\t\tcfg        EndToEndTopicConfig // uses ReplicationFactor & PartitionsPerBroker\n\t\tcheck      func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32)\n\t}\n\n\ttests := []tc{\n\t\t{\n\t\t\tname:       \"single broker creates one partition\",\n\t\t\tbrokers:    map[int32]string{1: \"\"},\n\t\t\tpartitions: nil, // empty topic\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   1,\n\t\t\t\tPartitionsPerBroker: 1,\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\trequire.NotNil(t, plan)\n\t\t\t\tassert.Equal(t, 1, len(plan.CreateAssignments), \"should create exactly one partition\")\n\t\t\t\tassert.Equal(t, 1, plan.FinalPartitionCount)\n\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Lenf(t, reps, 1, \"pid %d must have RF=1\", pid)\n\t\t\t\t}\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.Equal(t, 1, leaders[1], \"broker 1 should lead one partition\")\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"three brokers, no racks, RF grows to 3; ensure coverage and count\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"\", 2: \"\", 3: \"\",\n\t\t\t},\n\t\t\t// start with 2 partitions at RF=2 both led by broker with id 1 (skewed)\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{1, 2},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   3,\n\t\t\t\tPartitionsPerBroker: 1,\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\tassert.Equal(t, 3, plan.FinalPartitionCount, \"desired should be max(cur=2, ppb*brokers=3)=3\")\n\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Lenf(t, reps, 3, \"pid %d must have RF=3\", pid)\n\t\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t\t}\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[1], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[2], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[3], 1)\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"rack diversity improves on same-RF partition (2 racks, RF=2)\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"a\", 3: \"b\",\n\t\t\t},\n\t\t\t// single partition with both replicas on rack \"a\" -> re-pick to include \"b\"\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1, // desired total becomes 3, but p0 should be improved first\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, meta *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {\n\t\t\t\treps := final[0]\n\t\t\t\trequire.Len(t, reps, 2)\n\t\t\t\tassertNoDuplicates(t, reps)\n\n\t\t\t\tstate := BuildState(meta)\n\t\t\t\tassert.False(t, violatesRackDiversity(reps, state.NumRacks, state.Brokers), \"p0 should span both racks a/b\")\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"shrink RF preserves current leader\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"\", 2: \"\", 3: \"\",\n\t\t\t},\n\t\t\t// RF=3 currently, leader is 2; configured RF=2 -> leader must remain 2\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{2, 1, 3},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1,\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {\n\t\t\t\treps := final[0]\n\t\t\t\trequire.Len(t, reps, 2)\n\t\t\t\tassert.Equal(t, int32(2), reps[0], \"leader 2 should be preserved after shrink\")\n\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"rotate-if-replica covers missing brokers without extra swaps\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"\", 2: \"\", 3: \"\",\n\t\t\t},\n\t\t\t// 2 partitions, both led by 1; 2 and 3 are replicas only in separate partitions.\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{1, 3},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1, // desired total 3 -> one create expected\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\tassert.Equal(t, 1, len(plan.CreateAssignments), \"one create due to partition target\")\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[1], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[2], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[3], 1)\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Lenf(t, reps, 2, \"pid %d must have RF=2\", pid)\n\t\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t\t}\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"replace-duplicate-and-rotate when target broker is not a replica anywhere\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"a\", 3: \"b\",\n\t\t\t},\n\t\t\t// 2 partitions both {1,2}; broker 3 is nowhere; donors exist (1 leads 2),\n\t\t\t// and duplicates exist (rack \"a\" twice) so we can safely swap in 3 and rotate.\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{1, 2},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1, // desired total 3; coverage for 3 should be via swap+rotate\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[3], 1, \"broker 3 should lead at least one partition\")\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Lenf(t, reps, 2, \"pid %d RF=2\", pid)\n\t\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t\t}\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"even split leaders when scaling partitions (ppb=2)\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"\", 2: \"\", 3: \"\",\n\t\t\t},\n\t\t\t// Start with 3 partitions, each broker already leads one\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{2, 1},\n\t\t\t\t{3, 1},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 2, // desired = 2 * 3 = 6\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\tassert.Equal(t, 6, plan.FinalPartitionCount)\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.Equal(t, 2, leaders[1])\n\t\t\t\tassert.Equal(t, 2, leaders[2])\n\t\t\t\tassert.Equal(t, 2, leaders[3])\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"one defined rack for all brokers -> no diversity churn; no ops needed\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"a\", 3: \"a\",\n\t\t\t},\n\t\t\t// Already meets RF and coverage and desired count == 3\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{2, 3},\n\t\t\t\t{3, 1},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1,\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\tassert.Equal(t, 0, len(plan.Reassignments), \"no reassignments expected\")\n\t\t\t\tassert.Equal(t, 0, len(plan.CreateAssignments), \"no creates expected\")\n\t\t\t\tstate := BuildState(meta)\n\t\t\t\t// Diversity can't be improved (NumRacks=1)\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Falsef(t, violatesRackDiversity(reps, state.NumRacks, state.Brokers), \"pid %d should not violate with NumRacks=1\", pid)\n\t\t\t\t}\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"broker present but leading nothing (restarting?) gets coverage (via swap or create)\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"b\", 3: \"c\",\n\t\t\t},\n\t\t\t// Two partitions led by 1 and 2; broker 3 has no replicas/leadership.\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2},\n\t\t\t\t{2, 1},\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 1, // desired = max(2, 3)=3 -> at least one create or a swap+rotate + create for count\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\tassert.Equal(t, 3, plan.FinalPartitionCount)\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[1], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[2], 1)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[3], 1)\n\t\t\t\tfor pid, reps := range final {\n\t\t\t\t\tassert.Lenf(t, reps, 2, \"pid %d RF=2\", pid)\n\t\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t\t}\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"phase 3 accounts for leaders created in phase 2 - no over-assignment\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"b\", 3: \"c\", 4: \"d\",\n\t\t\t},\n\t\t\t// Start with 2 partitions, both led by broker 1\n\t\t\t// Brokers 2,3,4 have no leadership -> phase 2 will create partitions for them\n\t\t\t// Then phase 3 should NOT over-assign broker 1 when creating additional partitions\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2}, // broker 1 leads\n\t\t\t\t{1, 3}, // broker 1 leads (over-represented)\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   2,\n\t\t\t\tPartitionsPerBroker: 2, // 4*2=8 total desired, currently have 2, so need 6 more\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\t// Should create 6 new partitions (3 for coverage in phase 2, 3 more for count in phase 3)\n\t\t\t\tassert.Equal(t, 8, plan.FinalPartitionCount, \"should reach desired partition count of 8\")\n\t\t\t\tassert.Equal(t, 6, len(plan.CreateAssignments), \"should create 6 new partitions\")\n\n\t\t\t\tleaders := countLeaders(final)\n\n\t\t\t\t// Each broker should lead exactly 2 partitions (8 total / 4 brokers = 2 each)\n\t\t\t\t// This test will fail if phase 3 doesn't account for leaders created in phase 2\n\t\t\t\t// because it will see broker 1 as leading 2 partitions and think it needs 0 more,\n\t\t\t\t// while it actually leads 2 from existing + potentially more from phase 2\n\t\t\t\tfor brokerID := int32(1); brokerID <= 4; brokerID++ {\n\t\t\t\t\tassert.Equal(t, 2, leaders[brokerID],\n\t\t\t\t\t\t\"broker %d should lead exactly 2 partitions (even distribution), but leads %d\",\n\t\t\t\t\t\tbrokerID, leaders[brokerID])\n\t\t\t\t}\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tname: \"uses actual leader not preferred leader when they differ\",\n\t\t\tbrokers: map[int32]string{\n\t\t\t\t1: \"a\", 2: \"b\", 3: \"c\",\n\t\t\t},\n\t\t\t// This case simulates when the actual leader differs from preferred leader (replicas[0])\n\t\t\t// We'll manually construct metadata where leader != replicas[0]\n\t\t\tpartitions: [][]int32{\n\t\t\t\t{1, 2, 3}, // preferred leader is 1, but we'll set actual leader to 2 in buildMetaWithLeader\n\t\t\t\t{2, 3, 1}, // preferred leader is 2, but we'll set actual leader to 3 in buildMetaWithLeader\n\t\t\t},\n\t\t\tcfg: EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:      3,\n\t\t\t\tPartitionsPerBroker:    1, // 3*1=3 total desired, have 2, need 1 more\n\t\t\t\tRebalancePartitions:    true,\n\t\t\t},\n\t\t\tcheck: func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {\n\t\t\t\t// With our manually set leaders (2, 3), broker 1 has no leadership\n\t\t\t\t// The planner should recognize this and either rotate leadership to broker 1\n\t\t\t\t// or create a new partition led by broker 1\n\t\t\t\tleaders := countLeaders(final)\n\t\t\t\tassert.GreaterOrEqual(t, leaders[1], 1, \"broker 1 should lead at least one partition\")\n\t\t\t\tassert.GreaterOrEqual(t, leaders[2], 1, \"broker 2 should lead at least one partition\")\n\t\t\t\tassert.GreaterOrEqual(t, leaders[3], 1, \"broker 3 should lead at least one partition\")\n\t\t\t},\n\t\t},\n\t}\n\n\tfor _, tt := range tests {\n\t\tt.Run(tt.name, func(t *testing.T) {\n\t\t\tvar meta *kmsg.MetadataResponse\n\t\t\tif tt.name == \"uses actual leader not preferred leader when they differ\" {\n\t\t\t\t// Special case: manually set leaders to differ from preferred leaders\n\t\t\t\tmeta = buildMeta(\"probe\", tt.brokers, tt.partitions)\n\t\t\t\t// Override the leaders: partition 0 actual leader = 2, partition 1 actual leader = 3\n\t\t\t\tmeta.Topics[0].Partitions[0].Leader = 2 // preferred is 1, actual is 2\n\t\t\t\tmeta.Topics[0].Partitions[1].Leader = 3 // preferred is 2, actual is 3\n\t\t\t} else {\n\t\t\t\tmeta = buildMeta(\"probe\", tt.brokers, tt.partitions)\n\t\t\t}\n\n\t\t\tplanner := NewPartitionPlanner(tt.cfg, zap.NewNop())\n\t\t\tplan, err := planner.Plan(meta)\n\t\t\trequire.NoError(t, err, \"Plan() should not error\")\n\t\t\trequire.NotNil(t, plan, \"Plan() returned nil plan\")\n\n\t\t\t// Sanity: final >= max(current, ppb*brokers)\n\t\t\texpectedMin := max(len(meta.Topics[0].Partitions), tt.cfg.PartitionsPerBroker*len(tt.brokers))\n\t\t\tassert.GreaterOrEqual(t, plan.FinalPartitionCount, expectedMin, \"final partition count must meet lower bound\")\n\n\t\t\t// Apply and enforce universal invariants.\n\t\t\tfinal := applyPlan(meta, plan)\n\t\t\tfor pid, reps := range final {\n\t\t\t\tassert.Lenf(t, reps, tt.cfg.ReplicationFactor, \"pid %d RF mismatch\", pid)\n\t\t\t\tassertNoDuplicates(t, reps)\n\t\t\t}\n\n\t\t\t// Scenario-specific checks.\n\t\t\ttt.check(t, meta, plan, final)\n\t\t})\n\t}\n}\n\nfunc TestPartitionPlanner_Plan_Deterministic(t *testing.T) {\n\t// Test that the same input produces identical plans across multiple runs\n\tbrokers := map[int32]string{\n\t\t10: \"rack1\", 20: \"rack2\", 30: \"rack3\", 40: \"rack1\", 50: \"rack2\", 60: \"rack3\",\n\t}\n\t// Many partitions with suboptimal RF to force fixReplicationAndRack to iterate over map\n\tpartitions := [][]int32{\n\t\t{10}, {20}, {30}, {40}, {50}, {60}, // RF=1, needs growth to 3\n\t\t{10, 20}, {20, 30}, {30, 40}, {40, 50}, // RF=2, needs growth to 3\n\t\t{50, 60, 10}, {60, 10, 20}, {10, 30, 40}, // RF=3, may need rack fixes\n\t}\n\tcfg := EndToEndTopicConfig{\n\t\tReplicationFactor:   3,\n\t\tPartitionsPerBroker: 3, // 6*3=18 total desired, have 13, need 5 more\n\t}\n\n\tmeta := buildMeta(\"probe\", brokers, partitions)\n\tvar plans []*Plan\n\n\t// Run the same plan many times to increase chance of hitting different map iteration orders\n\tfor i := 0; i < 10; i++ {\n\t\tplanner := NewPartitionPlanner(cfg, zap.NewNop())\n\t\tplan, err := planner.Plan(meta)\n\t\trequire.NoError(t, err, \"Plan() should not error on run %d\", i)\n\t\trequire.NotNil(t, plan, \"Plan() returned nil plan on run %d\", i)\n\t\tplans = append(plans, plan)\n\t}\n\n\t// All plans should be identical\n\tfirstPlan := plans[0]\n\tfor i := 1; i < len(plans); i++ {\n\t\tassert.Equal(t, len(firstPlan.Reassignments), len(plans[i].Reassignments),\n\t\t\t\"run %d: reassignment count should be identical\", i)\n\t\tassert.Equal(t, len(firstPlan.CreateAssignments), len(plans[i].CreateAssignments),\n\t\t\t\"run %d: create count should be identical\", i)\n\t\tassert.Equal(t, firstPlan.FinalPartitionCount, plans[i].FinalPartitionCount,\n\t\t\t\"run %d: final partition count should be identical\", i)\n\n\t\t// Build maps for comparison\n\t\treassign1 := make(map[int32][]int32)\n\t\tfor _, r := range firstPlan.Reassignments {\n\t\t\treassign1[r.Partition] = r.Replicas\n\t\t}\n\t\treassign2 := make(map[int32][]int32)\n\t\tfor _, r := range plans[i].Reassignments {\n\t\t\treassign2[r.Partition] = r.Replicas\n\t\t}\n\t\tassert.Equal(t, reassign1, reassign2, \"run %d: reassignments should be identical\", i)\n\n\t\t// Create assignments order matters for determinism\n\t\tassert.Equal(t, firstPlan.CreateAssignments, plans[i].CreateAssignments,\n\t\t\t\"run %d: create assignments should be identical\", i)\n\t}\n}\n\n// TestActualLeaderCoverageSkipsPreferredRebalancing tests the fix for the bug where\n// ensureLeaderCoverage would trigger unnecessary reassignments when actual leader\n// coverage was perfect but preferred leader coverage was unbalanced.\nfunc TestActualLeaderCoverageSkipsPreferredRebalancing(t *testing.T) {\n\t// Simulate the exact scenario from the bug report:\n\t// - All brokers in same rack (no rack diversity benefit possible)\n\t// - Actual leaders perfectly distributed: broker 0→p1, broker 1→p0, broker 2→p2\n\t// - But preferred leaders (replicas[0]) unbalanced: broker 1→p0&p2, broker 0→p1, broker 2→none\n\tbrokers := map[int32]string{\n\t\t0: \"europe-west1-b\", 1: \"europe-west1-b\", 2: \"europe-west1-b\",\n\t}\n\n\t// Build metadata with specific replica assignments matching the bug report\n\tmeta := buildMeta(\"probe\", brokers, [][]int32{\n\t\t{1, 2, 0}, // partition 0: preferred leader = 1\n\t\t{0, 1, 2}, // partition 1: preferred leader = 0\n\t\t{1, 2, 0}, // partition 2: preferred leader = 1\n\t})\n\n\t// Override actual leaders to match the bug report scenario\n\tmeta.Topics[0].Partitions[0].Leader = 1 // p0: preferred=1, actual=1 (same)\n\tmeta.Topics[0].Partitions[1].Leader = 0 // p1: preferred=0, actual=0 (same)\n\tmeta.Topics[0].Partitions[2].Leader = 2 // p2: preferred=1, actual=2 (DIFFERENT!)\n\n\tcfg := EndToEndTopicConfig{\n\t\tReplicationFactor:   3,\n\t\tPartitionsPerBroker: 1, // 3*1=3 total desired, have 3, perfect\n\t}\n\n\tplanner := NewPartitionPlanner(cfg, zap.NewNop())\n\tplan, err := planner.Plan(meta)\n\trequire.NoError(t, err, \"Plan() should not error\")\n\trequire.NotNil(t, plan, \"Plan() returned nil plan\")\n\n\t// This is the key assertion: should have ZERO reassignments because:\n\t// 1. All brokers in same rack → no rack diversity violations possible\n\t// 2. Actual leader coverage already perfect (each broker leads exactly 1 partition)\n\t// 3. RF and partition count already correct\n\tassert.Equal(t, 0, len(plan.Reassignments), \"should have no reassignments when actual coverage is perfect\")\n\tassert.Equal(t, 0, len(plan.CreateAssignments), \"should have no creates when partition count is perfect\")\n\tassert.Equal(t, 3, plan.FinalPartitionCount, \"should maintain existing partition count\")\n\n\t// Verify the fix: no changes to replica assignments\n\tfinal := applyPlan(meta, plan)\n\tassert.Equal(t, []int32{1, 2, 0}, final[0], \"partition 0 replicas should be unchanged\")\n\tassert.Equal(t, []int32{0, 1, 2}, final[1], \"partition 1 replicas should be unchanged\")\n\tassert.Equal(t, []int32{1, 2, 0}, final[2], \"partition 2 replicas should be unchanged\")\n\n\t// Verify all partitions still have correct RF and no duplicates\n\tfor pid, reps := range final {\n\t\tassert.Lenf(t, reps, 3, \"pid %d must have RF=3\", pid)\n\t\tassertNoDuplicates(t, reps)\n\t}\n}\n\n// TestPlan_ToRequests_RebalancePartitions verifies that ToRequests omits explicit\n// replica assignments from the CreatePartitions request when RebalancePartitions\n// is false, but still sets the correct total Count.\nfunc TestPlan_ToRequests_RebalancePartitions(t *testing.T) {\n\tbrokers := map[int32]string{1: \"a\", 2: \"b\", 3: \"c\", 4: \"d\", 5: \"e\", 6: \"f\"}\n\t// Topic currently has 3 partitions; 6 brokers → planner will want 6 total.\n\tpartitions := [][]int32{\n\t\t{1, 2, 3},\n\t\t{2, 3, 4},\n\t\t{3, 4, 5},\n\t}\n\tmeta := buildMeta(\"probe\", brokers, partitions)\n\n\tt.Run(\"rebalancePartitions=true includes explicit assignments\", func(t *testing.T) {\n\t\tcfg := EndToEndTopicConfig{\n\t\t\tReplicationFactor:   3,\n\t\t\tPartitionsPerBroker: 1,\n\t\t\tRebalancePartitions: true,\n\t\t}\n\t\tplan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(meta)\n\t\trequire.NoError(t, err)\n\t\t_, createReq := plan.ToRequests(\"probe\", true)\n\t\trequire.NotNil(t, createReq, \"should have a CreatePartitions request\")\n\n\t\ttopic := createReq.Topics[0]\n\t\tassert.Equal(t, int32(plan.FinalPartitionCount), topic.Count)\n\t\tassert.NotEmpty(t, topic.Assignment, \"assignments must be present when rebalancePartitions=true\")\n\t\tassert.Equal(t, len(plan.CreateAssignments), len(topic.Assignment),\n\t\t\t\"one assignment entry per new partition\")\n\t})\n\n\tt.Run(\"rebalancePartitions=false omits assignments\", func(t *testing.T) {\n\t\tcfg := EndToEndTopicConfig{\n\t\t\tReplicationFactor:   3,\n\t\t\tPartitionsPerBroker: 1,\n\t\t\tRebalancePartitions: false,\n\t\t}\n\t\tplan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(meta)\n\t\trequire.NoError(t, err)\n\t\t// Planner should still compute create assignments (used for logging / count),\n\t\t// but ToRequests must NOT include them in the wire request.\n\t\trequire.NotEmpty(t, plan.CreateAssignments, \"planner should still compute assignments for count tracking\")\n\n\t\t_, createReq := plan.ToRequests(\"probe\", false)\n\t\trequire.NotNil(t, createReq, \"should still produce a CreatePartitions request\")\n\n\t\ttopic := createReq.Topics[0]\n\t\tassert.Equal(t, int32(plan.FinalPartitionCount), topic.Count,\n\t\t\t\"Count must reflect the desired total even without explicit assignments\")\n\t\tassert.Empty(t, topic.Assignment,\n\t\t\t\"assignments must be absent when rebalancePartitions=false\")\n\t})\n\n\tt.Run(\"no creates needed produces nil create request regardless of flag\", func(t *testing.T) {\n\t\t// Already-optimal topic: 3 brokers, 3 partitions, each broker leads one.\n\t\toptMeta := buildMeta(\"probe\",\n\t\t\tmap[int32]string{1: \"\", 2: \"\", 3: \"\"},\n\t\t\t[][]int32{{1, 2, 3}, {2, 3, 1}, {3, 1, 2}},\n\t\t)\n\t\tfor _, rebalance := range []bool{true, false} {\n\t\t\tcfg := EndToEndTopicConfig{\n\t\t\t\tReplicationFactor:   3,\n\t\t\t\tPartitionsPerBroker: 1,\n\t\t\t\tRebalancePartitions: rebalance,\n\t\t\t}\n\t\t\tplan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(optMeta)\n\t\t\trequire.NoError(t, err)\n\t\t\tassert.Empty(t, plan.CreateAssignments)\n\t\t\t_, createReq := plan.ToRequests(\"probe\", rebalance)\n\t\t\tassert.Nil(t, createReq, \"no CreatePartitions request when nothing to create (rebalance=%v)\", rebalance)\n\t\t}\n\t})\n}\n\nfunc TestMinimalReassignmentsWhenActualLeadersDivergeFromPreferred(t *testing.T) {\n\t// Scenario: all partitions have same preferred leader (broker 0), but actual\n\t// leaders are distributed. Algorithm should recognize brokers with actual\n\t// leadership and only fix gaps, not realign everything to preferred.\n\tbrokers := map[int32]string{\n\t\t0: \"rack-a\", 1: \"rack-b\", 2: \"rack-c\",\n\t}\n\n\tmeta := buildMeta(\"_redpanda_e2e_probe\", brokers, [][]int32{\n\t\t{0, 1, 2}, // partition 0: preferred leader = 0\n\t\t{0, 1, 2}, // partition 1: preferred leader = 0\n\t\t{0, 1, 2}, // partition 2: preferred leader = 0\n\t})\n\n\t// Set actual leaders to show divergence from preferred\n\tmeta.Topics[0].Partitions[0].Leader = 0 // p0: preferred=0, actual=0 (match)\n\tmeta.Topics[0].Partitions[1].Leader = 1 // p1: preferred=0, actual=1 (DIVERGED)\n\tmeta.Topics[0].Partitions[2].Leader = 0 // p2: preferred=0, actual=0 (match)\n\n\tcfg := EndToEndTopicConfig{\n\t\tReplicationFactor:   3,\n\t\tPartitionsPerBroker: 1,\n\t}\n\n\tplanner := NewPartitionPlanner(cfg, zap.NewNop())\n\tplan, err := planner.Plan(meta)\n\trequire.NoError(t, err)\n\trequire.NotNil(t, plan)\n\n\t// Key assertion: should have ONLY ONE reassignment\n\t// Broker 1 already has actual leadership (p1), even though preferred leader of\n\t// p1 is broker 0. Only broker 2 is missing from actual leadership, so we only\n\t// need to fix that one gap.\n\tassert.Equal(t, 1, len(plan.Reassignments), \"should need only ONE reassignment since broker 1 already has actual leadership\")\n\tassert.Equal(t, 0, len(plan.CreateAssignments), \"should have no creates\")\n\n\t// Verify the reassignment gives broker 2 preferred leadership\n\trequire.Len(t, plan.Reassignments, 1)\n\treassignment := plan.Reassignments[0]\n\tassert.Equal(t, int32(2), reassignment.Replicas[0], \"reassignment should give broker 2 preferred leadership\")\n\n\t// Verify all partitions still have correct RF and no duplicates\n\tfinal := applyPlan(meta, plan)\n\tfor pid, reps := range final {\n\t\tassert.Lenf(t, reps, 3, \"pid %d must have RF=3\", pid)\n\t\tassertNoDuplicates(t, reps)\n\t}\n}\n"
  },
  {
    "path": "e2e/producer.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"strconv\"\n\t\"time\"\n\n\t\"github.com/google/uuid\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"go.uber.org/zap\"\n)\n\n// produceMessagesToAllPartitions sends an EndToEndMessage to every partition on the given topic\nfunc (s *Service) produceMessagesToAllPartitions(ctx context.Context) {\n\tfor i := 0; i < s.partitionCount; i++ {\n\t\ts.produceMessage(ctx, i)\n\t}\n}\n\n// produceMessage produces an end to end record to a single given partition. If it succeeds producing the record\n// it will add it to the message tracker. If producing fails a message will be logged and the respective metrics\n// will be incremented.\nfunc (s *Service) produceMessage(ctx context.Context, partition int) {\n\ttopicName := s.config.TopicManagement.Name\n\trecord, msg := createEndToEndRecord(s.minionID, topicName, partition)\n\n\tstartTime := time.Now()\n\n\t// This childCtx will ensure that we will abort our efforts to produce (including retries) when we exceed\n\t// the SLA for producers.\n\tchildCtx, cancel := context.WithTimeout(ctx, s.config.Producer.AckSla+2*time.Second)\n\n\tpID := strconv.Itoa(partition)\n\ts.messagesProducedInFlight.WithLabelValues(pID).Inc()\n\ts.messageTracker.addToTracker(msg)\n\ts.client.TryProduce(childCtx, record, func(r *kgo.Record, err error) {\n\t\tdefer cancel()\n\t\tackDuration := time.Since(startTime)\n\t\ts.messagesProducedInFlight.WithLabelValues(pID).Dec()\n\t\ts.messagesProducedTotal.WithLabelValues(pID).Inc()\n\t\t// We add 0 in order to ensure that the \"failed\" metric series for that partition id are initialized as well.\n\t\ts.messagesProducedFailed.WithLabelValues(pID).Add(0)\n\t\ts.lostMessages.WithLabelValues(pID).Add(0)\n\n\t\tif err != nil {\n\t\t\ts.messagesProducedFailed.WithLabelValues(pID).Inc()\n\t\t\t_ = s.messageTracker.removeFromTracker(msg.MessageID)\n\n\t\t\ts.logger.Info(\"failed to produce message to end-to-end topic\",\n\t\t\t\tzap.String(\"topic_name\", r.Topic),\n\t\t\t\tzap.Int32(\"partition\", r.Partition),\n\t\t\t\tzap.Error(err))\n\t\t\treturn\n\t\t} else {\n\t\t\t// Update the message's state. If this message expires and is marked as successfully produced we will\n\t\t\t// report this as a lost message, which would indicate that the producer was told that the message got\n\t\t\t// produced successfully, but it got lost somewhere.\n\t\t\t// We need to use updateItemIfExists() because it's possible that the message has already been consumed\n\t\t\t// before we have received the message here (because we were awaiting the produce ack).\n\t\t\tmsg.state = EndToEndMessageStateProducedSuccessfully\n\t\t\tmsg.produceLatency = ackDuration.Seconds()\n\n\t\t\t// TODO: Enable again as soon as https://github.com/ReneKroon/ttlcache/issues/60 is fixed\n\t\t\t// Because we cannot update cache items in an atomic fashion we currently can't use this method\n\t\t\t// as this would cause a race condition which ends up in records being reported as lost/expired.\n\t\t\t// s.messageTracker.updateItemIfExists(msg)\n\t\t}\n\n\t\ts.produceLatency.WithLabelValues(pID).Observe(ackDuration.Seconds())\n\t})\n}\n\nfunc createEndToEndRecord(minionID string, topicName string, partition int) (*kgo.Record, *EndToEndMessage) {\n\tmessage := &EndToEndMessage{\n\t\tMinionID:  minionID,\n\t\tMessageID: uuid.NewString(),\n\t\tTimestamp: time.Now().UnixNano(),\n\n\t\tpartition: partition,\n\t\tstate:     EndToEndMessageStateCreated,\n\t}\n\n\tmjson, err := json.Marshal(message)\n\tif err != nil {\n\t\t// Should never happen since the struct is so simple,\n\t\t// but if it does, something is completely broken anyway\n\t\tpanic(\"cannot serialize EndToEndMessage\")\n\t}\n\n\trecord := &kgo.Record{\n\t\tTopic:     topicName,\n\t\tValue:     mjson,\n\t\tPartition: int32(partition), // we set partition for producing so our customPartitioner can make use of it\n\t}\n\n\treturn record, message\n}\n"
  },
  {
    "path": "e2e/service.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/google/uuid\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"go.uber.org/zap\"\n\n\t\"github.com/cloudhut/kminion/v2/kafka\"\n)\n\ntype Service struct {\n\t// General\n\tconfig Config\n\tlogger *zap.Logger\n\n\tkafkaSvc *kafka.Service // creates kafka client for us\n\tclient   *kgo.Client\n\n\t// Service\n\tminionID       string          // unique identifier, reported in metrics, in case multiple instances run at the same time\n\tgroupId        string          // our own consumer group\n\tgroupTracker   *groupTracker   // tracks consumer groups starting with the kminion prefix and deletes them if they are unused for some time\n\tmessageTracker *messageTracker // tracks successfully produced messages,\n\tclientHooks    *clientHooks    // logs broker events, tracks the coordinator (i.e. which broker last responded to our offset commit)\n\tpartitionCount int             // number of partitions of our test topic, used to send messages to all partitions\n\n\t// Metrics\n\tmessagesProducedInFlight *prometheus.GaugeVec\n\tmessagesProducedTotal    *prometheus.CounterVec\n\tmessagesProducedFailed   *prometheus.CounterVec\n\tmessagesReceived         *prometheus.CounterVec\n\toffsetCommitsTotal       *prometheus.CounterVec\n\toffsetCommitsFailedTotal *prometheus.CounterVec\n\tlostMessages             *prometheus.CounterVec\n\n\tproduceLatency      *prometheus.HistogramVec\n\troundtripLatency    *prometheus.HistogramVec\n\toffsetCommitLatency *prometheus.HistogramVec\n}\n\n// NewService creates a new instance of the e2e monitoring service (wow)\nfunc NewService(ctx context.Context, cfg Config, logger *zap.Logger, kafkaSvc *kafka.Service, promRegisterer prometheus.Registerer) (*Service, error) {\n\tminionID := uuid.NewString()\n\tgroupID := fmt.Sprintf(\"%v-%v\", cfg.Consumer.GroupIdPrefix, minionID)\n\n\t// Producer options\n\tkgoOpts := []kgo.Opt{\n\t\tkgo.ProduceRequestTimeout(3 * time.Second),\n\t\tkgo.RecordRetries(3),\n\t\t// We use the manual partitioner so that the records' partition id will be used as target partition\n\t\tkgo.RecordPartitioner(kgo.ManualPartitioner()),\n\t}\n\tif cfg.Producer.RequiredAcks == \"all\" {\n\t\tkgoOpts = append(kgoOpts, kgo.RequiredAcks(kgo.AllISRAcks()))\n\t} else {\n\t\tkgoOpts = append(kgoOpts, kgo.RequiredAcks(kgo.LeaderAck()))\n\t\tkgoOpts = append(kgoOpts, kgo.DisableIdempotentWrite())\n\t}\n\n\t// Consumer configs\n\tkgoOpts = append(kgoOpts,\n\t\tkgo.ConsumerGroup(groupID),\n\t\tkgo.ConsumeTopics(cfg.TopicManagement.Name),\n\t\tkgo.Balancers(kgo.CooperativeStickyBalancer()),\n\t\tkgo.DisableAutoCommit(),\n\t\tkgo.ConsumeResetOffset(kgo.NewOffset().AtEnd()),\n\t)\n\n\t// Prepare hooks\n\thooks := newEndToEndClientHooks(logger)\n\tkgoOpts = append(kgoOpts, kgo.WithHooks(hooks))\n\n\t// Create kafka service and check if client can successfully connect to Kafka cluster\n\tlogger.Info(\"connecting to Kafka seed brokers, trying to fetch cluster metadata\",\n\t\tzap.String(\"seed_brokers\", strings.Join(kafkaSvc.Brokers(), \",\")))\n\tclient, err := kafkaSvc.CreateAndTestClient(ctx, logger, kgoOpts)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to create kafka client for e2e: %w\", err)\n\t}\n\tlogger.Info(\"successfully connected to kafka cluster\")\n\n\tsvc := &Service{\n\t\tconfig:   cfg,\n\t\tlogger:   logger.Named(\"e2e\"),\n\t\tkafkaSvc: kafkaSvc,\n\t\tclient:   client,\n\n\t\tminionID:    minionID,\n\t\tgroupId:     groupID,\n\t\tclientHooks: hooks,\n\t}\n\n\tsvc.groupTracker = newGroupTracker(cfg, logger, client, groupID)\n\tsvc.messageTracker = newMessageTracker(svc)\n\n\tmakeCounterVec := func(name string, labelNames []string, help string) *prometheus.CounterVec {\n\t\tcv := prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\t\tSubsystem: \"end_to_end\",\n\t\t\tName:      name,\n\t\t\tHelp:      help,\n\t\t}, labelNames)\n\t\tpromRegisterer.MustRegister(cv)\n\t\treturn cv\n\t}\n\tmakeGaugeVec := func(name string, labelNames []string, help string) *prometheus.GaugeVec {\n\t\tgv := prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\t\tSubsystem: \"end_to_end\",\n\t\t\tName:      name,\n\t\t\tHelp:      help,\n\t\t}, labelNames)\n\t\tpromRegisterer.MustRegister(gv)\n\t\treturn gv\n\t}\n\tmakeHistogramVec := func(name string, maxLatency time.Duration, labelNames []string, help string) *prometheus.HistogramVec {\n\t\thv := prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\t\tSubsystem: \"end_to_end\",\n\t\t\tName:      name,\n\t\t\tHelp:      help,\n\t\t\tBuckets:   createHistogramBuckets(maxLatency),\n\t\t}, labelNames)\n\t\tpromRegisterer.MustRegister(hv)\n\t\treturn hv\n\t}\n\n\t// Low-level info\n\t// Users can construct alerts like \"can't produce messages\" themselves from those\n\tsvc.messagesProducedInFlight = makeGaugeVec(\"messages_produced_in_flight\", []string{\"partition_id\"}, \"Number of messages that kminion's end-to-end test produced but has not received an answer for yet\")\n\tsvc.messagesProducedTotal = makeCounterVec(\"messages_produced_total\", []string{\"partition_id\"}, \"Number of all messages produced to Kafka. This counter will be incremented when we receive a response (failure/timeout or success) from Kafka\")\n\tsvc.messagesProducedFailed = makeCounterVec(\"messages_produced_failed_total\", []string{\"partition_id\"}, \"Number of messages failed to produce to Kafka because of a timeout or failure\")\n\tsvc.messagesReceived = makeCounterVec(\"messages_received_total\", []string{\"partition_id\"}, \"Number of *matching* messages kminion received. Every roundtrip message has a minionID (randomly generated on startup) and a timestamp. Kminion only considers a message a match if it it arrives within the configured roundtrip SLA (and it matches the minionID)\")\n\tsvc.offsetCommitsTotal = makeCounterVec(\"offset_commits_total\", []string{\"coordinator_id\"}, \"Counts how many times kminions end-to-end test has committed offsets\")\n\tsvc.offsetCommitsFailedTotal = makeCounterVec(\"offset_commits_failed_total\", []string{\"coordinator_id\", \"reason\"}, \"Number of offset commits that returned an error or timed out\")\n\tsvc.lostMessages = makeCounterVec(\"messages_lost_total\", []string{\"partition_id\"}, \"Number of messages that have been produced successfully but not received within the configured SLA duration\")\n\n\t// Latency Histograms\n\t// More detailed info about how long stuff took\n\t// Since histograms also have an 'infinite' bucket, they can be used to detect small hickups \"lost\" messages\n\tsvc.produceLatency = makeHistogramVec(\"produce_latency_seconds\", cfg.Producer.AckSla, []string{\"partition_id\"}, \"Time until we received an ack for a produced message\")\n\tsvc.roundtripLatency = makeHistogramVec(\"roundtrip_latency_seconds\", cfg.Consumer.RoundtripSla, []string{\"partition_id\"}, \"Time it took between sending (producing) and receiving (consuming) a message\")\n\tsvc.offsetCommitLatency = makeHistogramVec(\"offset_commit_latency_seconds\", cfg.Consumer.CommitSla, []string{\"coordinator_id\"}, \"Time kafka took to respond to kminion's offset commit\")\n\n\treturn svc, nil\n}\n\n// Start starts the service (wow)\nfunc (s *Service) Start(ctx context.Context) error {\n\t// Ensure topic exists and is configured correctly\n\tif err := s.validateManagementTopic(ctx); err != nil {\n\t\treturn fmt.Errorf(\"could not validate end-to-end topic: %w\", err)\n\t}\n\n\t// finally start everything else (producing, consuming, continuous validation, consumer group tracking)\n\tgo s.startReconciliation(ctx)\n\n\t// Start consumer and wait until we've received a response for the first poll\n\t// which would indicate that the consumer is ready. Only if the consumer is\n\t// ready we want to start the e2e producer to ensure that we will not miss\n\t// messages because the consumer wasn't ready. However, if this initialization\n\t// does not succeed within 30s we have to assume, that something is wrong on the\n\t// consuming or producing side. KMinion is supposed to report these kind of\n\t// issues and therefore this should not block KMinion from starting.\n\tinitCh := make(chan bool, 1)\n\ts.logger.Info(\"initializing consumer and waiting until it has received the first record batch\")\n\tgo s.startConsumeMessages(ctx, initCh)\n\n\t// Produce an init message until the consumer received at least one fetch\n\tinitTicker := time.NewTicker(1 * time.Second)\n\tisInitialized := false\n\n\t// We send a first message immediately, but we'll keep sending more messages later\n\t// since the consumers start at the latest offset and may have missed this message.\n\tinitCtx, cancel := context.WithTimeout(ctx, 30*time.Second)\n\tdefer cancel()\n\ts.sendInitMessage(initCtx, s.client, s.config.TopicManagement.Name)\n\n\tfor !isInitialized {\n\t\tselect {\n\t\tcase <-initTicker.C:\n\t\t\ts.sendInitMessage(initCtx, s.client, s.config.TopicManagement.Name)\n\t\tcase <-initCh:\n\t\t\tisInitialized = true\n\t\t\ts.logger.Info(\"consumer has been successfully initialized\")\n\t\tcase <-initCtx.Done():\n\t\t\t// At this point we just assume the consumers are running fine.\n\t\t\t// The entire cluster may be down or producing fails.\n\t\t\ts.logger.Warn(\"initializing the consumers timed out, proceeding with the startup\")\n\t\t\tisInitialized = true\n\t\tcase <-ctx.Done():\n\t\t\treturn nil\n\t\t}\n\t}\n\tgo s.startOffsetCommits(ctx)\n\tgo s.startProducer(ctx)\n\n\t// keep track of groups, delete old unused groups\n\tif s.config.Consumer.DeleteStaleConsumerGroups {\n\t\tgo s.groupTracker.start(ctx)\n\t}\n\n\treturn nil\n}\n\nfunc (s *Service) sendInitMessage(ctx context.Context, client *kgo.Client, topicName string) {\n\t// Try to produce one record into each partition. This is important because\n\t// one or more partitions may be offline, while others may still be writable.\n\tfor i := 0; i < s.partitionCount; i++ {\n\t\tclient.TryProduce(ctx, &kgo.Record{\n\t\t\tKey:       []byte(\"init-message\"),\n\t\t\tValue:     nil,\n\t\t\tTopic:     topicName,\n\t\t\tPartition: int32(i),\n\t\t}, nil)\n\t}\n}\n\nfunc (s *Service) startReconciliation(ctx context.Context) {\n\tif !s.config.TopicManagement.Enabled {\n\t\treturn\n\t}\n\n\tvalidateTopicTicker := time.NewTicker(s.config.TopicManagement.ReconciliationInterval)\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\treturn\n\t\tcase <-validateTopicTicker.C:\n\t\t\terr := s.validateManagementTopic(ctx)\n\t\t\tif err != nil {\n\t\t\t\ts.logger.Error(\"failed to validate end-to-end topic\", zap.Error(err))\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunc (s *Service) startProducer(ctx context.Context) {\n\tproduceTicker := time.NewTicker(s.config.ProbeInterval)\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\treturn\n\t\tcase <-produceTicker.C:\n\t\t\ts.produceMessagesToAllPartitions(ctx)\n\t\t}\n\t}\n}\n\nfunc (s *Service) startOffsetCommits(ctx context.Context) {\n\tcommitTicker := time.NewTicker(5 * time.Second)\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\treturn\n\t\tcase <-commitTicker.C:\n\t\t\ts.commitOffsets(ctx)\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "e2e/topic.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"sort\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// Check our end-to-end test topic and adapt accordingly if something does not match our expectations.\n// - does it exist?\n//\n// - is it configured correctly?\n//   - does it have enough partitions?\n//   - is the replicationFactor correct?\n//\n// - are assignments good?\n//   - is each broker leading at least one partition?\n//   - are replicas distributed correctly?\nfunc (s *Service) validateManagementTopic(ctx context.Context) error {\n\ts.logger.Debug(\"validating end-to-end topic...\")\n\n\tmeta, err := s.getTopicMetadata(ctx)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"validateManagementTopic cannot get metadata of e2e topic: %w\", err)\n\t}\n\n\ttypedErr := kerr.TypedErrorForCode(meta.Topics[0].ErrorCode)\n\ttopicExists := false\n\tswitch {\n\tcase typedErr == nil:\n\t\ttopicExists = true\n\tcase errors.Is(typedErr, kerr.UnknownTopicOrPartition):\n\t\t// UnknownTopicOrPartition (Error code 3) means that the topic does not exist.\n\t\t// When the topic doesn't exist, continue to create it further down in the code.\n\t\ttopicExists = false\n\tdefault:\n\t\t// If the topic (possibly) exists, but there's an error, then this should result in a fail\n\t\treturn fmt.Errorf(\"failed to get metadata for end-to-end topic: %w\", err)\n\t}\n\n\t// Create topic if it doesn't exist\n\tif !topicExists {\n\t\tif !s.config.TopicManagement.Enabled {\n\t\t\treturn fmt.Errorf(\"the configured end to end topic does not exist. The topic will not be created \" +\n\t\t\t\t\"because topic management is disabled\")\n\t\t}\n\n\t\tif err = s.createManagementTopic(ctx, meta); err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Topic was just created with optimal assignments from the partition planner.\n\t\t// We can skip the validation/planning phase since the topic already has the correct\n\t\t// partition count and optimal replica assignments. We only need to update our\n\t\t// internal partition count tracking for KMinion's e2e monitoring operations.\n\t\treturn s.updatePartitionCount(ctx)\n\t}\n\n\t// If topic management is disabled, skip validation and alteration of the existing topic.\n\t// This allows kminion to work on managed Kafka platforms (e.g., Confluent Cloud) that\n\t// block partition reassignment operations.\n\tif !s.config.TopicManagement.Enabled {\n\t\ttopicMeta := meta.Topics[0]\n\t\tbrokerIDs := make([]int32, len(meta.Brokers))\n\t\tfor i, broker := range meta.Brokers {\n\t\t\tbrokerIDs[i] = broker.NodeID\n\t\t}\n\n\t\ts.logger.Info(\"topic management is disabled, skipping validation and alteration of existing topic\",\n\t\t\tzap.String(\"topic\", s.config.TopicManagement.Name),\n\t\t\tzap.Int(\"current_partitions\", len(topicMeta.Partitions)),\n\t\t\tzap.Int(\"replication_factor\", len(topicMeta.Partitions[0].Replicas)))\n\n\t\t// Log warnings if the topic configuration differs from expectations\n\t\texpectedPartitions := s.config.TopicManagement.PartitionsPerBroker * len(brokerIDs)\n\t\tif len(topicMeta.Partitions) != expectedPartitions {\n\t\t\ts.logger.Warn(\"topic partition count differs from expected configuration\",\n\t\t\t\tzap.Int(\"current_partitions\", len(topicMeta.Partitions)),\n\t\t\t\tzap.Int(\"expected_partitions\", expectedPartitions),\n\t\t\t\tzap.Int(\"brokers\", len(brokerIDs)),\n\t\t\t\tzap.Int(\"partitions_per_broker_config\", s.config.TopicManagement.PartitionsPerBroker),\n\t\t\t\tzap.String(\"reason\", \"topic management is disabled, will not alter\"))\n\t\t}\n\n\t\t// Check if each broker is leading at least one partition\n\t\tleaderCounts := make(map[int32]int)\n\t\tfor _, partition := range topicMeta.Partitions {\n\t\t\tleaderCounts[partition.Leader]++\n\t\t}\n\t\tbrokersWithoutLeader := []int32{}\n\t\tfor _, brokerID := range brokerIDs {\n\t\t\tif leaderCounts[brokerID] == 0 {\n\t\t\t\tbrokersWithoutLeader = append(brokersWithoutLeader, brokerID)\n\t\t\t}\n\t\t}\n\t\tif len(brokersWithoutLeader) > 0 {\n\t\t\ts.logger.Warn(\"some brokers are not leading any partitions on the e2e topic\",\n\t\t\t\tzap.Int32s(\"brokers_without_leader\", brokersWithoutLeader),\n\t\t\t\tzap.String(\"reason\", \"topic management is disabled, will not alter\"),\n\t\t\t\tzap.String(\"impact\", \"end-to-end monitoring may not cover all brokers\"))\n\t\t}\n\n\t\treturn s.updatePartitionCount(ctx)\n\t}\n\n\t// Topic already exists - use partition planner to validate and potentially fix assignments\n\tplanner := NewPartitionPlanner(s.config.TopicManagement, s.logger)\n\tplan, err := planner.Plan(meta)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create partition plan: %w\", err)\n\t}\n\n\t// Convert the plan to Kafka requests\n\ttopicName := pointerStrToStr(meta.Topics[0].Topic)\n\talterReq, createReq := plan.ToRequests(topicName, s.config.TopicManagement.RebalancePartitions)\n\n\tif s.config.TopicManagement.RebalancePartitions {\n\t\tif len(plan.Reassignments) > 0 {\n\t\t\ts.logPlannedReassignments(meta, plan, topicName)\n\t\t}\n\t\terr = s.executeAlterPartitionAssignments(ctx, alterReq)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"failed to alter partition assignments: %w\", err)\n\t\t}\n\t} else if len(plan.Reassignments) > 0 {\n\t\ts.logger.Info(\"skipping partition reassignment because rebalancePartitions is disabled\",\n\t\t\tzap.String(\"topic\", topicName),\n\t\t\tzap.Int(\"skipped_reassignments\", len(plan.Reassignments)),\n\t\t)\n\t}\n\n\tif len(plan.CreateAssignments) > 0 {\n\t\ts.logPlannedCreations(meta, plan, topicName, s.config.TopicManagement.RebalancePartitions)\n\t}\n\terr = s.executeCreatePartitions(ctx, createReq)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create partitions: %w\", err)\n\t}\n\n\treturn s.updatePartitionCount(ctx)\n}\n\n// updatePartitionCount retrieves metadata to inform kminion about the updated\n// partition count of its e2e topic. It must be updated after topic validation\n// because the validation process may lead to the creation of new partitions.\n// This can occur when new brokers are added to the cluster.\nfunc (s *Service) updatePartitionCount(ctx context.Context) error {\n\tretryTicker := time.NewTicker(1 * time.Second)\n\tdefer retryTicker.Stop()\n\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\treturn ctx.Err()\n\t\tcase <-retryTicker.C:\n\t\t\tmeta, err := s.getTopicMetadata(ctx)\n\t\t\tif err != nil {\n\t\t\t\treturn fmt.Errorf(\"could not get topic metadata while updating partition count: %w\", err)\n\t\t\t}\n\n\t\t\ttypedErr := kerr.TypedErrorForCode(meta.Topics[0].ErrorCode)\n\t\t\tif typedErr == nil {\n\t\t\t\ts.partitionCount = len(meta.Topics[0].Partitions)\n\t\t\t\ts.logger.Debug(\"updatePartitionCount: successfully updated partition count\", zap.Int(\"partition_count\", s.partitionCount))\n\t\t\t\treturn nil\n\t\t\t}\n\t\t\tif !errors.Is(typedErr, kerr.UnknownTopicOrPartition) {\n\t\t\t\treturn fmt.Errorf(\"unexpected error while updating partition count: %w\", typedErr)\n\t\t\t}\n\t\t\ts.logger.Warn(\"updatePartitionCount: received UNKNOWN_TOPIC_OR_PARTITION error, possibly due to timing issue. Retrying...\")\n\t\t\t// The UNKNOWN_TOPIC_OR_PARTITION error occurs occasionally even though the topic is created\n\t\t\t// in the validateManagementTopic function. It appears to be a timing issue where the topic metadata\n\t\t\t// is not immediately available after creation. In practice, waiting for a short period and then retrying\n\t\t\t// the operation resolves the issue.\n\t\t}\n\t}\n}\n\nfunc (s *Service) executeCreatePartitions(ctx context.Context, req *kmsg.CreatePartitionsRequest) error {\n\tif req == nil {\n\t\treturn nil\n\t}\n\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tfor _, topic := range res.Topics {\n\t\terr := kerr.ErrorForCode(topic.ErrorCode)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"inner Kafka error: %w\", err)\n\t\t}\n\t}\n\n\treturn nil\n}\n\nfunc (s *Service) executeAlterPartitionAssignments(ctx context.Context, req *kmsg.AlterPartitionAssignmentsRequest) error {\n\tif req == nil {\n\t\treturn nil\n\t}\n\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\ttypedErr := kerr.TypedErrorForCode(res.ErrorCode)\n\tif typedErr != nil {\n\t\ts.logger.Error(\"alter partition assignments: failed to alter partition assignments\", zap.Any(\"request_topics\", req.Topics))\n\t\treturn fmt.Errorf(\"inner Kafka error: %w\", typedErr)\n\t}\n\tfor _, topic := range res.Topics {\n\t\tfor _, partition := range topic.Partitions {\n\t\t\ttypedErr = kerr.TypedErrorForCode(partition.ErrorCode)\n\t\t\tif typedErr != nil {\n\t\t\t\treturn fmt.Errorf(\"inner Kafka partition error on partition '%v': %w\", partition.Partition, typedErr)\n\t\t\t}\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// logPlannedReassignments logs current partition state and planned reassignment details.\nfunc (s *Service) logPlannedReassignments(meta *kmsg.MetadataResponse, plan *Plan, topicName string) {\n\ttopicMeta := meta.Topics[0]\n\n\t// Log current partition state\n\ts.logger.Info(\"current partition assignments for e2e topic\",\n\t\tzap.String(\"topic\", topicName),\n\t\tzap.Int(\"current_partitions\", len(topicMeta.Partitions)),\n\t\tzap.Int(\"brokers_available\", len(meta.Brokers)),\n\t)\n\n\t// Log each current partition assignment (sorted by partition ID)\n\tsortedPartitions := make([]kmsg.MetadataResponseTopicPartition, len(topicMeta.Partitions))\n\tcopy(sortedPartitions, topicMeta.Partitions)\n\tsort.Slice(sortedPartitions, func(i, j int) bool {\n\t\treturn sortedPartitions[i].Partition < sortedPartitions[j].Partition\n\t})\n\n\tfor _, partition := range sortedPartitions {\n\t\ts.logger.Info(\"current partition assignment\",\n\t\t\tzap.String(\"topic\", topicName),\n\t\t\tzap.Int32(\"partition\", partition.Partition),\n\t\t\tzap.Int32s(\"replicas\", partition.Replicas),\n\t\t\tzap.Int32(\"leader\", partition.Leader),\n\t\t)\n\t}\n\n\ts.logger.Info(\"planned partition reassignments\",\n\t\tzap.String(\"topic\", topicName),\n\t\tzap.Int(\"reassignment_count\", len(plan.Reassignments)),\n\t)\n\n\t// Sort reassignments by partition ID for consistent logging\n\tsortedReassignments := make([]Reassignment, len(plan.Reassignments))\n\tcopy(sortedReassignments, plan.Reassignments)\n\tsort.Slice(sortedReassignments, func(i, j int) bool {\n\t\treturn sortedReassignments[i].Partition < sortedReassignments[j].Partition\n\t})\n\n\tfor _, reassignment := range sortedReassignments {\n\t\t// Find current assignment for this partition\n\t\tvar currentReplicas []int32\n\t\tvar currentLeader int32 = -1\n\t\tfor _, partition := range topicMeta.Partitions {\n\t\t\tif partition.Partition == reassignment.Partition {\n\t\t\t\tcurrentReplicas = partition.Replicas\n\t\t\t\tcurrentLeader = partition.Leader\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\n\t\ts.logger.Info(\"partition reassignment\",\n\t\t\tzap.String(\"topic\", topicName),\n\t\t\tzap.Int32(\"partition\", reassignment.Partition),\n\t\t\tzap.Int32s(\"current_replicas\", currentReplicas),\n\t\t\tzap.Int32s(\"new_replicas\", reassignment.Replicas),\n\t\t\tzap.Int32(\"current_leader\", currentLeader),\n\t\t\tzap.Int32(\"new_leader\", reassignment.Replicas[0]),\n\t\t)\n\t}\n}\n\n// logPlannedCreations logs planned partition creation details.\nfunc (s *Service) logPlannedCreations(meta *kmsg.MetadataResponse, plan *Plan, topicName string, rebalancePartitions bool) {\n\ttopicMeta := meta.Topics[0]\n\n\ts.logger.Info(\"planned partition creations\",\n\t\tzap.String(\"topic\", topicName),\n\t\tzap.Int(\"creation_count\", len(plan.CreateAssignments)),\n\t\tzap.Int(\"current_partitions\", len(topicMeta.Partitions)),\n\t\tzap.Int(\"final_partitions\", plan.FinalPartitionCount),\n\t)\n\n\tif !rebalancePartitions {\n\t\ts.logger.Info(\"partition assignments will be auto-placed by broker (rebalancePartitions is disabled)\",\n\t\t\tzap.String(\"topic\", topicName),\n\t\t)\n\t\treturn\n\t}\n\n\tnextPartitionID := int32(len(topicMeta.Partitions))\n\tfor i, creation := range plan.CreateAssignments {\n\t\ts.logger.Info(\"new partition creation\",\n\t\t\tzap.String(\"topic\", topicName),\n\t\t\tzap.Int32(\"new_partition\", nextPartitionID+int32(i)),\n\t\t\tzap.Int32s(\"replicas\", creation.Replicas),\n\t\t\tzap.Int32(\"leader\", creation.Replicas[0]),\n\t\t)\n\t}\n}\n\nfunc (s *Service) createManagementTopic(ctx context.Context, allMeta *kmsg.MetadataResponse) error {\n\ttopicCfg := s.config.TopicManagement\n\tbrokerCount := len(allMeta.Brokers)\n\ttotalPartitions := brokerCount * topicCfg.PartitionsPerBroker\n\n\ts.logger.Info(\"e2e topic does not exist, creating it...\",\n\t\tzap.String(\"topic_name\", topicCfg.Name),\n\t\tzap.Int(\"partitions_per_broker\", topicCfg.PartitionsPerBroker),\n\t\tzap.Int(\"replication_factor\", topicCfg.ReplicationFactor),\n\t\tzap.Int(\"broker_count\", brokerCount),\n\t\tzap.Int(\"total_partitions\", totalPartitions),\n\t)\n\n\t// Use partition planner to determine optimal assignments for the new topic.\n\t// The metadata already contains broker info, and since the topic doesn't exist,\n\t// meta.Topics[0].Partitions will be empty, which is exactly what we want.\n\tplanner := NewPartitionPlanner(topicCfg, s.logger)\n\tplan, err := planner.Plan(allMeta)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create partition plan for new topic: %w\", err)\n\t}\n\n\t// Create topic with specific replica assignments from the planner\n\ttopic := kmsg.NewCreateTopicsRequestTopic()\n\ttopic.Topic = topicCfg.Name\n\ttopic.NumPartitions = -1     // Must be -1 when using ReplicaAssignment\n\ttopic.ReplicationFactor = -1 // Must be -1 when using ReplicaAssignment\n\ttopic.Configs = createTopicConfig(topicCfg)\n\n\t// Convert planner's CreateAssignments to Kafka's ReplicaAssignment format\n\tfor i, assignment := range plan.CreateAssignments {\n\t\treplica := kmsg.NewCreateTopicsRequestTopicReplicaAssignment()\n\t\treplica.Partition = int32(i)\n\t\treplica.Replicas = append([]int32(nil), assignment.Replicas...)\n\t\ttopic.ReplicaAssignment = append(topic.ReplicaAssignment, replica)\n\t}\n\n\treq := kmsg.NewCreateTopicsRequest()\n\treq.Topics = []kmsg.CreateTopicsRequestTopic{topic}\n\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create e2e topic: %w\", err)\n\t}\n\tif len(res.Topics) > 0 {\n\t\terr := kerr.ErrorForCode(res.Topics[0].ErrorCode)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"failed to create e2e topic: %w\", err)\n\t\t}\n\t}\n\n\treturn nil\n}\n\nfunc (s *Service) getTopicMetadata(ctx context.Context) (*kmsg.MetadataResponse, error) {\n\ttopicReq := kmsg.NewMetadataRequestTopic()\n\ttopicName := s.config.TopicManagement.Name\n\ttopicReq.Topic = &topicName\n\n\treq := kmsg.NewMetadataRequest()\n\treq.Topics = []kmsg.MetadataRequestTopic{topicReq}\n\n\treturn req.RequestWith(ctx, s.client)\n}\n\nfunc (s *Service) getTopicsConfigs(ctx context.Context, configNames []string) (*kmsg.DescribeConfigsResponse, error) {\n\treq := kmsg.NewDescribeConfigsRequest()\n\treq.IncludeDocumentation = false\n\treq.IncludeSynonyms = false\n\treq.Resources = []kmsg.DescribeConfigsRequestResource{\n\t\t{\n\t\t\tResourceType: kmsg.ConfigResourceTypeTopic,\n\t\t\tResourceName: s.config.TopicManagement.Name,\n\t\t\tConfigNames:  configNames,\n\t\t},\n\t}\n\n\treturn req.RequestWith(ctx, s.client)\n}\n\nfunc createTopicConfig(cfgTopic EndToEndTopicConfig) []kmsg.CreateTopicsRequestTopicConfig {\n\n\ttopicConfig := func(name string, value interface{}) kmsg.CreateTopicsRequestTopicConfig {\n\t\tprop := kmsg.NewCreateTopicsRequestTopicConfig()\n\t\tprop.Name = name\n\t\tvalStr := fmt.Sprintf(\"%v\", value)\n\t\tprop.Value = &valStr\n\t\treturn prop\n\t}\n\n\tminISR := 1\n\tif cfgTopic.ReplicationFactor >= 3 {\n\t\t// Only with 3+ replicas does it make sense to require acks from 2 brokers\n\t\t// todo: think about if we should change how 'producer.requiredAcks' works.\n\t\t//       we probably don't even need this configured on the topic directly...\n\t\tminISR = 2\n\t}\n\n\t// Even though kminion's end-to-end feature actually does not require any\n\t// real persistence beyond a few minutes; it might be good to keep messages\n\t// around a bit for debugging.\n\treturn []kmsg.CreateTopicsRequestTopicConfig{\n\t\ttopicConfig(\"cleanup.policy\", \"delete\"),\n\t\ttopicConfig(\"segment.ms\", (time.Hour * 12).Milliseconds()),   // new segment every 12h\n\t\ttopicConfig(\"retention.ms\", (time.Hour * 24).Milliseconds()), // discard segments older than 24h\n\t\ttopicConfig(\"min.insync.replicas\", minISR),\n\t}\n}\n"
  },
  {
    "path": "e2e/utils.go",
    "content": "package e2e\n\nimport (\n\t\"context\"\n\t\"math\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// createHistogramBuckets creates the buckets for the histogram based on the number of desired buckets (10) and the\n// upper bucket size.\nfunc createHistogramBuckets(maxLatency time.Duration) []float64 {\n\t// Since this is an exponential bucket we need to take Log base2 or binary as the upper bound\n\t// Divide by 10 for the argument because the base is counted as 20ms and we want to normalize it as base 2 instead of 20\n\t// +2 because it starts at 5ms or 0.005 sec, to account 5ms and 10ms before it goes to the base which in this case is 0.02 sec or 20ms\n\t// and another +1 to account for decimal points on int parsing\n\tlatencyCount := math.Logb(float64(maxLatency.Milliseconds() / 10))\n\tcount := int(latencyCount) + 3\n\tbucket := prometheus.ExponentialBuckets(0.005, 2, count)\n\n\treturn bucket\n}\n\nfunc containsStr(ar []string, x string) (bool, int) {\n\tfor i, item := range ar {\n\t\tif item == x {\n\t\t\treturn true, i\n\t\t}\n\t}\n\treturn false, -1\n}\n\n// logCommitErrors logs all errors in commit response and returns a well formatted error code if there was one\nfunc (s *Service) logCommitErrors(r *kmsg.OffsetCommitResponse, err error) string {\n\tif err != nil {\n\t\tif err == context.DeadlineExceeded {\n\t\t\ts.logger.Warn(\"offset commit failed because SLA has been exceeded\")\n\t\t\treturn \"OFFSET_COMMIT_SLA_EXCEEDED\"\n\t\t}\n\n\t\ts.logger.Warn(\"offset commit failed\", zap.Error(err))\n\t\treturn \"RESPONSE_ERROR\"\n\t}\n\n\tlastErrCode := \"\"\n\tfor _, t := range r.Topics {\n\t\tfor _, p := range t.Partitions {\n\t\t\ttypedErr := kerr.TypedErrorForCode(p.ErrorCode)\n\t\t\tif typedErr == nil {\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\ts.logger.Warn(\"error committing partition offset\",\n\t\t\t\tzap.String(\"topic\", t.Topic),\n\t\t\t\tzap.Int32(\"partition_id\", p.Partition),\n\t\t\t\tzap.Error(typedErr),\n\t\t\t)\n\t\t\tlastErrCode = typedErr.Message\n\t\t}\n\t}\n\n\treturn lastErrCode\n}\n\n// brokerMetadataByBrokerID returns a map of all broker metadata keyed by their BrokerID\nfunc brokerMetadataByBrokerID(meta []kmsg.MetadataResponseBroker) map[int32]kmsg.MetadataResponseBroker {\n\tres := make(map[int32]kmsg.MetadataResponseBroker)\n\tfor _, broker := range meta {\n\t\tres[broker.NodeID] = broker\n\t}\n\treturn res\n}\n\n// brokerMetadataByRackID returns a map of all broker metadata keyed by their Rack identifier\nfunc brokerMetadataByRackID(meta []kmsg.MetadataResponseBroker) map[string][]kmsg.MetadataResponseBroker {\n\tres := make(map[string][]kmsg.MetadataResponseBroker)\n\tfor _, broker := range meta {\n\t\trackID := \"\"\n\t\tif broker.Rack != nil {\n\t\t\trackID = *broker.Rack\n\t\t}\n\t\tres[rackID] = append(res[rackID], broker)\n\t}\n\treturn res\n}\n\nfunc pointerStrToStr(str *string) string {\n\tif str == nil {\n\t\treturn \"\"\n\t}\n\treturn *str\n}\n\nfunc safeUnwrap(err error) string {\n\tif err == nil {\n\t\treturn \"<nil>\"\n\t}\n\treturn err.Error()\n}\n\nfunc isInArray(num int16, arr []int16) bool {\n\tfor _, n := range arr {\n\t\tif num == n {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n"
  },
  {
    "path": "go.mod",
    "content": "module github.com/cloudhut/kminion/v2\n\ngo 1.26\n\nrequire (\n\tgithub.com/google/uuid v1.6.0\n\tgithub.com/jcmturner/gokrb5/v8 v8.4.4\n\tgithub.com/jellydator/ttlcache/v2 v2.11.1\n\tgithub.com/knadh/koanf v1.5.0\n\tgithub.com/mitchellh/mapstructure v1.5.0\n\tgithub.com/orcaman/concurrent-map v1.0.0\n\tgithub.com/pkg/errors v0.9.1\n\tgithub.com/prometheus/client_golang v1.23.2\n\tgithub.com/stretchr/testify v1.11.1\n\tgithub.com/twmb/franz-go v1.20.6\n\tgithub.com/twmb/franz-go/pkg/kadm v1.17.1\n\tgithub.com/twmb/franz-go/pkg/kmsg v1.12.0\n\tgithub.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0\n\tgo.uber.org/atomic v1.11.0\n\tgo.uber.org/zap v1.27.1\n\tgolang.org/x/sync v0.19.0\n)\n\nrequire (\n\tgithub.com/beorn7/perks v1.0.1 // indirect\n\tgithub.com/cespare/xxhash/v2 v2.3.0 // indirect\n\tgithub.com/davecgh/go-spew v1.1.1 // indirect\n\tgithub.com/fsnotify/fsnotify v1.9.0 // indirect\n\tgithub.com/hashicorp/go-uuid v1.0.3 // indirect\n\tgithub.com/jcmturner/aescts/v2 v2.0.0 // indirect\n\tgithub.com/jcmturner/dnsutils/v2 v2.0.0 // indirect\n\tgithub.com/jcmturner/gofork v1.7.6 // indirect\n\tgithub.com/jcmturner/rpc/v2 v2.0.3 // indirect\n\tgithub.com/klauspost/compress v1.18.3 // indirect\n\tgithub.com/mitchellh/copystructure v1.2.0 // indirect\n\tgithub.com/mitchellh/reflectwalk v1.0.2 // indirect\n\tgithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect\n\tgithub.com/pelletier/go-toml v1.9.1 // indirect\n\tgithub.com/pierrec/lz4/v4 v4.1.25 // indirect\n\tgithub.com/pmezard/go-difflib v1.0.0 // indirect\n\tgithub.com/prometheus/client_model v0.6.2 // indirect\n\tgithub.com/prometheus/common v0.67.5 // indirect\n\tgithub.com/prometheus/procfs v0.19.2 // indirect\n\tgo.uber.org/multierr v1.11.0 // indirect\n\tgo.yaml.in/yaml/v2 v2.4.3 // indirect\n\tgolang.org/x/crypto v0.47.0 // indirect\n\tgolang.org/x/net v0.49.0 // indirect\n\tgolang.org/x/sys v0.40.0 // indirect\n\tgoogle.golang.org/protobuf v1.36.11 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "go.sum",
    "content": "cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ncloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ngithub.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=\ngithub.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=\ngithub.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=\ngithub.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=\ngithub.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=\ngithub.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=\ngithub.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=\ngithub.com/aws/aws-sdk-go-v2 v1.9.2/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4=\ngithub.com/aws/aws-sdk-go-v2/config v1.8.3/go.mod h1:4AEiLtAb8kLs7vgw2ZV3p2VZ1+hBavOc84hqxVNpCyw=\ngithub.com/aws/aws-sdk-go-v2/credentials v1.4.3/go.mod h1:FNNC6nQZQUuyhq5aE5c7ata8o9e4ECGmS4lAXC7o1mQ=\ngithub.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.6.0/go.mod h1:gqlclDEZp4aqJOancXK6TN24aKhT0W0Ae9MHk3wzTMM=\ngithub.com/aws/aws-sdk-go-v2/internal/ini v1.2.4/go.mod h1:ZcBrrI3zBKlhGFNYWvju0I3TR93I7YIgAfy82Fh4lcQ=\ngithub.com/aws/aws-sdk-go-v2/service/appconfig v1.4.2/go.mod h1:FZ3HkCe+b10uFZZkFdvf98LHW21k49W8o8J366lqVKY=\ngithub.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.3.2/go.mod h1:72HRZDLMtmVQiLG2tLfQcaWLCssELvGl+Zf2WVxMmR8=\ngithub.com/aws/aws-sdk-go-v2/service/sso v1.4.2/go.mod h1:NBvT9R1MEF+Ud6ApJKM0G+IkPchKS7p7c2YPKwHmBOk=\ngithub.com/aws/aws-sdk-go-v2/service/sts v1.7.2/go.mod h1:8EzeIqfWt2wWT4rJVu3f21TfrhJ8AEMzVybRNSb/b4g=\ngithub.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=\ngithub.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=\ngithub.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=\ngithub.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=\ngithub.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=\ngithub.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=\ngithub.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=\ngithub.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=\ngithub.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=\ngithub.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=\ngithub.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=\ngithub.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=\ngithub.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=\ngithub.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=\ngithub.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=\ngithub.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=\ngithub.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=\ngithub.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=\ngithub.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=\ngithub.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=\ngithub.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=\ngithub.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=\ngithub.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=\ngithub.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=\ngithub.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=\ngithub.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=\ngithub.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc=\ngithub.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=\ngithub.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=\ngithub.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=\ngithub.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=\ngithub.com/go-test/deep v1.0.2-0.20181118220953-042da051cf31/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=\ngithub.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=\ngithub.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=\ngithub.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=\ngithub.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=\ngithub.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=\ngithub.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=\ngithub.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=\ngithub.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=\ngithub.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=\ngithub.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=\ngithub.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=\ngithub.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=\ngithub.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=\ngithub.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=\ngithub.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=\ngithub.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=\ngithub.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=\ngithub.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=\ngithub.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=\ngithub.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=\ngithub.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=\ngithub.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=\ngithub.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=\ngithub.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=\ngithub.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=\ngithub.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=\ngithub.com/hashicorp/consul/api v1.13.0/go.mod h1:ZlVrynguJKcYr54zGaDbaL3fOvKC9m72FhPvA8T35KQ=\ngithub.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=\ngithub.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=\ngithub.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI=\ngithub.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=\ngithub.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=\ngithub.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=\ngithub.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=\ngithub.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=\ngithub.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY=\ngithub.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=\ngithub.com/hashicorp/go-rootcerts v1.0.1/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=\ngithub.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=\ngithub.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=\ngithub.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A=\ngithub.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=\ngithub.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=\ngithub.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=\ngithub.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=\ngithub.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=\ngithub.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=\ngithub.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=\ngithub.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=\ngithub.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=\ngithub.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q=\ngithub.com/hashicorp/vault/sdk v0.1.13/go.mod h1:B+hVj7TpuQY1Y/GPbCpffmgd+tSEwvhkWnjtSYCaS2M=\ngithub.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=\ngithub.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=\ngithub.com/hjson/hjson-go/v4 v4.0.0 h1:wlm6IYYqHjOdXH1gHev4VoXCaW20HdQAGCxdOEEg2cs=\ngithub.com/hjson/hjson-go/v4 v4.0.0/go.mod h1:KaYt3bTw3zhBjYqnXkYywcYctk0A2nxeEFTse3rH13E=\ngithub.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=\ngithub.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=\ngithub.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=\ngithub.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM=\ngithub.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg=\ngithub.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo=\ngithub.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o=\ngithub.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg=\ngithub.com/jcmturner/gokrb5/v8 v8.4.3/go.mod h1:dqRwJGXznQrzw6cWmyo6kH+E7jksEQG/CyVWsJEsJO0=\ngithub.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8=\ngithub.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs=\ngithub.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY=\ngithub.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=\ngithub.com/jellydator/ttlcache/v2 v2.11.1 h1:AZGME43Eh2Vv3giG6GeqeLeFXxwxn1/qHItqWZl6U64=\ngithub.com/jellydator/ttlcache/v2 v2.11.1/go.mod h1:RtE5Snf0/57e+2cLWFYWCCsLas2Hy3c5Z4n14XmSvTI=\ngithub.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=\ngithub.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=\ngithub.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc=\ngithub.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=\ngithub.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=\ngithub.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=\ngithub.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=\ngithub.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=\ngithub.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=\ngithub.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=\ngithub.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=\ngithub.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=\ngithub.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=\ngithub.com/knadh/koanf v1.5.0 h1:q2TSd/3Pyc/5yP9ldIrSdIz26MCcyNQzW0pEAugLPNs=\ngithub.com/knadh/koanf v1.5.0/go.mod h1:Hgyjp4y8v44hpZtPzs7JZfRAW5AhN7KfZcwv1RYggDs=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=\ngithub.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=\ngithub.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=\ngithub.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=\ngithub.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=\ngithub.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=\ngithub.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=\ngithub.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=\ngithub.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=\ngithub.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=\ngithub.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=\ngithub.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=\ngithub.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=\ngithub.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=\ngithub.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=\ngithub.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=\ngithub.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=\ngithub.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=\ngithub.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=\ngithub.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=\ngithub.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=\ngithub.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=\ngithub.com/mitchellh/go-testing-interface v0.0.0-20171004221916-a61a99592b77/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=\ngithub.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=\ngithub.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=\ngithub.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=\ngithub.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=\ngithub.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=\ngithub.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=\ngithub.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=\ngithub.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=\ngithub.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=\ngithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=\ngithub.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/npillmayer/nestext v0.1.3/go.mod h1:h2lrijH8jpicr25dFY+oAJLyzlya6jhnuG+zWp9L0Uk=\ngithub.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=\ngithub.com/orcaman/concurrent-map v1.0.0 h1:I/2A2XPCb4IuQWcQhBhSwGfiuybl/J0ev9HDbW65HOY=\ngithub.com/orcaman/concurrent-map v1.0.0/go.mod h1:Lu3tH6HLW3feq74c2GC+jIMS/K2CFcDWnWD9XkenwhI=\ngithub.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE=\ngithub.com/pelletier/go-toml v1.9.1 h1:a6qW1EVNZWH9WGI6CsYdD8WAylkoXBS5yv0XHlh17Tc=\ngithub.com/pelletier/go-toml v1.9.1/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=\ngithub.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=\ngithub.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=\ngithub.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0=\ngithub.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4=\ngithub.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=\ngithub.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=\ngithub.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=\ngithub.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=\ngithub.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=\ngithub.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=\ngithub.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=\ngithub.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=\ngithub.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=\ngithub.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=\ngithub.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=\ngithub.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=\ngithub.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=\ngithub.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=\ngithub.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=\ngithub.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=\ngithub.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=\ngithub.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=\ngithub.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=\ngithub.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=\ngithub.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=\ngithub.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=\ngithub.com/rhnvrm/simples3 v0.6.1/go.mod h1:Y+3vYm2V7Y4VijFoJHHTrja6OgPrJ2cBti8dPGkC3sA=\ngithub.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=\ngithub.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=\ngithub.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=\ngithub.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=\ngithub.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=\ngithub.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=\ngithub.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=\ngithub.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=\ngithub.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=\ngithub.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=\ngithub.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=\ngithub.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=\ngithub.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=\ngithub.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=\ngithub.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=\ngithub.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=\ngithub.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=\ngithub.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=\ngithub.com/twmb/franz-go v1.7.0/go.mod h1:PMze0jNfNghhih2XHbkmTFykbMF5sJqmNJB31DOOzro=\ngithub.com/twmb/franz-go v1.20.6 h1:TpQTt4QcixJ1cHEmQGPOERvTzo99s8jAutmS7rbSD6w=\ngithub.com/twmb/franz-go v1.20.6/go.mod h1:u+FzH2sInp7b9HNVv2cZN8AxdXy6y/AQ1Bkptu4c0FM=\ngithub.com/twmb/franz-go/pkg/kadm v1.17.1 h1:Bt02Y/RLgnFO2NP2HVP1kd2TFtGRiJZx+fSArjZDtpw=\ngithub.com/twmb/franz-go/pkg/kadm v1.17.1/go.mod h1:s4duQmrDbloVW9QTMXhs6mViTepze7JLG43xwPcAeTg=\ngithub.com/twmb/franz-go/pkg/kmsg v1.2.0/go.mod h1:SxG/xJKhgPu25SamAq0rrucfp7lbzCpEXOC+vH/ELrY=\ngithub.com/twmb/franz-go/pkg/kmsg v1.12.0 h1:CbatD7ers1KzDNgJqPbKOq0Bz/WLBdsTH75wgzeVaPc=\ngithub.com/twmb/franz-go/pkg/kmsg v1.12.0/go.mod h1:+DPt4NC8RmI6hqb8G09+3giKObE6uD2Eya6CfqBpeJY=\ngithub.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0 h1:alKdbddkPw3rDh+AwmUEwh6HNYgTvDSFIe/GWYRR9RM=\ngithub.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0/go.mod h1:k8BoBjyUbFj34f0rRbn+Ky12sZFAPbmShrg0karAIMo=\ngithub.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=\ngithub.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=\ngo.etcd.io/etcd/api/v3 v3.5.4/go.mod h1:5GB2vv4A4AOn3yk7MftYGHkUfGtDHnEraIjym4dYz5A=\ngo.etcd.io/etcd/client/pkg/v3 v3.5.4/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=\ngo.etcd.io/etcd/client/v3 v3.5.4/go.mod h1:ZaRkVgBZC+L+dLCjTcF1hRXpgZXQPOvnA/Ak/gq3kiY=\ngo.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=\ngo.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=\ngo.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=\ngo.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=\ngo.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=\ngo.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=\ngo.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=\ngo.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=\ngo.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=\ngo.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=\ngo.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=\ngo.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=\ngo.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=\ngo.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=\ngolang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=\ngolang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=\ngolang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=\ngolang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=\ngolang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=\ngolang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8=\ngolang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A=\ngolang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=\ngolang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=\ngolang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=\ngolang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=\ngolang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=\ngolang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=\ngolang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=\ngolang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=\ngolang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=\ngolang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=\ngolang.org/x/net v0.0.0-20220725212005-46097bf591d3/go.mod h1:AaygXjzTFtRAg2ttMY5RMuhpJ3cNnI0XpyFJD1iQRSM=\ngolang.org/x/net v0.0.0-20220812174116-3211cb980234/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=\ngolang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=\ngolang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=\ngolang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=\ngolang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=\ngolang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=\ngolang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=\ngolang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=\ngolang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190129075346-302c3dd5f1cc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=\ngolang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=\ngolang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=\ngolang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=\ngolang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=\ngolang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=\ngolang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=\ngolang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=\ngolang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=\ngolang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.0.0-20210112230658-8b4aab62c064/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=\ngolang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngoogle.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=\ngoogle.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=\ngoogle.golang.org/genproto v0.0.0-20190404172233-64821d5d2107/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=\ngoogle.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=\ngoogle.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=\ngoogle.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=\ngoogle.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=\ngoogle.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=\ngoogle.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=\ngoogle.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=\ngoogle.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=\ngoogle.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=\ngoogle.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=\ngoogle.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=\ngoogle.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=\ngoogle.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=\ngoogle.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=\ngoogle.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=\ngoogle.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=\ngoogle.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=\ngoogle.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=\ngoogle.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=\ngoogle.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=\ngoogle.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=\ngopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=\ngopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d/go.mod h1:cuepJuh7vyXfUyUwEgHQXw849cJrilpS5NeIjOWESAw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=\ngopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=\ngopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\nhonnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nhonnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nsigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=\n"
  },
  {
    "path": "kafka/client_config_helper.go",
    "content": "package kafka\n\nimport (\n\t\"context\"\n\t\"crypto/tls\"\n\t\"crypto/x509\"\n\t\"encoding/pem\"\n\t\"fmt\"\n\t\"io/ioutil\"\n\t\"net\"\n\t\"time\"\n\n\t\"github.com/jcmturner/gokrb5/v8/client\"\n\t\"github.com/jcmturner/gokrb5/v8/keytab\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/sasl\"\n\t\"github.com/twmb/franz-go/pkg/sasl/kerberos\"\n\t\"github.com/twmb/franz-go/pkg/sasl/oauth\"\n\t\"github.com/twmb/franz-go/pkg/sasl/plain\"\n\t\"github.com/twmb/franz-go/pkg/sasl/scram\"\n\t\"go.uber.org/zap\"\n\n\tkrbconfig \"github.com/jcmturner/gokrb5/v8/config\"\n)\n\n// NewKgoConfig creates a new Config for the Kafka Client as exposed by the franz-go library.\n// If TLS certificates can't be read an error will be returned.\n// logger is only used to print warnings about TLS.\nfunc NewKgoConfig(cfg Config, logger *zap.Logger) ([]kgo.Opt, error) {\n\topts := []kgo.Opt{\n\t\tkgo.SeedBrokers(cfg.Brokers...),\n\t\tkgo.ClientID(cfg.ClientID),\n\t\tkgo.FetchMaxBytes(5 * 1000 * 1000), // 5MB\n\t\tkgo.MaxConcurrentFetches(10),\n\t\t// Allow metadata to be refreshed more often than 5s (default) if needed.\n\t\t// That will mitigate issues with unknown partitions shortly after creating\n\t\t// them.\n\t\tkgo.MetadataMinAge(time.Second),\n\t}\n\n\t// Create Logger\n\tkgoLogger := KgoZapLogger{\n\t\tlogger: logger.Sugar(),\n\t}\n\topts = append(opts, kgo.WithLogger(kgoLogger))\n\n\t// Add Rack Awareness if configured\n\tif cfg.RackID != \"\" {\n\t\topts = append(opts, kgo.Rack(cfg.RackID))\n\t}\n\n\t// Configure SASL\n\tif cfg.SASL.Enabled {\n\t\t// SASL Plain\n\t\tif cfg.SASL.Mechanism == \"PLAIN\" {\n\t\t\tmechanism := plain.Auth{\n\t\t\t\tUser: cfg.SASL.Username,\n\t\t\t\tPass: cfg.SASL.Password,\n\t\t\t}.AsMechanism()\n\t\t\topts = append(opts, kgo.SASL(mechanism))\n\t\t}\n\n\t\t// SASL SCRAM\n\t\tif cfg.SASL.Mechanism == \"SCRAM-SHA-256\" || cfg.SASL.Mechanism == \"SCRAM-SHA-512\" {\n\t\t\tvar mechanism sasl.Mechanism\n\t\t\tscramAuth := scram.Auth{\n\t\t\t\tUser: cfg.SASL.Username,\n\t\t\t\tPass: cfg.SASL.Password,\n\t\t\t}\n\t\t\tif cfg.SASL.Mechanism == \"SCRAM-SHA-256\" {\n\t\t\t\tmechanism = scramAuth.AsSha256Mechanism()\n\t\t\t}\n\t\t\tif cfg.SASL.Mechanism == \"SCRAM-SHA-512\" {\n\t\t\t\tmechanism = scramAuth.AsSha512Mechanism()\n\t\t\t}\n\t\t\topts = append(opts, kgo.SASL(mechanism))\n\t\t}\n\n\t\t// Kerberos\n\t\tif cfg.SASL.Mechanism == \"GSSAPI\" {\n\t\t\tvar krbClient *client.Client\n\n\t\t\tkerbCfg, err := krbconfig.Load(cfg.SASL.GSSAPI.KerberosConfigPath)\n\t\t\tif err != nil {\n\t\t\t\treturn nil, fmt.Errorf(\"failed to create kerberos config from specified config filepath: %w\", err)\n\t\t\t}\n\n\t\t\tswitch cfg.SASL.GSSAPI.AuthType {\n\t\t\tcase \"USER_AUTH:\":\n\t\t\t\tkrbClient = client.NewWithPassword(\n\t\t\t\t\tcfg.SASL.GSSAPI.Username,\n\t\t\t\t\tcfg.SASL.GSSAPI.Realm,\n\t\t\t\t\tcfg.SASL.GSSAPI.Password,\n\t\t\t\t\tkerbCfg,\n\t\t\t\t\tclient.DisablePAFXFAST(!cfg.SASL.GSSAPI.EnableFast))\n\t\t\tcase \"KEYTAB_AUTH\":\n\t\t\t\tktb, err := keytab.Load(cfg.SASL.GSSAPI.KeyTabPath)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, fmt.Errorf(\"failed to load keytab: %w\", err)\n\t\t\t\t}\n\t\t\t\tkrbClient = client.NewWithKeytab(\n\t\t\t\t\tcfg.SASL.GSSAPI.Username,\n\t\t\t\t\tcfg.SASL.GSSAPI.Realm,\n\t\t\t\t\tktb,\n\t\t\t\t\tkerbCfg,\n\t\t\t\t\tclient.DisablePAFXFAST(!cfg.SASL.GSSAPI.EnableFast))\n\t\t\t}\n\t\t\tif krbClient == nil {\n\t\t\t\treturn nil, fmt.Errorf(\"kafka.sasl.gssapi.authType must be one of USER_AUTH or KEYTAB_AUTH\")\n\t\t\t}\n\t\t\tkerberosMechanism := kerberos.Auth{\n\t\t\t\tClient:           krbClient,\n\t\t\t\tService:          cfg.SASL.GSSAPI.ServiceName,\n\t\t\t\tPersistAfterAuth: true,\n\t\t\t}.AsMechanism()\n\t\t\topts = append(opts, kgo.SASL(kerberosMechanism))\n\t\t}\n\n\t\t// OAuthBearer\n\t\tif cfg.SASL.Mechanism == \"OAUTHBEARER\" {\n\t\t\tmechanism := oauth.Oauth(func(ctx context.Context) (oauth.Auth, error) {\n\t\t\t\ttoken, err := cfg.SASL.OAuthBearer.getToken(ctx)\n\t\t\t\treturn oauth.Auth{\n\t\t\t\t\tZid:   cfg.SASL.OAuthBearer.ClientID,\n\t\t\t\t\tToken: token,\n\t\t\t\t}, err\n\t\t\t})\n\t\t\topts = append(opts, kgo.SASL(mechanism))\n\t\t}\n\t}\n\n\t// Configure TLS\n\tvar caCertPool *x509.CertPool\n\tif cfg.TLS.Enabled {\n\t\t// Root CA\n\t\tif cfg.TLS.CaFilepath != \"\" || len(cfg.TLS.Ca) > 0 {\n\t\t\tca := []byte(cfg.TLS.Ca)\n\t\t\tif cfg.TLS.CaFilepath != \"\" {\n\t\t\t\tcaBytes, err := ioutil.ReadFile(cfg.TLS.CaFilepath)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, fmt.Errorf(\"failed to load ca cert: %w\", err)\n\t\t\t\t}\n\t\t\t\tca = caBytes\n\t\t\t}\n\t\t\tcaCertPool = x509.NewCertPool()\n\t\t\tisSuccessful := caCertPool.AppendCertsFromPEM(ca)\n\t\t\tif !isSuccessful {\n\t\t\t\tlogger.Warn(\"failed to append ca file to cert pool, is this a valid PEM format?\")\n\t\t\t}\n\t\t}\n\n\t\t// If configured load TLS cert & key - Mutual TLS\n\t\tvar certificates []tls.Certificate\n\t\thasCertFile := cfg.TLS.CertFilepath != \"\" || len(cfg.TLS.Cert) > 0\n\t\thasKeyFile := cfg.TLS.KeyFilepath != \"\" || len(cfg.TLS.Key) > 0\n\t\tif hasCertFile || hasKeyFile {\n\t\t\tcert := []byte(cfg.TLS.Cert)\n\t\t\tprivateKey := []byte(cfg.TLS.Key)\n\t\t\t// 1. Read certificates\n\t\t\tif cfg.TLS.CertFilepath != \"\" {\n\t\t\t\tcertBytes, err := ioutil.ReadFile(cfg.TLS.CertFilepath)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, fmt.Errorf(\"failed to TLS certificate: %w\", err)\n\t\t\t\t}\n\t\t\t\tcert = certBytes\n\t\t\t}\n\n\t\t\tif cfg.TLS.KeyFilepath != \"\" {\n\t\t\t\tkeyBytes, err := ioutil.ReadFile(cfg.TLS.KeyFilepath)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, fmt.Errorf(\"failed to read TLS key: %w\", err)\n\t\t\t\t}\n\t\t\t\tprivateKey = keyBytes\n\t\t\t}\n\n\t\t\t// 2. Check if private key needs to be decrypted. Decrypt it if passphrase is given, otherwise return error\n\t\t\tpemBlock, _ := pem.Decode(privateKey)\n\t\t\tif pemBlock == nil {\n\t\t\t\treturn nil, fmt.Errorf(\"no valid private key found\")\n\t\t\t}\n\n\t\t\tif x509.IsEncryptedPEMBlock(pemBlock) {\n\t\t\t\tdecryptedKey, err := x509.DecryptPEMBlock(pemBlock, []byte(cfg.TLS.Passphrase))\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, fmt.Errorf(\"private key is encrypted, but could not decrypt it: %s\", err)\n\t\t\t\t}\n\t\t\t\t// If private key was encrypted we can overwrite the original contents now with the decrypted version\n\t\t\t\tprivateKey = pem.EncodeToMemory(&pem.Block{Type: pemBlock.Type, Bytes: decryptedKey})\n\t\t\t}\n\t\t\ttlsCert, err := tls.X509KeyPair(cert, privateKey)\n\t\t\tif err != nil {\n\t\t\t\treturn nil, fmt.Errorf(\"cannot parse pem: %s\", err)\n\t\t\t}\n\t\t\tcertificates = []tls.Certificate{tlsCert}\n\t\t}\n\n\t\ttlsDialer := &tls.Dialer{\n\t\t\tNetDialer: &net.Dialer{Timeout: 10 * time.Second},\n\t\t\tConfig: &tls.Config{\n\t\t\t\tInsecureSkipVerify: cfg.TLS.InsecureSkipTLSVerify,\n\t\t\t\tCertificates:       certificates,\n\t\t\t\tRootCAs:            caCertPool,\n\t\t\t},\n\t\t}\n\t\topts = append(opts, kgo.Dialer(tlsDialer.DialContext))\n\t}\n\n\treturn opts, nil\n}\n"
  },
  {
    "path": "kafka/client_logger.go",
    "content": "package kafka\n\nimport (\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"go.uber.org/zap\"\n)\n\ntype KgoZapLogger struct {\n\tlogger *zap.SugaredLogger\n}\n\n// Level Implements kgo.Logger interface. It returns the log level to log at.\n// We pin this to debug as the zap logger decides what to actually send to the output stream.\nfunc (k KgoZapLogger) Level() kgo.LogLevel {\n\treturn kgo.LogLevelDebug\n}\n\n// Log implements kgo.Logger interface\nfunc (k KgoZapLogger) Log(level kgo.LogLevel, msg string, keyvals ...interface{}) {\n\tswitch level {\n\tcase kgo.LogLevelDebug:\n\t\tk.logger.Debugw(msg, keyvals...)\n\tcase kgo.LogLevelInfo:\n\t\tk.logger.Infow(msg, keyvals...)\n\tcase kgo.LogLevelWarn:\n\t\tk.logger.Warnw(msg, keyvals...)\n\tcase kgo.LogLevelError:\n\t\tk.logger.Errorw(msg, keyvals...)\n\t}\n}\n"
  },
  {
    "path": "kafka/config.go",
    "content": "package kafka\n\nimport \"fmt\"\n\ntype Config struct {\n\t// General\n\tBrokers  []string `koanf:\"brokers\"`\n\tClientID string   `koanf:\"clientId\"`\n\tRackID   string   `koanf:\"rackId\"`\n\n\tTLS  TLSConfig  `koanf:\"tls\"`\n\tSASL SASLConfig `koanf:\"sasl\"`\n\n\tRetryInitConnection bool `koanf:\"retryInitConnection\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.ClientID = \"kminion\"\n\n\tc.TLS.SetDefaults()\n\tc.SASL.SetDefaults()\n}\n\nfunc (c *Config) Validate() error {\n\tif len(c.Brokers) == 0 {\n\t\treturn fmt.Errorf(\"no seed brokers specified, at least one must be configured\")\n\t}\n\n\terr := c.TLS.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate TLS config: %w\", err)\n\t}\n\n\terr = c.SASL.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate SASL config: %w\", err)\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "kafka/config_sasl.go",
    "content": "package kafka\n\nimport \"fmt\"\n\nconst (\n\tSASLMechanismPlain       = \"PLAIN\"\n\tSASLMechanismScramSHA256 = \"SCRAM-SHA-256\"\n\tSASLMechanismScramSHA512 = \"SCRAM-SHA-512\"\n\tSASLMechanismGSSAPI      = \"GSSAPI\"\n\tSASLMechanismOAuthBearer = \"OAUTHBEARER\"\n)\n\n// SASLConfig for Kafka Client\ntype SASLConfig struct {\n\tEnabled   bool   `koanf:\"enabled\"`\n\tUsername  string `koanf:\"username\"`\n\tPassword  string `koanf:\"password\"`\n\tMechanism string `koanf:\"mechanism\"`\n\n\t// SASL Mechanisms that require more configuration than username & password\n\tGSSAPI      SASLGSSAPIConfig  `koanf:\"gssapi\"`\n\tOAuthBearer OAuthBearerConfig `koanf:\"oauth\"`\n}\n\n// SetDefaults for SASL Config\nfunc (c *SASLConfig) SetDefaults() {\n\tc.Enabled = false\n\tc.Mechanism = SASLMechanismPlain\n\tc.GSSAPI.SetDefaults()\n}\n\n// Validate SASL config input\nfunc (c *SASLConfig) Validate() error {\n\tif !c.Enabled {\n\t\treturn nil\n\t}\n\n\tswitch c.Mechanism {\n\tcase SASLMechanismPlain, SASLMechanismScramSHA256, SASLMechanismScramSHA512, SASLMechanismGSSAPI:\n\t\t// Valid and supported\n\tcase SASLMechanismOAuthBearer:\n\t\treturn c.OAuthBearer.Validate()\n\tdefault:\n\t\treturn fmt.Errorf(\"given sasl mechanism '%v' is invalid\", c.Mechanism)\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "kafka/config_sasl_gssapi.go",
    "content": "package kafka\n\n// SASLGSSAPIConfig represents the Kafka Kerberos config\ntype SASLGSSAPIConfig struct {\n\tAuthType           string `koanf:\"authType\"`\n\tKeyTabPath         string `koanf:\"keyTabPath\"`\n\tKerberosConfigPath string `koanf:\"kerberosConfigPath\"`\n\tServiceName        string `koanf:\"serviceName\"`\n\tUsername           string `koanf:\"username\"`\n\tPassword           string `koanf:\"password\"`\n\tRealm              string `koanf:\"realm\"`\n\n\t// EnableFAST enables FAST, which is a pre-authentication framework for Kerberos.\n\t// It includes a mechanism for tunneling pre-authentication exchanges using armoured KDC messages.\n\t// FAST provides increased resistance to passive password guessing attacks.\n\tEnableFast bool `koanf:\"enableFast\"`\n}\n\nfunc (s *SASLGSSAPIConfig) SetDefaults() {\n\ts.EnableFast = true\n}\n"
  },
  {
    "path": "kafka/config_sasl_oauthbearer.go",
    "content": "package kafka\n\nimport (\n\t\"context\"\n\t\"encoding/base64\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"net/http\"\n\t\"net/url\"\n\t\"strings\"\n)\n\ntype OAuthBearerConfig struct {\n\tTokenEndpoint string `koanf:\"tokenEndpoint\"`\n\tClientID      string `koanf:\"clientId\"`\n\tClientSecret  string `koanf:\"clientSecret\"`\n\tScope         string `koanf:\"scope\"`\n}\n\nfunc (c *OAuthBearerConfig) Validate() error {\n\tif c.TokenEndpoint == \"\" {\n\t\treturn fmt.Errorf(\"OAuthBearer token endpoint is not specified\")\n\t}\n\tif c.ClientID == \"\" || c.ClientSecret == \"\" {\n\t\treturn fmt.Errorf(\"OAuthBearer client credentials are not specified\")\n\t}\n\treturn nil\n}\n\n// same as AcquireToken in Console https://github.com/redpanda-data/console/blob/master/backend/pkg/config/kafka_sasl_oauth.go#L56\nfunc (c *OAuthBearerConfig) getToken(ctx context.Context) (string, error) {\n\tauthHeaderValue := base64.StdEncoding.EncodeToString([]byte(c.ClientID + \":\" + c.ClientSecret))\n\n\tqueryParams := url.Values{\n\t\t\"grant_type\": []string{\"client_credentials\"},\n\t\t\"scope\":      []string{c.Scope},\n\t}\n\n\treq, err := http.NewRequestWithContext(ctx, \"POST\", c.TokenEndpoint, strings.NewReader(queryParams.Encode()))\n\tif err != nil {\n\t\treturn \"\", fmt.Errorf(\"failed to create HTTP request: %w\", err)\n\t}\n\n\treq.URL.RawQuery = queryParams.Encode()\n\n\treq.Header.Set(\"Authorization\", \"Basic \"+authHeaderValue)\n\treq.Header.Set(\"Content-Type\", \"application/x-www-form-urlencoded\")\n\n\tclient := &http.Client{}\n\n\tresp, err := client.Do(req)\n\tif err != nil {\n\t\treturn \"\", fmt.Errorf(\"HTTP request failed: %w\", err)\n\t}\n\tdefer resp.Body.Close()\n\n\tif resp.StatusCode != http.StatusOK {\n\t\treturn \"\", fmt.Errorf(\"token request failed with status code %d\", resp.StatusCode)\n\t}\n\n\tvar tokenResponse map[string]interface{}\n\tdecoder := json.NewDecoder(resp.Body)\n\tif err := decoder.Decode(&tokenResponse); err != nil {\n\t\treturn \"\", fmt.Errorf(\"failed to parse token response: %w\", err)\n\t}\n\n\taccessToken, ok := tokenResponse[\"access_token\"].(string)\n\tif !ok {\n\t\treturn \"\", fmt.Errorf(\"access_token not found in token response\")\n\t}\n\n\treturn accessToken, nil\n}\n"
  },
  {
    "path": "kafka/config_tls.go",
    "content": "package kafka\n\nimport \"fmt\"\n\n// TLSConfig to connect to Kafka via TLS\ntype TLSConfig struct {\n\tEnabled               bool   `koanf:\"enabled\"`\n\tCaFilepath            string `koanf:\"caFilepath\"`\n\tCertFilepath          string `koanf:\"certFilepath\"`\n\tKeyFilepath           string `koanf:\"keyFilepath\"`\n\tCa                    string `koanf:\"ca\"`\n\tCert                  string `koanf:\"cert\"`\n\tKey                   string `koanf:\"key\"`\n\tPassphrase            string `koanf:\"passphrase\"`\n\tInsecureSkipTLSVerify bool   `koanf:\"insecureSkipTlsVerify\"`\n}\n\nfunc (c *TLSConfig) SetDefaults() {\n\tc.Enabled = false\n}\n\nfunc (c *TLSConfig) Validate() error {\n\tif len(c.CaFilepath) > 0 && len(c.Ca) > 0 {\n\t\treturn fmt.Errorf(\"config keys 'caFilepath' and 'ca' are both set. only one can be used at the same time\")\n\t}\n\tif len(c.CertFilepath) > 0 && len(c.Cert) > 0 {\n\t\treturn fmt.Errorf(\"config keys 'certFilepath' and 'cert' are both set. only one can be used at the same time\")\n\t}\n\n\tif len(c.KeyFilepath) > 0 && len(c.Key) > 0 {\n\t\treturn fmt.Errorf(\"config keys 'keyFilepath' and 'key' are both set. only one can be used at the same time\")\n\t}\n\treturn nil\n}\n"
  },
  {
    "path": "kafka/service.go",
    "content": "package kafka\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"github.com/twmb/franz-go/pkg/kversion\"\n\t\"go.uber.org/zap\"\n)\n\ntype Service struct {\n\tcfg    Config\n\tlogger *zap.Logger\n}\n\nfunc NewService(cfg Config, logger *zap.Logger) *Service {\n\treturn &Service{\n\t\tcfg:    cfg,\n\t\tlogger: logger.Named(\"kafka_service\"),\n\t}\n}\n\n// CreateAndTestClient creates a client with the services default settings\n// logger: will be used to log connections, errors, warnings about tls config, ...\nfunc (s *Service) CreateAndTestClient(ctx context.Context, l *zap.Logger, opts []kgo.Opt) (*kgo.Client, error) {\n\tlogger := l.Named(\"kgo_client\")\n\t// Config with default options\n\tkgoOpts, err := NewKgoConfig(s.cfg, logger)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to create a valid kafka Client config: %w\", err)\n\t}\n\t// Append user (the service calling this method) provided options\n\tkgoOpts = append(kgoOpts, opts...)\n\n\t// Create kafka client\n\tclient, err := kgo.NewClient(kgoOpts...)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to create kafka Client: %w\", err)\n\t}\n\n\t// Test connection\n\tfor {\n\t\terr = s.testConnection(client, ctx)\n\t\tif err == nil {\n\t\t\tbreak\n\t\t}\n\n\t\tif !s.cfg.RetryInitConnection {\n\t\t\treturn nil, fmt.Errorf(\"failed to test connectivity to Kafka cluster %w\", err)\n\t\t}\n\n\t\tlogger.Warn(\"failed to test connectivity to Kafka cluster, retrying in 5 seconds\", zap.Error(err))\n\t\ttime.Sleep(time.Second * 5)\n\t}\n\n\treturn client, nil\n}\n\n// Brokers returns list of brokers this service is connecting to\nfunc (s *Service) Brokers() []string {\n\treturn s.cfg.Brokers\n}\n\n// testConnection tries to fetch Broker metadata and prints some information if connection succeeds. An error will be\n// returned if connecting fails.\nfunc (s *Service) testConnection(client *kgo.Client, ctx context.Context) error {\n\tconnectCtx, cancel := context.WithTimeout(ctx, 15*time.Second)\n\tdefer cancel()\n\n\treq := kmsg.MetadataRequest{\n\t\tTopics: nil,\n\t}\n\tres, err := req.RequestWith(connectCtx, client)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to request metadata: %w\", err)\n\t}\n\n\t// Request versions in order to guess Kafka Cluster version\n\tversionsReq := kmsg.NewApiVersionsRequest()\n\tversionsRes, err := versionsReq.RequestWith(connectCtx, client)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to request api versions: %w\", err)\n\t}\n\terr = kerr.ErrorForCode(versionsRes.ErrorCode)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to request api versions. Inner kafka error: %w\", err)\n\t}\n\tversions := kversion.FromApiVersionsResponse(versionsRes)\n\n\ts.logger.Debug(\"successfully connected to kafka cluster\",\n\t\tzap.Int(\"advertised_broker_count\", len(res.Brokers)),\n\t\tzap.Int(\"topic_count\", len(res.Topics)),\n\t\tzap.Int32(\"controller_id\", res.ControllerID),\n\t\tzap.String(\"kafka_version\", versions.VersionGuess()))\n\n\treturn nil\n}\n"
  },
  {
    "path": "logging/config.go",
    "content": "package logging\n\nimport (\n\t\"fmt\"\n\t\"go.uber.org/zap\"\n)\n\ntype Config struct {\n\tLevel string `koanf:\"level\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.Level = \"info\"\n}\n\nfunc (c *Config) Validate() error {\n\tlevel := zap.NewAtomicLevel()\n\terr := level.UnmarshalText([]byte(c.Level))\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to parse logger level: %w\", err)\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "logging/logger.go",
    "content": "package logging\n\nimport (\n\t\"os\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promauto\"\n\t\"go.uber.org/zap/zapcore\"\n\n\t\"go.uber.org/zap\"\n)\n\n// NewLogger creates a preconfigured global logger and configures the global zap logger\nfunc NewLogger(cfg Config, metricsNamespace string) *zap.Logger {\n\tencoderCfg := zap.NewProductionEncoderConfig()\n\tencoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder\n\n\t// Parse log level text to zap.LogLevel. Error check isn't required because the input is already validated.\n\tlevel := zap.NewAtomicLevel()\n\t_ = level.UnmarshalText([]byte(cfg.Level))\n\n\tcore := zapcore.NewCore(\n\t\tzapcore.NewJSONEncoder(encoderCfg),\n\t\tzapcore.Lock(os.Stdout),\n\t\tlevel,\n\t)\n\tcore = zapcore.RegisterHooks(core, prometheusHook(metricsNamespace))\n\tlogger := zap.New(core)\n\tzap.ReplaceGlobals(logger)\n\n\treturn logger\n}\n\n// prometheusHook is a hook for the zap library which exposes Prometheus counters for various log levels.\nfunc prometheusHook(metricsNamespace string) func(zapcore.Entry) error {\n\tmessageCounterVec := promauto.NewCounterVec(prometheus.CounterOpts{\n\t\tNamespace: metricsNamespace,\n\t\tName:      \"log_messages_total\",\n\t\tHelp:      \"Total number of log messages by log level emitted by KMinion.\",\n\t}, []string{\"level\"})\n\n\t// Initialize counters for all supported log levels so that they expose 0 for each level on startup\n\tsupportedLevels := []zapcore.Level{\n\t\tzapcore.DebugLevel,\n\t\tzapcore.InfoLevel,\n\t\tzapcore.WarnLevel,\n\t\tzapcore.ErrorLevel,\n\t\tzapcore.FatalLevel,\n\t\tzapcore.PanicLevel,\n\t}\n\tfor _, level := range supportedLevels {\n\t\tmessageCounterVec.WithLabelValues(level.String())\n\t}\n\n\treturn func(entry zapcore.Entry) error {\n\t\tmessageCounterVec.WithLabelValues(entry.Level.String()).Inc()\n\t\treturn nil\n\t}\n}\n"
  },
  {
    "path": "main.go",
    "content": "package main\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"net\"\n\t\"net/http\"\n\t\"os\"\n\t\"os/signal\"\n\t\"strconv\"\n\n\t\"github.com/cloudhut/kminion/v2/e2e\"\n\t\"github.com/cloudhut/kminion/v2/kafka\"\n\t\"github.com/cloudhut/kminion/v2/logging\"\n\t\"github.com/cloudhut/kminion/v2/minion\"\n\t\"github.com/cloudhut/kminion/v2/prometheus\"\n\tpromclient \"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n\t\"go.uber.org/zap\"\n)\n\nvar (\n\t// ------------------------------------------------------------------------\n\t// Below parameters are set at build time using ldflags.\n\t// ------------------------------------------------------------------------\n\n\t// version is KMinion's SemVer version (for example: v1.0.0).\n\tversion = \"development\"\n\t// builtAt is a string that represent a human-readable date when the binary was built.\n\tbuiltAt = \"N/A\"\n\t// commit is a string that represents the last git commit for this build.\n\tcommit = \"N/A\"\n)\n\nfunc main() {\n\tstartupLogger, err := zap.NewProduction()\n\tif err != nil {\n\t\tpanic(fmt.Errorf(\"failed to create startup logger: %w\", err))\n\t}\n\n\tcfg, err := newConfig(startupLogger)\n\tif err != nil {\n\t\tstartupLogger.Fatal(\"failed to parse config\", zap.Error(err))\n\t}\n\n\tlogger := logging.NewLogger(cfg.Logger, cfg.Exporter.Namespace).Named(\"main\")\n\tif err != nil {\n\t\tstartupLogger.Fatal(\"failed to create new logger\", zap.Error(err))\n\t}\n\n\tlogger.Info(\"started kminion\", zap.String(\"version\", version), zap.String(\"built_at\", builtAt))\n\n\t// Setup context that stops when the application receives an interrupt signal\n\tctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)\n\tdefer stop()\n\n\twrappedRegisterer := promclient.WrapRegistererWithPrefix(cfg.Exporter.Namespace+\"_\", promclient.DefaultRegisterer)\n\n\t// Create kafka service\n\tkafkaSvc := kafka.NewService(cfg.Kafka, logger)\n\n\t// Create minion service\n\t// Prometheus exporter only talks to the minion service which\n\t// issues all the requests to Kafka and wraps the interface accordingly.\n\tminionSvc, err := minion.NewService(cfg.Minion, logger, kafkaSvc, cfg.Exporter.Namespace, ctx)\n\tif err != nil {\n\t\tlogger.Fatal(\"failed to setup minion service\", zap.Error(err))\n\t}\n\n\terr = minionSvc.Start(ctx)\n\tif err != nil {\n\t\tlogger.Fatal(\"failed to start minion service\", zap.Error(err))\n\t}\n\n\t// Create end to end testing service\n\tif cfg.Minion.EndToEnd.Enabled {\n\t\te2eService, err := e2e.NewService(\n\t\t\tctx,\n\t\t\tcfg.Minion.EndToEnd,\n\t\t\tlogger,\n\t\t\tkafkaSvc,\n\t\t\twrappedRegisterer,\n\t\t)\n\t\tif err != nil {\n\t\t\tlogger.Fatal(\"failed to create end-to-end monitoring service: %w\", zap.Error(err))\n\t\t}\n\n\t\tif err = e2eService.Start(ctx); err != nil {\n\t\t\tlogger.Fatal(\"failed to start end-to-end monitoring service\", zap.Error(err))\n\t\t}\n\t}\n\n\t// The Prometheus exporter that implements the Prometheus collector interface\n\texporter, err := prometheus.NewExporter(cfg.Exporter, logger, minionSvc)\n\tif err != nil {\n\t\tlogger.Fatal(\"failed to setup prometheus exporter\", zap.Error(err))\n\t}\n\texporter.InitializeMetrics()\n\n\tpromclient.MustRegister(exporter)\n\thttp.Handle(\"/metrics\",\n\t\tpromhttp.InstrumentMetricHandler(\n\t\t\tpromclient.DefaultRegisterer,\n\t\t\tpromhttp.HandlerFor(\n\t\t\t\tpromclient.DefaultGatherer,\n\t\t\t\tpromhttp.HandlerOpts{},\n\t\t\t),\n\t\t),\n\t)\n\thttp.Handle(\"/ready\", minionSvc.HandleIsReady())\n\n\t// Start HTTP server\n\taddress := net.JoinHostPort(cfg.Exporter.Host, strconv.Itoa(cfg.Exporter.Port))\n\tsrv := &http.Server{Addr: address}\n\tgo func() {\n\t\t<-ctx.Done()\n\t\tif err := srv.Shutdown(context.Background()); err != nil {\n\t\t\tlogger.Error(\"error stopping HTTP server\", zap.Error(err))\n\t\t\tos.Exit(1)\n\t\t}\n\t}()\n\tlogger.Info(\"listening on address\", zap.String(\"listen_address\", address))\n\tif err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {\n\t\tlogger.Error(\"error starting HTTP server\", zap.Error(err))\n\t\tos.Exit(1)\n\t}\n\n\tlogger.Info(\"kminion stopped\")\n}\n"
  },
  {
    "path": "minion/client_hooks.go",
    "content": "package minion\n\nimport (\n\t\"net\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promauto\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"go.uber.org/zap\"\n)\n\n// clientHooks implements the various hook interfaces from the franz-go (kafka) library. We can use these hooks to\n// log additional information, collect Prometheus metrics and similar.\ntype clientHooks struct {\n\tlogger *zap.Logger\n\n\trequestSentCount prometheus.Counter\n\tbytesSent        prometheus.Counter\n\n\trequestsReceivedCount prometheus.Counter\n\tbytesReceived         prometheus.Counter\n}\n\nfunc newMinionClientHooks(logger *zap.Logger, metricsNamespace string) *clientHooks {\n\trequestSentCount := promauto.NewCounter(prometheus.CounterOpts{\n\t\tNamespace: metricsNamespace,\n\t\tSubsystem: \"kafka\",\n\t\tName:      \"requests_sent_total\"})\n\tbytesSent := promauto.NewCounter(prometheus.CounterOpts{\n\t\tNamespace: metricsNamespace,\n\t\tSubsystem: \"kafka\",\n\t\tName:      \"sent_bytes\",\n\t})\n\n\trequestsReceivedCount := promauto.NewCounter(prometheus.CounterOpts{\n\t\tNamespace: metricsNamespace,\n\t\tSubsystem: \"kafka\",\n\t\tName:      \"requests_received_total\"})\n\tbytesReceived := promauto.NewCounter(prometheus.CounterOpts{\n\t\tNamespace: metricsNamespace,\n\t\tSubsystem: \"kafka\",\n\t\tName:      \"received_bytes\",\n\t})\n\n\treturn &clientHooks{\n\t\tlogger: logger,\n\n\t\trequestSentCount: requestSentCount,\n\t\tbytesSent:        bytesSent,\n\n\t\trequestsReceivedCount: requestsReceivedCount,\n\t\tbytesReceived:         bytesReceived,\n\t}\n}\n\nfunc (c clientHooks) OnBrokerConnect(meta kgo.BrokerMetadata, dialDur time.Duration, _ net.Conn, err error) {\n\tif err != nil {\n\t\tc.logger.Debug(\"kafka connection failed\", zap.String(\"broker_host\", meta.Host), zap.Error(err))\n\t\treturn\n\t}\n\tc.logger.Debug(\"kafka connection succeeded\",\n\t\tzap.String(\"host\", meta.Host),\n\t\tzap.Duration(\"dial_duration\", dialDur))\n}\n\nfunc (c clientHooks) OnBrokerDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {\n\tc.logger.Debug(\"kafka broker disconnected\",\n\t\tzap.String(\"host\", meta.Host))\n}\n\n// OnBrokerRead is passed the broker metadata, the key for the response that\n// was read, the number of bytes read, how long the Client waited\n// before reading the response, how long it took to read the response,\n// and any error.\n//\n// The bytes written does not count any tls overhead.\n// OnRead is called after a read from a broker.\nfunc (c clientHooks) OnBrokerRead(_ kgo.BrokerMetadata, _ int16, bytesRead int, _, _ time.Duration, _ error) {\n\tc.requestsReceivedCount.Inc()\n\tc.bytesReceived.Add(float64(bytesRead))\n}\n\n// OnBrokerWrite is passed the broker metadata, the key for the request that\n// was written, the number of bytes written, how long the request\n// waited before being written, how long it took to write the request,\n// and any error.\n//\n// The bytes written does not count any tls overhead.\n// OnWrite is called after a write to a broker.\nfunc (c clientHooks) OnBrokerWrite(_ kgo.BrokerMetadata, _ int16, bytesWritten int, _, _ time.Duration, _ error) {\n\tc.requestSentCount.Inc()\n\tc.bytesSent.Add(float64(bytesWritten))\n}\n"
  },
  {
    "path": "minion/config.go",
    "content": "package minion\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/cloudhut/kminion/v2/e2e\"\n)\n\ntype Config struct {\n\tConsumerGroups ConsumerGroupConfig `koanf:\"consumerGroups\"`\n\tTopics         TopicConfig         `koanf:\"topics\"`\n\tLogDirs        LogDirsConfig       `koanf:\"logDirs\"`\n\tEndToEnd       e2e.Config          `koanf:\"endToEnd\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.ConsumerGroups.SetDefaults()\n\tc.Topics.SetDefaults()\n\tc.LogDirs.SetDefaults()\n\tc.EndToEnd.SetDefaults()\n}\n\nfunc (c *Config) Validate() error {\n\terr := c.ConsumerGroups.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to consumer group config: %w\", err)\n\t}\n\n\terr = c.Topics.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate topic config: %w\", err)\n\t}\n\n\terr = c.LogDirs.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate log dirs config: %w\", err)\n\t}\n\n\terr = c.EndToEnd.Validate()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to validate endToEnd config: %w\", err)\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "minion/config_consumer_group.go",
    "content": "package minion\n\nimport (\n\t\"fmt\"\n)\n\nconst (\n\tConsumerGroupScrapeModeOffsetsTopic string = \"offsetsTopic\"\n\tConsumerGroupScrapeModeAdminAPI     string = \"adminApi\"\n\n\tConsumerGroupGranularityTopic     string = \"topic\"\n\tConsumerGroupGranularityPartition string = \"partition\"\n)\n\ntype ConsumerGroupConfig struct {\n\t// Enabled specifies whether consumer groups shall be scraped and exported or not.\n\tEnabled bool `koanf:\"enabled\"`\n\n\t// Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal\n\t// __consumer_offsets topic.\n\tScrapeMode string `koanf:\"scrapeMode\"`\n\n\t// Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n\t// you aren't interested in per partition lags you could choose \"topic\" where all partition lags will be summed\n\t// and only topic lags will be exported.\n\tGranularity string `koanf:\"granularity\"`\n\n\t// AllowedGroups are regex strings of group ids that shall be exported\n\tAllowedGroupIDs []string `koanf:\"allowedGroups\"`\n\n\t// IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups\n\t// take precedence over allowed groups.\n\tIgnoredGroupIDs []string `koanf:\"ignoredGroups\"`\n}\n\nfunc (c *ConsumerGroupConfig) SetDefaults() {\n\tc.Enabled = true\n\tc.ScrapeMode = ConsumerGroupScrapeModeAdminAPI\n\tc.Granularity = ConsumerGroupGranularityPartition\n\tc.AllowedGroupIDs = []string{\"/.*/\"}\n}\n\nfunc (c *ConsumerGroupConfig) Validate() error {\n\tswitch c.ScrapeMode {\n\tcase ConsumerGroupScrapeModeOffsetsTopic, ConsumerGroupScrapeModeAdminAPI:\n\tdefault:\n\t\treturn fmt.Errorf(\"invalid scrape mode '%v' specified. Valid modes are '%v' or '%v'\",\n\t\t\tc.ScrapeMode,\n\t\t\tConsumerGroupScrapeModeOffsetsTopic,\n\t\t\tConsumerGroupScrapeModeAdminAPI)\n\t}\n\n\tswitch c.Granularity {\n\tcase ConsumerGroupGranularityTopic, ConsumerGroupGranularityPartition:\n\tdefault:\n\t\treturn fmt.Errorf(\"invalid consumer group granularity '%v' specified. Valid modes are '%v' or '%v'\",\n\t\t\tc.Granularity,\n\t\t\tConsumerGroupGranularityTopic,\n\t\t\tConsumerGroupGranularityPartition)\n\t}\n\n\t// Check if all group strings are valid regex or literals\n\tfor _, groupID := range c.AllowedGroupIDs {\n\t\t_, err := compileRegex(groupID)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"allowed group string '%v' is not valid regex\", groupID)\n\t\t}\n\t}\n\n\tfor _, groupID := range c.IgnoredGroupIDs {\n\t\t_, err := compileRegex(groupID)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"ignored group string '%v' is not valid regex\", groupID)\n\t\t}\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "minion/config_log_dirs.go",
    "content": "package minion\n\ntype LogDirsConfig struct {\n\t// Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior\n\t// to version 1.0.0 as describing log dirs was not supported back then.\n\tEnabled bool `koanf:\"enabled\"`\n}\n\n// Validate if provided LogDirsConfig is valid.\nfunc (c *LogDirsConfig) Validate() error {\n\treturn nil\n}\n\n// SetDefaults for topic config\nfunc (c *LogDirsConfig) SetDefaults() {\n\tc.Enabled = true\n}\n"
  },
  {
    "path": "minion/config_topic_config.go",
    "content": "package minion\n\nimport (\n\t\"fmt\"\n)\n\nconst (\n\tTopicGranularityTopic     string = \"topic\"\n\tTopicGranularityPartition string = \"partition\"\n)\n\ntype TopicConfig struct {\n\t// Enabled can be set to false in order to not collect any topic metrics at all.\n\tEnabled bool `koanf:\"enabled\"`\n\n\t// Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and\n\t// you aren't interested in per partition metrics you could choose \"topic\".\n\tGranularity string `koanf:\"granularity\"`\n\n\t// AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.\n\tAllowedTopics []string `koanf:\"allowedTopics\"`\n\n\t// IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics\n\t// take precedence over allowed topics.\n\tIgnoredTopics []string `koanf:\"ignoredTopics\"`\n\n\t// InfoMetric configures how the kafka_topic_info metric is populated\n\tInfoMetric InfoMetricConfig `koanf:\"infoMetric\"`\n}\n\ntype InfoMetricConfig struct {\n\t// ConfigKeys configures optional topic configuration keys that should be exported\n\t// as prometheus metric labels.\n\t// By default only \"cleanup.policy\" is exported\n\tConfigKeys []string `koanf:\"configKeys\"`\n}\n\n// Validate if provided TopicConfig is valid.\nfunc (c *TopicConfig) Validate() error {\n\tswitch c.Granularity {\n\tcase TopicGranularityPartition, TopicGranularityTopic:\n\tdefault:\n\t\treturn fmt.Errorf(\"given granularity '%v' is invalid\", c.Granularity)\n\t}\n\n\t// Check whether each provided string is valid regex\n\tfor _, topic := range c.AllowedTopics {\n\t\t_, err := compileRegex(topic)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"allowed topic string '%v' is not valid regex\", topic)\n\t\t}\n\t}\n\n\tfor _, topic := range c.IgnoredTopics {\n\t\t_, err := compileRegex(topic)\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"ignored topic string '%v' is not valid regex\", topic)\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// SetDefaults for topic config\nfunc (c *TopicConfig) SetDefaults() {\n\tc.Enabled = true\n\tc.Granularity = TopicGranularityPartition\n\tc.AllowedTopics = []string{\"/.*/\"}\n\tc.InfoMetric = InfoMetricConfig{ConfigKeys: []string{\"cleanup.policy\"}}\n}\n"
  },
  {
    "path": "minion/consumer_group_offsets.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"sync\"\n\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n\t\"golang.org/x/sync/errgroup\"\n)\n\n// ListAllConsumerGroupOffsetsInternal returns a map from the in memory storage. The map value is the offset commit\n// value and is grouped by group id, topic, partition id as keys of the nested maps.\nfunc (s *Service) ListAllConsumerGroupOffsetsInternal() map[string]map[string]map[int32]OffsetCommit {\n\treturn s.storage.getGroupOffsets()\n}\n\n// ListAllConsumerGroupOffsetsAdminAPI return all consumer group offsets using Kafka's Admin API.\nfunc (s *Service) ListAllConsumerGroupOffsetsAdminAPI(ctx context.Context) (map[string]*kmsg.OffsetFetchResponse, error) {\n\tgroupsRes, err := s.listConsumerGroupsCached(ctx)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to list groupsRes: %w\", err)\n\t}\n\tgroupIDs := make([]string, len(groupsRes.Groups))\n\tfor i, group := range groupsRes.Groups {\n\t\tgroupIDs[i] = group.Group\n\t}\n\n\treturn s.listConsumerGroupOffsetsBulk(ctx, groupIDs)\n}\n\n// listConsumerGroupOffsetsBulk returns a map which has the Consumer group name as key\nfunc (s *Service) listConsumerGroupOffsetsBulk(ctx context.Context, groups []string) (map[string]*kmsg.OffsetFetchResponse, error) {\n\teg, _ := errgroup.WithContext(ctx)\n\n\tmutex := sync.Mutex{}\n\tres := make(map[string]*kmsg.OffsetFetchResponse)\n\n\tf := func(group string) func() error {\n\t\treturn func() error {\n\t\t\toffsets, err := s.listConsumerGroupOffsets(ctx, group)\n\t\t\tif err != nil {\n\t\t\t\ts.logger.Warn(\"failed to fetch consumer group offsets, inner kafka error\",\n\t\t\t\t\tzap.String(\"consumer_group\", group),\n\t\t\t\t\tzap.Error(err))\n\t\t\t\treturn nil\n\t\t\t}\n\n\t\t\tmutex.Lock()\n\t\t\tres[group] = offsets\n\t\t\tmutex.Unlock()\n\t\t\treturn nil\n\t\t}\n\t}\n\n\tfor _, group := range groups {\n\t\teg.Go(f(group))\n\t}\n\n\tif err := eg.Wait(); err != nil {\n\t\treturn nil, err\n\t}\n\n\treturn res, nil\n}\n\n// listConsumerGroupOffsets returns the committed group offsets for a single group\nfunc (s *Service) listConsumerGroupOffsets(ctx context.Context, group string) (*kmsg.OffsetFetchResponse, error) {\n\treq := kmsg.NewOffsetFetchRequest()\n\treq.Group = group\n\treq.Topics = nil\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to request group offsets for group '%v': %w\", group, err)\n\t}\n\n\treturn res, nil\n}\n"
  },
  {
    "path": "minion/describe_consumer_groups.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\ntype DescribeConsumerGroupsResponse struct {\n\tBrokerMetadata kgo.BrokerMetadata\n\tGroups         *kmsg.DescribeGroupsResponse\n}\n\nfunc (s *Service) listConsumerGroupsCached(ctx context.Context) (*kmsg.ListGroupsResponse, error) {\n\treqId := ctx.Value(\"requestId\").(string)\n\tkey := \"list-consumer-groups-\" + reqId\n\n\tif cachedRes, exists := s.getCachedItem(key); exists {\n\t\treturn cachedRes.(*kmsg.ListGroupsResponse), nil\n\t}\n\tres, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {\n\t\tres, err := s.listConsumerGroups(ctx)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t\ts.setCachedItem(key, res, 120*time.Second)\n\n\t\treturn res, nil\n\t})\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\treturn res.(*kmsg.ListGroupsResponse), nil\n}\n\nfunc (s *Service) listConsumerGroups(ctx context.Context) (*kmsg.ListGroupsResponse, error) {\n\tlistReq := kmsg.NewListGroupsRequest()\n\tres, err := listReq.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to list consumer groups: %w\", err)\n\t}\n\terr = kerr.ErrorForCode(res.ErrorCode)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to list consumer groups. inner kafka error: %w\", err)\n\t}\n\n\treturn res, nil\n}\n\nfunc (s *Service) DescribeConsumerGroups(ctx context.Context) ([]DescribeConsumerGroupsResponse, error) {\n\tlistRes, err := s.listConsumerGroupsCached(ctx)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\tgroupIDs := make([]string, len(listRes.Groups))\n\tfor i, group := range listRes.Groups {\n\t\tgroupIDs[i] = group.Group\n\t}\n\n\tdescribeReq := kmsg.NewDescribeGroupsRequest()\n\tdescribeReq.Groups = groupIDs\n\tdescribeReq.IncludeAuthorizedOperations = false\n\tshardedResp := s.client.RequestSharded(ctx, &describeReq)\n\n\tdescribedGroups := make([]DescribeConsumerGroupsResponse, 0)\n\tfor _, kresp := range shardedResp {\n\t\tif kresp.Err != nil {\n\t\t\ts.logger.Warn(\"broker failed to respond to the described groups request\",\n\t\t\t\tzap.Int32(\"broker_id\", kresp.Meta.NodeID),\n\t\t\t\tzap.Error(kresp.Err))\n\t\t\tcontinue\n\t\t}\n\t\tres := kresp.Resp.(*kmsg.DescribeGroupsResponse)\n\n\t\tdescribedGroups = append(describedGroups, DescribeConsumerGroupsResponse{\n\t\t\tBrokerMetadata: kresp.Meta,\n\t\t\tGroups:         res,\n\t\t})\n\t}\n\n\treturn describedGroups, nil\n}\n"
  },
  {
    "path": "minion/describe_topic_config.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/pkg/errors\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n)\n\nfunc (s *Service) GetTopicConfigs(ctx context.Context) (*kmsg.DescribeConfigsResponse, error) {\n\tmetadata, err := s.GetMetadataCached(ctx)\n\tif err != nil {\n\t\treturn nil, errors.Wrap(err, \"failed to get metadata\")\n\t}\n\n\treq := kmsg.NewDescribeConfigsRequest()\n\n\tfor _, topic := range metadata.Topics {\n\t\tresourceReq := kmsg.NewDescribeConfigsRequestResource()\n\t\tresourceReq.ResourceType = kmsg.ConfigResourceTypeTopic\n\t\tresourceReq.ResourceName = *topic.Topic\n\t\treq.Resources = append(req.Resources, resourceReq)\n\t}\n\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to request metadata: %w\", err)\n\t}\n\n\treturn res, nil\n}\n"
  },
  {
    "path": "minion/list_offsets.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kadm\"\n\t\"go.uber.org/zap\"\n)\n\nfunc (s *Service) ListEndOffsetsCached(ctx context.Context) (kadm.ListedOffsets, error) {\n\treturn s.listOffsetsCached(ctx, \"end\")\n}\n\nfunc (s *Service) ListStartOffsetsCached(ctx context.Context) (kadm.ListedOffsets, error) {\n\treturn s.listOffsetsCached(ctx, \"start\")\n}\n\nfunc (s *Service) listOffsetsCached(ctx context.Context, offsetType string) (kadm.ListedOffsets, error) {\n\treqId := ctx.Value(\"requestId\").(string)\n\tkey := fmt.Sprintf(\"partition-%s-offsets-%s\", offsetType, reqId)\n\n\tif cachedRes, exists := s.getCachedItem(key); exists {\n\t\treturn cachedRes.(kadm.ListedOffsets), nil\n\t}\n\n\tvar listFunc func(context.Context) (kadm.ListedOffsets, error)\n\tswitch offsetType {\n\tcase \"end\":\n\t\tlistFunc = s.ListEndOffsets\n\tcase \"start\":\n\t\tlistFunc = s.ListStartOffsets\n\tdefault:\n\t\treturn nil, fmt.Errorf(\"invalid offset type: %s\", offsetType)\n\t}\n\n\tres, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {\n\t\toffsets, err := listFunc(ctx)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\ts.setCachedItem(key, offsets, 120*time.Second)\n\n\t\treturn offsets, nil\n\t})\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\treturn res.(kadm.ListedOffsets), nil\n}\n\n// ListEndOffsets fetches the high water mark for all topic partitions.\nfunc (s *Service) ListEndOffsets(ctx context.Context) (kadm.ListedOffsets, error) {\n\treturn s.listOffsetsInternal(ctx, s.admClient.ListEndOffsets, \"end\")\n}\n\n// ListStartOffsets fetches the low water mark for all topic partitions.\nfunc (s *Service) ListStartOffsets(ctx context.Context) (kadm.ListedOffsets, error) {\n\treturn s.listOffsetsInternal(ctx, s.admClient.ListStartOffsets, \"start\")\n}\n\ntype listOffsetsFunc func(context.Context, ...string) (kadm.ListedOffsets, error)\n\nfunc (s *Service) listOffsetsInternal(ctx context.Context, listFunc listOffsetsFunc, offsetType string) (kadm.ListedOffsets, error) {\n\tlistedOffsets, err := listFunc(ctx)\n\tif err != nil {\n\t\tvar se *kadm.ShardErrors\n\t\tif !errors.As(err, &se) {\n\t\t\treturn nil, fmt.Errorf(\"failed to list %s offsets: %w\", offsetType, err)\n\t\t}\n\n\t\tif se.AllFailed {\n\t\t\treturn nil, fmt.Errorf(\"failed to list %s offsets, all shard responses failed: %w\", offsetType, err)\n\t\t}\n\t\ts.logger.Info(fmt.Sprintf(\"failed to list %s offset from some shards\", offsetType), zap.Int(\"failed_shards\", len(se.Errs)))\n\t\tfor _, shardErr := range se.Errs {\n\t\t\ts.logger.Warn(fmt.Sprintf(\"shard error for listing %s offsets\", offsetType),\n\t\t\t\tzap.Int32(\"broker_id\", shardErr.Broker.NodeID),\n\t\t\t\tzap.Error(shardErr.Err))\n\t\t}\n\t}\n\n\t// Log inner errors before returning them. We do that inside of this function to avoid duplicate logging as the response\n\t// are cached for each scrape anyways.\n\t//\n\t// Create two metrics to aggregate error logs in few messages. Logging one message per occured partition error\n\t// is too much. Typical errors are LEADER_NOT_AVAILABLE etc.\n\terrorCountByErrCode := make(map[error]int)\n\terrorCountByTopic := make(map[string]int)\n\n\t// Iterate on all partitions\n\tlistedOffsets.Each(func(offset kadm.ListedOffset) {\n\t\tif offset.Err != nil {\n\t\t\terrorCountByTopic[offset.Topic]++\n\t\t\terrorCountByErrCode[offset.Err]++\n\t\t}\n\t})\n\n\t// Print log line for each error type\n\tfor err, count := range errorCountByErrCode {\n\t\ts.logger.Warn(fmt.Sprintf(\"failed to list some partitions %s watermarks\", offsetType),\n\t\t\tzap.Error(err),\n\t\t\tzap.Int(\"error_count\", count))\n\t}\n\tif len(errorCountByTopic) > 0 {\n\t\ts.logger.Warn(fmt.Sprintf(\"some topics had one or more partitions whose %s watermarks could not be fetched from Kafka\", offsetType),\n\t\t\tzap.Int(\"topics_with_errors\", len(errorCountByTopic)))\n\t}\n\n\treturn listedOffsets, nil\n}\n"
  },
  {
    "path": "minion/log_dirs.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n)\n\ntype LogDirResponseShard struct {\n\tErr     error\n\tBroker  kgo.BrokerMetadata\n\tLogDirs *kmsg.DescribeLogDirsResponse\n}\n\nfunc (s *Service) DescribeLogDirs(ctx context.Context) []LogDirResponseShard {\n\treq := kmsg.NewDescribeLogDirsRequest()\n\treq.Topics = nil // Describe all topics\n\tresponses := s.client.RequestSharded(ctx, &req)\n\n\tres := make([]LogDirResponseShard, len(responses))\n\tfor i, responseShard := range responses {\n\t\tlogDirs, ok := responseShard.Resp.(*kmsg.DescribeLogDirsResponse)\n\t\tif !ok {\n\t\t\tlogDirs = &kmsg.DescribeLogDirsResponse{}\n\t\t}\n\n\t\tres[i] = LogDirResponseShard{\n\t\t\tErr:     responseShard.Err,\n\t\t\tBroker:  responseShard.Meta,\n\t\t\tLogDirs: logDirs,\n\t\t}\n\t}\n\n\treturn res\n}\n"
  },
  {
    "path": "minion/metadata.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n)\n\nfunc (s *Service) GetMetadataCached(ctx context.Context) (*kmsg.MetadataResponse, error) {\n\treqId := ctx.Value(\"requestId\").(string)\n\tkey := \"metadata-\" + reqId\n\n\tif cachedRes, exists := s.getCachedItem(key); exists {\n\t\treturn cachedRes.(*kmsg.MetadataResponse), nil\n\t}\n\n\tres, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {\n\t\tmetadata, err := s.GetMetadata(ctx)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\ts.setCachedItem(key, metadata, 120*time.Second)\n\n\t\treturn metadata, nil\n\t})\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\treturn res.(*kmsg.MetadataResponse), nil\n}\n\nfunc (s *Service) GetMetadata(ctx context.Context) (*kmsg.MetadataResponse, error) {\n\treq := kmsg.NewMetadataRequest()\n\treq.Topics = nil\n\n\tres, err := req.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to request metadata: %w\", err)\n\t}\n\n\treturn res, nil\n}\n"
  },
  {
    "path": "minion/offset_consumer.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kbin\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\n// startConsumingOffsets consumes the __consumer_offsets topic and forwards the kafka messages to their respective\n// methods where they'll be decoded and further processed.\nfunc (s *Service) startConsumingOffsets(ctx context.Context) {\n\tclient := s.client\n\n\ts.logger.Info(\"starting to consume messages from offsets topic\")\n\tgo s.checkIfConsumerLagIsCaughtUp(ctx)\n\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\treturn\n\t\tdefault:\n\t\t\tfetches := client.PollFetches(ctx)\n\t\t\terrors := fetches.Errors()\n\t\t\tfor _, err := range errors {\n\t\t\t\t// Log all errors and continue afterwards as we might get errors and still have some fetch results\n\t\t\t\ts.logger.Error(\"failed to fetch records from kafka\",\n\t\t\t\t\tzap.String(\"topic\", err.Topic),\n\t\t\t\t\tzap.Int32(\"partition\", err.Partition),\n\t\t\t\t\tzap.Error(err.Err))\n\t\t\t}\n\n\t\t\titer := fetches.RecordIter()\n\t\t\tfor !iter.Done() {\n\t\t\t\trecord := iter.Next()\n\t\t\t\ts.storage.markRecordConsumed(record)\n\n\t\t\t\terr := s.decodeOffsetRecord(record)\n\t\t\t\tif err != nil {\n\t\t\t\t\ts.logger.Warn(\"failed to decode offset record\", zap.Error(err))\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\n// checkIfConsumerLagIsCaughtUp fetches the newest partition offsets for all partitions in the __consumer_offsets\n// topic and compares these against the last consumed messages from our offset consumer. If the consumed offsets are\n// higher than the partition offsets this means we caught up the initial lag and can mark our storage as ready. A ready\n// store will start to expose consumer group offsets.\nfunc (s *Service) checkIfConsumerLagIsCaughtUp(ctx context.Context) {\n\tfor {\n\t\ttime.Sleep(12 * time.Second)\n\t\ts.logger.Debug(\"checking if lag in consumer offsets metadataReqTopic is caught up\")\n\n\t\t// 1. Get metadataReqTopic high watermarks for __consumer_offsets metadataReqTopic\n\t\tmetadataReq := kmsg.NewMetadataRequest()\n\t\tmetadataReqTopic := kmsg.NewMetadataRequestTopic()\n\t\ttopicName := \"__consumer_offsets\"\n\t\tmetadataReqTopic.Topic = &topicName\n\t\tmetadataReq.Topics = []kmsg.MetadataRequestTopic{metadataReqTopic}\n\n\t\tres, err := metadataReq.RequestWith(ctx, s.client)\n\t\tif err != nil {\n\t\t\ts.logger.Warn(\"failed to check if consumer lag on offsets metadataReqTopic is caught up because metadata request failed\",\n\t\t\t\tzap.Error(err))\n\t\t\tcontinue\n\t\t}\n\n\t\t// 2. Request high watermarks for consumer offset partitions\n\t\ttopicReqs := make([]kmsg.ListOffsetsRequestTopic, len(res.Topics))\n\t\tfor i, topic := range res.Topics {\n\t\t\treq := kmsg.NewListOffsetsRequestTopic()\n\t\t\treq.Topic = *topic.Topic\n\n\t\t\tpartitionReqs := make([]kmsg.ListOffsetsRequestTopicPartition, len(topic.Partitions))\n\t\t\tfor j, partition := range topic.Partitions {\n\t\t\t\tpartitionReqs[j] = kmsg.NewListOffsetsRequestTopicPartition()\n\t\t\t\tpartitionReqs[j].Partition = partition.Partition\n\t\t\t\tpartitionReqs[j].Timestamp = -1 // Newest\n\t\t\t}\n\t\t\treq.Partitions = partitionReqs\n\n\t\t\ttopicReqs[i] = req\n\t\t}\n\t\toffsetReq := kmsg.NewListOffsetsRequest()\n\t\toffsetReq.Topics = topicReqs\n\t\thighMarksRes, err := offsetReq.RequestWith(ctx, s.client)\n\t\tif err != nil {\n\t\t\ts.logger.Warn(\"failed to check if consumer lag on offsets metadataReqTopic is caught up because high watermark request failed\",\n\t\t\t\tzap.Error(err))\n\t\t\tcontinue\n\t\t}\n\t\tif len(highMarksRes.Topics) != 1 {\n\t\t\ts.logger.Error(\"expected exactly one metadataReqTopic response for high water mark request\")\n\t\t\tcontinue\n\t\t}\n\n\t\t// 3. Check if high watermarks have been consumed. To avoid a race condition here we will wait some time before\n\t\t// comparing, so that the consumer has enough time to catch up to the new high watermarks we just fetched.\n\t\ttime.Sleep(3 * time.Second)\n\t\tconsumedOffsets := s.storage.getConsumedOffsets()\n\t\ttopicRes := highMarksRes.Topics[0]\n\t\tisReady := true\n\n\t\ttype laggingParition struct {\n\t\t\tName string\n\t\t\tId   int32\n\t\t\tLag  int64\n\t\t}\n\t\tvar partitionsLagging []laggingParition\n\t\ttotalLag := int64(0)\n\t\tfor _, partition := range topicRes.Partitions {\n\t\t\terr := kerr.ErrorForCode(partition.ErrorCode)\n\t\t\tif err != nil {\n\t\t\t\ts.logger.Warn(\"failed to check if consumer lag on offsets metadataReqTopic is caught up because high \"+\n\t\t\t\t\t\"watermark request failed, with an inner error\",\n\t\t\t\t\tzap.Error(err))\n\t\t\t}\n\n\t\t\thighWaterMark := partition.Offset - 1\n\t\t\tconsumedOffset := consumedOffsets[partition.Partition]\n\t\t\tpartitionLag := highWaterMark - consumedOffset\n\t\t\tif partitionLag < 0 {\n\t\t\t\tpartitionLag = 0\n\t\t\t}\n\n\t\t\tif partitionLag > 0 {\n\t\t\t\tpartitionsLagging = append(partitionsLagging, laggingParition{\n\t\t\t\t\tName: topicRes.Topic,\n\t\t\t\t\tId:   partition.Partition,\n\t\t\t\t\tLag:  partitionLag,\n\t\t\t\t})\n\t\t\t\ttotalLag += partitionLag\n\t\t\t\ts.logger.Debug(\"consumer_offsets metadataReqTopic lag has not been caught up yet\",\n\t\t\t\t\tzap.Int32(\"partition_id\", partition.Partition),\n\t\t\t\t\tzap.Int64(\"high_water_mark\", highWaterMark),\n\t\t\t\t\tzap.Int64(\"consumed_offset\", consumedOffset),\n\t\t\t\t\tzap.Int64(\"partition_lag\", partitionLag))\n\t\t\t\tisReady = false\n\t\t\t\tcontinue\n\t\t\t}\n\t\t}\n\t\tif isReady {\n\t\t\ts.logger.Info(\"successfully consumed all consumer offsets. consumer group lags will be exported from now on\")\n\t\t\ts.storage.setReadyState(true)\n\t\t\treturn\n\t\t} else {\n\t\t\ts.logger.Info(\"catching up the message lag on consumer offsets\",\n\t\t\t\tzap.Int(\"lagging_partitions_count\", len(partitionsLagging)),\n\t\t\t\tzap.Any(\"lagging_partitions\", partitionsLagging),\n\t\t\t\tzap.Int64(\"total_lag\", totalLag))\n\t\t}\n\t}\n}\n\n// decodeOffsetRecord decodes all messages in the consumer offsets topic by routing records to the correct decoding\n// method.\nfunc (s *Service) decodeOffsetRecord(record *kgo.Record) error {\n\tif len(record.Key) < 2 {\n\t\treturn fmt.Errorf(\"offset commit key is supposed to be at least 2 bytes long\")\n\t}\n\tmessageVer := (&kbin.Reader{Src: record.Key}).Int16()\n\n\tswitch messageVer {\n\tcase 0, 1:\n\t\terr := s.decodeOffsetCommit(record)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\tcase 2:\n\t\terr := s.decodeOffsetMetadata(record)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// decodeOffsetMetadata decodes to metadata which includes the following information:\n// - group\n// - protocolType (connect/consumer/...)\n// - generation\n// - protocol\n// - currentStateTimestamp\n// - groupMembers (member metadata such aus: memberId, groupInstanceId, clientId, clientHost, rebalanceTimeout, ...)\nfunc (s *Service) decodeOffsetMetadata(record *kgo.Record) error {\n\tchildLogger := s.logger.With(\n\t\tzap.String(\"topic\", record.Topic),\n\t\tzap.Int32(\"partition_id\", record.Partition),\n\t\tzap.Int64(\"offset\", record.Offset))\n\n\tmetadataKey := kmsg.NewGroupMetadataKey()\n\terr := metadataKey.ReadFrom(record.Key)\n\tif err != nil {\n\t\tchildLogger.Warn(\"failed to decode offset metadata key\", zap.Error(err))\n\t\treturn fmt.Errorf(\"failed to decode offset metadata key: %w\", err)\n\t}\n\n\tif record.Value == nil {\n\t\treturn nil\n\t}\n\tmetadataValue := kmsg.NewGroupMetadataValue()\n\terr = metadataValue.ReadFrom(record.Value)\n\tif err != nil {\n\t\tchildLogger.Warn(\"failed to decode offset metadata value\", zap.Error(err))\n\t\treturn fmt.Errorf(\"failed to decode offset metadata value: %w\", err)\n\t}\n\n\treturn nil\n}\n\n// decodeOffsetCommit decodes to group offsets which include the following information:\n// - group, topic, partition\n// - offset\n// - leaderEpoch\n// - metadata (user specified string for each offset commit)\n// - commitTimestamp\n// - expireTimestamp (only version 1 offset commits / deprecated)\nfunc (s *Service) decodeOffsetCommit(record *kgo.Record) error {\n\tchildLogger := s.logger.With(\n\t\tzap.String(\"topic\", record.Topic),\n\t\tzap.Int32(\"partition_id\", record.Partition),\n\t\tzap.Int64(\"offset\", record.Offset))\n\toffsetCommitKey := kmsg.NewOffsetCommitKey()\n\terr := offsetCommitKey.ReadFrom(record.Key)\n\tif err != nil {\n\t\tchildLogger.Warn(\"failed to decode offset commit key\", zap.Error(err))\n\t\treturn fmt.Errorf(\"failed to decode offset commit key: %w\", err)\n\t}\n\n\tif record.Value == nil {\n\t\t// Tombstone - The group offset is expired or no longer valid (e.g. because the topic has been deleted)\n\t\ts.storage.deleteOffsetCommit(offsetCommitKey)\n\t\treturn nil\n\t}\n\n\toffsetCommitValue := kmsg.NewOffsetCommitValue()\n\terr = offsetCommitValue.ReadFrom(record.Value)\n\tif err != nil {\n\t\tchildLogger.Warn(\"failed to decode offset commit value\", zap.Error(err))\n\t\treturn fmt.Errorf(\"failed to decode offset commit value: %w\", err)\n\t}\n\ts.storage.addOffsetCommit(offsetCommitKey, offsetCommitValue)\n\n\treturn nil\n}\n\nfunc (s *Service) GetNumberOfOffsetRecordsConsumed() float64 {\n\treturn s.storage.getNumberOfConsumedRecords()\n}\n"
  },
  {
    "path": "minion/service.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"net/http\"\n\t\"regexp\"\n\t\"strings\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/twmb/franz-go/pkg/kadm\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"github.com/twmb/franz-go/pkg/kversion\"\n\t\"go.uber.org/zap\"\n\t\"golang.org/x/sync/singleflight\"\n\n\t\"github.com/cloudhut/kminion/v2/kafka\"\n)\n\ntype Service struct {\n\tCfg    Config\n\tlogger *zap.Logger\n\n\t// requestGroup is used to deduplicate multiple concurrent requests to kafka\n\trequestGroup *singleflight.Group\n\tcache        map[string]interface{}\n\tcacheLock    sync.RWMutex\n\n\tAllowedGroupIDsExpr []*regexp.Regexp\n\tIgnoredGroupIDsExpr []*regexp.Regexp\n\tAllowedTopicsExpr   []*regexp.Regexp\n\tIgnoredTopicsExpr   []*regexp.Regexp\n\n\tclient    *kgo.Client\n\tadmClient *kadm.Client\n\tstorage   *Storage\n}\n\nfunc NewService(cfg Config, logger *zap.Logger, kafkaSvc *kafka.Service, metricsNamespace string, ctx context.Context) (*Service, error) {\n\tstorage, err := newStorage(logger)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to create storage: %w\", err)\n\t}\n\n\t// Kafka client\n\tminionHooks := newMinionClientHooks(logger.Named(\"kafka_hooks\"), metricsNamespace)\n\tkgoOpts := []kgo.Opt{\n\t\tkgo.WithHooks(minionHooks),\n\t}\n\tif cfg.ConsumerGroups.Enabled && cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeOffsetsTopic {\n\t\tkgoOpts = append(kgoOpts,\n\t\t\tkgo.ConsumeResetOffset(kgo.NewOffset().AtStart()),\n\t\t\tkgo.ConsumeTopics(\"__consumer_offsets\"))\n\t}\n\n\tlogger.Info(\"connecting to Kafka seed brokers, trying to fetch cluster metadata\",\n\t\tzap.String(\"seed_brokers\", strings.Join(kafkaSvc.Brokers(), \",\")))\n\n\tclient, err := kafkaSvc.CreateAndTestClient(ctx, logger, kgoOpts)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to create kafka client: %w\", err)\n\t}\n\tlogger.Info(\"successfully connected to kafka cluster\")\n\n\t// Compile regexes. We can ignore the errors because valid compilation has been validated already\n\tallowedGroupIDsExpr, _ := compileRegexes(cfg.ConsumerGroups.AllowedGroupIDs)\n\tignoredGroupIDsExpr, _ := compileRegexes(cfg.ConsumerGroups.IgnoredGroupIDs)\n\tallowedTopicsExpr, _ := compileRegexes(cfg.Topics.AllowedTopics)\n\tignoredTopicsExpr, _ := compileRegexes(cfg.Topics.IgnoredTopics)\n\n\tservice := &Service{\n\t\tCfg:    cfg,\n\t\tlogger: logger.Named(\"minion_service\"),\n\n\t\trequestGroup: &singleflight.Group{},\n\t\tcache:        make(map[string]interface{}),\n\t\tcacheLock:    sync.RWMutex{},\n\n\t\tAllowedGroupIDsExpr: allowedGroupIDsExpr,\n\t\tIgnoredGroupIDsExpr: ignoredGroupIDsExpr,\n\t\tAllowedTopicsExpr:   allowedTopicsExpr,\n\t\tIgnoredTopicsExpr:   ignoredTopicsExpr,\n\n\t\tclient:    client,\n\t\tadmClient: kadm.NewClient(client),\n\n\t\tstorage: storage,\n\t}\n\n\treturn service, nil\n}\n\nfunc (s *Service) Start(ctx context.Context) error {\n\terr := s.ensureCompatibility(ctx)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to check feature compatibility against Kafka: %w\", err)\n\t}\n\n\tif s.Cfg.ConsumerGroups.Enabled && s.Cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeOffsetsTopic {\n\t\tgo s.startConsumingOffsets(ctx)\n\t}\n\n\treturn nil\n}\n\nfunc (s *Service) isReady() bool {\n\tif s.Cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeAdminAPI {\n\t\treturn true\n\t}\n\n\treturn s.storage.isReady()\n}\n\nfunc (s *Service) HandleIsReady() http.HandlerFunc {\n\ttype response struct {\n\t\tStatusCode int `json:\"statusCode\"`\n\t}\n\treturn func(w http.ResponseWriter, r *http.Request) {\n\t\tstatus := http.StatusOK\n\t\tif !s.isReady() {\n\t\t\tstatus = http.StatusServiceUnavailable\n\t\t}\n\t\tres := response{StatusCode: status}\n\t\tresJson, _ := json.Marshal(res)\n\t\tw.WriteHeader(status)\n\t\tw.Write(resJson)\n\t}\n}\n\n// ensureCompatibility checks whether the options as configured are available in the connected cluster. For example\n// we will check if the target Kafka's API version support the LogDirs request. If that's not the case we will\n// disable the option and print a warning message.\nfunc (s *Service) ensureCompatibility(ctx context.Context) error {\n\tctx, cancel := context.WithTimeout(ctx, 15*time.Second)\n\tdefer cancel()\n\tversionsRes, err := s.GetAPIVersions(ctx)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"kafka api versions couldn't be fetched: %w\", err)\n\t}\n\tversions := kversion.FromApiVersionsResponse(versionsRes)\n\n\t// Check Describe Log Dirs\n\tif s.Cfg.LogDirs.Enabled {\n\t\tk := kmsg.NewDescribeLogDirsRequest()\n\t\tisSupported := versions.HasKey(k.Key())\n\t\tif !isSupported {\n\t\t\ts.logger.Warn(\"describing log dirs is enabled, but it is not supported because your Kafka cluster \" +\n\t\t\t\t\"version is too old. feature will be disabled\")\n\t\t\ts.Cfg.LogDirs.Enabled = false\n\t\t}\n\t}\n\n\treturn nil\n}\n\nfunc (s *Service) getCachedItem(key string) (interface{}, bool) {\n\ts.cacheLock.RLock()\n\tdefer s.cacheLock.RUnlock()\n\n\tval, exists := s.cache[key]\n\treturn val, exists\n}\n\nfunc (s *Service) setCachedItem(key string, val interface{}, timeout time.Duration) {\n\ts.cacheLock.Lock()\n\tdefer s.cacheLock.Unlock()\n\n\tgo func() {\n\t\ttime.Sleep(timeout)\n\t\ts.deleteCachedItem(key)\n\t}()\n\n\ts.cache[key] = val\n}\n\nfunc (s *Service) deleteCachedItem(key string) {\n\ts.cacheLock.Lock()\n\tdefer s.cacheLock.Unlock()\n\n\tdelete(s.cache, key)\n}\n"
  },
  {
    "path": "minion/storage.go",
    "content": "package minion\n\nimport (\n\t\"fmt\"\n\t\"strconv\"\n\t\"time\"\n\n\tcmap \"github.com/orcaman/concurrent-map\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/atomic\"\n\t\"go.uber.org/zap\"\n)\n\n// Storage stores the current state of all consumer group information that has been consumed using the offset consumer.\ntype Storage struct {\n\tlogger *zap.Logger\n\n\t// offsetCommits is a map of all consumer offsets.\n\t// A unique key in the format \"group:topic:partition\" is used as map key.\n\t// Value is of type OffsetCommit\n\toffsetCommits cmap.ConcurrentMap\n\n\t// progressTracker is a map that tracks what offsets in each partition have already been consumed\n\tprogressTracker cmap.ConcurrentMap\n\n\tisReadyBool *atomic.Bool\n\n\t// Number of consumed records (used for a Prometheus metric)\n\tconsumedRecords *atomic.Float64\n}\n\n// OffsetCommit is used as value for the OffsetCommit map\ntype OffsetCommit struct {\n\tKey   kmsg.OffsetCommitKey\n\tValue kmsg.OffsetCommitValue\n\n\t// CommitCount is the number of offset commits for this group-topic-partition combination\n\tCommitCount int\n\n\t// ExpireTimestamp is a timestamp that indicates when this offset commit will expire on the Kafka cluster\n\tExpireTimestamp time.Time\n}\n\nfunc newStorage(logger *zap.Logger) (*Storage, error) {\n\treturn &Storage{\n\t\tlogger:          logger.Named(\"storage\"),\n\t\toffsetCommits:   cmap.New(),\n\t\tprogressTracker: cmap.New(),\n\t\tisReadyBool:     atomic.NewBool(false),\n\t\tconsumedRecords: atomic.NewFloat64(0),\n\t}, nil\n}\n\nfunc (s *Storage) isReady() bool {\n\treturn s.isReadyBool.Load()\n}\n\nfunc (s *Storage) setReadyState(isReady bool) {\n\ts.isReadyBool.Store(isReady)\n}\n\n// markRecordConsumed stores the latest consumed offset for each partition. This is necessary in order to figure out\n// whether we have caught up the message lag when starting KMinion as we start consuming from the very oldest offset\n// commit.\nfunc (s *Storage) markRecordConsumed(rec *kgo.Record) {\n\tkey := fmt.Sprintf(\"%v\", rec.Partition)\n\ts.progressTracker.Set(key, rec.Offset)\n\ts.consumedRecords.Add(1)\n}\n\nfunc (s *Storage) addOffsetCommit(key kmsg.OffsetCommitKey, value kmsg.OffsetCommitValue) {\n\t// For performance reasons we'll store offset commits using a \"unique key\". Writes happen way more frequently than\n\t// reads (Prometheus scraping the endpoint). Hence we can group everything by group or topic on the read path as\n\t// needed instead of writing it into nested maps like a map[GroupID]map[Topic]map[Partition]\n\tuniqueKey := encodeOffsetCommitKey(key)\n\n\tcommitCount := 0\n\tcommitInterface, exists := s.offsetCommits.Get(uniqueKey)\n\tif exists {\n\t\toffsetCommit := commitInterface.(OffsetCommit)\n\t\tcommitCount = offsetCommit.CommitCount\n\t}\n\n\ttimeDay := 24 * time.Hour\n\tcommit := OffsetCommit{\n\t\tKey:             key,\n\t\tValue:           value,\n\t\tCommitCount:     commitCount + 1,\n\t\tExpireTimestamp: time.Unix(0, value.CommitTimestamp*int64(time.Millisecond)).Add(7 * timeDay),\n\t}\n\ts.offsetCommits.Set(uniqueKey, commit)\n}\n\nfunc (s *Storage) getConsumedOffsets() map[int32]int64 {\n\toffsetsByPartition := make(map[int32]int64)\n\toffsets := s.progressTracker.Items()\n\tfor partitionID, offsetStr := range offsets {\n\t\tval := offsetStr.(int64)\n\t\tpartitionID, _ := strconv.ParseInt(partitionID, 10, 32)\n\t\toffsetsByPartition[int32(partitionID)] = val\n\t}\n\n\treturn offsetsByPartition\n}\n\nfunc (s *Storage) getNumberOfConsumedRecords() float64 {\n\treturn s.consumedRecords.Load()\n}\n\nfunc (s *Storage) getGroupOffsets() map[string]map[string]map[int32]OffsetCommit {\n\t// Offsets by group, topic, partition\n\toffsetsByGroup := make(map[string]map[string]map[int32]OffsetCommit)\n\n\tif !s.isReady() {\n\t\ts.logger.Info(\"Tried to fetch consumer group offsets, but haven't consumed the whole topic yet\")\n\t\treturn offsetsByGroup\n\t}\n\n\toffsets := s.offsetCommits.Items()\n\tfor _, offset := range offsets {\n\t\tval := offset.(OffsetCommit)\n\n\t\t// Initialize inner maps as necessary\n\t\tif _, exists := offsetsByGroup[val.Key.Group]; !exists {\n\t\t\toffsetsByGroup[val.Key.Group] = make(map[string]map[int32]OffsetCommit)\n\t\t}\n\t\tif _, exists := offsetsByGroup[val.Key.Group][val.Key.Topic]; !exists {\n\t\t\toffsetsByGroup[val.Key.Group][val.Key.Topic] = make(map[int32]OffsetCommit)\n\t\t}\n\n\t\toffsetsByGroup[val.Key.Group][val.Key.Topic][val.Key.Partition] = val\n\t}\n\n\treturn offsetsByGroup\n}\n\nfunc (s *Storage) deleteOffsetCommit(key kmsg.OffsetCommitKey) {\n\tuniqueKey := encodeOffsetCommitKey(key)\n\ts.offsetCommits.Remove(uniqueKey)\n}\n\nfunc encodeOffsetCommitKey(key kmsg.OffsetCommitKey) string {\n\treturn fmt.Sprintf(\"%v:%v:%v\", key.Group, key.Topic, key.Partition)\n}\n"
  },
  {
    "path": "minion/utils.go",
    "content": "package minion\n\nimport (\n\t\"fmt\"\n\t\"regexp\"\n\t\"strings\"\n)\n\nfunc (s *Service) IsGroupAllowed(groupName string) bool {\n\tisAllowed := false\n\tfor _, regex := range s.AllowedGroupIDsExpr {\n\t\tif regex.MatchString(groupName) {\n\t\t\tisAllowed = true\n\t\t\tbreak\n\t\t}\n\t}\n\n\tfor _, regex := range s.IgnoredGroupIDsExpr {\n\t\tif regex.MatchString(groupName) {\n\t\t\tisAllowed = false\n\t\t\tbreak\n\t\t}\n\t}\n\treturn isAllowed\n}\n\nfunc (s *Service) IsTopicAllowed(topicName string) bool {\n\tisAllowed := false\n\tfor _, regex := range s.AllowedTopicsExpr {\n\t\tif regex.MatchString(topicName) {\n\t\t\tisAllowed = true\n\t\t\tbreak\n\t\t}\n\t}\n\n\tfor _, regex := range s.IgnoredTopicsExpr {\n\t\tif regex.MatchString(topicName) {\n\t\t\tisAllowed = false\n\t\t\tbreak\n\t\t}\n\t}\n\treturn isAllowed\n}\n\nfunc compileRegex(expr string) (*regexp.Regexp, error) {\n\tif strings.HasPrefix(expr, \"/\") && strings.HasSuffix(expr, \"/\") {\n\t\tsubstr := expr[1 : len(expr)-1]\n\t\tregex, err := regexp.Compile(substr)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\treturn regex, nil\n\t}\n\n\t// If this is no regex input (which is marked by the slashes around it) then we escape it so that it's a literal\n\tregex, err := regexp.Compile(\"^\" + expr + \"$\")\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\treturn regex, nil\n}\n\nfunc compileRegexes(expr []string) ([]*regexp.Regexp, error) {\n\tcompiledExpressions := make([]*regexp.Regexp, len(expr))\n\tfor i, exprStr := range expr {\n\t\texpr, err := compileRegex(exprStr)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"failed to compile expression string '%v': %w\", exprStr, err)\n\t\t}\n\t\tcompiledExpressions[i] = expr\n\t}\n\n\treturn compiledExpressions, nil\n}\n"
  },
  {
    "path": "minion/versions.go",
    "content": "package minion\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"github.com/twmb/franz-go/pkg/kversion\"\n)\n\nfunc (s *Service) GetClusterVersion(ctx context.Context) (string, error) {\n\tres, err := s.GetAPIVersions(ctx)\n\tif err != nil {\n\t\treturn \"\", err\n\t}\n\n\tversions := kversion.FromApiVersionsResponse(res)\n\treturn versions.VersionGuess(), nil\n}\n\nfunc (s *Service) GetAPIVersions(ctx context.Context) (*kmsg.ApiVersionsResponse, error) {\n\tversionsReq := kmsg.NewApiVersionsRequest()\n\tversionsReq.ClientSoftwareName = \"kminion\"\n\tversionsReq.ClientSoftwareVersion = \"v2\"\n\tres, err := versionsReq.RequestWith(ctx, s.client)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to request api versions: %w\", err)\n\t}\n\n\terr = kerr.ErrorForCode(res.ErrorCode)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to request api versions. Inner kafka error: %w\", err)\n\t}\n\n\treturn res, nil\n}\n"
  },
  {
    "path": "prometheus/collect_broker_info.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"go.uber.org/zap\"\n\t\"strconv\"\n)\n\nfunc (e *Exporter) collectBrokerInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tmetadata, err := e.minionSvc.GetMetadataCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to get kafka metadata\", zap.Error(err))\n\t\treturn false\n\t}\n\n\tfor _, broker := range metadata.Brokers {\n\t\track := \"\"\n\t\tif broker.Rack != nil {\n\t\t\track = *broker.Rack\n\t\t}\n\n\t\tisController := metadata.ControllerID == broker.NodeID\n\t\tch <- prometheus.MustNewConstMetric(\n\t\t\te.brokerInfo,\n\t\t\tprometheus.GaugeValue,\n\t\t\t1,\n\t\t\tstrconv.Itoa(int(broker.NodeID)),\n\t\t\tbroker.Host,\n\t\t\tstrconv.Itoa(int(broker.Port)),\n\t\t\track,\n\t\t\tstrconv.FormatBool(isController),\n\t\t)\n\t}\n\n\treturn true\n}\n"
  },
  {
    "path": "prometheus/collect_cluster_info.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"go.uber.org/zap\"\n\t\"strconv\"\n)\n\nfunc (e *Exporter) collectClusterInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tversion, err := e.minionSvc.GetClusterVersion(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to get kafka cluster version\", zap.Error(err))\n\t\treturn false\n\t}\n\n\tmetadata, err := e.minionSvc.GetMetadataCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to get kafka metadata\", zap.Error(err))\n\t\treturn false\n\t}\n\tbrokerCount := len(metadata.Brokers)\n\tclusterID := \"\"\n\tif metadata.ClusterID != nil {\n\t\tclusterID = *metadata.ClusterID\n\t}\n\n\tch <- prometheus.MustNewConstMetric(\n\t\te.clusterInfo,\n\t\tprometheus.GaugeValue,\n\t\t1,\n\t\tversion,\n\t\tstrconv.Itoa(brokerCount),\n\t\tstrconv.Itoa(int(metadata.ControllerID)),\n\t\tclusterID,\n\t)\n\treturn true\n}\n"
  },
  {
    "path": "prometheus/collect_consumer_group_lags.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"math\"\n\t\"strconv\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kadm\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"go.uber.org/zap\"\n\n\t\"github.com/cloudhut/kminion/v2/minion\"\n)\n\ntype waterMark struct {\n\tTopicName     string\n\tPartitionID   int32\n\tLowWaterMark  int64\n\tHighWaterMark int64\n}\n\nfunc (e *Exporter) collectConsumerGroupLags(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tif !e.minionSvc.Cfg.ConsumerGroups.Enabled {\n\t\treturn true\n\t}\n\n\t// Low Watermarks (at the moment they are not needed at all, they could be used to calculate the lag on partitions\n\t// that don't have any active offsets)\n\tlowWaterMarks, err := e.minionSvc.ListStartOffsetsCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to fetch low water marks\", zap.Error(err))\n\t\treturn false\n\t}\n\t// High Watermarks\n\thighWaterMarks, err := e.minionSvc.ListEndOffsetsCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to fetch low water marks\", zap.Error(err))\n\t\treturn false\n\t}\n\twaterMarksByTopic := e.waterMarksByTopic(lowWaterMarks, highWaterMarks)\n\n\t// We have two different options to get consumer group offsets - either via the AdminAPI or by consuming the\n\t// __consumer_offsets topic.\n\tif e.minionSvc.Cfg.ConsumerGroups.ScrapeMode == minion.ConsumerGroupScrapeModeAdminAPI {\n\t\treturn e.collectConsumerGroupLagsAdminAPI(ctx, ch, waterMarksByTopic)\n\t} else {\n\t\treturn e.collectConsumerGroupLagsOffsetTopic(ctx, ch, waterMarksByTopic)\n\t}\n}\n\nfunc (e *Exporter) collectConsumerGroupLagsOffsetTopic(_ context.Context, ch chan<- prometheus.Metric, marks map[string]map[int32]waterMark) bool {\n\toffsets := e.minionSvc.ListAllConsumerGroupOffsetsInternal()\n\tfor groupName, group := range offsets {\n\t\tif !e.minionSvc.IsGroupAllowed(groupName) {\n\t\t\tcontinue\n\t\t}\n\t\toffsetCommits := 0\n\n\t\tfor topicName, topic := range group {\n\t\t\ttopicLag := float64(0)\n\t\t\ttopicOffsetSum := float64(0)\n\t\t\tfor partitionID, partition := range topic {\n\t\t\t\tchildLogger := e.logger.With(\n\t\t\t\t\tzap.String(\"consumer_group\", groupName),\n\t\t\t\t\tzap.String(\"topic_name\", topicName),\n\t\t\t\t\tzap.Int32(\"partition_id\", partitionID),\n\t\t\t\t\tzap.Int64(\"group_offset\", partition.Value.Offset))\n\n\t\t\t\ttopicMark, exists := marks[topicName]\n\t\t\t\tif !exists {\n\t\t\t\t\tchildLogger.Warn(\"consumer group has committed offsets on a topic we don't have watermarks for\")\n\t\t\t\t\tbreak // We can stop trying to find any other offsets for that topic so let's quit this loop\n\t\t\t\t}\n\t\t\t\tpartitionMark, exists := topicMark[partitionID]\n\t\t\t\tif !exists {\n\t\t\t\t\tchildLogger.Warn(\"consumer group has committed offsets on a partition we don't have watermarks for\")\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tlag := float64(partitionMark.HighWaterMark - partition.Value.Offset)\n\t\t\t\t// Lag might be negative because we fetch group offsets after we get partition offsets. It's kinda a\n\t\t\t\t// race condition. Negative lags obviously do not make sense so use at least 0 as lag.\n\t\t\t\tlag = math.Max(0, lag)\n\t\t\t\ttopicLag += lag\n\t\t\t\ttopicOffsetSum += float64(partition.Value.Offset)\n\n\t\t\t\t// Offset commit count for this consumer group\n\t\t\t\toffsetCommits += partition.CommitCount\n\n\t\t\t\tif e.minionSvc.Cfg.ConsumerGroups.Granularity == minion.ConsumerGroupGranularityTopic {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\t\te.consumerGroupTopicPartitionLag,\n\t\t\t\t\tprometheus.GaugeValue,\n\t\t\t\t\tlag,\n\t\t\t\t\tgroupName,\n\t\t\t\t\ttopicName,\n\t\t\t\t\tstrconv.Itoa(int(partitionID)),\n\t\t\t\t)\n\t\t\t}\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupTopicLag,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\ttopicLag,\n\t\t\t\tgroupName,\n\t\t\t\ttopicName,\n\t\t\t)\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupTopicOffsetSum,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\ttopicOffsetSum,\n\t\t\t\tgroupName,\n\t\t\t\ttopicName,\n\t\t\t)\n\t\t}\n\n\t\tch <- prometheus.MustNewConstMetric(\n\t\t\te.offsetCommits,\n\t\t\tprometheus.CounterValue,\n\t\t\tfloat64(offsetCommits),\n\t\t\tgroupName,\n\t\t)\n\t}\n\treturn true\n}\n\nfunc (e *Exporter) collectConsumerGroupLagsAdminAPI(ctx context.Context, ch chan<- prometheus.Metric, marks map[string]map[int32]waterMark) bool {\n\tisOk := true\n\n\tgroupOffsets, err := e.minionSvc.ListAllConsumerGroupOffsetsAdminAPI(ctx)\n\tfor groupName, offsetRes := range groupOffsets {\n\t\tif !e.minionSvc.IsGroupAllowed(groupName) {\n\t\t\tcontinue\n\t\t}\n\n\t\terr = kerr.ErrorForCode(offsetRes.ErrorCode)\n\t\tif err != nil {\n\t\t\te.logger.Warn(\"failed to get offsets from consumer group, inner kafka error\",\n\t\t\t\tzap.String(\"consumer_group\", groupName),\n\t\t\t\tzap.Error(err))\n\t\t\tisOk = false\n\t\t\tcontinue\n\t\t}\n\t\tfor _, topic := range offsetRes.Topics {\n\t\t\ttopicLag := float64(0)\n\t\t\ttopicOffsetSum := float64(0)\n\t\t\tfor _, partition := range topic.Partitions {\n\t\t\t\terr := kerr.ErrorForCode(partition.ErrorCode)\n\t\t\t\tif err != nil {\n\t\t\t\t\te.logger.Warn(\"failed to get consumer group offsets for a partition, inner kafka error\",\n\t\t\t\t\t\tzap.String(\"consumer_group\", groupName),\n\t\t\t\t\t\tzap.Error(err))\n\t\t\t\t\tisOk = false\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tchildLogger := e.logger.With(\n\t\t\t\t\tzap.String(\"consumer_group\", groupName),\n\t\t\t\t\tzap.String(\"topic_name\", topic.Topic),\n\t\t\t\t\tzap.Int32(\"partition_id\", partition.Partition),\n\t\t\t\t\tzap.Int64(\"group_offset\", partition.Offset))\n\t\t\t\ttopicMark, exists := marks[topic.Topic]\n\t\t\t\tif !exists {\n\t\t\t\t\tchildLogger.Warn(\"consumer group has committed offsets on a topic we don't have watermarks for\")\n\t\t\t\t\tisOk = false\n\t\t\t\t\tbreak // We can stop trying to find any other offsets for that topic so let's quit this loop\n\t\t\t\t}\n\t\t\t\tpartitionMark, exists := topicMark[partition.Partition]\n\t\t\t\tif !exists {\n\t\t\t\t\tchildLogger.Warn(\"consumer group has committed offsets on a partition we don't have watermarks for\")\n\t\t\t\t\tisOk = false\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tlag := float64(partitionMark.HighWaterMark - partition.Offset)\n\t\t\t\t// Lag might be negative because we fetch group offsets after we get partition offsets. It's kinda a\n\t\t\t\t// race condition. Negative lags obviously do not make sense so use at least 0 as lag.\n\t\t\t\tlag = math.Max(0, lag)\n\t\t\t\ttopicLag += lag\n\t\t\t\ttopicOffsetSum += float64(partition.Offset)\n\n\t\t\t\tif e.minionSvc.Cfg.ConsumerGroups.Granularity == minion.ConsumerGroupGranularityTopic {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\t\te.consumerGroupTopicPartitionLag,\n\t\t\t\t\tprometheus.GaugeValue,\n\t\t\t\t\tlag,\n\t\t\t\t\tgroupName,\n\t\t\t\t\ttopic.Topic,\n\t\t\t\t\tstrconv.Itoa(int(partition.Partition)),\n\t\t\t\t)\n\t\t\t}\n\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupTopicLag,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\ttopicLag,\n\t\t\t\tgroupName,\n\t\t\t\ttopic.Topic,\n\t\t\t)\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupTopicOffsetSum,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\ttopicOffsetSum,\n\t\t\t\tgroupName,\n\t\t\t\ttopic.Topic,\n\t\t\t)\n\t\t}\n\t}\n\treturn isOk\n}\n\nfunc (e *Exporter) waterMarksByTopic(lowMarks kadm.ListedOffsets, highMarks kadm.ListedOffsets) map[string]map[int32]waterMark {\n\ttype partitionID = int32\n\ttype topicName = string\n\twaterMarks := make(map[topicName]map[partitionID]waterMark)\n\n\tfor topic, lowMarksByPartitionID := range lowMarks {\n\t\t_, exists := waterMarks[topic]\n\t\tif !exists {\n\t\t\twaterMarks[topic] = make(map[partitionID]waterMark)\n\t\t}\n\n\t\tfor _, lowOffset := range lowMarksByPartitionID {\n\t\t\tif lowOffset.Err != nil {\n\t\t\t\te.logger.Debug(\"failed to get partition low water mark, inner kafka error\",\n\t\t\t\t\tzap.String(\"topic_name\", lowOffset.Topic),\n\t\t\t\t\tzap.Int32(\"partition_id\", lowOffset.Partition),\n\t\t\t\t\tzap.Error(lowOffset.Err))\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\thigOffset, exists := highMarks.Lookup(lowOffset.Topic, lowOffset.Partition)\n\t\t\tif !exists {\n\t\t\t\te.logger.Error(\"got low water marks for a topic's partition but no high watermarks\",\n\t\t\t\t\tzap.String(\"topic_name\", lowOffset.Topic),\n\t\t\t\t\tzap.Int32(\"partition_id\", lowOffset.Partition),\n\t\t\t\t\tzap.Int64(\"offset\", lowOffset.Offset))\n\t\t\t\tdelete(waterMarks, lowOffset.Topic)\n\t\t\t\tbreak // Topic watermarks are invalid -> delete & skip this topic\n\t\t\t}\n\t\t\tif higOffset.Err != nil {\n\t\t\t\te.logger.Debug(\"failed to get partition low water mark, inner kafka error\",\n\t\t\t\t\tzap.String(\"topic_name\", lowOffset.Topic),\n\t\t\t\t\tzap.Int32(\"partition_id\", lowOffset.Partition),\n\t\t\t\t\tzap.Error(lowOffset.Err))\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\twaterMarks[lowOffset.Topic][lowOffset.Partition] = waterMark{\n\t\t\t\tTopicName:     lowOffset.Topic,\n\t\t\t\tPartitionID:   lowOffset.Partition,\n\t\t\t\tLowWaterMark:  lowOffset.Offset,\n\t\t\t\tHighWaterMark: higOffset.Offset,\n\t\t\t}\n\t\t}\n\t}\n\n\treturn waterMarks\n}\n"
  },
  {
    "path": "prometheus/collect_consumer_groups.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"strconv\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kmsg\"\n\t\"go.uber.org/zap\"\n)\n\nfunc (e *Exporter) collectConsumerGroups(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tif !e.minionSvc.Cfg.ConsumerGroups.Enabled {\n\t\treturn true\n\t}\n\tgroups, err := e.minionSvc.DescribeConsumerGroups(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to collect consumer groups, because Kafka request failed\", zap.Error(err))\n\t\treturn false\n\t}\n\n\t// The list of groups may be incomplete due to group coordinators that might fail to respond. We do log an error\n\t// message in that case (in the kafka request method) and groups will not be included in this list.\n\tfor _, grp := range groups {\n\t\tcoordinator := grp.BrokerMetadata.NodeID\n\t\tfor _, group := range grp.Groups.Groups {\n\t\t\terr := kerr.ErrorForCode(group.ErrorCode)\n\t\t\tif err != nil {\n\t\t\t\te.logger.Warn(\"failed to describe consumer group, internal kafka error\",\n\t\t\t\t\tzap.Error(err),\n\t\t\t\t\tzap.String(\"group_id\", group.Group),\n\t\t\t\t)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tif !e.minionSvc.IsGroupAllowed(group.Group) {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tstate := 0\n\t\t\tif group.State == \"Stable\" {\n\t\t\t\tstate = 1\n\t\t\t}\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupInfo,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(state),\n\t\t\t\tgroup.Group,\n\t\t\t\tgroup.Protocol,\n\t\t\t\tgroup.ProtocolType,\n\t\t\t\tgroup.State,\n\t\t\t\tstrconv.FormatInt(int64(coordinator), 10),\n\t\t\t)\n\n\t\t\t// total number of members in consumer groups\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.consumerGroupMembers,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(len(group.Members)),\n\t\t\t\tgroup.Group,\n\t\t\t)\n\n\t\t\t// iterate all members and build two maps:\n\t\t\t// - {topic -> number-of-consumers}\n\t\t\t// - {topic -> number-of-partitions-assigned}\n\t\t\ttopicConsumers := make(map[string]int)\n\t\t\ttopicPartitionsAssigned := make(map[string]int)\n\t\t\tmembersWithEmptyAssignment := 0\n\t\t\tfailedAssignmentsDecode := 0\n\t\t\tfor _, member := range group.Members {\n\t\t\t\tif len(member.MemberAssignment) == 0 {\n\t\t\t\t\tmembersWithEmptyAssignment++\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tkassignment, err := decodeMemberAssignments(group.ProtocolType, member)\n\t\t\t\tif err != nil {\n\t\t\t\t\te.logger.Debug(\"failed to decode consumer group member assignment, internal kafka error\",\n\t\t\t\t\t\tzap.Error(err),\n\t\t\t\t\t\tzap.String(\"group_id\", group.Group),\n\t\t\t\t\t\tzap.String(\"client_id\", member.ClientID),\n\t\t\t\t\t\tzap.String(\"member_id\", member.MemberID),\n\t\t\t\t\t\tzap.String(\"client_host\", member.ClientHost),\n\t\t\t\t\t)\n\t\t\t\t\tfailedAssignmentsDecode++\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tif kassignment == nil {\n\t\t\t\t\t// This is expected in the case of protocolTypes that don't provide valuable information\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tif len(kassignment.Topics) == 0 {\n\t\t\t\t\tmembersWithEmptyAssignment++\n\t\t\t\t}\n\t\t\t\tfor _, topic := range kassignment.Topics {\n\t\t\t\t\ttopicConsumers[topic.Topic]++\n\t\t\t\t\ttopicPartitionsAssigned[topic.Topic] += len(topic.Partitions)\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif failedAssignmentsDecode > 0 {\n\t\t\t\te.logger.Error(\"failed to decode consumer group member assignment, internal kafka error\",\n\t\t\t\t\tzap.Error(err),\n\t\t\t\t\tzap.String(\"group_id\", group.Group),\n\t\t\t\t\tzap.Int(\"assignment_decode_failures\", failedAssignmentsDecode),\n\t\t\t\t)\n\t\t\t}\n\n\t\t\t// number of members with no assignment in a stable consumer group\n\t\t\tif membersWithEmptyAssignment > 0 {\n\t\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\t\te.consumerGroupMembersEmpty,\n\t\t\t\t\tprometheus.GaugeValue,\n\t\t\t\t\tfloat64(membersWithEmptyAssignment),\n\t\t\t\t\tgroup.Group,\n\t\t\t\t)\n\t\t\t}\n\t\t\t// number of members in consumer groups for each topic\n\t\t\tfor topicName, consumers := range topicConsumers {\n\t\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\t\te.consumerGroupTopicMembers,\n\t\t\t\t\tprometheus.GaugeValue,\n\t\t\t\t\tfloat64(consumers),\n\t\t\t\t\tgroup.Group,\n\t\t\t\t\ttopicName,\n\t\t\t\t)\n\t\t\t}\n\t\t\t// number of partitions assigned in consumer groups for each topic\n\t\t\tfor topicName, partitions := range topicPartitionsAssigned {\n\t\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\t\te.consumerGroupAssignedTopicPartitions,\n\t\t\t\t\tprometheus.GaugeValue,\n\t\t\t\t\tfloat64(partitions),\n\t\t\t\t\tgroup.Group,\n\t\t\t\t\ttopicName,\n\t\t\t\t)\n\t\t\t}\n\t\t}\n\t}\n\treturn true\n}\n\nfunc decodeMemberAssignments(protocolType string, member kmsg.DescribeGroupsResponseGroupMember) (*kmsg.ConsumerMemberAssignment, error) {\n\tswitch protocolType {\n\tcase \"consumer\":\n\t\ta := kmsg.NewConsumerMemberAssignment()\n\t\tif err := a.ReadFrom(member.MemberAssignment); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"failed to decode member assignment: %w\", err)\n\t\t}\n\t\treturn &a, nil\n\tcase \"connect\":\n\t\treturn nil, nil\n\tdefault:\n\t\treturn nil, nil\n\t}\n}\n"
  },
  {
    "path": "prometheus/collect_exporter_metrics.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n)\n\nfunc (e *Exporter) collectExporterMetrics(_ context.Context, ch chan<- prometheus.Metric) bool {\n\trecordsConsumed := e.minionSvc.GetNumberOfOffsetRecordsConsumed()\n\tch <- prometheus.MustNewConstMetric(\n\t\te.offsetConsumerRecordsConsumed,\n\t\tprometheus.CounterValue,\n\t\trecordsConsumed,\n\t)\n\treturn true\n}\n"
  },
  {
    "path": "prometheus/collect_log_dirs.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"github.com/twmb/franz-go/pkg/kgo\"\n\t\"go.uber.org/zap\"\n\t\"strconv\"\n)\n\nfunc (e *Exporter) collectLogDirs(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tif !e.minionSvc.Cfg.LogDirs.Enabled {\n\t\treturn true\n\t}\n\tisOk := true\n\n\tsizeByBroker := make(map[kgo.BrokerMetadata]int64)\n\tsizeByTopicName := make(map[string]int64)\n\n\tlogDirsSharded := e.minionSvc.DescribeLogDirs(ctx)\n\tfor _, logDirRes := range logDirsSharded {\n\t\tchildLogger := e.logger.With(zap.String(\"broker_address\", logDirRes.Broker.Host),\n\t\t\tzap.String(\"broker_id\", strconv.Itoa(int(logDirRes.Broker.NodeID))))\n\n\t\tif logDirRes.Err != nil {\n\t\t\tchildLogger.Error(\"failed to describe a broker's log dirs\", zap.Error(logDirRes.Err))\n\t\t\tisOk = false\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, dir := range logDirRes.LogDirs.Dirs {\n\t\t\terr := kerr.ErrorForCode(dir.ErrorCode)\n\t\t\tif err != nil {\n\t\t\t\tchildLogger.Error(\"failed to describe a broker's log dir\",\n\t\t\t\t\tzap.String(\"log_dir\", dir.Dir),\n\t\t\t\t\tzap.Error(err))\n\t\t\t\tisOk = false\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tfor _, topic := range dir.Topics {\n\t\t\t\ttopicSize := int64(0)\n\t\t\t\tfor _, partition := range topic.Partitions {\n\t\t\t\t\ttopicSize += partition.Size\n\t\t\t\t}\n\t\t\t\tsizeByTopicName[topic.Topic] += topicSize\n\t\t\t\tsizeByBroker[logDirRes.Broker] += topicSize\n\t\t\t}\n\t\t}\n\t}\n\n\t// Report the total log dir size per broker\n\tfor broker, size := range sizeByBroker {\n\t\trackID := \"\"\n\t\tif broker.Rack != nil {\n\t\t\trackID = *broker.Rack\n\t\t}\n\t\tch <- prometheus.MustNewConstMetric(\n\t\t\te.brokerLogDirSize,\n\t\t\tprometheus.GaugeValue,\n\t\t\tfloat64(size),\n\t\t\tstrconv.Itoa(int(broker.NodeID)),\n\t\t\tbroker.Host,\n\t\t\tstrconv.Itoa(int(broker.Port)),\n\t\t\trackID,\n\t\t)\n\t}\n\n\t// If one of the log dir responses returned an error we can not reliably report the topic log dirs, as there might\n\t// be additional data on the brokers that failed to respond.\n\tif !isOk {\n\t\treturn false\n\t}\n\n\t// Report the total log dir size per topic\n\tfor topicName, size := range sizeByTopicName {\n\t\tch <- prometheus.MustNewConstMetric(\n\t\t\te.topicLogDirSize,\n\t\t\tprometheus.GaugeValue,\n\t\t\tfloat64(size),\n\t\t\ttopicName,\n\t\t)\n\t}\n\n\treturn isOk\n}\n"
  },
  {
    "path": "prometheus/collect_topic_info.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"strconv\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/twmb/franz-go/pkg/kerr\"\n\t\"go.uber.org/zap\"\n)\n\nfunc (e *Exporter) collectTopicInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tif !e.minionSvc.Cfg.Topics.Enabled {\n\t\treturn true\n\t}\n\n\tmetadata, err := e.minionSvc.GetMetadataCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to get metadata\", zap.Error(err))\n\t\treturn false\n\t}\n\n\ttopicConfigs, err := e.minionSvc.GetTopicConfigs(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to get topic configs\", zap.Error(err))\n\t\treturn false\n\t}\n\n\tisOk := true\n\t// ConfigsByTopic is indexed by topic name and config resource name (inner key)\n\tconfigsByTopic := make(map[string]map[string]string)\n\tfor _, resource := range topicConfigs.Resources {\n\t\tconfigsByTopic[resource.ResourceName] = make(map[string]string)\n\t\ttypedErr := kerr.TypedErrorForCode(resource.ErrorCode)\n\t\tif typedErr != nil {\n\t\t\tisOk = false\n\t\t\te.logger.Warn(\"failed to get topic config of a specific topic\",\n\t\t\t\tzap.String(\"topic_name\", resource.ResourceName),\n\t\t\t\tzap.Error(typedErr))\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, config := range resource.Configs {\n\t\t\tconfVal := \"nil\"\n\t\t\tif config.Value != nil {\n\t\t\t\tconfVal = *config.Value\n\t\t\t}\n\t\t\tconfigsByTopic[resource.ResourceName][config.Name] = confVal\n\t\t}\n\n\t}\n\n\tfor _, topic := range metadata.Topics {\n\t\ttopicName := *topic.Topic\n\t\tif !e.minionSvc.IsTopicAllowed(topicName) {\n\t\t\tcontinue\n\t\t}\n\t\ttypedErr := kerr.TypedErrorForCode(topic.ErrorCode)\n\t\tif typedErr != nil {\n\t\t\tisOk = false\n\t\t\te.logger.Warn(\"failed to get metadata of a specific topic\",\n\t\t\t\tzap.String(\"topic_name\", topicName),\n\t\t\t\tzap.Error(typedErr))\n\t\t\tcontinue\n\t\t}\n\t\tpartitionCount := len(topic.Partitions)\n\t\treplicationFactor := -1\n\t\tif partitionCount > 0 {\n\t\t\t// It should never be possible to skip this, but just to be safe we'll check this so that we don't cause panics\n\t\t\treplicationFactor = len(topic.Partitions[0].Replicas)\n\t\t}\n\n\t\tvar labelsValues []string\n\t\tlabelsValues = append(labelsValues, topicName)\n\t\tlabelsValues = append(labelsValues, strconv.Itoa(partitionCount))\n\t\tlabelsValues = append(labelsValues, strconv.Itoa(replicationFactor))\n\t\tfor _, key := range e.minionSvc.Cfg.Topics.InfoMetric.ConfigKeys {\n\t\t\tlabelsValues = append(labelsValues, getOrDefault(configsByTopic[topicName], key, \"N/A\"))\n\t\t}\n\t\tch <- prometheus.MustNewConstMetric(\n\t\t\te.topicInfo,\n\t\t\tprometheus.GaugeValue,\n\t\t\tfloat64(1),\n\t\t\tlabelsValues...,\n\t\t)\n\t}\n\treturn isOk\n}\n\nfunc getOrDefault(m map[string]string, key string, defaultValue string) string {\n\tif value, exists := m[key]; exists {\n\t\treturn value\n\t}\n\treturn defaultValue\n}\n"
  },
  {
    "path": "prometheus/collect_topic_partition_offsets.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"strconv\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"go.uber.org/zap\"\n\n\t\"github.com/cloudhut/kminion/v2/minion\"\n)\n\nfunc (e *Exporter) collectTopicPartitionOffsets(ctx context.Context, ch chan<- prometheus.Metric) bool {\n\tif !e.minionSvc.Cfg.Topics.Enabled {\n\t\treturn true\n\t}\n\n\tisOk := true\n\n\t// Low Watermarks\n\tlowWaterMarks, err := e.minionSvc.ListStartOffsetsCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to fetch low water marks\", zap.Error(err))\n\t\treturn false\n\t}\n\t// High Watermarks\n\thighWaterMarks, err := e.minionSvc.ListEndOffsetsCached(ctx)\n\tif err != nil {\n\t\te.logger.Error(\"failed to fetch low water marks\", zap.Error(err))\n\t\treturn false\n\t}\n\n\t// Process Low Watermarks\n\n\tfor topicName, partitions := range lowWaterMarks {\n\t\tif !e.minionSvc.IsTopicAllowed(topicName) {\n\t\t\tcontinue\n\t\t}\n\n\t\twaterMarkSum := int64(0)\n\t\thasErrors := false\n\t\tfor _, offset := range partitions {\n\t\t\tif offset.Err != nil {\n\t\t\t\thasErrors = true\n\t\t\t\tisOk = false\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\twaterMarkSum += offset.Offset\n\t\t\t// Let's end here if partition metrics shall not be exposed\n\t\t\tif e.minionSvc.Cfg.Topics.Granularity == minion.TopicGranularityTopic {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.partitionLowWaterMark,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(offset.Offset),\n\t\t\t\ttopicName,\n\t\t\t\tstrconv.Itoa(int(offset.Partition)),\n\t\t\t)\n\t\t}\n\t\t// We only want to report the sum of all partition marks if we receive watermarks from all partition\n\t\tif !hasErrors {\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.topicLowWaterMarkSum,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(waterMarkSum),\n\t\t\t\ttopicName,\n\t\t\t)\n\t\t}\n\t}\n\n\tfor topicName, partitions := range highWaterMarks {\n\t\tif !e.minionSvc.IsTopicAllowed(topicName) {\n\t\t\tcontinue\n\t\t}\n\t\twaterMarkSum := int64(0)\n\t\thasErrors := false\n\t\tfor _, offset := range partitions {\n\t\t\tif offset.Err != nil {\n\t\t\t\thasErrors = true\n\t\t\t\tisOk = false\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\twaterMarkSum += offset.Offset\n\t\t\t// Let's end here if partition metrics shall not be exposed\n\t\t\tif e.minionSvc.Cfg.Topics.Granularity == minion.TopicGranularityTopic {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.partitionHighWaterMark,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(offset.Offset),\n\t\t\t\ttopicName,\n\t\t\t\tstrconv.Itoa(int(offset.Partition)),\n\t\t\t)\n\t\t}\n\t\t// We only want to report the sum of all partition marks if we receive watermarks from all partitions\n\t\tif !hasErrors {\n\t\t\tch <- prometheus.MustNewConstMetric(\n\t\t\t\te.topicHighWaterMarkSum,\n\t\t\t\tprometheus.GaugeValue,\n\t\t\t\tfloat64(waterMarkSum),\n\t\t\t\ttopicName,\n\t\t\t)\n\t\t}\n\t}\n\n\treturn isOk\n}\n"
  },
  {
    "path": "prometheus/config.go",
    "content": "package prometheus\n\ntype Config struct {\n\tHost      string `koanf:\"host\"`\n\tPort      int    `koanf:\"port\"`\n\tNamespace string `koanf:\"namespace\"`\n}\n\nfunc (c *Config) SetDefaults() {\n\tc.Port = 8080\n\tc.Namespace = \"kminion\"\n}\n"
  },
  {
    "path": "prometheus/exporter.go",
    "content": "package prometheus\n\nimport (\n\t\"context\"\n\t\"os\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/cloudhut/kminion/v2/minion\"\n\tuuid2 \"github.com/google/uuid\"\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"go.uber.org/zap\"\n)\n\n// Exporter is the Prometheus exporter that implements the prometheus.Collector interface\ntype Exporter struct {\n\tcfg       Config\n\tlogger    *zap.Logger\n\tminionSvc *minion.Service\n\n\t// Exporter metrics\n\texporterUp                    *prometheus.Desc\n\toffsetConsumerRecordsConsumed *prometheus.Desc\n\n\t// Kafka metrics\n\t// General\n\tclusterInfo *prometheus.Desc\n\tbrokerInfo  *prometheus.Desc\n\n\t// Log Dir Sizes\n\tbrokerLogDirSize *prometheus.Desc\n\ttopicLogDirSize  *prometheus.Desc\n\n\t// Topic / Partition\n\ttopicInfo              *prometheus.Desc\n\ttopicHighWaterMarkSum  *prometheus.Desc\n\tpartitionHighWaterMark *prometheus.Desc\n\ttopicLowWaterMarkSum   *prometheus.Desc\n\tpartitionLowWaterMark  *prometheus.Desc\n\n\t// Consumer Groups\n\tconsumerGroupInfo                    *prometheus.Desc\n\tconsumerGroupMembers                 *prometheus.Desc\n\tconsumerGroupMembersEmpty            *prometheus.Desc\n\tconsumerGroupTopicMembers            *prometheus.Desc\n\tconsumerGroupAssignedTopicPartitions *prometheus.Desc\n\tconsumerGroupTopicOffsetSum          *prometheus.Desc\n\tconsumerGroupTopicPartitionLag       *prometheus.Desc\n\tconsumerGroupTopicLag                *prometheus.Desc\n\toffsetCommits                        *prometheus.Desc\n}\n\nfunc NewExporter(cfg Config, logger *zap.Logger, minionSvc *minion.Service) (*Exporter, error) {\n\treturn &Exporter{cfg: cfg, logger: logger.Named(\"prometheus\"), minionSvc: minionSvc}, nil\n}\n\nfunc (e *Exporter) InitializeMetrics() {\n\t// Exporter / internal metrics\n\t// Exporter up\n\te.exporterUp = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"exporter\", \"up\"),\n\t\t\"Build info about this Prometheus Exporter. Gauge value is 0 if one or more scrapes have failed.\",\n\t\tnil,\n\t\tmap[string]string{\"version\": os.Getenv(\"VERSION\")},\n\t)\n\t// OffsetConsumer records consumed\n\te.offsetConsumerRecordsConsumed = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"exporter\", \"offset_consumer_records_consumed_total\"),\n\t\t\"The number of offset records that have been consumed by the internal offset consumer\",\n\t\t[]string{},\n\t\tnil,\n\t)\n\n\t// Kafka metrics\n\t// Cluster info\n\te.clusterInfo = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"cluster_info\"),\n\t\t\"Kafka cluster information\",\n\t\t[]string{\"cluster_version\", \"broker_count\", \"controller_id\", \"cluster_id\"},\n\t\tnil,\n\t)\n\t// Broker Info\n\te.brokerInfo = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"broker_info\"),\n\t\t\"Kafka broker information\",\n\t\t[]string{\"broker_id\", \"address\", \"port\", \"rack_id\", \"is_controller\"},\n\t\tnil,\n\t)\n\n\t// LogDir sizes\n\te.brokerLogDirSize = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"broker_log_dir_size_total_bytes\"),\n\t\t\"The summed size in bytes of all log dirs for a given broker\",\n\t\t[]string{\"broker_id\", \"address\", \"port\", \"rack_id\"},\n\t\tnil,\n\t)\n\te.topicLogDirSize = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_log_dir_size_total_bytes\"),\n\t\t\"The summed size in bytes of partitions for a given topic. This includes the used space for replica partitions.\",\n\t\t[]string{\"topic_name\"},\n\t\tnil,\n\t)\n\n\t// Topic / Partition metrics\n\t// Topic info\n\tvar labels = []string{\"topic_name\", \"partition_count\", \"replication_factor\"}\n\tfor _, key := range e.minionSvc.Cfg.Topics.InfoMetric.ConfigKeys {\n\t\t// prometheus does not allow . in label keys\n\t\tlabels = append(labels, strings.ReplaceAll(key, \".\", \"_\"))\n\t}\n\te.topicInfo = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_info\"),\n\t\t\"Info labels for a given topic\",\n\t\tlabels,\n\t\tnil,\n\t)\n\t// Partition Low Water Mark\n\te.partitionLowWaterMark = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_partition_low_water_mark\"),\n\t\t\"Partition Low Water Mark\",\n\t\t[]string{\"topic_name\", \"partition_id\"},\n\t\tnil,\n\t)\n\t// Topic Low Water Mark Sum\n\te.topicLowWaterMarkSum = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_low_water_mark_sum\"),\n\t\t\"Sum of all the topic's partition low water marks\",\n\t\t[]string{\"topic_name\"},\n\t\tnil,\n\t)\n\t// Partition High Water Mark\n\te.partitionHighWaterMark = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_partition_high_water_mark\"),\n\t\t\"Partition High Water Mark\",\n\t\t[]string{\"topic_name\", \"partition_id\"},\n\t\tnil,\n\t)\n\t// Topic Low Water Mark Sum\n\te.topicHighWaterMarkSum = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"topic_high_water_mark_sum\"),\n\t\t\"Sum of all the topic's partition high water marks\",\n\t\t[]string{\"topic_name\"},\n\t\tnil,\n\t)\n\n\t// Consumer Group Metrics\n\t// Group Info\n\te.consumerGroupInfo = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_info\"),\n\t\t\"Consumer Group info metrics. It will report 1 if the group is in the stable state, otherwise 0.\",\n\t\t[]string{\"group_id\", \"protocol\", \"protocol_type\", \"state\", \"coordinator_id\"},\n\t\tnil,\n\t)\n\t// Group Members\n\te.consumerGroupMembers = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_members\"),\n\t\t\"Consumer Group member count metrics. It will report the number of members in the consumer group\",\n\t\t[]string{\"group_id\"},\n\t\tnil,\n\t)\n\t// Group Empty Memmbers\n\te.consumerGroupMembersEmpty = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_empty_members\"),\n\t\t\"It will report the number of members in the consumer group with no partition assigned\",\n\t\t[]string{\"group_id\"},\n\t\tnil,\n\t)\n\t// Group Topic Members\n\te.consumerGroupTopicMembers = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_topic_members\"),\n\t\t\"It will report the number of members in the consumer group assigned on a given topic\",\n\t\t[]string{\"group_id\", \"topic_name\"},\n\t\tnil,\n\t)\n\t// Group Topic Assigned Partitions\n\te.consumerGroupAssignedTopicPartitions = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_topic_assigned_partitions\"),\n\t\t\"It will report the number of partitions assigned in the consumer group for a given topic\",\n\t\t[]string{\"group_id\", \"topic_name\"},\n\t\tnil,\n\t)\n\t// Topic / Partition Offset Sum (useful for calculating the consumed messages / sec on a topic)\n\te.consumerGroupTopicOffsetSum = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_topic_offset_sum\"),\n\t\t\"The sum of all committed group offsets across all partitions in a topic\",\n\t\t[]string{\"group_id\", \"topic_name\"},\n\t\tnil,\n\t)\n\t// Partition Lag\n\te.consumerGroupTopicPartitionLag = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_topic_partition_lag\"),\n\t\t\"The number of messages a consumer group is lagging behind the latest offset of a partition\",\n\t\t[]string{\"group_id\", \"topic_name\", \"partition_id\"},\n\t\tnil,\n\t)\n\t// Topic Lag (sum of all partition lags)\n\te.consumerGroupTopicLag = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_topic_lag\"),\n\t\t\"The number of messages a consumer group is lagging behind across all partitions in a topic\",\n\t\t[]string{\"group_id\", \"topic_name\"},\n\t\tnil,\n\t)\n\t// Offset commits by group id\n\te.offsetCommits = prometheus.NewDesc(\n\t\tprometheus.BuildFQName(e.cfg.Namespace, \"kafka\", \"consumer_group_offset_commits_total\"),\n\t\t\"The number of offsets committed by a group\",\n\t\t[]string{\"group_id\"},\n\t\tnil,\n\t)\n\n}\n\n// Describe implements the prometheus.Collector interface. It sends the\n// super-set of all possible descriptors of metrics collected by this\n// Collector to the provided channel and returns once the last descriptor\n// has been sent. The sent descriptors fulfill the consistency and uniqueness\n// requirements described in the Desc documentation.\nfunc (e *Exporter) Describe(ch chan<- *prometheus.Desc) {\n\tch <- e.exporterUp\n\tch <- e.clusterInfo\n}\n\nfunc (e *Exporter) Collect(ch chan<- prometheus.Metric) {\n\tctx, cancel := context.WithTimeout(context.Background(), time.Second*60)\n\tdefer cancel()\n\n\t// Attach a unique id which will be used for caching (and and it's invalidation) of the kafka requests\n\tuuid := uuid2.New()\n\tctx = context.WithValue(ctx, \"requestId\", uuid.String())\n\n\tok := e.collectClusterInfo(ctx, ch)\n\tok = e.collectExporterMetrics(ctx, ch) && ok\n\tok = e.collectBrokerInfo(ctx, ch) && ok\n\tok = e.collectLogDirs(ctx, ch) && ok\n\tok = e.collectConsumerGroups(ctx, ch) && ok\n\tok = e.collectTopicPartitionOffsets(ctx, ch) && ok\n\tok = e.collectConsumerGroupLags(ctx, ch) && ok\n\tok = e.collectTopicInfo(ctx, ch) && ok\n\n\tif ok {\n\t\tch <- prometheus.MustNewConstMetric(e.exporterUp, prometheus.GaugeValue, 1.0)\n\t} else {\n\t\tch <- prometheus.MustNewConstMetric(e.exporterUp, prometheus.GaugeValue, 0.0)\n\t}\n}\n"
  }
]