Repository: cloudworkz/kafka-minion
Branch: master
Commit: 06075b9f33bd
Files: 107
Total size: 334.9 KB

Directory structure:
gitextract_ind4nfvb/

├── .github/
│   ├── ai-opt-out
│   └── workflows/
│       ├── docker-image.yml
│       └── goreleaser.yml
├── .gitignore
├── .goreleaser.yml
├── Dockerfile
├── LICENSE
├── README.md
├── charts/
│   ├── README.md
│   ├── archives/
│   │   ├── .gitkeep
│   │   ├── index.yaml
│   │   ├── kminion-0.1.0.tgz
│   │   ├── kminion-0.1.1.tgz
│   │   ├── kminion-0.1.2.tgz
│   │   ├── kminion-0.1.3.tgz
│   │   ├── kminion-0.11.1.tgz
│   │   ├── kminion-0.11.2.tgz
│   │   ├── kminion-0.11.3.tgz
│   │   ├── kminion-0.12.0.tgz
│   │   ├── kminion-0.2.0.tgz
│   │   ├── kminion-0.2.1.tgz
│   │   ├── kminion-0.2.2.tgz
│   │   ├── kminion-0.3.0.tgz
│   │   ├── kminion-0.3.1.tgz
│   │   ├── kminion-0.4.0.tgz
│   │   ├── kminion-0.5.0.tgz
│   │   ├── kminion-0.6.0.tgz
│   │   ├── kminion-0.7.0.tgz
│   │   ├── kminion-0.8.0.tgz
│   │   ├── kminion-0.8.1.tgz
│   │   ├── kminion-0.8.2.tgz
│   │   ├── kminion-0.8.3.tgz
│   │   └── kminion-0.9.0.tgz
│   └── kminion/
│       ├── .helmignore
│       ├── Chart.yaml
│       ├── templates/
│       │   ├── NOTES.txt
│       │   ├── _helpers.tpl
│       │   ├── configmap.yaml
│       │   ├── daemonset.yaml
│       │   ├── deployment.yaml
│       │   ├── hpa.yaml
│       │   ├── ingress.yaml
│       │   ├── poddisruptionbudget.yaml
│       │   ├── service.yaml
│       │   ├── serviceaccount.yaml
│       │   └── servicemonitor.yaml
│       └── values.yaml
├── config.go
├── docker-compose.yml
├── docs/
│   ├── end-to-end.md
│   ├── metrics.md
│   └── reference-config.yaml
├── e2e/
│   ├── client_hooks.go
│   ├── config.go
│   ├── config_consumer.go
│   ├── config_producer.go
│   ├── config_topic.go
│   ├── config_topic_test.go
│   ├── consumer.go
│   ├── endtoend_message.go
│   ├── group_tracker.go
│   ├── message_tracker.go
│   ├── partition_planner.go
│   ├── partition_planner_test.go
│   ├── producer.go
│   ├── service.go
│   ├── topic.go
│   └── utils.go
├── go.mod
├── go.sum
├── kafka/
│   ├── client_config_helper.go
│   ├── client_logger.go
│   ├── config.go
│   ├── config_sasl.go
│   ├── config_sasl_gssapi.go
│   ├── config_sasl_oauthbearer.go
│   ├── config_tls.go
│   └── service.go
├── logging/
│   ├── config.go
│   └── logger.go
├── main.go
├── minion/
│   ├── client_hooks.go
│   ├── config.go
│   ├── config_consumer_group.go
│   ├── config_log_dirs.go
│   ├── config_topic_config.go
│   ├── consumer_group_offsets.go
│   ├── describe_consumer_groups.go
│   ├── describe_topic_config.go
│   ├── list_offsets.go
│   ├── log_dirs.go
│   ├── metadata.go
│   ├── offset_consumer.go
│   ├── service.go
│   ├── storage.go
│   ├── utils.go
│   └── versions.go
└── prometheus/
    ├── collect_broker_info.go
    ├── collect_cluster_info.go
    ├── collect_consumer_group_lags.go
    ├── collect_consumer_groups.go
    ├── collect_exporter_metrics.go
    ├── collect_log_dirs.go
    ├── collect_topic_info.go
    ├── collect_topic_partition_offsets.go
    ├── config.go
    └── exporter.go

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/ai-opt-out
================================================
opt-out: true


================================================
FILE: .github/workflows/docker-image.yml
================================================
---
name: docker-image
on:
  push:
    tags: ['*']
    branches: ['master']
    paths-ignore: ['charts/**']
permissions:
  id-token: write
  contents: read
jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-region: ${{ vars.RP_AWS_CRED_REGION }}
          role-to-assume: arn:aws:iam::${{ secrets.RP_AWS_CRED_ACCOUNT_ID }}:role/${{ vars.RP_AWS_CRED_BASE_ROLE_NAME }}${{ github.event.repository.name }}
      - uses: aws-actions/aws-secretsmanager-get-secrets@v2
        with:
          secret-ids: |
            ,sdlc/prod/github/dockerhub
          parse-json-secrets: true
      - uses: actions/checkout@v4
      - uses: docker/setup-qemu-action@v3
      - uses: docker/setup-buildx-action@v3
        with:
          driver-opts: |
            image=moby/buildkit:v0.21.1
            network=host
      - name: Set build date
        run: |
          echo "BUILT_AT=$(date --rfc-3339=date)" >> ${GITHUB_ENV}
      - uses: docker/metadata-action@v5
        id: docker_meta
        with:
          # list of Docker images to use as base name for tags
          images: |
            redpandadata/kminion
            name=public.ecr.aws/l9j0i2e0/kminion,enable=${{ startsWith(github.ref, 'refs/tags/v') }}
          # generate Docker tags based on the following events/attributes
          # Semver type is only active on 'push tag' events,
          # hence no enable condition required
          tags: |
            type=sha,prefix={{branch}}-,format=short,enable={{is_default_branch}}
            type=semver,pattern={{raw}}
      - uses: docker/login-action@v3
        with:
          username: ${{ env.DOCKERHUB_USER }}
          password: ${{ env.DOCKERHUB_TOKEN }}
      - uses: aws-actions/configure-aws-credentials@v4
        if: ${{ startsWith(github.ref, 'refs/tags/v') }}
        with:
          aws-region: us-east-1
          role-to-assume: arn:aws:iam::${{ secrets.RP_AWS_CRED_ACCOUNT_ID }}:role/${{ vars.RP_AWS_CRED_BASE_ROLE_NAME }}${{ github.event.repository.name }}
      - uses: aws-actions/amazon-ecr-login@v2
        if: ${{ startsWith(github.ref, 'refs/tags/v') }}
        with:
          registry-type: public
      - uses: docker/build-push-action@v6
        with:
          provenance: false
          push: true
          platforms: linux/amd64,linux/arm64
          tags: ${{ steps.docker_meta.outputs.tags }}
          build-args: |
            VERSION=${{ fromJSON(steps.docker_meta.outputs.json).labels['org.opencontainers.image.version'] }}
            BUILT_AT=${{ env.BUILT_AT }}
            COMMIT=${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max


================================================
FILE: .github/workflows/goreleaser.yml
================================================
---
name: goreleaser
on:
  push:
    tags: ['*']
jobs:
  goreleaser:
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - uses: actions/setup-go@v6
        with:
          go-version-file: 'go.mod'
      - uses: goreleaser/goreleaser-action@v7
        if: startsWith(github.ref, 'refs/tags/')
        with:
          version: latest
          args: release --clean
          workdir: .
        env:
          CGO_ENABLED: 0
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GORELEASER_CURRENT_TAG: ${{ github.ref_name }}


================================================
FILE: .gitignore
================================================
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, build with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out
zk-single-kafka-single
zk-multiple-kafka-multiple
.vscode
.idea

config
/kminion


================================================
FILE: .goreleaser.yml
================================================
---
version: 2
release:
  name_template: '{{.Version}} / {{time "2006-01-02"}}'
  prerelease: auto
  mode: append
  footer: |
    ## Docker Image
    Use the following command to pull this release's Docker image:
    ```sh
    docker pull redpandadata/kminion:{{ .Tag }}
    ```
changelog:
  disable: false
  use: github
  filters:
    # Commit messages matching the regexp listed here will be removed from the changelog
    exclude:
      - '^docs:'
      - '^test:'
      - '^npm:'
      - '^go.mod:'
      - '^.github:'
      - 'Merge branch'
builds:
  - id: kminion
    binary: kminion
    goos:
      - darwin
      - linux
      - windows
    goarch:
      - amd64
      - arm64
    ldflags:
      - -s -w -X main.version={{.Version}} -X main.builtAt={{.Date}} -X main.commit={{.Commit}}
checksum:
  name_template: 'checksums.txt'


================================================
FILE: Dockerfile
================================================
############################################################
# Build image
############################################################
FROM golang:1.26-alpine AS builder

ARG VERSION
ARG BUILT_AT
ARG COMMIT

RUN apk update && apk upgrade --no-cache && apk add --no-cache git ca-certificates && update-ca-certificates

WORKDIR /app

COPY go.mod .
COPY go.sum .
RUN go mod download

COPY . .

RUN CGO_ENABLED=0 go build \
    -ldflags="-w -s \
    -X main.version=$VERSION \
    -X main.commit=$COMMIT \
    -X main.builtAt=$BUILT_AT" \
    -o ./bin/kminion

############################################################
# Runtime Image
############################################################
FROM alpine:3
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=builder /app/bin/kminion /app/kminion
RUN addgroup -S redpanda \
    && adduser -S redpanda -G redpanda \
    && chmod o+rx /app/kminion \
    && apk upgrade --no-cache
USER redpanda

ENTRYPOINT ["/app/kminion"]


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2021 CloudHut

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# Redpanda Minion (KMinion) - Prometheus Exporter via Kafka API

[Redpanda](https://www.redpanda.com) Minion is a feature-rich and flexible Prometheus Exporter to monitor your Redpanda or Kafka cluster. 
All valuable information that are accessible via the Kafka protocol are supposed to be accessible using
KMinion.

## 🚀 Features

- **Kafka versions:** Supports all Kafka versions v0.11+
- **Supported SASL mechanisms:** plain, scram-sha-256/512, gssapi/kerberos
- **TLS support:** TLS is supported, regardless whether you need mTLS, a custom CA, encrypted keys or just the trusted
  root certs
- **Consumer Group Lags:** Number of messages a consumer group is lagging behind the latest offset
- **Log dir sizes:** Metric for log dir sizes either grouped by broker or by topic
- **Broker info:** Metric for each broker with its address, broker id, controller and rack id
- **Configurable granularity:** Export metrics (e.g. consumer group lags) either per partition or per topic. Helps to reduce the number of exported metric series.
- **End to End Monitoring:** Sends messages to its own topic and consumes them, measuring a messages real-world "roundtrip" latency. Also provides ack-latency and offset-commit-latency. [More Info](/docs/end-to-end.md)
- **Configurable targets:** You can configure what topics or groups you'd like to export using regex expressions
- **Multiple config parsers:** It's possible to configure KMinion using YAML, Environment variables or a mix of both

You can find a list of all exported metrics here: [/docs/metrics.md](/docs/metrics.md)

## Getting started

### 🐳 Docker image

All images will be built on each push to master or for every new release. You can find an overview of all available tags
in our [DockerHub repository](https://hub.docker.com/r/redpandadata/kminion/tags).

```shell
docker pull redpandadata/kminion:latest
```

### ☸ Helm chart

A Helm chart will be maintained as part of Redpanda's [helm-charts](https://github.com/redpanda-data/helm-charts/tree/main/charts/kminion) repository.

### 🔧 Configuration

All options in KMinion can be configured via YAML or environment variables. Configuring some options via YAML and some
via environment variables is also possible. Environment variables take precedence in this case. You can find the
reference config with additional documentation in [/docs/reference-config.yaml](/docs/reference-config.yaml).

If you want to use a YAML config file, specify the path to the config file by setting the env variable
`CONFIG_FILEPATH`.

### 📊 Grafana Dashboards

I uploaded three separate Grafana dashboards that can be used as inspiration in order to create your own dashboards. Please take note that these dashboards might not immediately work for you due to different labeling in your Prometheus config.

Cluster Dashboard: https://grafana.com/grafana/dashboards/14012

Consumer Group Dashboard: https://grafana.com/grafana/dashboards/14014

Topic Dashboard: https://grafana.com/grafana/dashboards/14013

<p float="left">
  <img src="/docs/screenshots/kminion-cluster.png" width="250" />
  <img src="/docs/screenshots/kminion-groups.png" width="250" /> 
  <img src="/docs/screenshots/kminion-topics.png" width="250" />
</p>

### ⚡ Testing locally

This repo contains a docker-compose file that you can run on your machine. It will spin up a Kafka & ZooKeeper cluster
and starts KMinion on port 8080 which is exposed to your host machine:

```shell
# 1. Clone this repo
# 2. Browse to the repo's root directory and run:
docker-compose up
```

## Chat with us

We use Slack to communicate. If you are looking for more interactive discussions or support, you are invited to join
our Slack server: https://redpanda.com/slack

## License

KMinion is distributed under the [MIT License](https://github.com/cloudhut/kminion/blob/master/LICENSE).


================================================
FILE: charts/README.md
================================================
# Helm Chart

⚠️ This chart has been moved to https://github.com/redpanda-data/helm-charts/tree/main/charts/kminion . Please install this chart instead. The existing archives are still being hosted here, to not break existing deployments.

---

This chart is intentionally very light on input validation. The goal was to offer a flexible Helm chart that allows
users to deploy KMinion the way they want to. Therefore it's very flexible at the cost of less input validation, so that
you might run into runtime errors for a misconfiguration.

All available input is documented inside of the [values.yaml](./kminion/values.yaml) file.

## Installing the Helm chart

```shell
helm repo add kminion https://raw.githubusercontent.com/cloudhut/kminion/master/charts/archives
helm repo update
helm install -f values.yaml kminion kminion/kminion
```


================================================
FILE: charts/archives/.gitkeep
================================================


================================================
FILE: charts/archives/index.yaml
================================================
apiVersion: v1
entries:
  kminion:
  - apiVersion: v2
    appVersion: v2.2.5
    created: "2023-07-03T16:38:22.568312+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 32e2ee36d0b0a045061d4e1490780fef905b4c85d7a23659819c5cb128aaa119
    name: kminion
    type: application
    urls:
    - kminion-0.12.0.tgz
    version: 0.12.0
  - apiVersion: v2
    appVersion: v2.2.5
    created: "2023-07-03T16:38:22.567922+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 8a7be130d57f6f8ead720277b69319ff4dcd364859e80f4750416abe5ed460c3
    name: kminion
    type: application
    urls:
    - kminion-0.11.3.tgz
    version: 0.11.3
  - apiVersion: v2
    appVersion: v2.2.3
    created: "2023-07-03T16:38:22.5675+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 42991a871f58b6d31a9e5b38539eb3d1e9cd35c0097a0fcf63f21f818fa7a999
    name: kminion
    type: application
    urls:
    - kminion-0.11.2.tgz
    version: 0.11.2
  - apiVersion: v2
    appVersion: v2.2.3
    created: "2023-07-03T16:38:22.566877+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 65d7231f1e8ee586bec42bc383b66726d596fe03e0f3183e14b688174a3a8112
    name: kminion
    type: application
    urls:
    - kminion-0.11.1.tgz
    version: 0.11.1
  - apiVersion: v2
    appVersion: v2.2.0
    created: "2023-07-03T16:38:22.575384+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 358bdd509f573049d4bfe77d2edb94c7ad3938f609aea11a8e2c2dc65cca2a9a
    name: kminion
    type: application
    urls:
    - kminion-0.9.0.tgz
    version: 0.9.0
  - apiVersion: v2
    appVersion: v2.2.0
    created: "2023-07-03T16:38:22.574906+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: be8f0047b345d3954fc7c7e7f8953a848c909ef253107d6e77ed747843ddd167
    name: kminion
    type: application
    urls:
    - kminion-0.8.3.tgz
    version: 0.8.3
  - apiVersion: v2
    appVersion: v2.1.0
    created: "2023-07-03T16:38:22.573746+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 888bc665cddc6b6b99af1ce6dd1dea0b107a2e928dff6bfe1c077bc741e20ef7
    name: kminion
    type: application
    urls:
    - kminion-0.8.2.tgz
    version: 0.8.2
  - apiVersion: v2
    appVersion: v2.1.0
    created: "2023-07-03T16:38:22.573271+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: e59c5d5574f162708bf1434c266acbfd9040a89aa7a4abd4a0db70885248e38d
    name: kminion
    type: application
    urls:
    - kminion-0.8.1.tgz
    version: 0.8.1
  - apiVersion: v2
    appVersion: v2.1.0
    created: "2023-07-03T16:38:22.572697+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: f54d8236f8cf03c863b53e077e1647164ffe2a7c34e1cf77101fa3312c589706
    name: kminion
    type: application
    urls:
    - kminion-0.8.0.tgz
    version: 0.8.0
  - apiVersion: v2
    appVersion: v2.1.0
    created: "2023-07-03T16:38:22.572269+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 4cc64cd9f78bd55673b00612579157e493020fb76440abbef10fe5152aef9acc
    name: kminion
    type: application
    urls:
    - kminion-0.7.0.tgz
    version: 0.7.0
  - apiVersion: v2
    appVersion: v2.1.0
    created: "2023-07-03T16:38:22.571852+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 0955e04fe9ef4b516fb0d9ed439ae79778ccdffcf817f09099790cb7e183e4d4
    name: kminion
    type: application
    urls:
    - kminion-0.6.0.tgz
    version: 0.6.0
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.571391+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: d3eb64d05535e136802538662eef7e9fdfdb3f0b93b6a42dfdcc93ee7deeadbd
    name: kminion
    type: application
    urls:
    - kminion-0.5.0.tgz
    version: 0.5.0
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.570618+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 6b4209352d1dffd7873791ee1573dc325eb08d67656b01b430729f45dea4c09a
    name: kminion
    type: application
    urls:
    - kminion-0.4.0.tgz
    version: 0.4.0
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.570281+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: c51e3b45791e9fd51f33036916b0d36f7ac695e2fa916a9e99882ea83914ed97
    name: kminion
    type: application
    urls:
    - kminion-0.3.1.tgz
    version: 0.3.1
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.569892+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: a2be2dd8a02dc5222ec7386195a0e25b2682a39bbdcf52b60793c171acac7653
    name: kminion
    type: application
    urls:
    - kminion-0.3.0.tgz
    version: 0.3.0
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.569445+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 321b6d5ff95ce310d2a3257b3d55f9ced51de99af6519d6d91723d7bdb6456fa
    name: kminion
    type: application
    urls:
    - kminion-0.2.2.tgz
    version: 0.2.2
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.569089+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: ed57df27158521a1eb33d215731fcc3248c71b3f36a4a029eb2d3a7b617ca519
    name: kminion
    type: application
    urls:
    - kminion-0.2.1.tgz
    version: 0.2.1
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.568694+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 025661ee7cc574ad8dde7a68093a3b614fc92e26dd5dd398fc89d0b5308010e1
    name: kminion
    type: application
    urls:
    - kminion-0.2.0.tgz
    version: 0.2.0
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.566269+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: e277e976d864b4bd2e505038dd865a9300486ae8c4323d3f0be40b84df75732b
    name: kminion
    type: application
    urls:
    - kminion-0.1.3.tgz
    version: 0.1.3
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.565773+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 562937d3613624c55984e51adbc6765e7898d1cf8cc2d7d241b6d671bbc12303
    name: kminion
    type: application
    urls:
    - kminion-0.1.2.tgz
    version: 0.1.2
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.562776+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 25e83d7c7cc92a63268d76b13ecc13077758b48be093490f281498a4f55ad3ca
    name: kminion
    type: application
    urls:
    - kminion-0.1.1.tgz
    version: 0.1.1
  - apiVersion: v2
    appVersion: v2.0.0
    created: "2023-07-03T16:38:22.562046+01:00"
    description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache
      Kafka
    digest: 7c10e9d9957e9752bc6f4b4a1fffb742d88cd57be06bf4f26ff7b5031645ccbd
    name: kminion
    type: application
    urls:
    - kminion-0.1.0.tgz
    version: 0.1.0
generated: "2023-07-03T16:38:22.560328+01:00"


================================================
FILE: charts/kminion/.helmignore
================================================
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/


================================================
FILE: charts/kminion/Chart.yaml
================================================
apiVersion: v2
name: kminion
description: A Helm chart to deploy KMinion - a Prometheus Exporter for Apache Kafka

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.12.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v2.2.5"


================================================
FILE: charts/kminion/templates/NOTES.txt
================================================
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
     {{- range .Values.ingress.hosts }}
     http://{{ . }}
     {{- end }}
{{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "kminion.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kminion.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kminion.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kminion.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}


================================================
FILE: charts/kminion/templates/_helpers.tpl
================================================
{{/*
Expand the name of the chart.
*/}}
{{- define "kminion.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "kminion.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kminion.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "kminion.labels" -}}
helm.sh/chart: {{ include "kminion.chart" . }}
{{ include "kminion.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Values.customLabels}}
{{ toYaml .Values.customLabels }}
{{- end}}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "kminion.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kminion.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "kminion.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "kminion.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

{{/*
Return the appropriate apiVersion for ingress.
*/}}
{{- define "kminion.ingress.apiVersion" -}}
{{- if and ($.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) }}
{{- print "networking.k8s.io/v1" }}
{{- else if $.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" }}
{{- print "networking.k8s.io/v1beta1" }}
{{- else }}
{{- print "extensions/v1beta1" }}
{{- end }}
{{- end }}
{{/*
Return if ingress is stable.
*/}}
{{- define "kminion.ingress.isStable" -}}
{{- eq (include "kminion.ingress.apiVersion" .) "networking.k8s.io/v1" }}
{{- end }}
{{/*
Return if ingress supports ingressClassName.
*/}}
{{- define "kminion.ingress.supportsIngressClassName" -}}
{{- or (eq (include "kminion.ingress.isStable" .) "true") (and (eq (include "kminion.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) }}
{{- end }}

{{/*
Return if ingress supports pathType.
*/}}
{{- define "kminion.ingress.supportsPathType" -}}
{{- or (eq (include "kminion.ingress.isStable" .) "true") (and (eq (include "kminion.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) }}
{{- end }}

{{/*
Return the appropriate apiVersion for podDisruptionBudget.
*/}}
{{- define "kminion.podDisruptionBudget.apiVersion" -}}
{{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }}
{{- print "policy/v1" }}
{{- else }}
{{- print "policy/v1beta1" }}
{{- end }}
{{- end }}


================================================
FILE: charts/kminion/templates/configmap.yaml
================================================
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{include "kminion.fullname" .}}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4}}
data:
  config.yaml: |
    {{- toYaml .Values.kminion.config | nindent 4}}


================================================
FILE: charts/kminion/templates/daemonset.yaml
================================================
{{- if .Values.daemonset.enabled }}
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: {{include "kminion.fullname" .}}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4}}
spec:
  updateStrategy:
    type: OnDelete
  selector:
    matchLabels:
      {{- include "kminion.selectorLabels" . | nindent 6}}
  template:
    metadata:
      {{- with .Values.podAnnotations}}
      annotations:
        {{- toYaml . | nindent 8}}
      {{- end}}
      labels:
        {{- include "kminion.selectorLabels" . | nindent 8}}
        {{- if .Values.customLabels}}
        {{toYaml .Values.customLabels | nindent 8}}
        {{- end}}
    spec:
      {{- with .Values.imagePullSecrets}}
      imagePullSecrets:
        {{- toYaml . | nindent 8}}
      {{- end}}
      securityContext:
        {{- toYaml .Values.podSecurityContext | nindent 8}}
      serviceAccountName: {{ .Values.serviceAccount.name }}
      volumes:
        - name: config
          configMap:
            name: {{include "kminion.fullname" .}}
        {{- range .Values.deployment.volumes.secrets}}
        - name: {{.secretName}}
          secret:
            secretName: {{.secretName}}
        {{- end}}
      containers:
        - name: {{.Chart.Name}}
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12}}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{.Values.image.pullPolicy}}
          ports:
            - name: metrics
              containerPort: {{.Values.service.port}}
              protocol: TCP
          env:
            - name: POD_NAME
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: CONFIG_FILEPATH
              value: /etc/kminion/config.yaml
            {{- range .Values.deployment.env.values}}
            - name: {{.name}}
              value: {{.value | quote}}
            {{- end}}
            {{- range .Values.deployment.env.secretKeyRefs}}
            - name: {{.name}}
              valueFrom:
                secretKeyRef:
                  name: {{.secretName}}
                  key: {{.secretKey}}
            {{- end}}
            {{- range .Values.deployment.env.configMapKeyRefs}}
            - name: {{.name}}
              valueFrom:
                configMapKeyRef:
                  name: {{.configMapName}}
                  key: {{.configMapKey}}
            {{- end}}
          volumeMounts:
            - name: config
              mountPath: /etc/kminion
            {{- range .Values.deployment.volumes.secrets}}
            - name: {{.secretName}}
              mountPath: {{.mountPath}}
            {{- end}}
          resources:
            {{- toYaml .Values.resources | nindent 12}}
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /ready
              port: metrics
              scheme: HTTP
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /ready
              port: metrics
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
      {{- with .Values.affinity}}
      affinity:
        {{- toYaml . | nindent 8}}
      {{- end}}
      {{- with .Values.tolerations}}
      tolerations:
        {{- toYaml . | nindent 8}}
      {{- end}}
{{- end }}


================================================
FILE: charts/kminion/templates/deployment.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{include "kminion.fullname" .}}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4}}
    {{- with .Values.deployment.labels}}
    {{- toYaml . | nindent 4}}
    {{- end}}
  {{- with .Values.deployment.annotations}}
  annotations:
    {{- toYaml . | nindent 4}}
  {{- end}}
spec:
  {{- if not .Values.autoscaling.enabled}}
  replicas: {{.Values.replicaCount}}
  {{- end}}
  selector:
    matchLabels:
      {{- include "kminion.selectorLabels" . | nindent 6}}
  template:
    metadata:
      {{- with .Values.podAnnotations}}
      annotations:
        {{- toYaml . | nindent 8}}
      {{- end}}
      labels:
        {{- include "kminion.selectorLabels" . | nindent 8}}
        {{- if .Values.customLabels}}
        {{toYaml .Values.customLabels | nindent 8}}
        {{- end}}
    spec:
      {{- with .Values.imagePullSecrets}}
      imagePullSecrets:
        {{- toYaml . | nindent 8}}
      {{- end}}
      serviceAccountName: {{include "kminion.serviceAccountName" .}}
      securityContext:
        {{- toYaml .Values.podSecurityContext | nindent 8}}
      volumes:
        - name: config
          configMap:
            name: {{include "kminion.fullname" .}}
        {{- range .Values.deployment.volumes.secrets}}
        - name: {{.secretName}}
          secret:
            secretName: {{.secretName}}
        {{- end}}
        {{- with .Values.deployment.volumes.extra }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
      initContainers:
      {{- with .Values.deployment.initContainers }}
      {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: {{.Chart.Name}}
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12}}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{.Values.image.pullPolicy}}
          ports:
            - name: metrics
              containerPort: {{.Values.service.port}}
              protocol: TCP
          env:
            - name: CONFIG_FILEPATH
              value: /etc/kminion/config.yaml
            {{- range .Values.deployment.env.values}}
            - name: {{.name}}
              value: {{.value | quote}}
            {{- end}}
            {{- range .Values.deployment.env.secretKeyRefs}}
            - name: {{.name}}
              valueFrom:
                secretKeyRef:
                  name: {{.secretName}}
                  key: {{.secretKey}}
            {{- end}}
            {{- range .Values.deployment.env.configMapKeyRefs}}
            - name: {{.name}}
              valueFrom:
                configMapKeyRef:
                  name: {{.configMapName}}
                  key: {{.configMapKey}}
            {{- end}}
          volumeMounts:
            - name: config
              mountPath: /etc/kminion
            {{- range .Values.deployment.volumes.secrets}}
            - name: {{.secretName}}
              mountPath: {{.mountPath}}
            {{- end}}
          resources:
            {{- toYaml .Values.resources | nindent 12}}
          {{- if .Values.deployment.readinessProbe.enabled }}
          readinessProbe:
            httpGet:
              path: /ready
              port: {{.Values.service.port}}
            initialDelaySeconds: 10
          {{- end }}
        {{- with .Values.deployment.extraContainers }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
      {{- with .Values.nodeSelector}}
      nodeSelector:
        {{- toYaml . | nindent 8}}
      {{- end}}
      {{- with .Values.affinity}}
      affinity:
        {{- toYaml . | nindent 8}}
      {{- end}}
      {{- with .Values.tolerations}}
      tolerations:
        {{- toYaml . | nindent 8}}
      {{- end}}


================================================
FILE: charts/kminion/templates/hpa.yaml
================================================
{{- if .Values.autoscaling.enabled }}
apiVersion: {{ ternary "autoscaling/v2" "autoscaling/v2beta1" (.Capabilities.APIVersions.Has "autoscaling/v2") }}
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "kminion.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "kminion.fullname" . }}
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        {{- if .Capabilities.APIVersions.Has "autoscaling/v2" }}
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
        {{ else }}
        targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
        {{- end }}
    {{- end }}
    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        {{- if .Capabilities.APIVersions.Has "autoscaling/v2" }}
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
        {{ else }}
        targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
        {{- end }}
    {{- end }}
{{- end }}


================================================
FILE: charts/kminion/templates/ingress.yaml
================================================
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "kminion.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- $ingressApiIsStable := eq (include "kminion.ingress.isStable" .) "true" -}}
{{- $ingressSupportsIngressClassName := eq (include "kminion.ingress.supportsIngressClassName" .) "true" -}}
{{- $ingressSupportsPathType := eq (include "kminion.ingress.supportsPathType" .) "true" -}}
{{- $fullName := include "kminion.fullname" . -}}
{{- $servicePort := .Values.service.port -}}
{{- $ingressPath := .Values.ingress.path -}}
{{- $ingressPathType := .Values.ingress.pathType -}}
{{- $extraPaths := .Values.ingress.extraPaths -}}

apiVersion: {{ include "kminion.ingress.apiVersion" . }}
kind: Ingress
metadata:
  name: {{ $fullName }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
spec:
  {{- if and $ingressSupportsIngressClassName .Values.ingress.ingressClassName }}
  ingressClassName: {{ .Values.ingress.ingressClassName }}
  {{- end -}}
  {{- with .Values.ingress.tls }}
  tls:
    {{- tpl (toYaml .) $ | nindent 4 }}
  {{- end }}
  rules:
  {{- if .Values.ingress.hosts  }}
  {{- range .Values.ingress.hosts }}
    - host: {{ tpl . $ }}
      http:
        paths:
          {{- with $extraPaths }}
          {{- toYaml . | nindent 10 }}
          {{- end }}
          - path: {{ $ingressPath }}
            {{- if $ingressSupportsPathType }}
            pathType: {{ $ingressPathType }}
            {{- end }}
            backend:
              {{- if $ingressApiIsStable }}
              service:
                name: {{ $fullName }}
                port:
                  number: {{ $servicePort }}
              {{- else }}
              serviceName: {{ $fullName }}
              servicePort: {{ $servicePort }}
              {{- end }}
  {{- end }}
  {{- else }}
    - http:
        paths:
          - backend:
              {{- if $ingressApiIsStable }}
              service:
                name: {{ $fullName }}
                port:
                  number: {{ $servicePort }}
              {{- else }}
              serviceName: {{ $fullName }}
              servicePort: {{ $servicePort }}
              {{- end }}
            {{- with $ingressPath }}
            path: {{ . }}
            {{- end }}
            {{- if $ingressSupportsPathType }}
            pathType: {{ $ingressPathType }}
            {{- end }}
  {{- end -}}
  {{- end }}


================================================
FILE: charts/kminion/templates/poddisruptionbudget.yaml
================================================
{{- if .Values.podDisruptionBudget }}
apiVersion: {{ include "kminion.podDisruptionBudget.apiVersion" . }}
kind: PodDisruptionBudget
metadata:
  name: {{ template "kminion.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4}}
spec:
  {{- if .Values.podDisruptionBudget.minAvailable }}
  minAvailable: {{ .Values.podDisruptionBudget.minAvailable }}
  {{- end }}
  {{- if .Values.podDisruptionBudget.maxUnavailable }}
  maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "kminion.selectorLabels" . | nindent 6}}
{{- end }}


================================================
FILE: charts/kminion/templates/service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  name: {{ include "kminion.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4 }}
  {{- if .Values.service.annotations }}
  annotations:
    {{- toYaml .Values.service.annotations | nindent 4 }}
  {{- end }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: {{ .Values.service.port }}
      targetPort: metrics
      protocol: TCP
      name: metrics
  {{- if .Values.service.extraPorts }}
    {{- toYaml .Values.service.extraPorts | nindent 4 }}
  {{- end }}
  selector:
    {{- include "kminion.selectorLabels" . | nindent 4 }}


================================================
FILE: charts/kminion/templates/serviceaccount.yaml
================================================
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ include "kminion.serviceAccountName" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
{{- end }}


================================================
FILE: charts/kminion/templates/servicemonitor.yaml
================================================
{{- if .Values.serviceMonitor.create }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{include "kminion.fullname" .}}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "kminion.labels" . | nindent 4}}
    {{- if .Values.serviceMonitor.additionalLabels}}
    {{toYaml .Values.serviceMonitor.additionalLabels | nindent 4}}
    {{- end}}
spec:
  selector:
    matchLabels:
      {{- include "kminion.labels" . | nindent 6}}
  endpoints:
    - port: metrics
      path: /metrics
      honorLabels: {{ .Values.serviceMonitor.honorLabels }}
      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
      interval: {{ .Values.serviceMonitor.interval }}
      {{- if .Values.serviceMonitor.relabelings }}
      relabelings:
      {{ toYaml .Values.serviceMonitor.relabelings | nindent 6 }}
      {{- end }}
  {{- if .Values.serviceMonitor.targetLabels}}
  targetLabels:
    {{- toYaml .Values.serviceMonitor.targetLabels | nindent 4}}
  {{- end}}
  {{- if .Values.customLabels }}
  podTargetLabels:
    {{- (keys .Values.customLabels | sortAlpha) | toYaml | nindent 4 }}
  {{- end}}
 {{- end }}


================================================
FILE: charts/kminion/values.yaml
================================================
# Default values for kminion.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
  repository: redpandadata/kminion
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: ""

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

podAnnotations: {}
#  prometheus.io/scrape: "true"
#  prometheus.io/port: "8080"
#  prometheus.io/path: "/metrics"

podSecurityContext:
  runAsUser: 99
  fsGroup: 99

## See `kubectl explain poddisruptionbudget.spec` for more
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
podDisruptionBudget:
  maxUnavailable: 1
  # minAvailable: 1

securityContext:
  allowPrivilegeEscalation: false
# capabilities:
#   drop:
#   - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000

service:
  type: ClusterIP
  port: 8080 # This port is also used as exposed container port
  annotations: {} # # Annotations to add to the service
  extraPorts: [] # when []extraContainers expose additional metrics, make
                 # discoverable for servicemontors
    # - port: 8443
    #   targetPort: 8443
    #   protocol: TCP
    #   name: expose-x509-for-ttl-checks


ingress:
  enabled: false
  # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName
  # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress
  # ingressClassName: nginx
  # Values can be templated
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  labels: {}
  path: /

  # pathType is only for k8s >= 1.1=
  pathType: Prefix

  hosts:
    - chart-example.local

  ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
  extraPaths: []
  # - path: /*
  #   backend:
  #     serviceName: ssl-redirect
  #     servicePort: use-annotation
  ## Or for k8s > 1.19
  # - path: /*
  #   pathType: Prefix
  #   backend:
  #     service:
  #       name: ssl-redirect
  #       port:
  #         name: use-annotation

  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
  # ingressClassName:

resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
#   cpu: 100m
#   memory: 128Mi
# requests:
#   cpu: 100m
#   memory: 128Mi

autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80

nodeSelector: {}

tolerations: []

affinity: {}

customLabels: {}

serviceMonitor:
  create: false
  additionalLabels: {}
  honorLabels: false
  scrapeTimeout: 10s
  interval: 15s
  relabelings: []
  # - sourceLabels: [__meta_kubernetes_pod_label_my_label]
  #   separator: ;
  #   regex: (.*)
  #   targetLabel: my_label
  #   replacement: $1
  #   action: replace

# For DaemonSet mode you may set daemonset to "true" and replicaCount to 0.
daemonset:
  enabled: false

deployment:
  readinessProbe:
    enabled: true

  labels: {}
  # Annotations to add to the Deployment resource
  annotations: {}
  volumes:
    # Mount files from Kubernetes secrets into the container
    secrets: []
    # - secretName: vault-tls
    #   mountPath: /vault/tls
    extra: []
    # - name: example
    #   emptyDir: {}

  # If you want to provide specifc config settings like sensitive Kafka credentials via environment variables you can
  # do so by making them available here. See the kminion reference config to figure out the expected variable names.
  env:
    # Configure environment variables which you want to make available
    values: []
    # - name: KAFKA_SASL_MECHANISM
    #   value: PLAIN

    # Configure environment variables which you want to make available from a config map
    configMapKeyRefs: []
    # - name: KAFKA_SASL_USERNAME
    #   configMapName: kafka-user-team-xy
    #   key: username

    # Configure environment variables which you want to make available from a secret
    secretKeyRefs: []
    # - name: KAFKA_SASL_PASSWORD
    #   secretName: kafka-credentials-team-xy
    #   secretKey: password

  # Add additional containers, e. g. for oauth2-proxy
  extraContainers: {}
  # Add additional init containers, e. g. for oauth2-proxy
  initContainers: {}

kminion:
  # KMinion can be configured using environment variables and/or a YAML config. The yaml contents under config will
  # end up in a YAML file which will be mounted into the kminion container.
  # See reference config: https://github.com/cloudhut/kminion/blob/master/docs/reference-config.yaml
  config: {}
#    kafka:
#      brokers: [ ]
#      clientId: "kminion"
#      rackId: ""
#      tls:
#        enabled: false
#        caFilepath: ""
#        certFilepath: ""
#        keyFilepath: ""
#        passphrase: ""
#        insecureSkipTlsVerify: false
#      sasl:
#        # Whether or not SASL authentication will be used for authentication
#        enabled: false
#        # Username to use for PLAIN or SCRAM mechanism
#        username: ""
#        # Password to use for PLAIN or SCRAM mechanism
#        password: ""
#        # Mechanism to use for SASL Authentication. Valid values are PLAIN, SCRAM-SHA-256, SCRAM-SHA-512, GSSAPI
#        mechanism: "PLAIN"
#        # GSSAPI / Kerberos config properties
#        gssapi:
#          authType: ""
#          keyTabPath: ""
#          kerberosConfigPath: ""
#          serviceName: ""
#          username: ""
#          password: ""
#          realm: ""
#      # Whether to retry the initial test connection to Kafka. False will exit with code 1 on error,
#      # while true will retry until success.
#      retryInitConnection: false
#
#    minion:
#      consumerGroups:
#        # Enabled specifies whether consumer groups shall be scraped and exported or not.
#        enabled: true
#        # Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal
#        # __consumer_offsets topic. Both modes have their advantages and disadvantages.
#        scrapeMode: adminApi # Valid values: adminApi, offsetsTopic
#        # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
#        # you aren't interested in per partition lags you could choose "topic" where all partition lags will be summed
#        # and only topic lags will be exported.
#        granularity: partition
#        # AllowedGroups are regex strings of group ids that shall be exported
#        # You can specify allowed groups by providing literals like "my-consumergroup-name" or by providing regex expressions
#        # like "/internal-.*/".
#        allowedGroups: [ ]
#        # IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups
#        # take precedence over allowed groups.
#        ignoredGroups: [ ]
#      topics:
#        # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
#        # you aren't interested in per partition metrics you could choose "topic".
#        granularity: partition
#        # AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.
#        # You can specify allowed topics by providing literals like "my-topic-name" or by providing regex expressions
#        # like "/internal-.*/".
#        allowedTopics: [ ]
#
#        # IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics
#        # take precedence over allowed topics.
#        ignoredTopics: [ ]
#        # infoMetric is a configuration object for the kminion_kafka_topic_info metric
#        infoMetric:
#          # ConfigKeys are set of strings of Topic configs that you want to have exported as part of the metric
#          configKeys: ["cleanup.policy"]
#      logDirs:
#        # Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior
#        # to version 1.0.0 as describing log dirs was not supported back then.
#        enabled: true
#
#    exporter:
#      # Namespace is the prefix for all exported Prometheus metrics
#      namespace: "kminion"
#      # Host that shall be used to bind the HTTP server on
#      host: ""
#      # Port that shall be used to bind the HTTP server on
#      port: 8080
#
#    logger:
#      # Level is a logging priority. Higher levels are more important. Valid values are: debug, info, warn, error, fatal, panic
#      level: info


================================================
FILE: config.go
================================================
package main

import (
	"fmt"
	"os"
	"strings"

	"github.com/cloudhut/kminion/v2/kafka"
	"github.com/cloudhut/kminion/v2/logging"
	"github.com/cloudhut/kminion/v2/minion"
	"github.com/cloudhut/kminion/v2/prometheus"
	"github.com/knadh/koanf"
	"github.com/knadh/koanf/parsers/yaml"
	"github.com/knadh/koanf/providers/env"
	"github.com/knadh/koanf/providers/file"
	"github.com/mitchellh/mapstructure"
	"go.uber.org/zap"
)

type Config struct {
	Kafka    kafka.Config      `koanf:"kafka"`
	Minion   minion.Config     `koanf:"minion"`
	Exporter prometheus.Config `koanf:"exporter"`
	Logger   logging.Config    `koanf:"logger"`
}

func (c *Config) SetDefaults() {
	c.Kafka.SetDefaults()
	c.Minion.SetDefaults()
	c.Exporter.SetDefaults()
	c.Logger.SetDefaults()
}

func (c *Config) Validate() error {
	err := c.Kafka.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate kafka config: %w", err)
	}

	err = c.Minion.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate minion config: %w", err)
	}

	err = c.Logger.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate logger config: %w", err)
	}

	return nil
}

func newConfig(logger *zap.Logger) (Config, error) {
	k := koanf.New(".")
	var cfg Config
	cfg.SetDefaults()

	// 1. Check if a config filepath is set via flags. If there is one we'll try to load the file using a YAML Parser
	envKey := "CONFIG_FILEPATH"
	configFilepath := os.Getenv(envKey)
	if configFilepath == "" {
		logger.Info("the env variable '" + envKey + "' is not set, therefore no YAML config will be loaded")
	} else {
		err := k.Load(file.Provider(configFilepath), yaml.Parser())
		if err != nil {
			return Config{}, fmt.Errorf("failed to parse YAML config: %w", err)
		}
	}

	// We could unmarshal the loaded koanf input after loading both providers, however we want to unmarshal the YAML
	// config with `ErrorUnused` set to true, but unmarshal environment variables with `ErrorUnused` set to false (default).
	// Rationale: Orchestrators like Kubernetes inject unrelated environment variables, which we still want to allow.
	err := k.UnmarshalWithConf("", &cfg, koanf.UnmarshalConf{
		Tag:       "",
		FlatPaths: false,
		DecoderConfig: &mapstructure.DecoderConfig{
			DecodeHook: mapstructure.ComposeDecodeHookFunc(
				mapstructure.StringToTimeDurationHookFunc()),
			Metadata:         nil,
			Result:           &cfg,
			WeaklyTypedInput: true,
			ErrorUnused:      true,
		},
	})
	if err != nil {
		return Config{}, err
	}

	err = k.Load(env.ProviderWithValue("", ".", func(s string, v string) (string, interface{}) {
		// key := strings.Replace(strings.ToLower(s), "_", ".", -1)
		key := strings.Replace(strings.ToLower(s), "_", ".", -1)
		// Check to exist if we have a configuration option already and see if it's a slice
		// If there is a comma in the value, split the value into a slice by the comma.
		if strings.Contains(v, ",") {
			return key, strings.Split(v, ",")
		}

		// Otherwise return the new key with the unaltered value
		return key, v
	}), nil)
	if err != nil {
		return Config{}, err
	}

	err = k.Unmarshal("", &cfg)
	if err != nil {
		return Config{}, err
	}

	err = cfg.Validate()
	if err != nil {
		return Config{}, fmt.Errorf("failed to validate config: %w", err)
	}

	return cfg, nil
}


================================================
FILE: docker-compose.yml
================================================
---
version: '2.1'

services:

  zookeeper:
    image: confluentinc/cp-zookeeper:latest
    ports:
      - 2181:2181
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
      ZOOKEEPER_TICK_TIME: 2000
    container_name: zookeeper
    hostname: zookeeper

  kafka:
    image: confluentinc/cp-kafka:latest
    hostname: kafka
    container_name: kafka
    depends_on:
      - zookeeper
    ports:
      - 9092:9092
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1

  kafka-minion:
    build:
      context: .
      dockerfile: ./Dockerfile
    hostname: kafka-minion
    container_name: kafka-minion
    depends_on:
      - zookeeper
      - kafka
    ports:
      - 8080:8080
    environment:
      KAFKA_BROKERS: kafka:29092
    restart: unless-stopped

================================================
FILE: docs/end-to-end.md
================================================
# End-To-End Monitoring

This page describes the end-to-end monitoring feature in KMinion, how it works, and what metrics it provides.

## Motivation

> What is the issue? Why did we build this feature?

We can monitor metrics like CPU usage, free disk space, or even consumer group lag. However, these metrics don't give us
a good idea of the performance characteristics an actual, real-world, client experiences when connected to the cluster.

With the "classic" metrics lots of questions go unanswered:

- Can a client produce messages to the cluster?
- Can clients produce & consume messages as well as commit group offsets with an acceptable latency?
- Is the cluster in a healthy state from a client's perspective?

## Approach & Implementation

> How do we solve those issues? How does the feature work?

The most reliably way to get real-world performance and availability metrics is to actually run a producer/consumer
ourselves. This is exactly what the end-to-end monitoring feature does!

## High Level Overview

In order to determine if the cluster is fully operational, and it's performance is within acceptable limits, KMinion
continuously produces and consumes messages to/from the cluster. That way we can measure things like ack-latency,
commit-latency, and roundtrip-time.

KMinion creates and manages its own topic for the end-to-end test messages. The name of the topic can be configured.

**The first step** is to create a message and send it to the cluster.

- Every produced message is added to an internal tracker, so we can recognize messages being "lost". A message is
  considered lost if it doesn't arrive back at the consumer within the configured time span.

**The second step** is to continuously consume the topic.

- As each message arrives, we calculate its roundtrip time (time from the point the message was created, until KMinion
  received it again)
- Consumer group offsets are committed periodically, while also recording the time each commit takes.

### Topic Management

The topic KMinion uses, is created and managed completely automatically (the topic name can be configured though).

KMinion continuously checks the topic and fixes issues/imbalances automatically:

- Add partitions to the topic, so it has at least as many partitions as there are brokers.
- Will reassign partitions to ensure every broker leads at least one partition, and that all partitions' replicas are
  distributed evenly across the brokers. KMinion tries to assign partitionIDs to brokers that have the same broker id.

### Consumer Group Management

On startup each KMinion instance generates a unique identifier (UUID) that is used to create its own consumer group. It
incorporates the shared prefix from the config.

That is necessary because:

- Offsets must not be shared among multiple instances.
- Each instance must always consume **all** partitions of the topic.

The instances' UUID is also embedded in every message, so each instance can easily filter out messages it didn't
produce. That's why it is perfectly fine to run multiple KMinion instances against the same cluster, using the same
topic.

KMinion also monitors and deletes consumer groups that use it's configured prefix. That way, when an instance
exits/restarts, previous consumer groups will be cleaned up quickly (check happens every 20s).

## Available Metrics

The end-to-end monitoring feature exports the following metrics.

### Counters

| Name | Description |
| --- | --- |
| `kminion_end_to_end_messages_produced_total ` | Messages KMinion *tried* to send |
| `kminion_end_to_end_messages_received_total ` | Number of messages received (only counts those that match, i.e. that this instance actually produced itself) |
| `kminion_end_to_end_offset_commits_total` | Number of successful offset commits |
| `kminion_end_to_end_messages_lost_total` Number of messages that have been produced successfully but not received within the configured SLA duration |
| `kminion_end_to_end_messages_produced_failed_total` Number of messages failed to produce to Kafka because of a timeout or failure |
| `kminion_end_to_end_offset_commits_total` Counts how many times kminions end-to-end test has committed offsets |

### Histograms

| Name | Description |
| --- | --- |
| `kminion_end_to_end_produce_latency_seconds ` | Duration until the cluster acknowledged a message.  |
| `kminion_end_to_end_offset_commit_latency_seconds` Time kafka took to respond to kminion's offset commit |
| `kminion_end_to_end_roundtrip_latency_seconds ` | Duration from creation of a message, until it was received/consumed again. |

### Gauges
| Name | Description |
| --- | --- |
| `kminion_end_to_end_messages_produced_in_flight` Number of messages that kminion's end-to-end test produced but has not received an answer for yet |

## Config Properties

All config properties related to this feature are located in `minion.endToEnd`.

```yaml
  endToEnd:
    enabled: true
    probeInterval: 800ms # how often to send end-to-end test messages
    topicManagement:
      # You can disable topic management, without disabling the testing feature.
      # Only makes sense if you have multiple kminion instances, and for some reason only want one of them to create/configure the topic.
      # It is strongly recommended to leave this enabled.
      enabled: true

      # Name of the topic kminion uses to send its test messages
      # You do *not* need to change this if you are running multiple kminion instances on the same cluster.
      # Different instances are perfectly fine with sharing the same topic!
      name: kminion-end-to-end

      # How often kminion checks its topic to validate configuration, partition count, and partition assignments
      reconciliationInterval: 10m

      # Useful for monitoring the performance of acks (if >1 this is best combined with 'producer.requiredAcks' set to 'all')
      replicationFactor: 1

      # Rarely makes sense to change this, but maybe if you want some sort of cheap load test?
      partitionsPerBroker: 1

    producer:
      # This defines the maximum time to wait for an ack response after producing a message,
      # and the upper bound for histogram buckets in "produce_latency_seconds"
      ackSla: 5s
      # Can be to "all" (default) so kafka only reports an end-to-end test message as acknowledged if
      # the message was written to all in-sync replicas of the partition.
      # Or can be set to "leader" to only require to have written the message to its log.
      requiredAcks: all

    consumer:
      # Prefix kminion uses when creating its consumer groups. Current kminion instance id will be appended automatically
      groupIdPrefix: kminion-end-to-end

      # Whether KMinion should try to delete empty consumer groups with the same prefix. This can be used if you want
      # KMinion to cleanup it's old consumer groups. It should only be used if you use a unique prefix for KMinion.
      deleteStaleConsumerGroups: false

      # Defines the time limit beyond which a message is considered "lost" (failed the roundtrip),
      # also used as the upper bound for histogram buckets in "roundtrip_latency"
      roundtripSla: 20s

      # Maximum time an offset commit is allowed to take before considering it failed,
      # also used as the upper bound for histogram buckets in "commit_latency_seconds"
      commitSla: 10s
```


================================================
FILE: docs/metrics.md
================================================
# Exported Metrics

This document lists all exported metrics in an exemplary way.

## Exporter Metrics

```
# HELP kminion_exporter_up Build info about this Prometheus Exporter. Gauge value is 0 if one or more scrapes have failed.
# TYPE kminion_exporter_up gauge
kminion_exporter_up{version="sha-0ab0dcdf862f7a34b06998cd2d980148e048151a"} 1

# HELP kminion_exporter_offset_consumer_records_consumed_total The number of offset records that have been consumed by the internal offset consumer
# TYPE kminion_exporter_offset_consumer_records_consumed_total counter
kminion_exporter_offset_consumer_records_consumed_total 5.058244883e+09
```

## Kafka Metrics

### General / Cluster Metrics

```
# HELP kminion_kafka_broker_info Kafka broker information
# TYPE kminion_kafka_broker_info gauge
kminion_kafka_broker_info{address="broker-9.analytics-prod.kafka.cloudhut.dev",broker_id="9",is_controller="false",port="9092",rack_id="europe-west1-b"} 1

# HELP kminion_kafka_cluster_info Kafka cluster information
# TYPE kminion_kafka_cluster_info gauge
kminion_kafka_cluster_info{broker_count="12",cluster_id="UYZJg8bhT_6SxhsdaQZEQ",cluster_version="v2.6",controller_id="6"} 1
```

### Log Dir Metrics

```
# HELP kminion_kafka_broker_log_dir_size_total_bytes The summed size in bytes of all log dirs for a given broker
# TYPE kminion_kafka_broker_log_dir_size_total_bytes gauge
kminion_kafka_broker_log_dir_size_total_bytes{address="broker-9.analytics-prod.kafka.cloudhut.dev",broker_id="9",port="9092",rack_id="europe-west1-b"} 8.32654935115e+11

# HELP kminion_kafka_topic_log_dir_size_total_bytes The summed size in bytes of partitions for a given topic. This includes the used space for replica partitions.
# TYPE kminion_kafka_topic_log_dir_size_total_bytes gauge
kminion_kafka_topic_log_dir_size_total_bytes{topic_name="__consumer_offsets"} 9.026554258e+09
```

### Topic & Partition Metrics

```
# HELP kminion_kafka_topic_info Info labels for a given topic
# TYPE kminion_kafka_topic_info gauge
kminion_kafka_topic_info{cleanup_policy="compact",partition_count="1",replication_factor="1",topic_name="_confluent-ksql-default__command_topic"} 1

# HELP kminion_kafka_topic_partition_low_water_mark Partition Low Water Mark
# TYPE kminion_kafka_topic_partition_low_water_mark gauge
kminion_kafka_topic_partition_low_water_mark{partition_id="0",topic_name="__consumer_offsets"} 0

# HELP kminion_kafka_topic_low_water_mark_sum Sum of all the topic's partition low water marks
# TYPE kminion_kafka_topic_low_water_mark_sum gauge
kminion_kafka_topic_low_water_mark_sum{topic_name="__consumer_offsets"} 0

# HELP kminion_kafka_topic_partition_high_water_mark Partition High Water Mark
# TYPE kminion_kafka_topic_partition_high_water_mark gauge
kminion_kafka_topic_partition_high_water_mark{partition_id="0",topic_name="__consumer_offsets"} 2.04952001e+08

# HELP kminion_kafka_topic_high_water_mark_sum Sum of all the topic's partition high water marks
# TYPE kminion_kafka_topic_high_water_mark_sum gauge
kminion_kafka_topic_high_water_mark_sum{topic_name="__consumer_offsets"} 1.512023846873e+12
```

### Consumer Group Metrics

```
# HELP kminion_kafka_consumer_group_info Consumer Group info metrics. It will report 1 if the group is in the stable state, otherwise 0.
# TYPE kminion_kafka_consumer_group_info gauge
kminion_kafka_consumer_group_info{coordinator_id="0",group_id="bigquery-sink",protocol="range",protocol_type="consumer",state="Stable"} 1

# HELP kminion_kafka_consumer_group_members Consumer Group member count metrics. It will report the number of members in the consumer group
# TYPE kminion_kafka_consumer_group_members gauge
kminion_kafka_consumer_group_members{group_id="bigquery-sink"} 2

# HELP kminion_kafka_consumer_group_empty_members Consumer Group Empty Members. It will report the number of members in the consumer group with no partition assigned
# TYPE kminion_kafka_consumer_group_empty_members gauge
kminion_kafka_consumer_group_empty_members{group_id="bigquery-sink"} 1

# HELP kminion_kafka_consumer_group_topic_members Consumer Group topic member count metrics. It will report the number of members in the consumer group assigned on a given topic
# TYPE kminion_kafka_consumer_group_topic_members gauge
kminion_kafka_consumer_group_topic_members{group_id="bigquery-sink",topic_name="shop-activity"} 4

# HELP kminion_kafka_consumer_group_topic_assigned_partitions Consumer Group topic partitions count metrics. It will report the number of partitions assigned in the consumer group for a given topic
# TYPE kminion_kafka_consumer_group_topic_assigned_partitions gauge
kminion_kafka_consumer_group_topic_assigned_partitions{group_id="bigquery-sink",topic_name="shop-activity"} 32

# HELP kminion_kafka_consumer_group_topic_offset_sum The sum of all committed group offsets across all partitions in a topic
# TYPE kminion_kafka_consumer_group_topic_offset_sum gauge
kminion_kafka_consumer_group_topic_offset_sum{group_id="bigquery-sink",topic_name="shop-activity"} 4.259513e+06

# HELP kminion_kafka_consumer_group_topic_partition_lag The number of messages a consumer group is lagging behind the latest offset of a partition
# TYPE kminion_kafka_consumer_group_topic_partition_lag gauge
kminion_kafka_consumer_group_topic_partition_lag{group_id="bigquery-sink",partition_id="10",topic_name="shop-activity"} 147481

# HELP kminion_kafka_consumer_group_topic_lag The number of messages a consumer group is lagging behind across all partitions in a topic
# TYPE kminion_kafka_consumer_group_topic_lag gauge
kminion_kafka_consumer_group_topic_lag{group_id="bigquery-sink",topic_name="shop-activity"} 147481

# HELP kminion_kafka_consumer_group_offset_commits_total The number of offsets committed by a group
# TYPE kminion_kafka_consumer_group_offset_commits_total counter
kminion_kafka_consumer_group_offset_commits_total{group_id="bigquery-sink"} 1098
```

### End-to-End Metrics

```
# HELP kminion_end_to_end_messages_produced_total Number of messages that kminion's end-to-end test has tried to send to kafka
# TYPE kminion_end_to_end_messages_produced_total counter
kminion_end_to_end_messages_produced_total 384

# HELP kminion_end_to_end_offset_commits_total Counts how many times kminions end-to-end test has committed messages
# TYPE kminion_end_to_end_offset_commits_total counter
kminion_end_to_end_offset_commits_total 18

# HELP kminion_end_to_end_messages_received_total Number of *matching* messages kminion received. Every roundtrip message has a minionID (randomly generated on startup) and a timestamp. Kminion only considers a message a match if it it arrives within the configured roundtrip SLA (and it matches the minionID)
# TYPE kminion_end_to_end_messages_received_total counter
kminion_end_to_end_messages_received_total 383

# HELP kminion_end_to_end_produce_latency_seconds Time until we received an ack for a produced message
# TYPE kminion_end_to_end_produce_latency_seconds histogram
kminion_end_to_end_produce_latency_seconds_bucket{partitionId="0",le="0.005"} 0

# HELP kminion_end_to_end_offset_commit_latency_seconds Time kafka took to respond to kminion's offset commit
# TYPE kminion_end_to_end_offset_commit_latency_seconds histogram
kminion_end_to_end_offset_commit_latency_seconds_bucket{groupCoordinatorBrokerId="0",le="0.005"} 0

# HELP kminion_end_to_end_roundtrip_latency_seconds Time it took between sending (producing) and receiving (consuming) a message
# TYPE kminion_end_to_end_roundtrip_latency_seconds histogram
kminion_end_to_end_roundtrip_latency_seconds_bucket{partitionId="0",le="0.005"} 0

# HELP kminion_end_to_end_messages_lost_total Number of messages that have been produced successfully but not received within the configured SLA duration
# TYPE kminion_end_to_end_messages_lost_total counter
kminion_end_to_end_messages_lost_total{partition_id="0"} 0

# HELP kminion_end_to_end_messages_produced_failed_total Number of messages failed to produce to Kafka because of a timeout or failure
# TYPE kminion_end_to_end_messages_produced_failed_total counter
kminion_end_to_end_messages_produced_failed_total{partition_id="0"} 0

# HELP kminion_end_to_end_messages_produced_in_flight Number of messages that kminion's end-to-end test produced but has not received an answer for yet
# TYPE kminion_end_to_end_messages_produced_in_flight gauge
kminion_end_to_end_messages_produced_in_flight{partition_id="0"} 0
```


================================================
FILE: docs/reference-config.yaml
================================================
#####################################################################################
# This file documents all the available config options and it's default values.
#
# All config options can be configured via environment variables as well.
# If you specify both the env variable and yaml option for the same configuration
# the environment variable will take precedence. If you want to use a YAML config
# file, specify the path to the config file by setting the env variable
# CONFIG_FILEPATH.
#
# The env variable name is auto generated by upper casing everything and adding
# an underscore for each indentation/level. Some examples:
# kafka.rackId => KAFKA_RACKID
# kafka.tls.caFilepath => KAFKA_TLS_CAFILEPATH
# minion.consumerGroups.allowedGroups => MINION_CONSUMERGROUPS_ALLOWEDGROUPS
#
# Env variables that expect array values can be provided by separting them using
# a comma: KAFKA_BROKERS = "broker1:9092,broker2:9092,broker3:9092"
#####################################################################################

logger:
  # Valid values are: debug, info, warn, error, fatal, panic
  level: info

kafka:
  brokers: [ ]
  clientId: "kminion"
  rackId: ""
  tls:
    enabled: false
    caFilepath: ""
    certFilepath: ""
    keyFilepath: ""
    # base64 encoded tls CA, cannot be set if 'caFilepath' is set
    ca: ""
    # base64 encoded tls cert, cannot be set if 'certFilepath' is set
    cert: ""
    # base64 encoded tls key, cannot be set if 'keyFilepath' is set
    key: ""
    passphrase: ""
    insecureSkipTlsVerify: false

  sasl:
    # Whether or not SASL authentication will be used for authentication
    enabled: false
    # Username to use for PLAIN or SCRAM mechanism
    username: ""
    # Password to use for PLAIN or SCRAM mechanism
    password: ""
    # Mechanism to use for SASL Authentication. Valid values are PLAIN, SCRAM-SHA-256, SCRAM-SHA-512, GSSAPI, OAUTHBEARER
    mechanism: "PLAIN"
    # GSSAPI / Kerberos config properties
    gssapi:
      # Required. One of USER_AUTH or KEYTAB_AUTH
      authType: ""
      keyTabPath: ""
      kerberosConfigPath: ""
      serviceName: ""
      username: ""
      password: ""
      realm: ""
      enableFast: true
    # OAUTHBEARER config properties
    oauth:
      tokenEndpoint: ""
      clientId: ""
      clientSecret: ""
      scope: ""

minion:
  consumerGroups:
    # Enabled specifies whether consumer groups shall be scraped and exported or not.
    enabled: true
    # Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal
    # __consumer_offsets topic. Both modes have their advantages and disadvantages.
    # * adminApi:
    #     - Useful for managed kafka clusters that do not provide access to the offsets topic.
    # * offsetsTopic
    #     - Enables kminion_kafka_consumer_group_offset_commits_total metrics.
    #     - Processing the offsetsTopic requires slightly more memory and cpu than using the adminApi. The amount depends on the
    #       size and throughput of the offsets topic.
    scrapeMode: adminApi # Valid values: adminApi, offsetsTopic
    # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
    # you aren't interested in per partition lags you could choose "topic" where all partition lags will be summed
    # and only topic lags will be exported.
    granularity: partition
    # AllowedGroups are regex strings of group ids that shall be exported
    # You can specify allowed groups by providing literals like "my-consumergroup-name" or by providing regex expressions
    # like "/internal-.*/".
    allowedGroups: [ ".*" ]
    # IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups
    # take precedence over allowed groups.
    ignoredGroups: [ ]
  topics:
    # Enabled can be set to false in order to disable collecting any topic metrics.
    enabled: true
    # Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
    # you aren't interested in per partition metrics you could choose "topic".
    granularity: partition
    # AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.
    # You can specify allowed topics by providing literals like "my-topic-name" or by providing regex expressions
    # like "/internal-.*/".
    allowedTopics: [ ".*" ]
    # IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics
    # take precedence over allowed topics.
    ignoredTopics: [ ]
    # infoMetric is a configuration object for the kminion_kafka_topic_info metric
    infoMetric:
      # ConfigKeys are set of strings of Topic configs that you want to have exported as part of the metric
      configKeys: [ "cleanup.policy" ]
  logDirs:
    # Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior
    # to version 1.0.0 as describing log dirs was not supported back then.
    enabled: true

  # EndToEnd Metrics
  # When enabled, kminion creates a topic which it produces to and consumes from, to measure various advanced metrics. See docs for more info
  endToEnd:
    enabled: false
    # How often to send end-to-end test messages
    probeInterval: 100ms
    topicManagement:
      # Controls whether kminion should create, validate, and alter the end-to-end topic.
      #
      # When enabled (true):
      # - kminion creates the topic if it doesn't exist
      # - Validates partition count and replica assignments on startup and periodically (based on reconciliationInterval)
      # - Alters the topic (reassigns partitions, adds partitions) to ensure one partition leader per broker
      # - Fails on startup if topic alteration operations fail
      #
      # When disabled (false):
      # - kminion will NOT create the topic if it doesn't exist (startup will fail)
      # - Will NOT validate or alter an existing topic (accepts current partition layout as-is)
      # - Logs warnings if the topic configuration is suboptimal
      # - Continues end-to-end testing with whatever partition layout exists
      #
      # Use disabled mode on managed Kafka platforms that restrict partition reassignment operations.
      # In this mode, you must manually create the topic with appropriate partition distribution
      # before starting kminion.
      enabled: true

      # Name of the topic kminion uses to send its test messages
      # You do *not* need to change this if you are running multiple kminion instances on the same cluster.
      # Different instances are perfectly fine with sharing the same topic!
      name: kminion-end-to-end

      # How often kminion checks its topic to validate configuration, partition count, and partition assignments
      reconciliationInterval: 10m

      # Depending on the desired monitoring (e.g. you want to alert on broker failure vs. cluster that is not writable)
      # you may choose replication factor 1 or 3 most commonly.
      replicationFactor: 1

      # Rarely makes sense to change this, but maybe if you want some sort of cheap load test?
      # By default (1) every broker gets one partition
      partitionsPerBroker: 1

      # Whether kminion should issue AlterPartitionAssignments requests to rebalance partition
      # leaders across brokers. Disable this when running against Kafka-compatible clusters that
      # reject AlterPartitionAssignments (e.g. Redpanda with autobalancer enabled, Confluent Cloud).
      # Topic creation and partition count management remain active regardless of this setting.
      rebalancePartitions: true

    producer:
      # This defines:
      # - Maximum time to wait for an ack response after producing a message
      # - Upper bound for histogram buckets in "produce_latency_seconds"
      ackSla: 5s
      # Can be to "all" (default) so kafka only reports an end-to-end test message as acknowledged if
      # the message was written to all in-sync replicas of the partition.
      # Or can be set to "leader" to only require to have written the message to its log.
      requiredAcks: all

    consumer:
      # Prefix kminion uses when creating its consumer groups. Current kminion instance id will be appended automatically
      groupIdPrefix: kminion-end-to-end

      # Whether KMinion should try to delete empty consumer groups with the same prefix. This can be used if you want
      # KMinion to cleanup it's old consumer groups. It should only be used if you use a unique prefix for KMinion.
      deleteStaleConsumerGroups: false

      # This defines:
      # - Upper bound for histogram buckets in "roundtrip_latency"
      # - Time limit beyond which a message is considered "lost" (failed the roundtrip)
      roundtripSla: 20s

      # - Upper bound for histogram buckets in "commit_latency_seconds"
      # - Maximum time an offset commit is allowed to take before considering it failed
      commitSla: 10s

exporter:
  # Namespace is the prefix for all exported Prometheus metrics
  namespace: "kminion"
  # Host that shall be used to bind the HTTP server on
  host: ""
  # Port that shall be used to bind the HTTP server on
  port: 8080


================================================
FILE: e2e/client_hooks.go
================================================
package e2e

import (
	"net"
	"sync/atomic"
	"time"

	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// in e2e we only use client hooks for logging connect/disconnect messages
type clientHooks struct {
	logger *zap.Logger

	lastCoordinatorUpdate time.Time
	currentCoordinator    *atomic.Value // kgo.BrokerMetadata
}

func newEndToEndClientHooks(logger *zap.Logger) *clientHooks {
	return &clientHooks{
		logger:             logger.Named("e2e_hooks"),
		currentCoordinator: &atomic.Value{},
	}
}

func (c *clientHooks) OnBrokerConnect(meta kgo.BrokerMetadata, dialDur time.Duration, _ net.Conn, err error) {
	if err != nil {
		c.logger.Error("kafka connection failed", zap.String("broker_host", meta.Host), zap.Int32("broker_id", meta.NodeID), zap.Error(err))
		return
	}
	c.logger.Debug("kafka connection succeeded",
		zap.String("host", meta.Host), zap.Int32("broker_id", meta.NodeID),
		zap.Int64("dial_duration_ms", dialDur.Milliseconds()))
}

func (c *clientHooks) OnDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {
	c.logger.Warn("kafka broker disconnected", zap.Int32("broker_id", meta.NodeID),
		zap.String("host", meta.Host))
}

// OnBrokerWrite is passed the broker metadata, the key for the request that
// was written, the number of bytes written, how long the request
// waited before being written, how long it took to write the request,
// and any error.
//
// The bytes written does not count any tls overhead.
// OnWrite is called after a write to a broker.
//
// OnWrite(meta BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error)
func (c *clientHooks) OnBrokerWrite(meta kgo.BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error) {
	keyName := kmsg.NameForKey(key)
	if keyName != "OffsetCommit" {
		return
	}

	// c.logger.Info("hooks onWrite",
	// 	zap.Duration("timeToWrite", timeToWrite),
	// 	zap.NamedError("err", err))
}

// OnBrokerRead is passed the broker metadata, the key for the response that
// was read, the number of bytes read, how long the Client waited
// before reading the response, how long it took to read the response,
// and any error.
//
// The bytes written does not count any tls overhead.
// OnRead is called after a read from a broker.
// OnRead(meta BrokerMetadata, key int16, bytesRead int, readWait, timeToRead time.Duration, err error)
func (c *clientHooks) OnBrokerRead(meta kgo.BrokerMetadata, key int16, bytesRead int, readWait, timeToRead time.Duration, err error) {
	consumerGroupMsgKeys := []int16{
		(&kmsg.OffsetCommitResponse{}).Key(),
		(&kmsg.JoinGroupResponse{}).Key(),
		(&kmsg.HeartbeatResponse{}).Key(),
		(&kmsg.SyncGroupResponse{}).Key(),
	}

	isMessageFromGroupCoordinator := isInArray(key, consumerGroupMsgKeys)
	if !isMessageFromGroupCoordinator {
		return
	}

	if err == nil {
		c.currentCoordinator.Store(meta)
		c.lastCoordinatorUpdate = time.Now()
	}
}


================================================
FILE: e2e/config.go
================================================
package e2e

import (
	"fmt"
	"time"
)

type Config struct {
	Enabled         bool                   `koanf:"enabled"`
	TopicManagement EndToEndTopicConfig    `koanf:"topicManagement"`
	ProbeInterval   time.Duration          `koanf:"probeInterval"`
	Producer        EndToEndProducerConfig `koanf:"producer"`
	Consumer        EndToEndConsumerConfig `koanf:"consumer"`
}

func (c *Config) SetDefaults() {
	c.Enabled = false
	c.ProbeInterval = 100 * time.Millisecond
	c.TopicManagement.SetDefaults()
	c.Producer.SetDefaults()
	c.Consumer.SetDefaults()
}

func (c *Config) Validate() error {

	if !c.Enabled {
		return nil
	}

	// If the timeduration is 0s or 0ms or its variation of zero, it will be parsed as 0
	if c.ProbeInterval == 0 {
		return fmt.Errorf("failed to validate probeInterval config, the duration can't be zero")
	}

	err := c.TopicManagement.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate topicManagement config: %w", err)
	}

	_, err = time.ParseDuration(c.ProbeInterval.String())
	if err != nil {
		return fmt.Errorf("failed to parse '%s' to time.Duration: %v", c.ProbeInterval.String(), err)
	}

	err = c.Producer.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate producer config: %w", err)
	}

	err = c.Consumer.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate consumer config: %w", err)
	}

	return nil
}


================================================
FILE: e2e/config_consumer.go
================================================
package e2e

import (
	"fmt"
	"time"
)

type EndToEndConsumerConfig struct {
	GroupIdPrefix             string `koanf:"groupIdPrefix"`
	DeleteStaleConsumerGroups bool   `koanf:"deleteStaleConsumerGroups"`

	// RoundtripSLA is the time duration from the moment where we try to produce until the moment where we consumed
	// the message. Therefore this should always be higher than the produceTimeout / SLA.
	RoundtripSla time.Duration `koanf:"roundtripSla"`
	CommitSla    time.Duration `koanf:"commitSla"`
}

func (c *EndToEndConsumerConfig) SetDefaults() {
	c.GroupIdPrefix = "kminion-end-to-end"
	c.DeleteStaleConsumerGroups = false
	c.RoundtripSla = 20 * time.Second
	c.CommitSla = 5 * time.Second
}

func (c *EndToEndConsumerConfig) Validate() error {
	if len(c.GroupIdPrefix) < 3 {
		return fmt.Errorf("kminion prefix should be at least 3 characters long")
	}

	if c.RoundtripSla <= 0 {
		return fmt.Errorf("consumer.roundtripSla must be greater than zero")
	}

	if c.CommitSla <= 0 {
		return fmt.Errorf("consumer.commitSla must be greater than zero")
	}

	return nil
}


================================================
FILE: e2e/config_producer.go
================================================
package e2e

import (
	"fmt"
	"time"
)

type EndToEndProducerConfig struct {
	AckSla       time.Duration `koanf:"ackSla"`
	RequiredAcks string        `koanf:"requiredAcks"`
}

func (c *EndToEndProducerConfig) SetDefaults() {
	c.AckSla = 5 * time.Second
	c.RequiredAcks = "all"
}

func (c *EndToEndProducerConfig) Validate() error {

	if c.RequiredAcks != "all" && c.RequiredAcks != "leader" {
		return fmt.Errorf("producer.requiredAcks must be 'all' or 'leader")
	}

	if c.AckSla <= 0 {
		return fmt.Errorf("producer.ackSla must be greater than zero")
	}

	return nil
}


================================================
FILE: e2e/config_topic.go
================================================
package e2e

import (
	"fmt"
	"time"
)

type EndToEndTopicConfig struct {
	Enabled                bool          `koanf:"enabled"`
	Name                   string        `koanf:"name"`
	ReplicationFactor      int           `koanf:"replicationFactor"`
	PartitionsPerBroker    int           `koanf:"partitionsPerBroker"`
	ReconciliationInterval time.Duration `koanf:"reconciliationInterval"`
	// RebalancePartitions controls whether kminion will issue AlterPartitionAssignments
	// requests to rebalance partition leaders across brokers. Set to false when running
	// against Kafka-compatible clusters (e.g. Redpanda with autobalancer enabled, Confluent
	// Cloud) that reject AlterPartitionAssignments. Topic creation and partition count
	// management remain active regardless of this setting.
	RebalancePartitions bool `koanf:"rebalancePartitions"`
}

func (c *EndToEndTopicConfig) SetDefaults() {
	c.Enabled = true
	c.Name = "kminion-end-to-end"
	c.ReplicationFactor = 1
	c.PartitionsPerBroker = 1
	c.ReconciliationInterval = 10 * time.Minute
	c.RebalancePartitions = true
}

func (c *EndToEndTopicConfig) Validate() error {

	if c.ReplicationFactor < 1 {
		return fmt.Errorf("failed to parse replicationFactor, it should be more than 1, retrieved value %v", c.ReplicationFactor)
	}

	if c.PartitionsPerBroker < 1 {
		return fmt.Errorf("failed to parse partitionsPerBroker, it should be more than 1, retrieved value %v", c.PartitionsPerBroker)
	}

	// If the timeduration is 0s or 0ms or its variation of zero, it will be parsed as 0
	if c.ReconciliationInterval == 0 {
		return fmt.Errorf("failed to validate topic.ReconciliationInterval config, the duration can't be zero")
	}

	return nil
}


================================================
FILE: e2e/config_topic_test.go
================================================
package e2e

import (
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"go.uber.org/zap"
)

func TestEndToEndTopicConfig_SetDefaults(t *testing.T) {
	var cfg EndToEndTopicConfig
	cfg.SetDefaults()

	assert.True(t, cfg.Enabled)
	assert.Equal(t, "kminion-end-to-end", cfg.Name)
	assert.Equal(t, 1, cfg.ReplicationFactor)
	assert.Equal(t, 1, cfg.PartitionsPerBroker)
	assert.Equal(t, 10*time.Minute, cfg.ReconciliationInterval)
	assert.True(t, cfg.RebalancePartitions, "RebalancePartitions should default to true for backward compatibility")
}

func TestEndToEndTopicConfig_Validate(t *testing.T) {
	tests := []struct {
		name    string
		cfg     EndToEndTopicConfig
		wantErr bool
	}{
		{
			name: "valid config with rebalance enabled",
			cfg: EndToEndTopicConfig{
				Enabled:                true,
				Name:                   "test-topic",
				ReplicationFactor:      3,
				PartitionsPerBroker:    1,
				ReconciliationInterval: 10 * time.Minute,
				RebalancePartitions:    true,
			},
			wantErr: false,
		},
		{
			name: "valid config with rebalance disabled",
			cfg: EndToEndTopicConfig{
				Enabled:                true,
				Name:                   "test-topic",
				ReplicationFactor:      3,
				PartitionsPerBroker:    1,
				ReconciliationInterval: 10 * time.Minute,
				RebalancePartitions:    false,
			},
			wantErr: false,
		},
		{
			name: "invalid replication factor",
			cfg: EndToEndTopicConfig{
				ReplicationFactor:      0,
				PartitionsPerBroker:    1,
				ReconciliationInterval: 10 * time.Minute,
				RebalancePartitions:    false,
			},
			wantErr: true,
		},
		{
			name: "invalid partitions per broker",
			cfg: EndToEndTopicConfig{
				ReplicationFactor:      1,
				PartitionsPerBroker:    0,
				ReconciliationInterval: 10 * time.Minute,
				RebalancePartitions:    false,
			},
			wantErr: true,
		},
		{
			name: "zero reconciliation interval",
			cfg: EndToEndTopicConfig{
				ReplicationFactor:      1,
				PartitionsPerBroker:    1,
				ReconciliationInterval: 0,
				RebalancePartitions:    false,
			},
			wantErr: true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			err := tt.cfg.Validate()
			if tt.wantErr {
				require.Error(t, err)
			} else {
				require.NoError(t, err)
			}
		})
	}
}

// TestPartitionPlanner_RebalancePartitionsDisabled verifies that when
// RebalancePartitions is false the planner still produces a valid plan
// (the caller in topic.go is responsible for not executing reassignments),
// and that partition creation assignments are still generated normally.
func TestPartitionPlanner_RebalancePartitionsDisabled(t *testing.T) {
	// Three brokers, topic already exists with suboptimal leader distribution:
	// all partitions led by broker 0.
	meta := buildMeta("e2e",
		map[int32]string{0: "", 1: "", 2: ""},
		[][]int32{
			{0, 1, 2},
			{0, 2, 1},
			{0, 1, 2},
		},
	)

	cfg := EndToEndTopicConfig{
		Enabled:                true,
		Name:                   "e2e",
		ReplicationFactor:      3,
		PartitionsPerBroker:    1,
		ReconciliationInterval: 10 * time.Minute,
		RebalancePartitions:    false,
	}

	planner := NewPartitionPlanner(cfg, zap.NewNop())
	plan, err := planner.Plan(meta)
	require.NoError(t, err)

	// The planner should still detect that reassignments are needed —
	// it's the caller's responsibility to skip executing them.
	assert.NotEmpty(t, plan.Reassignments, "planner should detect reassignments are needed")
	for _, ra := range plan.Reassignments {
		assertNoDuplicates(t, ra.Replicas)
		assert.Len(t, ra.Replicas, cfg.ReplicationFactor)
	}

	// No new partitions should be created (3 brokers × 1 per broker = 3 already exist).
	assert.Empty(t, plan.CreateAssignments)
	assert.Equal(t, 3, plan.FinalPartitionCount)
}

// TestPartitionPlanner_RebalancePartitionsDisabled_Creates verifies that when
// RebalancePartitions is false and new partitions need to be created, Phase 3
// uses actual current leaders (not predicted leaders from staged reassignments)
// to pick the preferred leader for new partitions.
func TestPartitionPlanner_RebalancePartitionsDisabled_Creates(t *testing.T) {
	// 4 brokers, 3 partitions all led by broker 0.
	// PartitionsPerBroker=1 means desired = 4, so Phase 3 must create 1.
	meta := buildMeta("e2e",
		map[int32]string{0: "", 1: "", 2: "", 3: ""},
		[][]int32{
			{0, 1, 2},
			{0, 2, 3},
			{0, 1, 3},
		},
	)

	cfg := EndToEndTopicConfig{
		Enabled:                true,
		Name:                   "e2e",
		ReplicationFactor:      3,
		PartitionsPerBroker:    1,
		ReconciliationInterval: 10 * time.Minute,
		RebalancePartitions:    false,
	}

	planner := NewPartitionPlanner(cfg, zap.NewNop())
	plan, err := planner.Plan(meta)
	require.NoError(t, err)

	// Phase 3 should create exactly 1 partition (4 desired - 3 existing).
	require.Len(t, plan.CreateAssignments, 1)

	// The new partition's preferred leader should NOT be broker 0,
	// because actual state shows broker 0 already leads 3 partitions.
	// With rebalancePartitions=false, Phase 3 counts from actual leaders,
	// so it should pick one of the under-represented brokers (1, 2, or 3).
	newLeader := plan.CreateAssignments[0].Replicas[0]
	assert.NotEqual(t, int32(0), newLeader,
		"new partition should not be led by broker 0 (already leads 3 partitions in actual state)")
}


================================================
FILE: e2e/consumer.go
================================================
package e2e

import (
	"context"
	"encoding/json"
	"strconv"
	"time"

	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

func (s *Service) startConsumeMessages(ctx context.Context, initializedCh chan<- bool) {
	client := s.client

	s.logger.Info("starting to consume end-to-end topic",
		zap.String("topic_name", s.config.TopicManagement.Name),
		zap.String("group_id", s.groupId))

	isInitialized := false
	for {
		fetches := client.PollFetches(ctx)
		if !isInitialized {
			isInitialized = true
			initializedCh <- true
			close(initializedCh)
		}

		// Log all errors and continue afterwards as we might get errors and still have some fetch results
		errors := fetches.Errors()
		for _, err := range errors {
			s.logger.Error("kafka fetch error",
				zap.String("topic", err.Topic),
				zap.Int32("partition", err.Partition),
				zap.Error(err.Err))
		}

		fetches.EachRecord(s.processMessage)
	}
}

func (s *Service) commitOffsets(ctx context.Context) {
	client := s.client
	uncommittedOffset := client.UncommittedOffsets()
	if uncommittedOffset == nil {
		return
	}

	startCommitTimestamp := time.Now()

	childCtx, cancel := context.WithTimeout(ctx, s.config.Consumer.CommitSla)
	client.CommitOffsets(childCtx, uncommittedOffset, func(_ *kgo.Client, req *kmsg.OffsetCommitRequest, r *kmsg.OffsetCommitResponse, err error) {
		cancel()

		coordinator := s.clientHooks.currentCoordinator.Load().(kgo.BrokerMetadata)
		coordinatorID := strconv.Itoa(int(coordinator.NodeID))

		latency := time.Since(startCommitTimestamp)
		s.offsetCommitLatency.WithLabelValues(coordinatorID).Observe(latency.Seconds())
		s.offsetCommitsTotal.WithLabelValues(coordinatorID).Inc()
		// We do this to ensure that a series with that coordinator id is initialized
		s.offsetCommitsTotal.WithLabelValues(coordinatorID).Add(0)

		// If we have at least one error in our commit response we want to report it as an error with an appropriate
		// reason as label.
		if errCode := s.logCommitErrors(r, err); errCode != "" {
			s.offsetCommitsFailedTotal.WithLabelValues(coordinatorID, errCode).Inc()
			return
		}
	})
}

// processMessage:
// - deserializes the message
// - checks if it is from us, or from another kminion process running somewhere else
// - hands it off to the service, which then reports metrics on it
func (s *Service) processMessage(record *kgo.Record) {
	if record.Value == nil {
		// Init messages have nil values - we want to skip these. They are only used to make sure a consumer is ready.
		return
	}

	var msg EndToEndMessage
	if jerr := json.Unmarshal(record.Value, &msg); jerr != nil {
		s.logger.Error("failed to unmarshal message value", zap.Error(jerr))
		return // maybe older version
	}

	if msg.MinionID != s.minionID {
		return // not from us
	}

	// restore partition, which is not serialized
	msg.partition = int(record.Partition)
	s.messageTracker.onMessageArrived(&msg)
}


================================================
FILE: e2e/endtoend_message.go
================================================
package e2e

import "time"

const (
	_ = iota
	EndToEndMessageStateCreated
	EndToEndMessageStateProducedSuccessfully
)

type EndToEndMessage struct {
	MinionID  string `json:"minionID"`     // unique for each running kminion instance
	MessageID string `json:"messageID"`    // unique for each message
	Timestamp int64  `json:"createdUtcNs"` // when the message was created, unix nanoseconds

	// The following properties are only used within the message tracker
	partition      int
	state          int
	produceLatency float64
}

func (m *EndToEndMessage) creationTime() time.Time {
	return time.Unix(0, m.Timestamp)
}


================================================
FILE: e2e/group_tracker.go
================================================
package e2e

import (
	"context"
	"strings"
	"time"

	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

const (
	oldGroupCheckInterval = 5 * time.Second  // how often to check for old kminion groups
	oldGroupMaxAge        = 20 * time.Second // maximum age after which an old group should be deleted
)

// groupTracker keeps checking for empty consumerGroups matching the kminion prefix.
// When a group was seen empty for some time, we delete it.
// Why?
// Whenever a kminion instance starts up it creates a consumer-group for itself in order to not "collide" with other kminion instances.
// When an instance restarts (for whatever reason), it creates a new group again, so we'd end up with a lot of unused groups.
type groupTracker struct {
	cfg                    Config
	logger                 *zap.Logger
	client                 *kgo.Client          // kafka client
	groupId                string               // our own groupId
	potentiallyEmptyGroups map[string]time.Time // groupName -> utc timestamp when the group was first seen
}

func newGroupTracker(cfg Config, logger *zap.Logger, client *kgo.Client, groupID string) *groupTracker {
	return &groupTracker{
		cfg:                    cfg,
		logger:                 logger.Named("group_tracker"),
		client:                 client,
		groupId:                groupID,
		potentiallyEmptyGroups: make(map[string]time.Time),
	}
}

func (g *groupTracker) start(ctx context.Context) {
	g.logger.Debug("starting group tracker")

	deleteOldGroupsTicker := time.NewTicker(oldGroupCheckInterval)
	for {
		select {
		case <-ctx.Done():
			g.logger.Debug("stopping group tracker, context was cancelled")
			return
		case <-deleteOldGroupsTicker.C:
			childCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
			err := g.checkAndDeleteOldConsumerGroups(childCtx)
			if err != nil {
				g.logger.Error("failed to check for old consumer groups: %w", zap.Error(err))
			}
			cancel()
		}
	}
}

func (g *groupTracker) checkAndDeleteOldConsumerGroups(ctx context.Context) error {
	groupsRq := kmsg.NewListGroupsRequest()
	groupsRq.StatesFilter = []string{"Empty"}

	g.logger.Debug("checking for stale kminion consumer groups")

	shardedResponse := g.client.RequestSharded(ctx, &groupsRq)

	// find groups that start with the kminion prefix
	matchingGroups := make([]string, 0)
	for _, shard := range shardedResponse {
		if shard.Err != nil {
			g.logger.Error("error in response to ListGroupsRequest", zap.Int32("broker_id", shard.Meta.NodeID), zap.Error(shard.Err))
			continue
		}

		r, ok := shard.Resp.(*kmsg.ListGroupsResponse)
		if !ok {
			g.logger.Error("cannot cast responseShard.Resp to kmsg.ListGroupsResponse")
			continue
		}

		for _, group := range r.Groups {
			name := group.Group

			if name == g.groupId {
				continue // skip our own consumer group
			}

			if strings.HasPrefix(name, g.cfg.Consumer.GroupIdPrefix) {
				matchingGroups = append(matchingGroups, name)
			}
		}
	}

	// save new (previously unseen) groups to tracker
	g.logger.Debug("checked for stale consumer groups", zap.Int("found_groups", len(matchingGroups)), zap.Strings("groups", matchingGroups))
	for _, name := range matchingGroups {
		_, exists := g.potentiallyEmptyGroups[name]
		if !exists {
			// add it with the current timestamp
			g.potentiallyEmptyGroups[name] = time.Now()
			g.logger.Debug("found new empty kminion group, adding it to the tracker", zap.String("group", name))
		}
	}

	// go through saved groups:
	// - don't track the ones we don't see anymore (bc they got deleted or are not empty anymore)
	// - mark the ones that are too old (have been observed as empty for too long)
	groupsToDelete := make([]string, 0)
	for name, firstSeen := range g.potentiallyEmptyGroups {
		exists, _ := containsStr(matchingGroups, name)
		if exists {
			// still there, check age and maybe delete it
			age := time.Since(firstSeen)
			if age > oldGroupMaxAge {
				// group was unused for too long, delete it
				groupsToDelete = append(groupsToDelete, name)
				delete(g.potentiallyEmptyGroups, name)
			}
		} else {
			// does not exist anymore, it must have been deleted, or is in use now (no longer empty)
			// don't track it anymore
			delete(g.potentiallyEmptyGroups, name)
		}
	}

	// actually delete the groups we've decided to delete
	if len(groupsToDelete) == 0 {
		return nil
	}

	deleteRq := kmsg.NewDeleteGroupsRequest()
	deleteRq.Groups = groupsToDelete
	deleteResp := g.client.RequestSharded(ctx, &deleteRq)

	// done, now just errors
	// if we get a not authorized error we'll disable deleting groups
	foundNotAuthorizedError := false
	deletedGroups := make([]string, 0)
	for _, shard := range deleteResp {
		if shard.Err != nil {
			g.logger.Error("sharded consumer group delete request failed", zap.Error(shard.Err))
			continue
		}

		resp, ok := shard.Resp.(*kmsg.DeleteGroupsResponse)
		if !ok {
			g.logger.Error("failed to cast shard response to DeleteGroupsResponse while handling an error for deleting groups", zap.String("shard_host", shard.Meta.Host), zap.Int32("broker_id", shard.Meta.NodeID), zap.Error(shard.Err))
			continue
		}

		for _, groupResp := range resp.Groups {
			err := kerr.ErrorForCode(groupResp.ErrorCode)
			if err != nil {
				g.logger.Error("failed to delete consumer group", zap.String("shard", shard.Meta.Host), zap.Int32("broker_id", shard.Meta.NodeID), zap.String("group", groupResp.Group), zap.Error(err))

				if groupResp.ErrorCode == kerr.GroupAuthorizationFailed.Code {
					foundNotAuthorizedError = true
				}

			} else {
				deletedGroups = append(deletedGroups, groupResp.Group)
			}
		}
	}
	g.logger.Info("deleted old consumer groups", zap.Strings("deleted_groups", deletedGroups))

	if foundNotAuthorizedError {
		g.logger.Info("disabling trying to delete old kminion consumer-groups since one of the last delete results had an 'GroupAuthorizationFailed' error")
	}

	return nil
}


================================================
FILE: e2e/message_tracker.go
================================================
package e2e

import (
	"fmt"
	"strconv"
	"time"

	"github.com/jellydator/ttlcache/v2"

	"go.uber.org/zap"
)

// messageTracker keeps track of the messages' lifetime
//
// When we successfully send a mesasge, it will be added to this tracker.
// Later, when we receive the message back in the consumer, the message is marked as completed and removed from the tracker.
// If the message does not arrive within the configured `consumer.roundtripSla`, it is counted as lost. Messages that
// failed to be produced will not be
// considered as lost message.
//
// We use a dedicated counter to track messages that couldn't be  produced to Kafka.
type messageTracker struct {
	svc    *Service
	logger *zap.Logger
	cache  *ttlcache.Cache
}

func newMessageTracker(svc *Service) *messageTracker {
	defaultExpirationDuration := svc.config.Consumer.RoundtripSla
	cache := ttlcache.NewCache()
	cache.SetTTL(defaultExpirationDuration)

	t := &messageTracker{
		svc:    svc,
		logger: svc.logger.Named("message_tracker"),
		cache:  cache,
	}
	t.cache.SetExpirationReasonCallback(func(key string, reason ttlcache.EvictionReason, value interface{}) {
		t.onMessageExpired(key, reason, value.(*EndToEndMessage))
	})

	return t
}

func (t *messageTracker) addToTracker(msg *EndToEndMessage) {
	t.cache.Set(msg.MessageID, msg)
}

// updateItemIfExists only updates a message if it still exists in the cache. The remaining time to live will not
// be refreshed.
// If it doesn't exist an ttlcache.ErrNotFound error will be returned.
func (t *messageTracker) updateItemIfExists(msg *EndToEndMessage) error {
	_, ttl, err := t.cache.GetWithTTL(msg.MessageID)
	if err != nil {
		if err == ttlcache.ErrNotFound {
			return err
		}
		panic(err)
	}

	// Because the returned TTL is set to the original TTL duration (and not the remaining TTL) we have to calculate
	// the remaining TTL now as we want to updat the existing cache item without changing the remaining time to live.
	expiryTimestamp := msg.creationTime().Add(ttl)
	remainingTTL := expiryTimestamp.Sub(time.Now())
	if remainingTTL < 0 {
		// This entry should have been deleted already. Race condition.
		return ttlcache.ErrNotFound
	}

	err = t.cache.SetWithTTL(msg.MessageID, msg, remainingTTL)
	if err != nil {
		panic(err)
	}

	return nil
}

// removeFromTracker removes an entry from the cache. If the key does not exist it will return an ttlcache.ErrNotFound error.
func (t *messageTracker) removeFromTracker(messageID string) error {
	return t.cache.Remove(messageID)
}

func (t *messageTracker) onMessageArrived(arrivedMessage *EndToEndMessage) {
	cm, err := t.cache.Get(arrivedMessage.MessageID)
	if err != nil {
		if err == ttlcache.ErrNotFound {
			// message expired and was removed from the cache
			// it arrived too late, nothing to do here...
			return
		} else {
			panic(fmt.Errorf("failed to get message from cache: %w", err))
		}
	}

	msg := cm.(*EndToEndMessage)

	expireTime := msg.creationTime().Add(t.svc.config.Consumer.RoundtripSla)
	isExpired := time.Now().Before(expireTime)
	latency := time.Now().Sub(msg.creationTime())

	if !isExpired {
		// Message arrived late, but was still in cache. We don't increment the lost counter here because eventually
		// it will be evicted from the cache. This case should only pop up if the sla time is exceeded, but if the
		// item has not been evicted from the cache yet.
		t.logger.Info("message arrived late, will be marked as a lost message",
			zap.Int64("delay_ms", latency.Milliseconds()),
			zap.String("id", msg.MessageID))
		return
	}

	// message arrived early enough
	pID := strconv.Itoa(msg.partition)
	t.svc.messagesReceived.WithLabelValues(pID).Inc()
	t.svc.roundtripLatency.WithLabelValues(pID).Observe(latency.Seconds())

	// Remove message from cache, so that we don't track it any longer and won't mark it as lost when the entry expires.
	t.cache.Remove(msg.MessageID)
}

func (t *messageTracker) onMessageExpired(_ string, reason ttlcache.EvictionReason, value interface{}) {
	if reason == ttlcache.Removed {
		// We are not interested in messages that have been removed by us!
		return
	}

	msg := value.(*EndToEndMessage)

	created := msg.creationTime()
	age := time.Since(created)
	t.svc.lostMessages.WithLabelValues(strconv.Itoa(msg.partition)).Inc()

	t.logger.Debug("message expired/lost",
		zap.Int64("age_ms", age.Milliseconds()),
		zap.Int("partition", msg.partition),
		zap.String("message_id", msg.MessageID),
		zap.Bool("successfully_produced", msg.state == EndToEndMessageStateProducedSuccessfully),
		zap.Float64("produce_latency_seconds", msg.produceLatency),
	)
}


================================================
FILE: e2e/partition_planner.go
================================================
package e2e

import (
	"fmt"
	"sort"

	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// PartitionPlanner is a thin orchestrator around the three planning phases. It wires in
// configuration, logging, and the replica-selection strategy. The planning phases are:
//  1. Fix replication factor and rack diversity on existing partitions.
//  2. Ensure every broker is the preferred leader of at least 1 partition.
//  3. Ensure total partition count meets the configured lower bound.
//
// The probe topic is created to continuously test end-to-end availability by
// producing and consuming records per partition. The planner enforces:
//
//   - Correct replication factor (RF == configured RF) and no duplicate brokers
//     within a single partition's replica set.
//   - Rack awareness: maximize the number of unique racks per partition (bounded
//     by min(RF, #racks)). This reduces the blast radius of a rack failure.
//   - Sufficient partition count: >= max(current, #brokers, partitionsPerBroker*#brokers).
//   - Leader coverage: every broker must be the preferred leader (replicas[0]) of
//     at least one partition, so a per-broker failure is observable.
//   - Minimal movement: prefer to fix RF and rack issues first, then rotate/swap
//     leaders to fill gaps, and only create partitions when necessary.
type PartitionPlanner struct {
	cfg    EndToEndTopicConfig
	logger *zap.Logger
	sel    ReplicaSelector
}

// NewPartitionPlanner constructs a Planner with the given config & logger. The replica
// selector is chosen in Plan() once we have ClusterState ready.
func NewPartitionPlanner(cfg EndToEndTopicConfig, logger *zap.Logger) *PartitionPlanner {
	return &PartitionPlanner{cfg: cfg, logger: logger}
}

// Plan produces an in-memory plan (reassignments + creations) for the probe
// topic based on current cluster metadata. See the package header for the
// invariants we enforce.
func (p *PartitionPlanner) Plan(meta *kmsg.MetadataResponse) (*Plan, error) {
	if meta == nil || len(meta.Topics) == 0 {
		return nil, fmt.Errorf("metadata response has no topics")
	}
	if len(meta.Brokers) == 0 {
		return nil, fmt.Errorf("metadata response has no brokers")
	}
	if p.cfg.ReplicationFactor > len(meta.Brokers) {
		return nil, fmt.Errorf("replication factor %d exceeds available brokers %d", p.cfg.ReplicationFactor, len(meta.Brokers))
	}

	// Build state required for the planning
	state := BuildState(meta)
	desired := ComputeDesired(state, p.cfg)
	tracker := NewLoadTracker(state)
	selector := NewRackAwareSelector(state, tracker)
	p.sel = selector

	b := NewPlanBuilder(state, desired, tracker, p.cfg.RebalancePartitions)

	// Phase 1: normalize RF and racks (low movement first)
	// Grow/trim replicas to configured RF and re-pick to maximize unique racks
	// per partition. We avoid moving the leader when possible.
	fixReplicationAndRack(b, selector, p.cfg.ReplicationFactor)

	// Phase 2: ensure each broker is preferred leader for >= 1 partition
	// This guarantees probe coverage: if a broker dies, some partition leader is
	// unavailable and the probe trips.
	ensureLeaderCoverage(b, selector)

	// Phase 3: ensure desired partition count
	// Enforce >= max(current, #brokers, partitionsPerBroker * #brokers).
	ensurePartitionCount(b, selector)

	// Build final plan and log the changes
	plan := b.Build()

	// Log with appropriate level based on whether changes are needed
	totalChanges := len(plan.Reassignments) + len(plan.CreateAssignments)
	if totalChanges == 0 {
		p.logger.Info("e2e probe topic partition leadership and replica distribution check completed - optimal",
			zap.String("topic", state.TopicName),
			zap.Int("brokers", len(state.Brokers)),
			zap.Int("partitions", len(state.Partitions)),
			zap.String("status", "optimal"),
		)
	} else {
		p.logger.Info("plan to change partition leadership and replica placements on e2e topic has been prepared",
			zap.String("topic", state.TopicName),
			zap.Int("brokers", len(state.Brokers)),
			zap.Int("cur_partitions", len(state.Partitions)),
			zap.Int("final_partitions", plan.FinalPartitionCount),
			zap.Int("reassignments", len(plan.Reassignments)),
			zap.Int("creates", len(plan.CreateAssignments)),
			zap.Int("total_changes", totalChanges),
		)
	}
	return plan, nil
}

// -----------------------------------------------------------------------------
// Planning phases
// -----------------------------------------------------------------------------

// PlanBuilder holds a predictive view of partition -> replicas after applying
// staged operations. We never mutate ClusterState.Partitions; instead, we write
// new assignments into PlanBuilder.view and record high-level operations to
// produce Kafka requests at the end (see Plan.ToRequests).
//
// This keeps the planning phases simple and side-effect free.
type PlanBuilder struct {
	state   ClusterState
	desired Desired
	tracker *LoadTracker

	// rebalancePartitions indicates whether reassignments will actually be executed.
	// When false, Phase 3 uses actual current leaders instead of predicted leaders
	// from the view, since reassignments won't be applied.
	rebalancePartitions bool

	// view is our predictive map: partitionID -> replicas (preferred leader at idx 0)
	view map[int32][]int32

	reassignments []Reassignment     // staged reassignments for existing partitions
	creations     []CreateAssignment // staged creations of new partitions
}

// Reassignment captures a single partition’s new replica list.
//
// The order of Replicas matters: index 0 will become the preferred leader after
// reassignment completes on the broker side.
type Reassignment struct {
	Partition int32
	Replicas  []int32
}

// CreateAssignment captures the replica list for a *new* partition that will be
// appended to the topic during CreatePartitions.
//
// The order of Replicas matters: index 0 is the preferred leader for the new
// partition.
type CreateAssignment struct {
	Replicas []int32
}

// Plan is the final, immutable result of planning. It can be turned into Kafka
// requests via ToRequests.
//
// FinalPartitionCount is the topic’s partition count after applying creations.
// (Reassignments do not change the count.)
type Plan struct {
	Reassignments       []Reassignment
	CreateAssignments   []CreateAssignment
	FinalPartitionCount int
}

// NewPlanBuilder initializes a predictive view by cloning the current
// partition->replicas map. We avoid accidental mutation by copying slices.
func NewPlanBuilder(state ClusterState, desired Desired, tracker *LoadTracker, rebalancePartitions bool) *PlanBuilder {
	view := make(map[int32][]int32, len(state.Partitions))
	for pid, p := range state.Partitions {
		view[pid] = append([]int32(nil), p.Replicas...)
	}
	return &PlanBuilder{state: state, desired: desired, tracker: tracker, rebalancePartitions: rebalancePartitions, view: view}
}

// Build freezes the current staged operations into a Plan. We compute the final
// partition count as current + number of creates.
func (b *PlanBuilder) Build() *Plan {
	return &Plan{
		Reassignments:       b.reassignments,
		CreateAssignments:   b.creations,
		FinalPartitionCount: len(b.state.Partitions) + len(b.creations),
	}
}

// CommitReassignment records a reassignment and updates the predictive view.
func (b *PlanBuilder) CommitReassignment(pid int32, reps []int32) {
	b.reassignments = append(b.reassignments, Reassignment{Partition: pid, Replicas: reps})
	b.view[pid] = reps
}

// CommitCreate records a new-partition assignment. The final partition count is
// computed when building the Plan.
func (b *PlanBuilder) CommitCreate(reps []int32) {
	b.creations = append(b.creations, CreateAssignment{Replicas: reps})
}

// fixReplicationAndRack enforces configured RF on each existing partition
// (growing or shrinking as needed) and re-picks replicas when rack diversity can
// be improved. We try to keep the current leader by always retaining replicas[0]
// when shrinking.
func fixReplicationAndRack(b *PlanBuilder, sel ReplicaSelector, rf int) {
	// Sort partition IDs for deterministic iteration
	pids := make([]int32, 0, len(b.view))
	for pid := range b.view {
		pids = append(pids, pid)
	}
	sort.Slice(pids, func(i, j int) bool { return pids[i] < pids[j] })

	for _, pid := range pids {
		replicas := b.view[pid]
		desiredRF := rf
		newReplicas := replicas

		switch {
		// Grow: re-pick the full set based on the current actual leader.
		case len(replicas) < desiredRF:
			// Use the actual current leader from the partition metadata
			currentPartition, exists := b.state.Partitions[pid]
			preferredLeader := replicas[0] // fallback to preferred leader
			if exists && currentPartition.Leader != -1 {
				preferredLeader = currentPartition.Leader
			}
			newReplicas = sel.ChooseReplicas(preferredLeader, desiredRF)

		// Shrink: keep leader; then pick remaining replicas preferring
		// new racks, then lower load, then lower broker ID.
		case len(replicas) > desiredRF:
			newReplicas = shrinkPreservingLeader(b, pid, replicas, desiredRF)

		// Same RF: if rack diversity can be improved, re-pick.
		default:
			if violatesRackDiversity(replicas, b.state.NumRacks, b.state.Brokers) {
				// Use the actual current leader from the partition metadata
				currentPartition, exists := b.state.Partitions[pid]
				preferredLeader := replicas[0] // fallback to preferred leader
				if exists && currentPartition.Leader != -1 {
					preferredLeader = currentPartition.Leader
				}
				newReplicas = sel.ChooseReplicas(preferredLeader, desiredRF)
			}
		}

		if !equalInt32s(newReplicas, replicas) {
			b.CommitReassignment(pid, newReplicas)
		}
	}
}

// shrinkPreservingLeader returns a replica set of size rf that keeps the
// current leader and greedily prefers candidates that add a new rack;
// among equals, chooses lower load, then lower broker ID.
func shrinkPreservingLeader(b *PlanBuilder, pid int32, replicas []int32, rf int) []int32 {
	// Use the actual current leader from the partition metadata, not replicas[0]
	currentPartition, exists := b.state.Partitions[pid]
	leader := replicas[0] // fallback to preferred leader if no current leader found
	if exists && currentPartition.Leader != -1 {
		leader = currentPartition.Leader
	}
	keep := []int32{leader}

	seen := map[string]struct{}{
		b.state.Brokers[leader].Rack: {},
	}

	type cand struct {
		id   int32
		rack string
		load int
	}

	// Build the candidate pool from non-leader replicas.
	pool := make([]cand, 0, len(replicas)-1)
	for _, id := range replicas[1:] {
		pool = append(pool, cand{
			id:   id,
			rack: b.state.Brokers[id].Rack,
			load: b.tracker.Load(id).Replicas,
		})
	}

	// Greedy selection with dynamic "seen racks".
	for len(keep) < rf && len(pool) > 0 {
		best := 0
		for i := 1; i < len(pool); i++ {
			a, b2 := pool[i], pool[best]
			_, aSeen := seen[a.rack]
			_, bSeen := seen[b2.rack]

			switch {
			// Prefer a candidate that adds a new rack.
			case aSeen != bSeen:
				if !aSeen && bSeen {
					best = i
				}
			// Then prefer lower load.
			case a.load != b2.load:
				if a.load < b2.load {
					best = i
				}
			// Then prefer lower broker ID (stable tie-breaker).
			case a.id < b2.id:
				best = i
			}
		}

		chosen := pool[best]
		keep = append(keep, chosen.id)
		seen[chosen.rack] = struct{}{}

		// Remove chosen from pool (swap-delete).
		pool[best] = pool[len(pool)-1]
		pool = pool[:len(pool)-1]
	}

	return keep
}

// ensureLeaderCoverage guarantees that each broker becomes preferred leader for
// at least one partition. We try the cheapest options first:
//  1. If the broker already hosts a replica of some partition where it is not
//     leader, rotate it to index 0 (no RF change, minimal movement).
//  2. Otherwise, replace a non-unique-rack replica in some donor partition and
//     rotate the target broker to index 0.
//  3. If neither is possible, create a new partition led by the target broker.
func ensureLeaderCoverage(b *PlanBuilder, sel ReplicaSelector) {
	// Guard: if actual leaders already cover all brokers, skip preferred leader rebalancing
	actualLeaders := make(map[int32][]int32, len(b.state.BrokerIDs))
	for _, id := range b.state.BrokerIDs {
		actualLeaders[id] = nil
	}
	for pid, part := range b.state.Partitions {
		if part.Leader != -1 {
			actualLeaders[part.Leader] = append(actualLeaders[part.Leader], pid)
		}
	}
	if len(brokersMissingLeadership(b.state.BrokerIDs, actualLeaders)) == 0 {
		return // Actual coverage is perfect - no need to rebalance preferred leaders
	}

	// Build "leadersByBroker": broker -> list of partition IDs it currently leads (preferred).
	leadersByBroker := indexLeaders(b.state.BrokerIDs, b.view)

	// Brokers that currently lead zero partitions (preferred).
	// However, if a broker already has actual leadership (even if not preferred),
	// we can skip it to minimize unnecessary reassignments.
	missing := []int32{}
	for _, broker := range brokersMissingLeadership(b.state.BrokerIDs, leadersByBroker) {
		// Skip if this broker already has actual leadership
		if len(actualLeaders[broker]) > 0 {
			continue
		}
		missing = append(missing, broker)
	}
	if len(missing) == 0 {
		return
	}

	// Local helpers that both perform the action and update leadersByBroker.
	rotateIfReplica := func(target int32, donors []int32) bool {
		for _, donor := range donors {
			// Collect candidate partitions where target is already a replica
			pids := append([]int32(nil), leadersByBroker[donor]...)

			// Sort with preference: partitions where the donor is the ACTUAL leader first.
			// This ensures we're actually freeing up leadership from the donor, rather than
			// rotating a partition where the donor is only the preferred leader.
			// Then by partition ID for determinism.
			sort.Slice(pids, func(i, j int) bool {
				pi, pj := pids[i], pids[j]

				// Prefer partitions where the donor is the actual leader
				iDonorIsActual := b.state.Partitions[pi].Leader == donor
				jDonorIsActual := b.state.Partitions[pj].Leader == donor

				if iDonorIsActual != jDonorIsActual {
					return iDonorIsActual
				}
				// Then by partition ID for stability
				return pi < pj
			})

			for _, pid := range pids {
				reps := b.view[pid]
				if !contains(reps, target) {
					continue
				}
				newReps := putFirst(reps, target) // make target the leader
				b.CommitReassignment(pid, newReps)

				// Update bookkeeping: pid moves from donor to target.
				leadersByBroker[donor] = remove(leadersByBroker[donor], pid)
				leadersByBroker[target] = append(leadersByBroker[target], pid)
				return true
			}
		}
		return false
	}

	replaceDuplicateAndRotate := func(target int32, donors []int32) bool {
		for _, donor := range donors {
			// Sort partition IDs for deterministic iteration
			pids := append([]int32(nil), leadersByBroker[donor]...)
			sort.Slice(pids, func(i, j int) bool { return pids[i] < pids[j] })
			for _, pid := range pids {
				reps := b.view[pid]
				if contains(reps, target) {
					continue // covered by rotate path above
				}
				idx := victimIndex(reps, b.state.Brokers)
				if idx < 0 {
					continue // no safe replica to swap (unique racks already)
				}
				newReps := append([]int32{}, reps...)
				newReps[idx] = target
				newReps = putFirst(newReps, target)
				b.CommitReassignment(pid, newReps)

				leadersByBroker[donor] = remove(leadersByBroker[donor], pid)
				leadersByBroker[target] = append(leadersByBroker[target], pid)
				return true
			}
		}
		return false
	}

	for _, target := range missing {
		// Donors: brokers leading more partitions than their soft target,
		// sorted by largest surplus first (tie-breaker: smaller broker ID).
		donors := donorBrokers(b.state.BrokerIDs, leadersByBroker, b.desired.TargetLeaders)

		// 1) Cheapest: rotate target to lead where it already is a replica.
		if rotateIfReplica(target, donors) {
			continue
		}
		// 2) Next-cheapest: replace a duplicate-rack replica, then rotate.
		if replaceDuplicateAndRotate(target, donors) {
			continue
		}
		// 3) Last resort: create a new partition led by target.
		reps := sel.ChooseReplicas(target, b.desired.RF)
		b.CommitCreate(reps)

		// Track a synthetic partition ID so counts stay consistent within this loop.
		newPID := int32(len(b.state.Partitions) + len(b.creations) - 1)
		leadersByBroker[target] = append(leadersByBroker[target], newPID)
	}
}

// ensurePartitionCount adds partitions until we reach DesiredPartitions.
//
// Leader selection strategy per new partition:
//  1. Prefer a broker still below its soft target (TargetLeaders). Among those,
//     pick the one closest to its target (smallest positive gap).
//  2. If all brokers are at/above target, pick the broker that currently leads
//     the fewest partitions (stable tie-breaker via leastLoadedLeader).
func ensurePartitionCount(b *PlanBuilder, sel ReplicaSelector) {
	desiredTotal := b.desired.DesiredPartitions
	total := len(b.state.Partitions) + len(b.creations)
	if total >= desiredTotal || len(b.state.BrokerIDs) == 0 {
		return
	}

	// Count current leaders per broker.
	leaderCount := make(map[int32]int, len(b.state.BrokerIDs))
	if b.rebalancePartitions {
		// Use predictive view (reassignments will be applied)
		for _, reps := range b.view {
			if len(reps) > 0 {
				leaderCount[reps[0]]++
			}
		}
	} else {
		// Use actual current leaders (reassignments won't be applied)
		for _, p := range b.state.Partitions {
			if p.Leader != -1 {
				leaderCount[p.Leader]++
			}
		}
	}
	// Always include leaders from staged creates (Phase 2 fallback creates are always executed)
	for _, ca := range b.creations {
		if len(ca.Replicas) > 0 {
			leaderCount[ca.Replicas[0]]++
		}
	}
	// Make sure every broker has an entry in leaderCount.
	for _, id := range b.state.BrokerIDs {
		if _, ok := leaderCount[id]; !ok {
			leaderCount[id] = 0
		}
	}

	for total < desiredTotal {
		// Pick a preferred leader for the new partition.
		preferred := pickLeader(b.state.BrokerIDs, leaderCount, b.desired.TargetLeaders)

		// Materialize replicas and commit the create.
		reps := sel.ChooseReplicas(preferred, b.desired.RF)
		b.CommitCreate(reps)

		leaderCount[preferred]++
		total++
	}
}

// pickLeader returns the broker to prefer as the new partition's leader.
//
// If any brokers are still below their soft target, it returns the one with the
// smallest positive remaining gap (# of missing leaderships for partitions).
// Otherwise, it returns the least-loaded leader.
func pickLeader(brokerIDs []int32, leaderCount map[int32]int, target map[int32]int) int32 {
	var (
		chosen  int32
		bestGap int // smallest positive gap seen so far
		found   bool
	)

	for _, id := range brokerIDs {
		gap := target[id] - leaderCount[id] // how many leaders this broker still “should” get
		if gap <= 0 {
			continue
		}
		if !found || gap < bestGap {
			chosen = id
			bestGap = gap
			found = true
		}
	}
	if found {
		return chosen
	}
	// Everyone at/above target: fall back to least leaders.
	return leastLoadedLeader(leaderCount, brokerIDs)
}

// -----------------------------------------------------------------------------
// State & Desired
// -----------------------------------------------------------------------------

// Broker is an immutable snapshot of a broker’s ID and rack label at plan time.
// An empty rack value means the broker did not advertise one.
//
// We never mutate this during planning; it mirrors the Metadata response.
type Broker struct {
	ID   int32
	Rack string // empty string if unknown
}

// Partition is an immutable snapshot of a partition's current replica list.
//
// The order of Replicas matters: index 0 is the *preferred leader* for this
// partition, i.e., where the controller will attempt to place leadership after
// changes. The Leader field contains the *actual current leader* as reported
// by Kafka metadata, which may differ from the preferred leader (replicas[0]).
//
// We do not mutate these structs; instead we work with a predictive "view"
// inside PlanBuilder (below).
type Partition struct {
	ID       int32
	Leader   int32   // actual current leader from metadata
	Replicas []int32 // order matters; index 0 is preferred leader
}

// ClusterState is a convenience wrapper around the metadata we actually need in
// this planner. It is deliberately small to keep the code readable.
//
// - TopicName: the probe topic name (for logging and request building)
// - Brokers: map of broker id -> Broker
// - BrokerIDs: sorted slice of broker IDs for deterministic iteration
// - ByRack: rack -> broker IDs, to reason about rack diversity
// - NumRacks: number of distinct racks (empty rack counts as one)
// - Partitions: map of partition id -> Partition snapshot
//
// None of the members are mutated after construction.
type ClusterState struct {
	TopicName  string
	Brokers    map[int32]Broker
	BrokerIDs  []int32 // sorted
	ByRack     map[string][]int32
	NumRacks   int
	Partitions map[int32]Partition
}

// BuildState converts Metadata response to ClusterState and copies slices so
// planning cannot accidentally mutate the input.
func BuildState(meta *kmsg.MetadataResponse) ClusterState {
	brokers := make(map[int32]Broker, len(meta.Brokers))
	byRack := make(map[string][]int32)
	var ids []int32
	for _, b := range meta.Brokers {
		rack := ""
		if b.Rack != nil {
			rack = *b.Rack
		}
		brokers[b.NodeID] = Broker{ID: b.NodeID, Rack: rack}
		byRack[rack] = append(byRack[rack], b.NodeID)
		ids = append(ids, b.NodeID)
	}
	sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })

	parts := make(map[int32]Partition)
	for _, p := range meta.Topics[0].Partitions {
		rep := append([]int32(nil), p.Replicas...)
		parts[p.Partition] = Partition{ID: p.Partition, Leader: p.Leader, Replicas: rep}
	}

	return ClusterState{
		TopicName:  pointerStrToStr(meta.Topics[0].Topic),
		Brokers:    brokers,
		BrokerIDs:  ids,
		ByRack:     byRack,
		NumRacks:   len(byRack),
		Partitions: parts,
	}
}

type Desired struct {
	RF                int
	DesiredPartitions int
	TargetLeaders     map[int32]int // per broker (even split)
}

// ComputeDesired derives the minimal partition count and a soft target for
// preferred leader distribution. We require at least one partition per broker so
// everyone can lead, and we honor PartitionsPerBroker as an additional lower
// bound.
func ComputeDesired(state ClusterState, cfg EndToEndTopicConfig) Desired {
	perBroker := cfg.PartitionsPerBroker
	if perBroker < 1 {
		perBroker = 1
	}
	cur := len(state.Partitions)
	desiredPartitions := max(cur, perBroker*len(state.BrokerIDs))
	target := evenSplit(desiredPartitions, state.BrokerIDs)
	return Desired{RF: cfg.ReplicationFactor, DesiredPartitions: desiredPartitions, TargetLeaders: target}
}

// -----------------------------------------------------------------------------
// Replica selection (rack-aware) & load tracking
// -----------------------------------------------------------------------------

// Load captures, per broker, how many times it appears as a replica and as a
// preferred leader across the predictive view of the topic. We use this to
// bias selection towards less-loaded brokers to avoid hot spots.
//
// Note: this is a transient view local to a single planning run. Nothing here
// is persisted and no attempt is made to perfectly balance replicas across the
// cluster—only to avoid obviously uneven choices.
type Load struct {
	Replicas int
	Leaders  int
}

// LoadTracker is a tiny helper holding a per-broker Load map with convenience
// methods for reading and incrementing counts while the plan is being
// constructed.
//
// Why not compute loads on the fly? We do that initially (from current
// assignments) and then update incrementally while choosing replicas for new or
// changed partitions to keep subsequent choices informed by earlier ones.
// Keeping it explicit in a struct makes the intent obvious and testing easier.
type LoadTracker struct {
	l map[int32]Load
}

// NewLoadTracker builds initial loads from the current assignments in the
// metadata (before any staged changes). The caller updates loads as it makes
// predictive choices so the next decision can see the latest picture.
func NewLoadTracker(state ClusterState) *LoadTracker {
	l := make(map[int32]Load, len(state.BrokerIDs))
	for _, id := range state.BrokerIDs {
		l[id] = Load{}
	}
	for _, p := range state.Partitions {
		for _, r := range p.Replicas {
			ld := l[r]
			ld.Replicas++
			l[r] = ld
		}
		// Count the actual current leader separately
		if p.Leader != -1 { // -1 indicates no leader (error state)
			ld := l[p.Leader]
			ld.Leaders++
			l[p.Leader] = ld
		}
	}
	return &LoadTracker{l: l}
}

// AddReplica increments replica and (optionally) leader counts for a broker in
// the predictive view. Call this after the planner decides to place a replica
// (e.g., in ChooseReplicas).
func (t *LoadTracker) AddReplica(id int32, leader bool) {
	ld := t.l[id]
	ld.Replicas++
	if leader {
		ld.Leaders++
	}
	t.l[id] = ld
}

// Load returns the current transient load counters for a broker.
func (t *LoadTracker) Load(id int32) Load { return t.l[id] }

// ReplicaSelector abstracts the heuristic used to pick a concrete replica set
// for a given (preferred) leader and RF. Keeping this as an interface makes it
// trivial to swap strategy in tests if you’d like to assert specific behaviors.
// The production strategy we use is RackAwareSelector.
type ReplicaSelector interface {
	ChooseReplicas(preferredLeader int32, rf int) []int32
}

// RackAwareSelector is a simple greedy strategy that tries to:
//  1. Always include the requested preferred leader at index 0.
//  2. Maximize rack diversity by preferring brokers on new racks first.
//  3. Among candidates on equally novel racks, prefer lower replica load.
//  4. Use broker ID as a final tiebreaker for determinism.
//
// After selecting, we update the transient LoadTracker so subsequent decisions
// are informed by this choice.
//
// This is intentionally not perfect or global-optimal—just a pragmatic heuristic
// that produces good, stable results for the probe topic.
type RackAwareSelector struct {
	state ClusterState
	loads *LoadTracker
}

// NewRackAwareSelector constructs the default selection strategy.
func NewRackAwareSelector(state ClusterState, loads *LoadTracker) *RackAwareSelector {
	return &RackAwareSelector{state: state, loads: loads}
}

// ChooseReplicas returns an ordered replica list of length rf where index 0 is
// the preferred leader. Candidates are scored by new rack first, then lower
// load, then lower broker ID.
func (s *RackAwareSelector) ChooseReplicas(preferredLeader int32, rf int) []int32 {
	rf = min(rf, len(s.state.BrokerIDs))
	res := make([]int32, 0, rf)
	res = append(res, preferredLeader)
	seen := map[int32]struct{}{preferredLeader: {}}
	usedRack := map[string]struct{}{s.state.Brokers[preferredLeader].Rack: {}}
	type cand struct {
		id   int32
		rack string
		load int
	}
	build := func() []cand {
		out := make([]cand, 0, len(s.state.BrokerIDs))
		for _, id := range s.state.BrokerIDs {
			if _, ok := seen[id]; ok {
				continue
			}
			out = append(out, cand{id: id, rack: s.state.Brokers[id].Rack, load: s.loads.Load(id).Replicas})
		}
		sort.Slice(out, func(i, j int) bool {
			_, iu := usedRack[out[i].rack]
			_, ju := usedRack[out[j].rack]
			if iu != ju {
				return !iu && ju
			}
			if out[i].load != out[j].load {
				return out[i].load < out[j].load
			}
			return out[i].id < out[j].id
		})
		return out
	}
	for len(res) < rf {
		cands := build()
		if len(cands) == 0 {
			break
		}
		c := cands[0]
		res = append(res, c.id)
		seen[c.id] = struct{}{}
		usedRack[c.rack] = struct{}{}
	}
	// update transient loads
	for i, id := range res {
		s.loads.AddReplica(id, i == 0)
	}
	return res
}

// -----------------------------------------------------------------------------
// Plan -> Kafka requests
// -----------------------------------------------------------------------------

// ToRequests converts a Plan to Kafka admin requests. Either result may be nil
// if the plan contains no operations of that type.
//
// rebalancePartitions controls whether explicit replica assignments are included
// in the CreatePartitions request. Set it to false for Redpanda Cloud, which
// disallows explicit partition assignments via the Kafka API and returns
// INVALID_REQUEST when they are present.
func (p *Plan) ToRequests(topic string, rebalancePartitions bool) (*kmsg.AlterPartitionAssignmentsRequest, *kmsg.CreatePartitionsRequest) {
	var alter *kmsg.AlterPartitionAssignmentsRequest
	var create *kmsg.CreatePartitionsRequest

	if len(p.Reassignments) > 0 {
		r := kmsg.NewAlterPartitionAssignmentsRequest()
		t := kmsg.NewAlterPartitionAssignmentsRequestTopic()
		t.Topic = topic
		for _, ra := range p.Reassignments {
			pr := kmsg.NewAlterPartitionAssignmentsRequestTopicPartition()
			pr.Partition = ra.Partition
			pr.Replicas = append([]int32(nil), ra.Replicas...)
			t.Partitions = append(t.Partitions, pr)
		}
		r.Topics = []kmsg.AlterPartitionAssignmentsRequestTopic{t}
		alter = &r
	}

	if len(p.CreateAssignments) > 0 {
		r := kmsg.NewCreatePartitionsRequest()
		t := kmsg.NewCreatePartitionsRequestTopic()
		t.Topic = topic
		t.Count = int32(p.FinalPartitionCount)
		// Redpanda Cloud disallows explicit partition assignments via the Kafka API
		// (returning INVALID_REQUEST), the same restriction that applies to
		// AlterPartitionAssignments. Omit the Assignment list and let the broker
		// auto-place new partitions when rebalancing is disabled.
		if rebalancePartitions {
			for _, ca := range p.CreateAssignments {
				ta := kmsg.NewCreatePartitionsRequestTopicAssignment()
				ta.Replicas = append([]int32(nil), ca.Replicas...)
				t.Assignment = append(t.Assignment, ta)
			}
		}
		r.Topics = []kmsg.CreatePartitionsRequestTopic{t}
		create = &r
	}
	return alter, create
}

// -----------------------------------------------------------------------------
// Utils
// -----------------------------------------------------------------------------

// indexLeaders builds broker -> list of partition IDs it currently leads (from view).
func indexLeaders(brokerIDs []int32, view map[int32][]int32) map[int32][]int32 {
	m := make(map[int32][]int32, len(brokerIDs))
	for _, id := range brokerIDs {
		m[id] = nil
	}
	for pid, reps := range view {
		if len(reps) > 0 {
			m[reps[0]] = append(m[reps[0]], pid)
		}
	}
	return m
}

// brokersMissingLeadership returns brokers that lead zero partitions.
func brokersMissingLeadership(brokerIDs []int32, leadersByBroker map[int32][]int32) []int32 {
	var out []int32
	for _, id := range brokerIDs {
		if len(leadersByBroker[id]) == 0 {
			out = append(out, id)
		}
	}
	return out
}

// donorBrokers returns brokers that currently lead more than their soft target,
// sorted by largest surplus first; ties broken by broker ID ascending.
func donorBrokers(brokerIDs []int32, leadersByBroker map[int32][]int32, target map[int32]int) []int32 {
	var donors []int32
	for _, id := range brokerIDs {
		if len(leadersByBroker[id]) > target[id] {
			donors = append(donors, id)
		}
	}
	sort.Slice(donors, func(i, j int) bool {
		surplusI := len(leadersByBroker[donors[i]]) - target[donors[i]]
		surplusJ := len(leadersByBroker[donors[j]]) - target[donors[j]]
		if surplusI != surplusJ {
			return surplusI > surplusJ
		}
		return donors[i] < donors[j]
	})
	return donors
}

// violatesRackDiversity returns true if a partition’s replicas do not use as
// many unique racks as they could (bounded by min(len(reps), numRacks)).
func violatesRackDiversity(reps []int32, numRacks int, brokers map[int32]Broker) bool {
	if len(reps) <= 1 {
		return false
	}
	seen := map[string]struct{}{}
	for _, r := range reps {
		seen[brokers[r].Rack] = struct{}{}
	}
	maxUnique := min(len(reps), numRacks)
	return len(seen) < maxUnique
}

// victimIndex returns the index of a replica that sits on a rack appearing more
// than once within the replica set (i.e., a duplicate-rack candidate). This is
// used when we need to swap in a new broker to keep/restore diversity. If none
// exists, we fall back to the last replica (a stable, simple choice that avoids
// touching the leader at index 0).
func victimIndex(reps []int32, brokers map[int32]Broker) int {
	rc := map[string]int{}
	for _, r := range reps {
		rc[brokers[r].Rack]++
	}
	for i, r := range reps {
		if rc[brokers[r].Rack] > 1 {
			return i
		}
	}
	return len(reps) - 1 // fallback: last (never the leader)
}

// remove removes v from a slice without preserving order. Used for maintaining
// the leaders map in ensureLeaderCoverage.
func remove(xs []int32, v int32) []int32 {
	out := xs[:0]
	for _, x := range xs {
		if x != v {
			out = append(out, x)
		}
	}
	return out
}

// putFirst moves id to index 0 while preserving the relative order of the
// remaining elements. This models changing the preferred leader.
func putFirst(reps []int32, id int32) []int32 {
	out := make([]int32, 0, len(reps))
	out = append(out, id)
	for _, r := range reps {
		if r != id {
			out = append(out, r)
		}
	}
	return out
}

// contains reports whether v is present in xs.
func contains(xs []int32, v int32) bool {
	for _, x := range xs {
		if x == v {
			return true
		}
	}
	return false
}

// evenSplit returns a soft target leader count per broker such that totals sum
// to n. The remainder (+1) is assigned to the lowest broker IDs for stability.
func evenSplit(n int, ids []int32) map[int32]int {
	m := make(map[int32]int, len(ids))
	if len(ids) == 0 {
		return m
	}
	base := n / len(ids)
	rem := n % len(ids)
	for i, id := range ids {
		m[id] = base
		if i < rem {
			m[id]++
		}
	}
	return m
}

// leastLoadedLeader returns the broker with the smallest number of preferred
// leader assignments (ties broken by smaller broker ID).
func leastLoadedLeader(leaders map[int32]int, ids []int32) int32 {
	best, bestCnt := ids[0], 1<<30
	for _, id := range ids {
		if leaders[id] < bestCnt || (leaders[id] == bestCnt && id < best) {
			bestCnt, best = leaders[id], id
		}
	}
	return best
}

func equalInt32s(a, b []int32) bool {
	if len(a) != len(b) {
		return false
	}
	for i := range a {
		if a[i] != b[i] {
			return false
		}
	}
	return true
}


================================================
FILE: e2e/partition_planner_test.go
================================================
package e2e

import (
	"sort"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// buildMeta constructs a MetadataResponse for tests.
// brokers: brokerID -> rack label ("" means no rack/unknown).
// partitions: list of replica lists where index 0 is the preferred leader.
func buildMeta(topic string, brokers map[int32]string, partitions [][]int32) *kmsg.MetadataResponse {
	// Brokers
	bs := make([]kmsg.MetadataResponseBroker, 0, len(brokers))
	ids := make([]int32, 0, len(brokers))
	for id := range brokers {
		ids = append(ids, id)
	}
	sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
	for _, id := range ids {
		rack := brokers[id] // copy for address stability
		bs = append(bs, kmsg.MetadataResponseBroker{
			NodeID: id,
			Rack:   &rack, // empty string is allowed and treated as one "rack" bucket by planner
		})
	}

	// Partitions
	ps := make([]kmsg.MetadataResponseTopicPartition, 0, len(partitions))
	for i, reps := range partitions {
		cp := append([]int32(nil), reps...)
		leader := int32(-1)
		if len(reps) > 0 {
			leader = reps[0] // In tests, assume preferred leader is actual leader
		}
		ps = append(ps, kmsg.MetadataResponseTopicPartition{
			Partition: int32(i),
			Leader:    leader,
			Replicas:  cp,
		})
	}

	return &kmsg.MetadataResponse{
		Brokers: bs,
		Topics: []kmsg.MetadataResponseTopic{
			{
				Topic:      kmsg.StringPtr(topic),
				Partitions: ps,
			},
		},
	}
}

// applyPlan returns the final predictive assignments after applying the plan
// to the given metadata snapshot (without mutating meta).
func applyPlan(meta *kmsg.MetadataResponse, plan *Plan) map[int32][]int32 {
	final := map[int32][]int32{}
	for _, p := range meta.Topics[0].Partitions {
		final[p.Partition] = append([]int32(nil), p.Replicas...)
	}
	for _, ra := range plan.Reassignments {
		final[ra.Partition] = append([]int32(nil), ra.Replicas...)
	}
	nextID := int32(len(meta.Topics[0].Partitions))
	for _, ca := range plan.CreateAssignments {
		final[nextID] = append([]int32(nil), ca.Replicas...)
		nextID++
	}
	return final
}

func countLeaders(assigns map[int32][]int32) map[int32]int {
	m := map[int32]int{}
	for _, reps := range assigns {
		if len(reps) > 0 {
			m[reps[0]]++
		}
	}
	return m
}

func assertNoDuplicates(t *testing.T, reps []int32) {
	t.Helper()
	seen := map[int32]struct{}{}
	for _, r := range reps {
		_, dup := seen[r]
		assert.Falsef(t, dup, "duplicate broker in replica set: %v", reps)
		seen[r] = struct{}{}
	}
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}

// --- tests -----------------------------------------------------------------

func TestPartitionPlanner_Plan(t *testing.T) {
	type tc struct {
		name       string
		brokers    map[int32]string    // brokerID -> rack ("" for no rack)
		partitions [][]int32           // ordered replicas (index 0 = preferred leader)
		cfg        EndToEndTopicConfig // uses ReplicationFactor & PartitionsPerBroker
		check      func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32)
	}

	tests := []tc{
		{
			name:       "single broker creates one partition",
			brokers:    map[int32]string{1: ""},
			partitions: nil, // empty topic
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   1,
				PartitionsPerBroker: 1,
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				require.NotNil(t, plan)
				assert.Equal(t, 1, len(plan.CreateAssignments), "should create exactly one partition")
				assert.Equal(t, 1, plan.FinalPartitionCount)

				for pid, reps := range final {
					assert.Lenf(t, reps, 1, "pid %d must have RF=1", pid)
				}
				leaders := countLeaders(final)
				assert.Equal(t, 1, leaders[1], "broker 1 should lead one partition")
			},
		},
		{
			name: "three brokers, no racks, RF grows to 3; ensure coverage and count",
			brokers: map[int32]string{
				1: "", 2: "", 3: "",
			},
			// start with 2 partitions at RF=2 both led by broker with id 1 (skewed)
			partitions: [][]int32{
				{1, 2},
				{1, 2},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   3,
				PartitionsPerBroker: 1,
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				assert.Equal(t, 3, plan.FinalPartitionCount, "desired should be max(cur=2, ppb*brokers=3)=3")

				for pid, reps := range final {
					assert.Lenf(t, reps, 3, "pid %d must have RF=3", pid)
					assertNoDuplicates(t, reps)
				}
				leaders := countLeaders(final)
				assert.GreaterOrEqual(t, leaders[1], 1)
				assert.GreaterOrEqual(t, leaders[2], 1)
				assert.GreaterOrEqual(t, leaders[3], 1)
			},
		},
		{
			name: "rack diversity improves on same-RF partition (2 racks, RF=2)",
			brokers: map[int32]string{
				1: "a", 2: "a", 3: "b",
			},
			// single partition with both replicas on rack "a" -> re-pick to include "b"
			partitions: [][]int32{
				{1, 2},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1, // desired total becomes 3, but p0 should be improved first
			},
			check: func(t *testing.T, meta *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {
				reps := final[0]
				require.Len(t, reps, 2)
				assertNoDuplicates(t, reps)

				state := BuildState(meta)
				assert.False(t, violatesRackDiversity(reps, state.NumRacks, state.Brokers), "p0 should span both racks a/b")
			},
		},
		{
			name: "shrink RF preserves current leader",
			brokers: map[int32]string{
				1: "", 2: "", 3: "",
			},
			// RF=3 currently, leader is 2; configured RF=2 -> leader must remain 2
			partitions: [][]int32{
				{2, 1, 3},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1,
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {
				reps := final[0]
				require.Len(t, reps, 2)
				assert.Equal(t, int32(2), reps[0], "leader 2 should be preserved after shrink")
				assertNoDuplicates(t, reps)
			},
		},
		{
			name: "rotate-if-replica covers missing brokers without extra swaps",
			brokers: map[int32]string{
				1: "", 2: "", 3: "",
			},
			// 2 partitions, both led by 1; 2 and 3 are replicas only in separate partitions.
			partitions: [][]int32{
				{1, 2},
				{1, 3},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1, // desired total 3 -> one create expected
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				assert.Equal(t, 1, len(plan.CreateAssignments), "one create due to partition target")
				leaders := countLeaders(final)
				assert.GreaterOrEqual(t, leaders[1], 1)
				assert.GreaterOrEqual(t, leaders[2], 1)
				assert.GreaterOrEqual(t, leaders[3], 1)
				for pid, reps := range final {
					assert.Lenf(t, reps, 2, "pid %d must have RF=2", pid)
					assertNoDuplicates(t, reps)
				}
			},
		},
		{
			name: "replace-duplicate-and-rotate when target broker is not a replica anywhere",
			brokers: map[int32]string{
				1: "a", 2: "a", 3: "b",
			},
			// 2 partitions both {1,2}; broker 3 is nowhere; donors exist (1 leads 2),
			// and duplicates exist (rack "a" twice) so we can safely swap in 3 and rotate.
			partitions: [][]int32{
				{1, 2},
				{1, 2},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1, // desired total 3; coverage for 3 should be via swap+rotate
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, _ *Plan, final map[int32][]int32) {
				leaders := countLeaders(final)
				assert.GreaterOrEqual(t, leaders[3], 1, "broker 3 should lead at least one partition")
				for pid, reps := range final {
					assert.Lenf(t, reps, 2, "pid %d RF=2", pid)
					assertNoDuplicates(t, reps)
				}
			},
		},
		{
			name: "even split leaders when scaling partitions (ppb=2)",
			brokers: map[int32]string{
				1: "", 2: "", 3: "",
			},
			// Start with 3 partitions, each broker already leads one
			partitions: [][]int32{
				{1, 2},
				{2, 1},
				{3, 1},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 2, // desired = 2 * 3 = 6
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				assert.Equal(t, 6, plan.FinalPartitionCount)
				leaders := countLeaders(final)
				assert.Equal(t, 2, leaders[1])
				assert.Equal(t, 2, leaders[2])
				assert.Equal(t, 2, leaders[3])
			},
		},
		{
			name: "one defined rack for all brokers -> no diversity churn; no ops needed",
			brokers: map[int32]string{
				1: "a", 2: "a", 3: "a",
			},
			// Already meets RF and coverage and desired count == 3
			partitions: [][]int32{
				{1, 2},
				{2, 3},
				{3, 1},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1,
			},
			check: func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				assert.Equal(t, 0, len(plan.Reassignments), "no reassignments expected")
				assert.Equal(t, 0, len(plan.CreateAssignments), "no creates expected")
				state := BuildState(meta)
				// Diversity can't be improved (NumRacks=1)
				for pid, reps := range final {
					assert.Falsef(t, violatesRackDiversity(reps, state.NumRacks, state.Brokers), "pid %d should not violate with NumRacks=1", pid)
				}
			},
		},
		{
			name: "broker present but leading nothing (restarting?) gets coverage (via swap or create)",
			brokers: map[int32]string{
				1: "a", 2: "b", 3: "c",
			},
			// Two partitions led by 1 and 2; broker 3 has no replicas/leadership.
			partitions: [][]int32{
				{1, 2},
				{2, 1},
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 1, // desired = max(2, 3)=3 -> at least one create or a swap+rotate + create for count
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				assert.Equal(t, 3, plan.FinalPartitionCount)
				leaders := countLeaders(final)
				assert.GreaterOrEqual(t, leaders[1], 1)
				assert.GreaterOrEqual(t, leaders[2], 1)
				assert.GreaterOrEqual(t, leaders[3], 1)
				for pid, reps := range final {
					assert.Lenf(t, reps, 2, "pid %d RF=2", pid)
					assertNoDuplicates(t, reps)
				}
			},
		},
		{
			name: "phase 3 accounts for leaders created in phase 2 - no over-assignment",
			brokers: map[int32]string{
				1: "a", 2: "b", 3: "c", 4: "d",
			},
			// Start with 2 partitions, both led by broker 1
			// Brokers 2,3,4 have no leadership -> phase 2 will create partitions for them
			// Then phase 3 should NOT over-assign broker 1 when creating additional partitions
			partitions: [][]int32{
				{1, 2}, // broker 1 leads
				{1, 3}, // broker 1 leads (over-represented)
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:   2,
				PartitionsPerBroker: 2, // 4*2=8 total desired, currently have 2, so need 6 more
			},
			check: func(t *testing.T, _ *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				// Should create 6 new partitions (3 for coverage in phase 2, 3 more for count in phase 3)
				assert.Equal(t, 8, plan.FinalPartitionCount, "should reach desired partition count of 8")
				assert.Equal(t, 6, len(plan.CreateAssignments), "should create 6 new partitions")

				leaders := countLeaders(final)

				// Each broker should lead exactly 2 partitions (8 total / 4 brokers = 2 each)
				// This test will fail if phase 3 doesn't account for leaders created in phase 2
				// because it will see broker 1 as leading 2 partitions and think it needs 0 more,
				// while it actually leads 2 from existing + potentially more from phase 2
				for brokerID := int32(1); brokerID <= 4; brokerID++ {
					assert.Equal(t, 2, leaders[brokerID],
						"broker %d should lead exactly 2 partitions (even distribution), but leads %d",
						brokerID, leaders[brokerID])
				}
			},
		},
		{
			name: "uses actual leader not preferred leader when they differ",
			brokers: map[int32]string{
				1: "a", 2: "b", 3: "c",
			},
			// This case simulates when the actual leader differs from preferred leader (replicas[0])
			// We'll manually construct metadata where leader != replicas[0]
			partitions: [][]int32{
				{1, 2, 3}, // preferred leader is 1, but we'll set actual leader to 2 in buildMetaWithLeader
				{2, 3, 1}, // preferred leader is 2, but we'll set actual leader to 3 in buildMetaWithLeader
			},
			cfg: EndToEndTopicConfig{
				ReplicationFactor:      3,
				PartitionsPerBroker:    1, // 3*1=3 total desired, have 2, need 1 more
				RebalancePartitions:    true,
			},
			check: func(t *testing.T, meta *kmsg.MetadataResponse, plan *Plan, final map[int32][]int32) {
				// With our manually set leaders (2, 3), broker 1 has no leadership
				// The planner should recognize this and either rotate leadership to broker 1
				// or create a new partition led by broker 1
				leaders := countLeaders(final)
				assert.GreaterOrEqual(t, leaders[1], 1, "broker 1 should lead at least one partition")
				assert.GreaterOrEqual(t, leaders[2], 1, "broker 2 should lead at least one partition")
				assert.GreaterOrEqual(t, leaders[3], 1, "broker 3 should lead at least one partition")
			},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var meta *kmsg.MetadataResponse
			if tt.name == "uses actual leader not preferred leader when they differ" {
				// Special case: manually set leaders to differ from preferred leaders
				meta = buildMeta("probe", tt.brokers, tt.partitions)
				// Override the leaders: partition 0 actual leader = 2, partition 1 actual leader = 3
				meta.Topics[0].Partitions[0].Leader = 2 // preferred is 1, actual is 2
				meta.Topics[0].Partitions[1].Leader = 3 // preferred is 2, actual is 3
			} else {
				meta = buildMeta("probe", tt.brokers, tt.partitions)
			}

			planner := NewPartitionPlanner(tt.cfg, zap.NewNop())
			plan, err := planner.Plan(meta)
			require.NoError(t, err, "Plan() should not error")
			require.NotNil(t, plan, "Plan() returned nil plan")

			// Sanity: final >= max(current, ppb*brokers)
			expectedMin := max(len(meta.Topics[0].Partitions), tt.cfg.PartitionsPerBroker*len(tt.brokers))
			assert.GreaterOrEqual(t, plan.FinalPartitionCount, expectedMin, "final partition count must meet lower bound")

			// Apply and enforce universal invariants.
			final := applyPlan(meta, plan)
			for pid, reps := range final {
				assert.Lenf(t, reps, tt.cfg.ReplicationFactor, "pid %d RF mismatch", pid)
				assertNoDuplicates(t, reps)
			}

			// Scenario-specific checks.
			tt.check(t, meta, plan, final)
		})
	}
}

func TestPartitionPlanner_Plan_Deterministic(t *testing.T) {
	// Test that the same input produces identical plans across multiple runs
	brokers := map[int32]string{
		10: "rack1", 20: "rack2", 30: "rack3", 40: "rack1", 50: "rack2", 60: "rack3",
	}
	// Many partitions with suboptimal RF to force fixReplicationAndRack to iterate over map
	partitions := [][]int32{
		{10}, {20}, {30}, {40}, {50}, {60}, // RF=1, needs growth to 3
		{10, 20}, {20, 30}, {30, 40}, {40, 50}, // RF=2, needs growth to 3
		{50, 60, 10}, {60, 10, 20}, {10, 30, 40}, // RF=3, may need rack fixes
	}
	cfg := EndToEndTopicConfig{
		ReplicationFactor:   3,
		PartitionsPerBroker: 3, // 6*3=18 total desired, have 13, need 5 more
	}

	meta := buildMeta("probe", brokers, partitions)
	var plans []*Plan

	// Run the same plan many times to increase chance of hitting different map iteration orders
	for i := 0; i < 10; i++ {
		planner := NewPartitionPlanner(cfg, zap.NewNop())
		plan, err := planner.Plan(meta)
		require.NoError(t, err, "Plan() should not error on run %d", i)
		require.NotNil(t, plan, "Plan() returned nil plan on run %d", i)
		plans = append(plans, plan)
	}

	// All plans should be identical
	firstPlan := plans[0]
	for i := 1; i < len(plans); i++ {
		assert.Equal(t, len(firstPlan.Reassignments), len(plans[i].Reassignments),
			"run %d: reassignment count should be identical", i)
		assert.Equal(t, len(firstPlan.CreateAssignments), len(plans[i].CreateAssignments),
			"run %d: create count should be identical", i)
		assert.Equal(t, firstPlan.FinalPartitionCount, plans[i].FinalPartitionCount,
			"run %d: final partition count should be identical", i)

		// Build maps for comparison
		reassign1 := make(map[int32][]int32)
		for _, r := range firstPlan.Reassignments {
			reassign1[r.Partition] = r.Replicas
		}
		reassign2 := make(map[int32][]int32)
		for _, r := range plans[i].Reassignments {
			reassign2[r.Partition] = r.Replicas
		}
		assert.Equal(t, reassign1, reassign2, "run %d: reassignments should be identical", i)

		// Create assignments order matters for determinism
		assert.Equal(t, firstPlan.CreateAssignments, plans[i].CreateAssignments,
			"run %d: create assignments should be identical", i)
	}
}

// TestActualLeaderCoverageSkipsPreferredRebalancing tests the fix for the bug where
// ensureLeaderCoverage would trigger unnecessary reassignments when actual leader
// coverage was perfect but preferred leader coverage was unbalanced.
func TestActualLeaderCoverageSkipsPreferredRebalancing(t *testing.T) {
	// Simulate the exact scenario from the bug report:
	// - All brokers in same rack (no rack diversity benefit possible)
	// - Actual leaders perfectly distributed: broker 0→p1, broker 1→p0, broker 2→p2
	// - But preferred leaders (replicas[0]) unbalanced: broker 1→p0&p2, broker 0→p1, broker 2→none
	brokers := map[int32]string{
		0: "europe-west1-b", 1: "europe-west1-b", 2: "europe-west1-b",
	}

	// Build metadata with specific replica assignments matching the bug report
	meta := buildMeta("probe", brokers, [][]int32{
		{1, 2, 0}, // partition 0: preferred leader = 1
		{0, 1, 2}, // partition 1: preferred leader = 0
		{1, 2, 0}, // partition 2: preferred leader = 1
	})

	// Override actual leaders to match the bug report scenario
	meta.Topics[0].Partitions[0].Leader = 1 // p0: preferred=1, actual=1 (same)
	meta.Topics[0].Partitions[1].Leader = 0 // p1: preferred=0, actual=0 (same)
	meta.Topics[0].Partitions[2].Leader = 2 // p2: preferred=1, actual=2 (DIFFERENT!)

	cfg := EndToEndTopicConfig{
		ReplicationFactor:   3,
		PartitionsPerBroker: 1, // 3*1=3 total desired, have 3, perfect
	}

	planner := NewPartitionPlanner(cfg, zap.NewNop())
	plan, err := planner.Plan(meta)
	require.NoError(t, err, "Plan() should not error")
	require.NotNil(t, plan, "Plan() returned nil plan")

	// This is the key assertion: should have ZERO reassignments because:
	// 1. All brokers in same rack → no rack diversity violations possible
	// 2. Actual leader coverage already perfect (each broker leads exactly 1 partition)
	// 3. RF and partition count already correct
	assert.Equal(t, 0, len(plan.Reassignments), "should have no reassignments when actual coverage is perfect")
	assert.Equal(t, 0, len(plan.CreateAssignments), "should have no creates when partition count is perfect")
	assert.Equal(t, 3, plan.FinalPartitionCount, "should maintain existing partition count")

	// Verify the fix: no changes to replica assignments
	final := applyPlan(meta, plan)
	assert.Equal(t, []int32{1, 2, 0}, final[0], "partition 0 replicas should be unchanged")
	assert.Equal(t, []int32{0, 1, 2}, final[1], "partition 1 replicas should be unchanged")
	assert.Equal(t, []int32{1, 2, 0}, final[2], "partition 2 replicas should be unchanged")

	// Verify all partitions still have correct RF and no duplicates
	for pid, reps := range final {
		assert.Lenf(t, reps, 3, "pid %d must have RF=3", pid)
		assertNoDuplicates(t, reps)
	}
}

// TestPlan_ToRequests_RebalancePartitions verifies that ToRequests omits explicit
// replica assignments from the CreatePartitions request when RebalancePartitions
// is false, but still sets the correct total Count.
func TestPlan_ToRequests_RebalancePartitions(t *testing.T) {
	brokers := map[int32]string{1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}
	// Topic currently has 3 partitions; 6 brokers → planner will want 6 total.
	partitions := [][]int32{
		{1, 2, 3},
		{2, 3, 4},
		{3, 4, 5},
	}
	meta := buildMeta("probe", brokers, partitions)

	t.Run("rebalancePartitions=true includes explicit assignments", func(t *testing.T) {
		cfg := EndToEndTopicConfig{
			ReplicationFactor:   3,
			PartitionsPerBroker: 1,
			RebalancePartitions: true,
		}
		plan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(meta)
		require.NoError(t, err)
		_, createReq := plan.ToRequests("probe", true)
		require.NotNil(t, createReq, "should have a CreatePartitions request")

		topic := createReq.Topics[0]
		assert.Equal(t, int32(plan.FinalPartitionCount), topic.Count)
		assert.NotEmpty(t, topic.Assignment, "assignments must be present when rebalancePartitions=true")
		assert.Equal(t, len(plan.CreateAssignments), len(topic.Assignment),
			"one assignment entry per new partition")
	})

	t.Run("rebalancePartitions=false omits assignments", func(t *testing.T) {
		cfg := EndToEndTopicConfig{
			ReplicationFactor:   3,
			PartitionsPerBroker: 1,
			RebalancePartitions: false,
		}
		plan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(meta)
		require.NoError(t, err)
		// Planner should still compute create assignments (used for logging / count),
		// but ToRequests must NOT include them in the wire request.
		require.NotEmpty(t, plan.CreateAssignments, "planner should still compute assignments for count tracking")

		_, createReq := plan.ToRequests("probe", false)
		require.NotNil(t, createReq, "should still produce a CreatePartitions request")

		topic := createReq.Topics[0]
		assert.Equal(t, int32(plan.FinalPartitionCount), topic.Count,
			"Count must reflect the desired total even without explicit assignments")
		assert.Empty(t, topic.Assignment,
			"assignments must be absent when rebalancePartitions=false")
	})

	t.Run("no creates needed produces nil create request regardless of flag", func(t *testing.T) {
		// Already-optimal topic: 3 brokers, 3 partitions, each broker leads one.
		optMeta := buildMeta("probe",
			map[int32]string{1: "", 2: "", 3: ""},
			[][]int32{{1, 2, 3}, {2, 3, 1}, {3, 1, 2}},
		)
		for _, rebalance := range []bool{true, false} {
			cfg := EndToEndTopicConfig{
				ReplicationFactor:   3,
				PartitionsPerBroker: 1,
				RebalancePartitions: rebalance,
			}
			plan, err := NewPartitionPlanner(cfg, zap.NewNop()).Plan(optMeta)
			require.NoError(t, err)
			assert.Empty(t, plan.CreateAssignments)
			_, createReq := plan.ToRequests("probe", rebalance)
			assert.Nil(t, createReq, "no CreatePartitions request when nothing to create (rebalance=%v)", rebalance)
		}
	})
}

func TestMinimalReassignmentsWhenActualLeadersDivergeFromPreferred(t *testing.T) {
	// Scenario: all partitions have same preferred leader (broker 0), but actual
	// leaders are distributed. Algorithm should recognize brokers with actual
	// leadership and only fix gaps, not realign everything to preferred.
	brokers := map[int32]string{
		0: "rack-a", 1: "rack-b", 2: "rack-c",
	}

	meta := buildMeta("_redpanda_e2e_probe", brokers, [][]int32{
		{0, 1, 2}, // partition 0: preferred leader = 0
		{0, 1, 2}, // partition 1: preferred leader = 0
		{0, 1, 2}, // partition 2: preferred leader = 0
	})

	// Set actual leaders to show divergence from preferred
	meta.Topics[0].Partitions[0].Leader = 0 // p0: preferred=0, actual=0 (match)
	meta.Topics[0].Partitions[1].Leader = 1 // p1: preferred=0, actual=1 (DIVERGED)
	meta.Topics[0].Partitions[2].Leader = 0 // p2: preferred=0, actual=0 (match)

	cfg := EndToEndTopicConfig{
		ReplicationFactor:   3,
		PartitionsPerBroker: 1,
	}

	planner := NewPartitionPlanner(cfg, zap.NewNop())
	plan, err := planner.Plan(meta)
	require.NoError(t, err)
	require.NotNil(t, plan)

	// Key assertion: should have ONLY ONE reassignment
	// Broker 1 already has actual leadership (p1), even though preferred leader of
	// p1 is broker 0. Only broker 2 is missing from actual leadership, so we only
	// need to fix that one gap.
	assert.Equal(t, 1, len(plan.Reassignments), "should need only ONE reassignment since broker 1 already has actual leadership")
	assert.Equal(t, 0, len(plan.CreateAssignments), "should have no creates")

	// Verify the reassignment gives broker 2 preferred leadership
	require.Len(t, plan.Reassignments, 1)
	reassignment := plan.Reassignments[0]
	assert.Equal(t, int32(2), reassignment.Replicas[0], "reassignment should give broker 2 preferred leadership")

	// Verify all partitions still have correct RF and no duplicates
	final := applyPlan(meta, plan)
	for pid, reps := range final {
		assert.Lenf(t, reps, 3, "pid %d must have RF=3", pid)
		assertNoDuplicates(t, reps)
	}
}


================================================
FILE: e2e/producer.go
================================================
package e2e

import (
	"context"
	"encoding/json"
	"strconv"
	"time"

	"github.com/google/uuid"
	"github.com/twmb/franz-go/pkg/kgo"
	"go.uber.org/zap"
)

// produceMessagesToAllPartitions sends an EndToEndMessage to every partition on the given topic
func (s *Service) produceMessagesToAllPartitions(ctx context.Context) {
	for i := 0; i < s.partitionCount; i++ {
		s.produceMessage(ctx, i)
	}
}

// produceMessage produces an end to end record to a single given partition. If it succeeds producing the record
// it will add it to the message tracker. If producing fails a message will be logged and the respective metrics
// will be incremented.
func (s *Service) produceMessage(ctx context.Context, partition int) {
	topicName := s.config.TopicManagement.Name
	record, msg := createEndToEndRecord(s.minionID, topicName, partition)

	startTime := time.Now()

	// This childCtx will ensure that we will abort our efforts to produce (including retries) when we exceed
	// the SLA for producers.
	childCtx, cancel := context.WithTimeout(ctx, s.config.Producer.AckSla+2*time.Second)

	pID := strconv.Itoa(partition)
	s.messagesProducedInFlight.WithLabelValues(pID).Inc()
	s.messageTracker.addToTracker(msg)
	s.client.TryProduce(childCtx, record, func(r *kgo.Record, err error) {
		defer cancel()
		ackDuration := time.Since(startTime)
		s.messagesProducedInFlight.WithLabelValues(pID).Dec()
		s.messagesProducedTotal.WithLabelValues(pID).Inc()
		// We add 0 in order to ensure that the "failed" metric series for that partition id are initialized as well.
		s.messagesProducedFailed.WithLabelValues(pID).Add(0)
		s.lostMessages.WithLabelValues(pID).Add(0)

		if err != nil {
			s.messagesProducedFailed.WithLabelValues(pID).Inc()
			_ = s.messageTracker.removeFromTracker(msg.MessageID)

			s.logger.Info("failed to produce message to end-to-end topic",
				zap.String("topic_name", r.Topic),
				zap.Int32("partition", r.Partition),
				zap.Error(err))
			return
		} else {
			// Update the message's state. If this message expires and is marked as successfully produced we will
			// report this as a lost message, which would indicate that the producer was told that the message got
			// produced successfully, but it got lost somewhere.
			// We need to use updateItemIfExists() because it's possible that the message has already been consumed
			// before we have received the message here (because we were awaiting the produce ack).
			msg.state = EndToEndMessageStateProducedSuccessfully
			msg.produceLatency = ackDuration.Seconds()

			// TODO: Enable again as soon as https://github.com/ReneKroon/ttlcache/issues/60 is fixed
			// Because we cannot update cache items in an atomic fashion we currently can't use this method
			// as this would cause a race condition which ends up in records being reported as lost/expired.
			// s.messageTracker.updateItemIfExists(msg)
		}

		s.produceLatency.WithLabelValues(pID).Observe(ackDuration.Seconds())
	})
}

func createEndToEndRecord(minionID string, topicName string, partition int) (*kgo.Record, *EndToEndMessage) {
	message := &EndToEndMessage{
		MinionID:  minionID,
		MessageID: uuid.NewString(),
		Timestamp: time.Now().UnixNano(),

		partition: partition,
		state:     EndToEndMessageStateCreated,
	}

	mjson, err := json.Marshal(message)
	if err != nil {
		// Should never happen since the struct is so simple,
		// but if it does, something is completely broken anyway
		panic("cannot serialize EndToEndMessage")
	}

	record := &kgo.Record{
		Topic:     topicName,
		Value:     mjson,
		Partition: int32(partition), // we set partition for producing so our customPartitioner can make use of it
	}

	return record, message
}


================================================
FILE: e2e/service.go
================================================
package e2e

import (
	"context"
	"fmt"
	"strings"
	"time"

	"github.com/google/uuid"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kgo"
	"go.uber.org/zap"

	"github.com/cloudhut/kminion/v2/kafka"
)

type Service struct {
	// General
	config Config
	logger *zap.Logger

	kafkaSvc *kafka.Service // creates kafka client for us
	client   *kgo.Client

	// Service
	minionID       string          // unique identifier, reported in metrics, in case multiple instances run at the same time
	groupId        string          // our own consumer group
	groupTracker   *groupTracker   // tracks consumer groups starting with the kminion prefix and deletes them if they are unused for some time
	messageTracker *messageTracker // tracks successfully produced messages,
	clientHooks    *clientHooks    // logs broker events, tracks the coordinator (i.e. which broker last responded to our offset commit)
	partitionCount int             // number of partitions of our test topic, used to send messages to all partitions

	// Metrics
	messagesProducedInFlight *prometheus.GaugeVec
	messagesProducedTotal    *prometheus.CounterVec
	messagesProducedFailed   *prometheus.CounterVec
	messagesReceived         *prometheus.CounterVec
	offsetCommitsTotal       *prometheus.CounterVec
	offsetCommitsFailedTotal *prometheus.CounterVec
	lostMessages             *prometheus.CounterVec

	produceLatency      *prometheus.HistogramVec
	roundtripLatency    *prometheus.HistogramVec
	offsetCommitLatency *prometheus.HistogramVec
}

// NewService creates a new instance of the e2e monitoring service (wow)
func NewService(ctx context.Context, cfg Config, logger *zap.Logger, kafkaSvc *kafka.Service, promRegisterer prometheus.Registerer) (*Service, error) {
	minionID := uuid.NewString()
	groupID := fmt.Sprintf("%v-%v", cfg.Consumer.GroupIdPrefix, minionID)

	// Producer options
	kgoOpts := []kgo.Opt{
		kgo.ProduceRequestTimeout(3 * time.Second),
		kgo.RecordRetries(3),
		// We use the manual partitioner so that the records' partition id will be used as target partition
		kgo.RecordPartitioner(kgo.ManualPartitioner()),
	}
	if cfg.Producer.RequiredAcks == "all" {
		kgoOpts = append(kgoOpts, kgo.RequiredAcks(kgo.AllISRAcks()))
	} else {
		kgoOpts = append(kgoOpts, kgo.RequiredAcks(kgo.LeaderAck()))
		kgoOpts = append(kgoOpts, kgo.DisableIdempotentWrite())
	}

	// Consumer configs
	kgoOpts = append(kgoOpts,
		kgo.ConsumerGroup(groupID),
		kgo.ConsumeTopics(cfg.TopicManagement.Name),
		kgo.Balancers(kgo.CooperativeStickyBalancer()),
		kgo.DisableAutoCommit(),
		kgo.ConsumeResetOffset(kgo.NewOffset().AtEnd()),
	)

	// Prepare hooks
	hooks := newEndToEndClientHooks(logger)
	kgoOpts = append(kgoOpts, kgo.WithHooks(hooks))

	// Create kafka service and check if client can successfully connect to Kafka cluster
	logger.Info("connecting to Kafka seed brokers, trying to fetch cluster metadata",
		zap.String("seed_brokers", strings.Join(kafkaSvc.Brokers(), ",")))
	client, err := kafkaSvc.CreateAndTestClient(ctx, logger, kgoOpts)
	if err != nil {
		return nil, fmt.Errorf("failed to create kafka client for e2e: %w", err)
	}
	logger.Info("successfully connected to kafka cluster")

	svc := &Service{
		config:   cfg,
		logger:   logger.Named("e2e"),
		kafkaSvc: kafkaSvc,
		client:   client,

		minionID:    minionID,
		groupId:     groupID,
		clientHooks: hooks,
	}

	svc.groupTracker = newGroupTracker(cfg, logger, client, groupID)
	svc.messageTracker = newMessageTracker(svc)

	makeCounterVec := func(name string, labelNames []string, help string) *prometheus.CounterVec {
		cv := prometheus.NewCounterVec(prometheus.CounterOpts{
			Subsystem: "end_to_end",
			Name:      name,
			Help:      help,
		}, labelNames)
		promRegisterer.MustRegister(cv)
		return cv
	}
	makeGaugeVec := func(name string, labelNames []string, help string) *prometheus.GaugeVec {
		gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
			Subsystem: "end_to_end",
			Name:      name,
			Help:      help,
		}, labelNames)
		promRegisterer.MustRegister(gv)
		return gv
	}
	makeHistogramVec := func(name string, maxLatency time.Duration, labelNames []string, help string) *prometheus.HistogramVec {
		hv := prometheus.NewHistogramVec(prometheus.HistogramOpts{
			Subsystem: "end_to_end",
			Name:      name,
			Help:      help,
			Buckets:   createHistogramBuckets(maxLatency),
		}, labelNames)
		promRegisterer.MustRegister(hv)
		return hv
	}

	// Low-level info
	// Users can construct alerts like "can't produce messages" themselves from those
	svc.messagesProducedInFlight = makeGaugeVec("messages_produced_in_flight", []string{"partition_id"}, "Number of messages that kminion's end-to-end test produced but has not received an answer for yet")
	svc.messagesProducedTotal = makeCounterVec("messages_produced_total", []string{"partition_id"}, "Number of all messages produced to Kafka. This counter will be incremented when we receive a response (failure/timeout or success) from Kafka")
	svc.messagesProducedFailed = makeCounterVec("messages_produced_failed_total", []string{"partition_id"}, "Number of messages failed to produce to Kafka because of a timeout or failure")
	svc.messagesReceived = makeCounterVec("messages_received_total", []string{"partition_id"}, "Number of *matching* messages kminion received. Every roundtrip message has a minionID (randomly generated on startup) and a timestamp. Kminion only considers a message a match if it it arrives within the configured roundtrip SLA (and it matches the minionID)")
	svc.offsetCommitsTotal = makeCounterVec("offset_commits_total", []string{"coordinator_id"}, "Counts how many times kminions end-to-end test has committed offsets")
	svc.offsetCommitsFailedTotal = makeCounterVec("offset_commits_failed_total", []string{"coordinator_id", "reason"}, "Number of offset commits that returned an error or timed out")
	svc.lostMessages = makeCounterVec("messages_lost_total", []string{"partition_id"}, "Number of messages that have been produced successfully but not received within the configured SLA duration")

	// Latency Histograms
	// More detailed info about how long stuff took
	// Since histograms also have an 'infinite' bucket, they can be used to detect small hickups "lost" messages
	svc.produceLatency = makeHistogramVec("produce_latency_seconds", cfg.Producer.AckSla, []string{"partition_id"}, "Time until we received an ack for a produced message")
	svc.roundtripLatency = makeHistogramVec("roundtrip_latency_seconds", cfg.Consumer.RoundtripSla, []string{"partition_id"}, "Time it took between sending (producing) and receiving (consuming) a message")
	svc.offsetCommitLatency = makeHistogramVec("offset_commit_latency_seconds", cfg.Consumer.CommitSla, []string{"coordinator_id"}, "Time kafka took to respond to kminion's offset commit")

	return svc, nil
}

// Start starts the service (wow)
func (s *Service) Start(ctx context.Context) error {
	// Ensure topic exists and is configured correctly
	if err := s.validateManagementTopic(ctx); err != nil {
		return fmt.Errorf("could not validate end-to-end topic: %w", err)
	}

	// finally start everything else (producing, consuming, continuous validation, consumer group tracking)
	go s.startReconciliation(ctx)

	// Start consumer and wait until we've received a response for the first poll
	// which would indicate that the consumer is ready. Only if the consumer is
	// ready we want to start the e2e producer to ensure that we will not miss
	// messages because the consumer wasn't ready. However, if this initialization
	// does not succeed within 30s we have to assume, that something is wrong on the
	// consuming or producing side. KMinion is supposed to report these kind of
	// issues and therefore this should not block KMinion from starting.
	initCh := make(chan bool, 1)
	s.logger.Info("initializing consumer and waiting until it has received the first record batch")
	go s.startConsumeMessages(ctx, initCh)

	// Produce an init message until the consumer received at least one fetch
	initTicker := time.NewTicker(1 * time.Second)
	isInitialized := false

	// We send a first message immediately, but we'll keep sending more messages later
	// since the consumers start at the latest offset and may have missed this message.
	initCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
	defer cancel()
	s.sendInitMessage(initCtx, s.client, s.config.TopicManagement.Name)

	for !isInitialized {
		select {
		case <-initTicker.C:
			s.sendInitMessage(initCtx, s.client, s.config.TopicManagement.Name)
		case <-initCh:
			isInitialized = true
			s.logger.Info("consumer has been successfully initialized")
		case <-initCtx.Done():
			// At this point we just assume the consumers are running fine.
			// The entire cluster may be down or producing fails.
			s.logger.Warn("initializing the consumers timed out, proceeding with the startup")
			isInitialized = true
		case <-ctx.Done():
			return nil
		}
	}
	go s.startOffsetCommits(ctx)
	go s.startProducer(ctx)

	// keep track of groups, delete old unused groups
	if s.config.Consumer.DeleteStaleConsumerGroups {
		go s.groupTracker.start(ctx)
	}

	return nil
}

func (s *Service) sendInitMessage(ctx context.Context, client *kgo.Client, topicName string) {
	// Try to produce one record into each partition. This is important because
	// one or more partitions may be offline, while others may still be writable.
	for i := 0; i < s.partitionCount; i++ {
		client.TryProduce(ctx, &kgo.Record{
			Key:       []byte("init-message"),
			Value:     nil,
			Topic:     topicName,
			Partition: int32(i),
		}, nil)
	}
}

func (s *Service) startReconciliation(ctx context.Context) {
	if !s.config.TopicManagement.Enabled {
		return
	}

	validateTopicTicker := time.NewTicker(s.config.TopicManagement.ReconciliationInterval)
	for {
		select {
		case <-ctx.Done():
			return
		case <-validateTopicTicker.C:
			err := s.validateManagementTopic(ctx)
			if err != nil {
				s.logger.Error("failed to validate end-to-end topic", zap.Error(err))
			}
		}
	}
}

func (s *Service) startProducer(ctx context.Context) {
	produceTicker := time.NewTicker(s.config.ProbeInterval)
	for {
		select {
		case <-ctx.Done():
			return
		case <-produceTicker.C:
			s.produceMessagesToAllPartitions(ctx)
		}
	}
}

func (s *Service) startOffsetCommits(ctx context.Context) {
	commitTicker := time.NewTicker(5 * time.Second)
	for {
		select {
		case <-ctx.Done():
			return
		case <-commitTicker.C:
			s.commitOffsets(ctx)
		}
	}
}


================================================
FILE: e2e/topic.go
================================================
package e2e

import (
	"context"
	"errors"
	"fmt"
	"sort"
	"time"

	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// Check our end-to-end test topic and adapt accordingly if something does not match our expectations.
// - does it exist?
//
// - is it configured correctly?
//   - does it have enough partitions?
//   - is the replicationFactor correct?
//
// - are assignments good?
//   - is each broker leading at least one partition?
//   - are replicas distributed correctly?
func (s *Service) validateManagementTopic(ctx context.Context) error {
	s.logger.Debug("validating end-to-end topic...")

	meta, err := s.getTopicMetadata(ctx)
	if err != nil {
		return fmt.Errorf("validateManagementTopic cannot get metadata of e2e topic: %w", err)
	}

	typedErr := kerr.TypedErrorForCode(meta.Topics[0].ErrorCode)
	topicExists := false
	switch {
	case typedErr == nil:
		topicExists = true
	case errors.Is(typedErr, kerr.UnknownTopicOrPartition):
		// UnknownTopicOrPartition (Error code 3) means that the topic does not exist.
		// When the topic doesn't exist, continue to create it further down in the code.
		topicExists = false
	default:
		// If the topic (possibly) exists, but there's an error, then this should result in a fail
		return fmt.Errorf("failed to get metadata for end-to-end topic: %w", err)
	}

	// Create topic if it doesn't exist
	if !topicExists {
		if !s.config.TopicManagement.Enabled {
			return fmt.Errorf("the configured end to end topic does not exist. The topic will not be created " +
				"because topic management is disabled")
		}

		if err = s.createManagementTopic(ctx, meta); err != nil {
			return err
		}

		// Topic was just created with optimal assignments from the partition planner.
		// We can skip the validation/planning phase since the topic already has the correct
		// partition count and optimal replica assignments. We only need to update our
		// internal partition count tracking for KMinion's e2e monitoring operations.
		return s.updatePartitionCount(ctx)
	}

	// If topic management is disabled, skip validation and alteration of the existing topic.
	// This allows kminion to work on managed Kafka platforms (e.g., Confluent Cloud) that
	// block partition reassignment operations.
	if !s.config.TopicManagement.Enabled {
		topicMeta := meta.Topics[0]
		brokerIDs := make([]int32, len(meta.Brokers))
		for i, broker := range meta.Brokers {
			brokerIDs[i] = broker.NodeID
		}

		s.logger.Info("topic management is disabled, skipping validation and alteration of existing topic",
			zap.String("topic", s.config.TopicManagement.Name),
			zap.Int("current_partitions", len(topicMeta.Partitions)),
			zap.Int("replication_factor", len(topicMeta.Partitions[0].Replicas)))

		// Log warnings if the topic configuration differs from expectations
		expectedPartitions := s.config.TopicManagement.PartitionsPerBroker * len(brokerIDs)
		if len(topicMeta.Partitions) != expectedPartitions {
			s.logger.Warn("topic partition count differs from expected configuration",
				zap.Int("current_partitions", len(topicMeta.Partitions)),
				zap.Int("expected_partitions", expectedPartitions),
				zap.Int("brokers", len(brokerIDs)),
				zap.Int("partitions_per_broker_config", s.config.TopicManagement.PartitionsPerBroker),
				zap.String("reason", "topic management is disabled, will not alter"))
		}

		// Check if each broker is leading at least one partition
		leaderCounts := make(map[int32]int)
		for _, partition := range topicMeta.Partitions {
			leaderCounts[partition.Leader]++
		}
		brokersWithoutLeader := []int32{}
		for _, brokerID := range brokerIDs {
			if leaderCounts[brokerID] == 0 {
				brokersWithoutLeader = append(brokersWithoutLeader, brokerID)
			}
		}
		if len(brokersWithoutLeader) > 0 {
			s.logger.Warn("some brokers are not leading any partitions on the e2e topic",
				zap.Int32s("brokers_without_leader", brokersWithoutLeader),
				zap.String("reason", "topic management is disabled, will not alter"),
				zap.String("impact", "end-to-end monitoring may not cover all brokers"))
		}

		return s.updatePartitionCount(ctx)
	}

	// Topic already exists - use partition planner to validate and potentially fix assignments
	planner := NewPartitionPlanner(s.config.TopicManagement, s.logger)
	plan, err := planner.Plan(meta)
	if err != nil {
		return fmt.Errorf("failed to create partition plan: %w", err)
	}

	// Convert the plan to Kafka requests
	topicName := pointerStrToStr(meta.Topics[0].Topic)
	alterReq, createReq := plan.ToRequests(topicName, s.config.TopicManagement.RebalancePartitions)

	if s.config.TopicManagement.RebalancePartitions {
		if len(plan.Reassignments) > 0 {
			s.logPlannedReassignments(meta, plan, topicName)
		}
		err = s.executeAlterPartitionAssignments(ctx, alterReq)
		if err != nil {
			return fmt.Errorf("failed to alter partition assignments: %w", err)
		}
	} else if len(plan.Reassignments) > 0 {
		s.logger.Info("skipping partition reassignment because rebalancePartitions is disabled",
			zap.String("topic", topicName),
			zap.Int("skipped_reassignments", len(plan.Reassignments)),
		)
	}

	if len(plan.CreateAssignments) > 0 {
		s.logPlannedCreations(meta, plan, topicName, s.config.TopicManagement.RebalancePartitions)
	}
	err = s.executeCreatePartitions(ctx, createReq)
	if err != nil {
		return fmt.Errorf("failed to create partitions: %w", err)
	}

	return s.updatePartitionCount(ctx)
}

// updatePartitionCount retrieves metadata to inform kminion about the updated
// partition count of its e2e topic. It must be updated after topic validation
// because the validation process may lead to the creation of new partitions.
// This can occur when new brokers are added to the cluster.
func (s *Service) updatePartitionCount(ctx context.Context) error {
	retryTicker := time.NewTicker(1 * time.Second)
	defer retryTicker.Stop()

	for {
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-retryTicker.C:
			meta, err := s.getTopicMetadata(ctx)
			if err != nil {
				return fmt.Errorf("could not get topic metadata while updating partition count: %w", err)
			}

			typedErr := kerr.TypedErrorForCode(meta.Topics[0].ErrorCode)
			if typedErr == nil {
				s.partitionCount = len(meta.Topics[0].Partitions)
				s.logger.Debug("updatePartitionCount: successfully updated partition count", zap.Int("partition_count", s.partitionCount))
				return nil
			}
			if !errors.Is(typedErr, kerr.UnknownTopicOrPartition) {
				return fmt.Errorf("unexpected error while updating partition count: %w", typedErr)
			}
			s.logger.Warn("updatePartitionCount: received UNKNOWN_TOPIC_OR_PARTITION error, possibly due to timing issue. Retrying...")
			// The UNKNOWN_TOPIC_OR_PARTITION error occurs occasionally even though the topic is created
			// in the validateManagementTopic function. It appears to be a timing issue where the topic metadata
			// is not immediately available after creation. In practice, waiting for a short period and then retrying
			// the operation resolves the issue.
		}
	}
}

func (s *Service) executeCreatePartitions(ctx context.Context, req *kmsg.CreatePartitionsRequest) error {
	if req == nil {
		return nil
	}

	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return err
	}

	for _, topic := range res.Topics {
		err := kerr.ErrorForCode(topic.ErrorCode)
		if err != nil {
			return fmt.Errorf("inner Kafka error: %w", err)
		}
	}

	return nil
}

func (s *Service) executeAlterPartitionAssignments(ctx context.Context, req *kmsg.AlterPartitionAssignmentsRequest) error {
	if req == nil {
		return nil
	}

	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return err
	}

	typedErr := kerr.TypedErrorForCode(res.ErrorCode)
	if typedErr != nil {
		s.logger.Error("alter partition assignments: failed to alter partition assignments", zap.Any("request_topics", req.Topics))
		return fmt.Errorf("inner Kafka error: %w", typedErr)
	}
	for _, topic := range res.Topics {
		for _, partition := range topic.Partitions {
			typedErr = kerr.TypedErrorForCode(partition.ErrorCode)
			if typedErr != nil {
				return fmt.Errorf("inner Kafka partition error on partition '%v': %w", partition.Partition, typedErr)
			}
		}
	}

	return nil
}

// logPlannedReassignments logs current partition state and planned reassignment details.
func (s *Service) logPlannedReassignments(meta *kmsg.MetadataResponse, plan *Plan, topicName string) {
	topicMeta := meta.Topics[0]

	// Log current partition state
	s.logger.Info("current partition assignments for e2e topic",
		zap.String("topic", topicName),
		zap.Int("current_partitions", len(topicMeta.Partitions)),
		zap.Int("brokers_available", len(meta.Brokers)),
	)

	// Log each current partition assignment (sorted by partition ID)
	sortedPartitions := make([]kmsg.MetadataResponseTopicPartition, len(topicMeta.Partitions))
	copy(sortedPartitions, topicMeta.Partitions)
	sort.Slice(sortedPartitions, func(i, j int) bool {
		return sortedPartitions[i].Partition < sortedPartitions[j].Partition
	})

	for _, partition := range sortedPartitions {
		s.logger.Info("current partition assignment",
			zap.String("topic", topicName),
			zap.Int32("partition", partition.Partition),
			zap.Int32s("replicas", partition.Replicas),
			zap.Int32("leader", partition.Leader),
		)
	}

	s.logger.Info("planned partition reassignments",
		zap.String("topic", topicName),
		zap.Int("reassignment_count", len(plan.Reassignments)),
	)

	// Sort reassignments by partition ID for consistent logging
	sortedReassignments := make([]Reassignment, len(plan.Reassignments))
	copy(sortedReassignments, plan.Reassignments)
	sort.Slice(sortedReassignments, func(i, j int) bool {
		return sortedReassignments[i].Partition < sortedReassignments[j].Partition
	})

	for _, reassignment := range sortedReassignments {
		// Find current assignment for this partition
		var currentReplicas []int32
		var currentLeader int32 = -1
		for _, partition := range topicMeta.Partitions {
			if partition.Partition == reassignment.Partition {
				currentReplicas = partition.Replicas
				currentLeader = partition.Leader
				break
			}
		}

		s.logger.Info("partition reassignment",
			zap.String("topic", topicName),
			zap.Int32("partition", reassignment.Partition),
			zap.Int32s("current_replicas", currentReplicas),
			zap.Int32s("new_replicas", reassignment.Replicas),
			zap.Int32("current_leader", currentLeader),
			zap.Int32("new_leader", reassignment.Replicas[0]),
		)
	}
}

// logPlannedCreations logs planned partition creation details.
func (s *Service) logPlannedCreations(meta *kmsg.MetadataResponse, plan *Plan, topicName string, rebalancePartitions bool) {
	topicMeta := meta.Topics[0]

	s.logger.Info("planned partition creations",
		zap.String("topic", topicName),
		zap.Int("creation_count", len(plan.CreateAssignments)),
		zap.Int("current_partitions", len(topicMeta.Partitions)),
		zap.Int("final_partitions", plan.FinalPartitionCount),
	)

	if !rebalancePartitions {
		s.logger.Info("partition assignments will be auto-placed by broker (rebalancePartitions is disabled)",
			zap.String("topic", topicName),
		)
		return
	}

	nextPartitionID := int32(len(topicMeta.Partitions))
	for i, creation := range plan.CreateAssignments {
		s.logger.Info("new partition creation",
			zap.String("topic", topicName),
			zap.Int32("new_partition", nextPartitionID+int32(i)),
			zap.Int32s("replicas", creation.Replicas),
			zap.Int32("leader", creation.Replicas[0]),
		)
	}
}

func (s *Service) createManagementTopic(ctx context.Context, allMeta *kmsg.MetadataResponse) error {
	topicCfg := s.config.TopicManagement
	brokerCount := len(allMeta.Brokers)
	totalPartitions := brokerCount * topicCfg.PartitionsPerBroker

	s.logger.Info("e2e topic does not exist, creating it...",
		zap.String("topic_name", topicCfg.Name),
		zap.Int("partitions_per_broker", topicCfg.PartitionsPerBroker),
		zap.Int("replication_factor", topicCfg.ReplicationFactor),
		zap.Int("broker_count", brokerCount),
		zap.Int("total_partitions", totalPartitions),
	)

	// Use partition planner to determine optimal assignments for the new topic.
	// The metadata already contains broker info, and since the topic doesn't exist,
	// meta.Topics[0].Partitions will be empty, which is exactly what we want.
	planner := NewPartitionPlanner(topicCfg, s.logger)
	plan, err := planner.Plan(allMeta)
	if err != nil {
		return fmt.Errorf("failed to create partition plan for new topic: %w", err)
	}

	// Create topic with specific replica assignments from the planner
	topic := kmsg.NewCreateTopicsRequestTopic()
	topic.Topic = topicCfg.Name
	topic.NumPartitions = -1     // Must be -1 when using ReplicaAssignment
	topic.ReplicationFactor = -1 // Must be -1 when using ReplicaAssignment
	topic.Configs = createTopicConfig(topicCfg)

	// Convert planner's CreateAssignments to Kafka's ReplicaAssignment format
	for i, assignment := range plan.CreateAssignments {
		replica := kmsg.NewCreateTopicsRequestTopicReplicaAssignment()
		replica.Partition = int32(i)
		replica.Replicas = append([]int32(nil), assignment.Replicas...)
		topic.ReplicaAssignment = append(topic.ReplicaAssignment, replica)
	}

	req := kmsg.NewCreateTopicsRequest()
	req.Topics = []kmsg.CreateTopicsRequestTopic{topic}

	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return fmt.Errorf("failed to create e2e topic: %w", err)
	}
	if len(res.Topics) > 0 {
		err := kerr.ErrorForCode(res.Topics[0].ErrorCode)
		if err != nil {
			return fmt.Errorf("failed to create e2e topic: %w", err)
		}
	}

	return nil
}

func (s *Service) getTopicMetadata(ctx context.Context) (*kmsg.MetadataResponse, error) {
	topicReq := kmsg.NewMetadataRequestTopic()
	topicName := s.config.TopicManagement.Name
	topicReq.Topic = &topicName

	req := kmsg.NewMetadataRequest()
	req.Topics = []kmsg.MetadataRequestTopic{topicReq}

	return req.RequestWith(ctx, s.client)
}

func (s *Service) getTopicsConfigs(ctx context.Context, configNames []string) (*kmsg.DescribeConfigsResponse, error) {
	req := kmsg.NewDescribeConfigsRequest()
	req.IncludeDocumentation = false
	req.IncludeSynonyms = false
	req.Resources = []kmsg.DescribeConfigsRequestResource{
		{
			ResourceType: kmsg.ConfigResourceTypeTopic,
			ResourceName: s.config.TopicManagement.Name,
			ConfigNames:  configNames,
		},
	}

	return req.RequestWith(ctx, s.client)
}

func createTopicConfig(cfgTopic EndToEndTopicConfig) []kmsg.CreateTopicsRequestTopicConfig {

	topicConfig := func(name string, value interface{}) kmsg.CreateTopicsRequestTopicConfig {
		prop := kmsg.NewCreateTopicsRequestTopicConfig()
		prop.Name = name
		valStr := fmt.Sprintf("%v", value)
		prop.Value = &valStr
		return prop
	}

	minISR := 1
	if cfgTopic.ReplicationFactor >= 3 {
		// Only with 3+ replicas does it make sense to require acks from 2 brokers
		// todo: think about if we should change how 'producer.requiredAcks' works.
		//       we probably don't even need this configured on the topic directly...
		minISR = 2
	}

	// Even though kminion's end-to-end feature actually does not require any
	// real persistence beyond a few minutes; it might be good to keep messages
	// around a bit for debugging.
	return []kmsg.CreateTopicsRequestTopicConfig{
		topicConfig("cleanup.policy", "delete"),
		topicConfig("segment.ms", (time.Hour * 12).Milliseconds()),   // new segment every 12h
		topicConfig("retention.ms", (time.Hour * 24).Milliseconds()), // discard segments older than 24h
		topicConfig("min.insync.replicas", minISR),
	}
}


================================================
FILE: e2e/utils.go
================================================
package e2e

import (
	"context"
	"math"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// createHistogramBuckets creates the buckets for the histogram based on the number of desired buckets (10) and the
// upper bucket size.
func createHistogramBuckets(maxLatency time.Duration) []float64 {
	// Since this is an exponential bucket we need to take Log base2 or binary as the upper bound
	// Divide by 10 for the argument because the base is counted as 20ms and we want to normalize it as base 2 instead of 20
	// +2 because it starts at 5ms or 0.005 sec, to account 5ms and 10ms before it goes to the base which in this case is 0.02 sec or 20ms
	// and another +1 to account for decimal points on int parsing
	latencyCount := math.Logb(float64(maxLatency.Milliseconds() / 10))
	count := int(latencyCount) + 3
	bucket := prometheus.ExponentialBuckets(0.005, 2, count)

	return bucket
}

func containsStr(ar []string, x string) (bool, int) {
	for i, item := range ar {
		if item == x {
			return true, i
		}
	}
	return false, -1
}

// logCommitErrors logs all errors in commit response and returns a well formatted error code if there was one
func (s *Service) logCommitErrors(r *kmsg.OffsetCommitResponse, err error) string {
	if err != nil {
		if err == context.DeadlineExceeded {
			s.logger.Warn("offset commit failed because SLA has been exceeded")
			return "OFFSET_COMMIT_SLA_EXCEEDED"
		}

		s.logger.Warn("offset commit failed", zap.Error(err))
		return "RESPONSE_ERROR"
	}

	lastErrCode := ""
	for _, t := range r.Topics {
		for _, p := range t.Partitions {
			typedErr := kerr.TypedErrorForCode(p.ErrorCode)
			if typedErr == nil {
				continue
			}

			s.logger.Warn("error committing partition offset",
				zap.String("topic", t.Topic),
				zap.Int32("partition_id", p.Partition),
				zap.Error(typedErr),
			)
			lastErrCode = typedErr.Message
		}
	}

	return lastErrCode
}

// brokerMetadataByBrokerID returns a map of all broker metadata keyed by their BrokerID
func brokerMetadataByBrokerID(meta []kmsg.MetadataResponseBroker) map[int32]kmsg.MetadataResponseBroker {
	res := make(map[int32]kmsg.MetadataResponseBroker)
	for _, broker := range meta {
		res[broker.NodeID] = broker
	}
	return res
}

// brokerMetadataByRackID returns a map of all broker metadata keyed by their Rack identifier
func brokerMetadataByRackID(meta []kmsg.MetadataResponseBroker) map[string][]kmsg.MetadataResponseBroker {
	res := make(map[string][]kmsg.MetadataResponseBroker)
	for _, broker := range meta {
		rackID := ""
		if broker.Rack != nil {
			rackID = *broker.Rack
		}
		res[rackID] = append(res[rackID], broker)
	}
	return res
}

func pointerStrToStr(str *string) string {
	if str == nil {
		return ""
	}
	return *str
}

func safeUnwrap(err error) string {
	if err == nil {
		return "<nil>"
	}
	return err.Error()
}

func isInArray(num int16, arr []int16) bool {
	for _, n := range arr {
		if num == n {
			return true
		}
	}
	return false
}


================================================
FILE: go.mod
================================================
module github.com/cloudhut/kminion/v2

go 1.26

require (
	github.com/google/uuid v1.6.0
	github.com/jcmturner/gokrb5/v8 v8.4.4
	github.com/jellydator/ttlcache/v2 v2.11.1
	github.com/knadh/koanf v1.5.0
	github.com/mitchellh/mapstructure v1.5.0
	github.com/orcaman/concurrent-map v1.0.0
	github.com/pkg/errors v0.9.1
	github.com/prometheus/client_golang v1.23.2
	github.com/stretchr/testify v1.11.1
	github.com/twmb/franz-go v1.20.6
	github.com/twmb/franz-go/pkg/kadm v1.17.1
	github.com/twmb/franz-go/pkg/kmsg v1.12.0
	github.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0
	go.uber.org/atomic v1.11.0
	go.uber.org/zap v1.27.1
	golang.org/x/sync v0.19.0
)

require (
	github.com/beorn7/perks v1.0.1 // indirect
	github.com/cespare/xxhash/v2 v2.3.0 // indirect
	github.com/davecgh/go-spew v1.1.1 // indirect
	github.com/fsnotify/fsnotify v1.9.0 // indirect
	github.com/hashicorp/go-uuid v1.0.3 // indirect
	github.com/jcmturner/aescts/v2 v2.0.0 // indirect
	github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect
	github.com/jcmturner/gofork v1.7.6 // indirect
	github.com/jcmturner/rpc/v2 v2.0.3 // indirect
	github.com/klauspost/compress v1.18.3 // indirect
	github.com/mitchellh/copystructure v1.2.0 // indirect
	github.com/mitchellh/reflectwalk v1.0.2 // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/pelletier/go-toml v1.9.1 // indirect
	github.com/pierrec/lz4/v4 v4.1.25 // indirect
	github.com/pmezard/go-difflib v1.0.0 // indirect
	github.com/prometheus/client_model v0.6.2 // indirect
	github.com/prometheus/common v0.67.5 // indirect
	github.com/prometheus/procfs v0.19.2 // indirect
	go.uber.org/multierr v1.11.0 // indirect
	go.yaml.in/yaml/v2 v2.4.3 // indirect
	golang.org/x/crypto v0.47.0 // indirect
	golang.org/x/net v0.49.0 // indirect
	golang.org/x/sys v0.40.0 // indirect
	google.golang.org/protobuf v1.36.11 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
)


================================================
FILE: go.sum
================================================
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/aws/aws-sdk-go-v2 v1.9.2/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4=
github.com/aws/aws-sdk-go-v2/config v1.8.3/go.mod h1:4AEiLtAb8kLs7vgw2ZV3p2VZ1+hBavOc84hqxVNpCyw=
github.com/aws/aws-sdk-go-v2/credentials v1.4.3/go.mod h1:FNNC6nQZQUuyhq5aE5c7ata8o9e4ECGmS4lAXC7o1mQ=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.6.0/go.mod h1:gqlclDEZp4aqJOancXK6TN24aKhT0W0Ae9MHk3wzTMM=
github.com/aws/aws-sdk-go-v2/internal/ini v1.2.4/go.mod h1:ZcBrrI3zBKlhGFNYWvju0I3TR93I7YIgAfy82Fh4lcQ=
github.com/aws/aws-sdk-go-v2/service/appconfig v1.4.2/go.mod h1:FZ3HkCe+b10uFZZkFdvf98LHW21k49W8o8J366lqVKY=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.3.2/go.mod h1:72HRZDLMtmVQiLG2tLfQcaWLCssELvGl+Zf2WVxMmR8=
github.com/aws/aws-sdk-go-v2/service/sso v1.4.2/go.mod h1:NBvT9R1MEF+Ud6ApJKM0G+IkPchKS7p7c2YPKwHmBOk=
github.com/aws/aws-sdk-go-v2/service/sts v1.7.2/go.mod h1:8EzeIqfWt2wWT4rJVu3f21TfrhJ8AEMzVybRNSb/b4g=
github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
github.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.0.2-0.20181118220953-042da051cf31/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/consul/api v1.13.0/go.mod h1:ZlVrynguJKcYr54zGaDbaL3fOvKC9m72FhPvA8T35KQ=
github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI=
github.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY=
github.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-rootcerts v1.0.1/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q=
github.com/hashicorp/vault/sdk v0.1.13/go.mod h1:B+hVj7TpuQY1Y/GPbCpffmgd+tSEwvhkWnjtSYCaS2M=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/hjson/hjson-go/v4 v4.0.0 h1:wlm6IYYqHjOdXH1gHev4VoXCaW20HdQAGCxdOEEg2cs=
github.com/hjson/hjson-go/v4 v4.0.0/go.mod h1:KaYt3bTw3zhBjYqnXkYywcYctk0A2nxeEFTse3rH13E=
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM=
github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg=
github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo=
github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o=
github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg=
github.com/jcmturner/gokrb5/v8 v8.4.3/go.mod h1:dqRwJGXznQrzw6cWmyo6kH+E7jksEQG/CyVWsJEsJO0=
github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8=
github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs=
github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY=
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
github.com/jellydator/ttlcache/v2 v2.11.1 h1:AZGME43Eh2Vv3giG6GeqeLeFXxwxn1/qHItqWZl6U64=
github.com/jellydator/ttlcache/v2 v2.11.1/go.mod h1:RtE5Snf0/57e+2cLWFYWCCsLas2Hy3c5Z4n14XmSvTI=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc=
github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/knadh/koanf v1.5.0 h1:q2TSd/3Pyc/5yP9ldIrSdIz26MCcyNQzW0pEAugLPNs=
github.com/knadh/koanf v1.5.0/go.mod h1:Hgyjp4y8v44hpZtPzs7JZfRAW5AhN7KfZcwv1RYggDs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v0.0.0-20171004221916-a61a99592b77/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/npillmayer/nestext v0.1.3/go.mod h1:h2lrijH8jpicr25dFY+oAJLyzlya6jhnuG+zWp9L0Uk=
github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=
github.com/orcaman/concurrent-map v1.0.0 h1:I/2A2XPCb4IuQWcQhBhSwGfiuybl/J0ev9HDbW65HOY=
github.com/orcaman/concurrent-map v1.0.0/go.mod h1:Lu3tH6HLW3feq74c2GC+jIMS/K2CFcDWnWD9XkenwhI=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE=
github.com/pelletier/go-toml v1.9.1 h1:a6qW1EVNZWH9WGI6CsYdD8WAylkoXBS5yv0XHlh17Tc=
github.com/pelletier/go-toml v1.9.1/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0=
github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
github.com/rhnvrm/simples3 v0.6.1/go.mod h1:Y+3vYm2V7Y4VijFoJHHTrja6OgPrJ2cBti8dPGkC3sA=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/twmb/franz-go v1.7.0/go.mod h1:PMze0jNfNghhih2XHbkmTFykbMF5sJqmNJB31DOOzro=
github.com/twmb/franz-go v1.20.6 h1:TpQTt4QcixJ1cHEmQGPOERvTzo99s8jAutmS7rbSD6w=
github.com/twmb/franz-go v1.20.6/go.mod h1:u+FzH2sInp7b9HNVv2cZN8AxdXy6y/AQ1Bkptu4c0FM=
github.com/twmb/franz-go/pkg/kadm v1.17.1 h1:Bt02Y/RLgnFO2NP2HVP1kd2TFtGRiJZx+fSArjZDtpw=
github.com/twmb/franz-go/pkg/kadm v1.17.1/go.mod h1:s4duQmrDbloVW9QTMXhs6mViTepze7JLG43xwPcAeTg=
github.com/twmb/franz-go/pkg/kmsg v1.2.0/go.mod h1:SxG/xJKhgPu25SamAq0rrucfp7lbzCpEXOC+vH/ELrY=
github.com/twmb/franz-go/pkg/kmsg v1.12.0 h1:CbatD7ers1KzDNgJqPbKOq0Bz/WLBdsTH75wgzeVaPc=
github.com/twmb/franz-go/pkg/kmsg v1.12.0/go.mod h1:+DPt4NC8RmI6hqb8G09+3giKObE6uD2Eya6CfqBpeJY=
github.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0 h1:alKdbddkPw3rDh+AwmUEwh6HNYgTvDSFIe/GWYRR9RM=
github.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0/go.mod h1:k8BoBjyUbFj34f0rRbn+Ky12sZFAPbmShrg0karAIMo=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.etcd.io/etcd/api/v3 v3.5.4/go.mod h1:5GB2vv4A4AOn3yk7MftYGHkUfGtDHnEraIjym4dYz5A=
go.etcd.io/etcd/client/pkg/v3 v3.5.4/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v3 v3.5.4/go.mod h1:ZaRkVgBZC+L+dLCjTcF1hRXpgZXQPOvnA/Ak/gq3kiY=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8=
golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20220725212005-46097bf591d3/go.mod h1:AaygXjzTFtRAg2ttMY5RMuhpJ3cNnI0XpyFJD1iQRSM=
golang.org/x/net v0.0.0-20220812174116-3211cb980234/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190129075346-302c3dd5f1cc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210112230658-8b4aab62c064/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190404172233-64821d5d2107/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d/go.mod h1:cuepJuh7vyXfUyUwEgHQXw849cJrilpS5NeIjOWESAw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=


================================================
FILE: kafka/client_config_helper.go
================================================
package kafka

import (
	"context"
	"crypto/tls"
	"crypto/x509"
	"encoding/pem"
	"fmt"
	"io/ioutil"
	"net"
	"time"

	"github.com/jcmturner/gokrb5/v8/client"
	"github.com/jcmturner/gokrb5/v8/keytab"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/sasl"
	"github.com/twmb/franz-go/pkg/sasl/kerberos"
	"github.com/twmb/franz-go/pkg/sasl/oauth"
	"github.com/twmb/franz-go/pkg/sasl/plain"
	"github.com/twmb/franz-go/pkg/sasl/scram"
	"go.uber.org/zap"

	krbconfig "github.com/jcmturner/gokrb5/v8/config"
)

// NewKgoConfig creates a new Config for the Kafka Client as exposed by the franz-go library.
// If TLS certificates can't be read an error will be returned.
// logger is only used to print warnings about TLS.
func NewKgoConfig(cfg Config, logger *zap.Logger) ([]kgo.Opt, error) {
	opts := []kgo.Opt{
		kgo.SeedBrokers(cfg.Brokers...),
		kgo.ClientID(cfg.ClientID),
		kgo.FetchMaxBytes(5 * 1000 * 1000), // 5MB
		kgo.MaxConcurrentFetches(10),
		// Allow metadata to be refreshed more often than 5s (default) if needed.
		// That will mitigate issues with unknown partitions shortly after creating
		// them.
		kgo.MetadataMinAge(time.Second),
	}

	// Create Logger
	kgoLogger := KgoZapLogger{
		logger: logger.Sugar(),
	}
	opts = append(opts, kgo.WithLogger(kgoLogger))

	// Add Rack Awareness if configured
	if cfg.RackID != "" {
		opts = append(opts, kgo.Rack(cfg.RackID))
	}

	// Configure SASL
	if cfg.SASL.Enabled {
		// SASL Plain
		if cfg.SASL.Mechanism == "PLAIN" {
			mechanism := plain.Auth{
				User: cfg.SASL.Username,
				Pass: cfg.SASL.Password,
			}.AsMechanism()
			opts = append(opts, kgo.SASL(mechanism))
		}

		// SASL SCRAM
		if cfg.SASL.Mechanism == "SCRAM-SHA-256" || cfg.SASL.Mechanism == "SCRAM-SHA-512" {
			var mechanism sasl.Mechanism
			scramAuth := scram.Auth{
				User: cfg.SASL.Username,
				Pass: cfg.SASL.Password,
			}
			if cfg.SASL.Mechanism == "SCRAM-SHA-256" {
				mechanism = scramAuth.AsSha256Mechanism()
			}
			if cfg.SASL.Mechanism == "SCRAM-SHA-512" {
				mechanism = scramAuth.AsSha512Mechanism()
			}
			opts = append(opts, kgo.SASL(mechanism))
		}

		// Kerberos
		if cfg.SASL.Mechanism == "GSSAPI" {
			var krbClient *client.Client

			kerbCfg, err := krbconfig.Load(cfg.SASL.GSSAPI.KerberosConfigPath)
			if err != nil {
				return nil, fmt.Errorf("failed to create kerberos config from specified config filepath: %w", err)
			}

			switch cfg.SASL.GSSAPI.AuthType {
			case "USER_AUTH:":
				krbClient = client.NewWithPassword(
					cfg.SASL.GSSAPI.Username,
					cfg.SASL.GSSAPI.Realm,
					cfg.SASL.GSSAPI.Password,
					kerbCfg,
					client.DisablePAFXFAST(!cfg.SASL.GSSAPI.EnableFast))
			case "KEYTAB_AUTH":
				ktb, err := keytab.Load(cfg.SASL.GSSAPI.KeyTabPath)
				if err != nil {
					return nil, fmt.Errorf("failed to load keytab: %w", err)
				}
				krbClient = client.NewWithKeytab(
					cfg.SASL.GSSAPI.Username,
					cfg.SASL.GSSAPI.Realm,
					ktb,
					kerbCfg,
					client.DisablePAFXFAST(!cfg.SASL.GSSAPI.EnableFast))
			}
			if krbClient == nil {
				return nil, fmt.Errorf("kafka.sasl.gssapi.authType must be one of USER_AUTH or KEYTAB_AUTH")
			}
			kerberosMechanism := kerberos.Auth{
				Client:           krbClient,
				Service:          cfg.SASL.GSSAPI.ServiceName,
				PersistAfterAuth: true,
			}.AsMechanism()
			opts = append(opts, kgo.SASL(kerberosMechanism))
		}

		// OAuthBearer
		if cfg.SASL.Mechanism == "OAUTHBEARER" {
			mechanism := oauth.Oauth(func(ctx context.Context) (oauth.Auth, error) {
				token, err := cfg.SASL.OAuthBearer.getToken(ctx)
				return oauth.Auth{
					Zid:   cfg.SASL.OAuthBearer.ClientID,
					Token: token,
				}, err
			})
			opts = append(opts, kgo.SASL(mechanism))
		}
	}

	// Configure TLS
	var caCertPool *x509.CertPool
	if cfg.TLS.Enabled {
		// Root CA
		if cfg.TLS.CaFilepath != "" || len(cfg.TLS.Ca) > 0 {
			ca := []byte(cfg.TLS.Ca)
			if cfg.TLS.CaFilepath != "" {
				caBytes, err := ioutil.ReadFile(cfg.TLS.CaFilepath)
				if err != nil {
					return nil, fmt.Errorf("failed to load ca cert: %w", err)
				}
				ca = caBytes
			}
			caCertPool = x509.NewCertPool()
			isSuccessful := caCertPool.AppendCertsFromPEM(ca)
			if !isSuccessful {
				logger.Warn("failed to append ca file to cert pool, is this a valid PEM format?")
			}
		}

		// If configured load TLS cert & key - Mutual TLS
		var certificates []tls.Certificate
		hasCertFile := cfg.TLS.CertFilepath != "" || len(cfg.TLS.Cert) > 0
		hasKeyFile := cfg.TLS.KeyFilepath != "" || len(cfg.TLS.Key) > 0
		if hasCertFile || hasKeyFile {
			cert := []byte(cfg.TLS.Cert)
			privateKey := []byte(cfg.TLS.Key)
			// 1. Read certificates
			if cfg.TLS.CertFilepath != "" {
				certBytes, err := ioutil.ReadFile(cfg.TLS.CertFilepath)
				if err != nil {
					return nil, fmt.Errorf("failed to TLS certificate: %w", err)
				}
				cert = certBytes
			}

			if cfg.TLS.KeyFilepath != "" {
				keyBytes, err := ioutil.ReadFile(cfg.TLS.KeyFilepath)
				if err != nil {
					return nil, fmt.Errorf("failed to read TLS key: %w", err)
				}
				privateKey = keyBytes
			}

			// 2. Check if private key needs to be decrypted. Decrypt it if passphrase is given, otherwise return error
			pemBlock, _ := pem.Decode(privateKey)
			if pemBlock == nil {
				return nil, fmt.Errorf("no valid private key found")
			}

			if x509.IsEncryptedPEMBlock(pemBlock) {
				decryptedKey, err := x509.DecryptPEMBlock(pemBlock, []byte(cfg.TLS.Passphrase))
				if err != nil {
					return nil, fmt.Errorf("private key is encrypted, but could not decrypt it: %s", err)
				}
				// If private key was encrypted we can overwrite the original contents now with the decrypted version
				privateKey = pem.EncodeToMemory(&pem.Block{Type: pemBlock.Type, Bytes: decryptedKey})
			}
			tlsCert, err := tls.X509KeyPair(cert, privateKey)
			if err != nil {
				return nil, fmt.Errorf("cannot parse pem: %s", err)
			}
			certificates = []tls.Certificate{tlsCert}
		}

		tlsDialer := &tls.Dialer{
			NetDialer: &net.Dialer{Timeout: 10 * time.Second},
			Config: &tls.Config{
				InsecureSkipVerify: cfg.TLS.InsecureSkipTLSVerify,
				Certificates:       certificates,
				RootCAs:            caCertPool,
			},
		}
		opts = append(opts, kgo.Dialer(tlsDialer.DialContext))
	}

	return opts, nil
}


================================================
FILE: kafka/client_logger.go
================================================
package kafka

import (
	"github.com/twmb/franz-go/pkg/kgo"
	"go.uber.org/zap"
)

type KgoZapLogger struct {
	logger *zap.SugaredLogger
}

// Level Implements kgo.Logger interface. It returns the log level to log at.
// We pin this to debug as the zap logger decides what to actually send to the output stream.
func (k KgoZapLogger) Level() kgo.LogLevel {
	return kgo.LogLevelDebug
}

// Log implements kgo.Logger interface
func (k KgoZapLogger) Log(level kgo.LogLevel, msg string, keyvals ...interface{}) {
	switch level {
	case kgo.LogLevelDebug:
		k.logger.Debugw(msg, keyvals...)
	case kgo.LogLevelInfo:
		k.logger.Infow(msg, keyvals...)
	case kgo.LogLevelWarn:
		k.logger.Warnw(msg, keyvals...)
	case kgo.LogLevelError:
		k.logger.Errorw(msg, keyvals...)
	}
}


================================================
FILE: kafka/config.go
================================================
package kafka

import "fmt"

type Config struct {
	// General
	Brokers  []string `koanf:"brokers"`
	ClientID string   `koanf:"clientId"`
	RackID   string   `koanf:"rackId"`

	TLS  TLSConfig  `koanf:"tls"`
	SASL SASLConfig `koanf:"sasl"`

	RetryInitConnection bool `koanf:"retryInitConnection"`
}

func (c *Config) SetDefaults() {
	c.ClientID = "kminion"

	c.TLS.SetDefaults()
	c.SASL.SetDefaults()
}

func (c *Config) Validate() error {
	if len(c.Brokers) == 0 {
		return fmt.Errorf("no seed brokers specified, at least one must be configured")
	}

	err := c.TLS.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate TLS config: %w", err)
	}

	err = c.SASL.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate SASL config: %w", err)
	}

	return nil
}


================================================
FILE: kafka/config_sasl.go
================================================
package kafka

import "fmt"

const (
	SASLMechanismPlain       = "PLAIN"
	SASLMechanismScramSHA256 = "SCRAM-SHA-256"
	SASLMechanismScramSHA512 = "SCRAM-SHA-512"
	SASLMechanismGSSAPI      = "GSSAPI"
	SASLMechanismOAuthBearer = "OAUTHBEARER"
)

// SASLConfig for Kafka Client
type SASLConfig struct {
	Enabled   bool   `koanf:"enabled"`
	Username  string `koanf:"username"`
	Password  string `koanf:"password"`
	Mechanism string `koanf:"mechanism"`

	// SASL Mechanisms that require more configuration than username & password
	GSSAPI      SASLGSSAPIConfig  `koanf:"gssapi"`
	OAuthBearer OAuthBearerConfig `koanf:"oauth"`
}

// SetDefaults for SASL Config
func (c *SASLConfig) SetDefaults() {
	c.Enabled = false
	c.Mechanism = SASLMechanismPlain
	c.GSSAPI.SetDefaults()
}

// Validate SASL config input
func (c *SASLConfig) Validate() error {
	if !c.Enabled {
		return nil
	}

	switch c.Mechanism {
	case SASLMechanismPlain, SASLMechanismScramSHA256, SASLMechanismScramSHA512, SASLMechanismGSSAPI:
		// Valid and supported
	case SASLMechanismOAuthBearer:
		return c.OAuthBearer.Validate()
	default:
		return fmt.Errorf("given sasl mechanism '%v' is invalid", c.Mechanism)
	}

	return nil
}


================================================
FILE: kafka/config_sasl_gssapi.go
================================================
package kafka

// SASLGSSAPIConfig represents the Kafka Kerberos config
type SASLGSSAPIConfig struct {
	AuthType           string `koanf:"authType"`
	KeyTabPath         string `koanf:"keyTabPath"`
	KerberosConfigPath string `koanf:"kerberosConfigPath"`
	ServiceName        string `koanf:"serviceName"`
	Username           string `koanf:"username"`
	Password           string `koanf:"password"`
	Realm              string `koanf:"realm"`

	// EnableFAST enables FAST, which is a pre-authentication framework for Kerberos.
	// It includes a mechanism for tunneling pre-authentication exchanges using armoured KDC messages.
	// FAST provides increased resistance to passive password guessing attacks.
	EnableFast bool `koanf:"enableFast"`
}

func (s *SASLGSSAPIConfig) SetDefaults() {
	s.EnableFast = true
}


================================================
FILE: kafka/config_sasl_oauthbearer.go
================================================
package kafka

import (
	"context"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"net/http"
	"net/url"
	"strings"
)

type OAuthBearerConfig struct {
	TokenEndpoint string `koanf:"tokenEndpoint"`
	ClientID      string `koanf:"clientId"`
	ClientSecret  string `koanf:"clientSecret"`
	Scope         string `koanf:"scope"`
}

func (c *OAuthBearerConfig) Validate() error {
	if c.TokenEndpoint == "" {
		return fmt.Errorf("OAuthBearer token endpoint is not specified")
	}
	if c.ClientID == "" || c.ClientSecret == "" {
		return fmt.Errorf("OAuthBearer client credentials are not specified")
	}
	return nil
}

// same as AcquireToken in Console https://github.com/redpanda-data/console/blob/master/backend/pkg/config/kafka_sasl_oauth.go#L56
func (c *OAuthBearerConfig) getToken(ctx context.Context) (string, error) {
	authHeaderValue := base64.StdEncoding.EncodeToString([]byte(c.ClientID + ":" + c.ClientSecret))

	queryParams := url.Values{
		"grant_type": []string{"client_credentials"},
		"scope":      []string{c.Scope},
	}

	req, err := http.NewRequestWithContext(ctx, "POST", c.TokenEndpoint, strings.NewReader(queryParams.Encode()))
	if err != nil {
		return "", fmt.Errorf("failed to create HTTP request: %w", err)
	}

	req.URL.RawQuery = queryParams.Encode()

	req.Header.Set("Authorization", "Basic "+authHeaderValue)
	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")

	client := &http.Client{}

	resp, err := client.Do(req)
	if err != nil {
		return "", fmt.Errorf("HTTP request failed: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		return "", fmt.Errorf("token request failed with status code %d", resp.StatusCode)
	}

	var tokenResponse map[string]interface{}
	decoder := json.NewDecoder(resp.Body)
	if err := decoder.Decode(&tokenResponse); err != nil {
		return "", fmt.Errorf("failed to parse token response: %w", err)
	}

	accessToken, ok := tokenResponse["access_token"].(string)
	if !ok {
		return "", fmt.Errorf("access_token not found in token response")
	}

	return accessToken, nil
}


================================================
FILE: kafka/config_tls.go
================================================
package kafka

import "fmt"

// TLSConfig to connect to Kafka via TLS
type TLSConfig struct {
	Enabled               bool   `koanf:"enabled"`
	CaFilepath            string `koanf:"caFilepath"`
	CertFilepath          string `koanf:"certFilepath"`
	KeyFilepath           string `koanf:"keyFilepath"`
	Ca                    string `koanf:"ca"`
	Cert                  string `koanf:"cert"`
	Key                   string `koanf:"key"`
	Passphrase            string `koanf:"passphrase"`
	InsecureSkipTLSVerify bool   `koanf:"insecureSkipTlsVerify"`
}

func (c *TLSConfig) SetDefaults() {
	c.Enabled = false
}

func (c *TLSConfig) Validate() error {
	if len(c.CaFilepath) > 0 && len(c.Ca) > 0 {
		return fmt.Errorf("config keys 'caFilepath' and 'ca' are both set. only one can be used at the same time")
	}
	if len(c.CertFilepath) > 0 && len(c.Cert) > 0 {
		return fmt.Errorf("config keys 'certFilepath' and 'cert' are both set. only one can be used at the same time")
	}

	if len(c.KeyFilepath) > 0 && len(c.Key) > 0 {
		return fmt.Errorf("config keys 'keyFilepath' and 'key' are both set. only one can be used at the same time")
	}
	return nil
}


================================================
FILE: kafka/service.go
================================================
package kafka

import (
	"context"
	"fmt"
	"time"

	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"github.com/twmb/franz-go/pkg/kversion"
	"go.uber.org/zap"
)

type Service struct {
	cfg    Config
	logger *zap.Logger
}

func NewService(cfg Config, logger *zap.Logger) *Service {
	return &Service{
		cfg:    cfg,
		logger: logger.Named("kafka_service"),
	}
}

// CreateAndTestClient creates a client with the services default settings
// logger: will be used to log connections, errors, warnings about tls config, ...
func (s *Service) CreateAndTestClient(ctx context.Context, l *zap.Logger, opts []kgo.Opt) (*kgo.Client, error) {
	logger := l.Named("kgo_client")
	// Config with default options
	kgoOpts, err := NewKgoConfig(s.cfg, logger)
	if err != nil {
		return nil, fmt.Errorf("failed to create a valid kafka Client config: %w", err)
	}
	// Append user (the service calling this method) provided options
	kgoOpts = append(kgoOpts, opts...)

	// Create kafka client
	client, err := kgo.NewClient(kgoOpts...)
	if err != nil {
		return nil, fmt.Errorf("failed to create kafka Client: %w", err)
	}

	// Test connection
	for {
		err = s.testConnection(client, ctx)
		if err == nil {
			break
		}

		if !s.cfg.RetryInitConnection {
			return nil, fmt.Errorf("failed to test connectivity to Kafka cluster %w", err)
		}

		logger.Warn("failed to test connectivity to Kafka cluster, retrying in 5 seconds", zap.Error(err))
		time.Sleep(time.Second * 5)
	}

	return client, nil
}

// Brokers returns list of brokers this service is connecting to
func (s *Service) Brokers() []string {
	return s.cfg.Brokers
}

// testConnection tries to fetch Broker metadata and prints some information if connection succeeds. An error will be
// returned if connecting fails.
func (s *Service) testConnection(client *kgo.Client, ctx context.Context) error {
	connectCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
	defer cancel()

	req := kmsg.MetadataRequest{
		Topics: nil,
	}
	res, err := req.RequestWith(connectCtx, client)
	if err != nil {
		return fmt.Errorf("failed to request metadata: %w", err)
	}

	// Request versions in order to guess Kafka Cluster version
	versionsReq := kmsg.NewApiVersionsRequest()
	versionsRes, err := versionsReq.RequestWith(connectCtx, client)
	if err != nil {
		return fmt.Errorf("failed to request api versions: %w", err)
	}
	err = kerr.ErrorForCode(versionsRes.ErrorCode)
	if err != nil {
		return fmt.Errorf("failed to request api versions. Inner kafka error: %w", err)
	}
	versions := kversion.FromApiVersionsResponse(versionsRes)

	s.logger.Debug("successfully connected to kafka cluster",
		zap.Int("advertised_broker_count", len(res.Brokers)),
		zap.Int("topic_count", len(res.Topics)),
		zap.Int32("controller_id", res.ControllerID),
		zap.String("kafka_version", versions.VersionGuess()))

	return nil
}


================================================
FILE: logging/config.go
================================================
package logging

import (
	"fmt"
	"go.uber.org/zap"
)

type Config struct {
	Level string `koanf:"level"`
}

func (c *Config) SetDefaults() {
	c.Level = "info"
}

func (c *Config) Validate() error {
	level := zap.NewAtomicLevel()
	err := level.UnmarshalText([]byte(c.Level))
	if err != nil {
		return fmt.Errorf("failed to parse logger level: %w", err)
	}

	return nil
}


================================================
FILE: logging/logger.go
================================================
package logging

import (
	"os"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"go.uber.org/zap/zapcore"

	"go.uber.org/zap"
)

// NewLogger creates a preconfigured global logger and configures the global zap logger
func NewLogger(cfg Config, metricsNamespace string) *zap.Logger {
	encoderCfg := zap.NewProductionEncoderConfig()
	encoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder

	// Parse log level text to zap.LogLevel. Error check isn't required because the input is already validated.
	level := zap.NewAtomicLevel()
	_ = level.UnmarshalText([]byte(cfg.Level))

	core := zapcore.NewCore(
		zapcore.NewJSONEncoder(encoderCfg),
		zapcore.Lock(os.Stdout),
		level,
	)
	core = zapcore.RegisterHooks(core, prometheusHook(metricsNamespace))
	logger := zap.New(core)
	zap.ReplaceGlobals(logger)

	return logger
}

// prometheusHook is a hook for the zap library which exposes Prometheus counters for various log levels.
func prometheusHook(metricsNamespace string) func(zapcore.Entry) error {
	messageCounterVec := promauto.NewCounterVec(prometheus.CounterOpts{
		Namespace: metricsNamespace,
		Name:      "log_messages_total",
		Help:      "Total number of log messages by log level emitted by KMinion.",
	}, []string{"level"})

	// Initialize counters for all supported log levels so that they expose 0 for each level on startup
	supportedLevels := []zapcore.Level{
		zapcore.DebugLevel,
		zapcore.InfoLevel,
		zapcore.WarnLevel,
		zapcore.ErrorLevel,
		zapcore.FatalLevel,
		zapcore.PanicLevel,
	}
	for _, level := range supportedLevels {
		messageCounterVec.WithLabelValues(level.String())
	}

	return func(entry zapcore.Entry) error {
		messageCounterVec.WithLabelValues(entry.Level.String()).Inc()
		return nil
	}
}


================================================
FILE: main.go
================================================
package main

import (
	"context"
	"errors"
	"fmt"
	"net"
	"net/http"
	"os"
	"os/signal"
	"strconv"

	"github.com/cloudhut/kminion/v2/e2e"
	"github.com/cloudhut/kminion/v2/kafka"
	"github.com/cloudhut/kminion/v2/logging"
	"github.com/cloudhut/kminion/v2/minion"
	"github.com/cloudhut/kminion/v2/prometheus"
	promclient "github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
	"go.uber.org/zap"
)

var (
	// ------------------------------------------------------------------------
	// Below parameters are set at build time using ldflags.
	// ------------------------------------------------------------------------

	// version is KMinion's SemVer version (for example: v1.0.0).
	version = "development"
	// builtAt is a string that represent a human-readable date when the binary was built.
	builtAt = "N/A"
	// commit is a string that represents the last git commit for this build.
	commit = "N/A"
)

func main() {
	startupLogger, err := zap.NewProduction()
	if err != nil {
		panic(fmt.Errorf("failed to create startup logger: %w", err))
	}

	cfg, err := newConfig(startupLogger)
	if err != nil {
		startupLogger.Fatal("failed to parse config", zap.Error(err))
	}

	logger := logging.NewLogger(cfg.Logger, cfg.Exporter.Namespace).Named("main")
	if err != nil {
		startupLogger.Fatal("failed to create new logger", zap.Error(err))
	}

	logger.Info("started kminion", zap.String("version", version), zap.String("built_at", builtAt))

	// Setup context that stops when the application receives an interrupt signal
	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
	defer stop()

	wrappedRegisterer := promclient.WrapRegistererWithPrefix(cfg.Exporter.Namespace+"_", promclient.DefaultRegisterer)

	// Create kafka service
	kafkaSvc := kafka.NewService(cfg.Kafka, logger)

	// Create minion service
	// Prometheus exporter only talks to the minion service which
	// issues all the requests to Kafka and wraps the interface accordingly.
	minionSvc, err := minion.NewService(cfg.Minion, logger, kafkaSvc, cfg.Exporter.Namespace, ctx)
	if err != nil {
		logger.Fatal("failed to setup minion service", zap.Error(err))
	}

	err = minionSvc.Start(ctx)
	if err != nil {
		logger.Fatal("failed to start minion service", zap.Error(err))
	}

	// Create end to end testing service
	if cfg.Minion.EndToEnd.Enabled {
		e2eService, err := e2e.NewService(
			ctx,
			cfg.Minion.EndToEnd,
			logger,
			kafkaSvc,
			wrappedRegisterer,
		)
		if err != nil {
			logger.Fatal("failed to create end-to-end monitoring service: %w", zap.Error(err))
		}

		if err = e2eService.Start(ctx); err != nil {
			logger.Fatal("failed to start end-to-end monitoring service", zap.Error(err))
		}
	}

	// The Prometheus exporter that implements the Prometheus collector interface
	exporter, err := prometheus.NewExporter(cfg.Exporter, logger, minionSvc)
	if err != nil {
		logger.Fatal("failed to setup prometheus exporter", zap.Error(err))
	}
	exporter.InitializeMetrics()

	promclient.MustRegister(exporter)
	http.Handle("/metrics",
		promhttp.InstrumentMetricHandler(
			promclient.DefaultRegisterer,
			promhttp.HandlerFor(
				promclient.DefaultGatherer,
				promhttp.HandlerOpts{},
			),
		),
	)
	http.Handle("/ready", minionSvc.HandleIsReady())

	// Start HTTP server
	address := net.JoinHostPort(cfg.Exporter.Host, strconv.Itoa(cfg.Exporter.Port))
	srv := &http.Server{Addr: address}
	go func() {
		<-ctx.Done()
		if err := srv.Shutdown(context.Background()); err != nil {
			logger.Error("error stopping HTTP server", zap.Error(err))
			os.Exit(1)
		}
	}()
	logger.Info("listening on address", zap.String("listen_address", address))
	if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
		logger.Error("error starting HTTP server", zap.Error(err))
		os.Exit(1)
	}

	logger.Info("kminion stopped")
}


================================================
FILE: minion/client_hooks.go
================================================
package minion

import (
	"net"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"github.com/twmb/franz-go/pkg/kgo"
	"go.uber.org/zap"
)

// clientHooks implements the various hook interfaces from the franz-go (kafka) library. We can use these hooks to
// log additional information, collect Prometheus metrics and similar.
type clientHooks struct {
	logger *zap.Logger

	requestSentCount prometheus.Counter
	bytesSent        prometheus.Counter

	requestsReceivedCount prometheus.Counter
	bytesReceived         prometheus.Counter
}

func newMinionClientHooks(logger *zap.Logger, metricsNamespace string) *clientHooks {
	requestSentCount := promauto.NewCounter(prometheus.CounterOpts{
		Namespace: metricsNamespace,
		Subsystem: "kafka",
		Name:      "requests_sent_total"})
	bytesSent := promauto.NewCounter(prometheus.CounterOpts{
		Namespace: metricsNamespace,
		Subsystem: "kafka",
		Name:      "sent_bytes",
	})

	requestsReceivedCount := promauto.NewCounter(prometheus.CounterOpts{
		Namespace: metricsNamespace,
		Subsystem: "kafka",
		Name:      "requests_received_total"})
	bytesReceived := promauto.NewCounter(prometheus.CounterOpts{
		Namespace: metricsNamespace,
		Subsystem: "kafka",
		Name:      "received_bytes",
	})

	return &clientHooks{
		logger: logger,

		requestSentCount: requestSentCount,
		bytesSent:        bytesSent,

		requestsReceivedCount: requestsReceivedCount,
		bytesReceived:         bytesReceived,
	}
}

func (c clientHooks) OnBrokerConnect(meta kgo.BrokerMetadata, dialDur time.Duration, _ net.Conn, err error) {
	if err != nil {
		c.logger.Debug("kafka connection failed", zap.String("broker_host", meta.Host), zap.Error(err))
		return
	}
	c.logger.Debug("kafka connection succeeded",
		zap.String("host", meta.Host),
		zap.Duration("dial_duration", dialDur))
}

func (c clientHooks) OnBrokerDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {
	c.logger.Debug("kafka broker disconnected",
		zap.String("host", meta.Host))
}

// OnBrokerRead is passed the broker metadata, the key for the response that
// was read, the number of bytes read, how long the Client waited
// before reading the response, how long it took to read the response,
// and any error.
//
// The bytes written does not count any tls overhead.
// OnRead is called after a read from a broker.
func (c clientHooks) OnBrokerRead(_ kgo.BrokerMetadata, _ int16, bytesRead int, _, _ time.Duration, _ error) {
	c.requestsReceivedCount.Inc()
	c.bytesReceived.Add(float64(bytesRead))
}

// OnBrokerWrite is passed the broker metadata, the key for the request that
// was written, the number of bytes written, how long the request
// waited before being written, how long it took to write the request,
// and any error.
//
// The bytes written does not count any tls overhead.
// OnWrite is called after a write to a broker.
func (c clientHooks) OnBrokerWrite(_ kgo.BrokerMetadata, _ int16, bytesWritten int, _, _ time.Duration, _ error) {
	c.requestSentCount.Inc()
	c.bytesSent.Add(float64(bytesWritten))
}


================================================
FILE: minion/config.go
================================================
package minion

import (
	"fmt"

	"github.com/cloudhut/kminion/v2/e2e"
)

type Config struct {
	ConsumerGroups ConsumerGroupConfig `koanf:"consumerGroups"`
	Topics         TopicConfig         `koanf:"topics"`
	LogDirs        LogDirsConfig       `koanf:"logDirs"`
	EndToEnd       e2e.Config          `koanf:"endToEnd"`
}

func (c *Config) SetDefaults() {
	c.ConsumerGroups.SetDefaults()
	c.Topics.SetDefaults()
	c.LogDirs.SetDefaults()
	c.EndToEnd.SetDefaults()
}

func (c *Config) Validate() error {
	err := c.ConsumerGroups.Validate()
	if err != nil {
		return fmt.Errorf("failed to consumer group config: %w", err)
	}

	err = c.Topics.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate topic config: %w", err)
	}

	err = c.LogDirs.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate log dirs config: %w", err)
	}

	err = c.EndToEnd.Validate()
	if err != nil {
		return fmt.Errorf("failed to validate endToEnd config: %w", err)
	}

	return nil
}


================================================
FILE: minion/config_consumer_group.go
================================================
package minion

import (
	"fmt"
)

const (
	ConsumerGroupScrapeModeOffsetsTopic string = "offsetsTopic"
	ConsumerGroupScrapeModeAdminAPI     string = "adminApi"

	ConsumerGroupGranularityTopic     string = "topic"
	ConsumerGroupGranularityPartition string = "partition"
)

type ConsumerGroupConfig struct {
	// Enabled specifies whether consumer groups shall be scraped and exported or not.
	Enabled bool `koanf:"enabled"`

	// Mode specifies whether we export consumer group offsets using the Admin API or by consuming the internal
	// __consumer_offsets topic.
	ScrapeMode string `koanf:"scrapeMode"`

	// Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
	// you aren't interested in per partition lags you could choose "topic" where all partition lags will be summed
	// and only topic lags will be exported.
	Granularity string `koanf:"granularity"`

	// AllowedGroups are regex strings of group ids that shall be exported
	AllowedGroupIDs []string `koanf:"allowedGroups"`

	// IgnoredGroups are regex strings of group ids that shall be ignored/skipped when exporting metrics. Ignored groups
	// take precedence over allowed groups.
	IgnoredGroupIDs []string `koanf:"ignoredGroups"`
}

func (c *ConsumerGroupConfig) SetDefaults() {
	c.Enabled = true
	c.ScrapeMode = ConsumerGroupScrapeModeAdminAPI
	c.Granularity = ConsumerGroupGranularityPartition
	c.AllowedGroupIDs = []string{"/.*/"}
}

func (c *ConsumerGroupConfig) Validate() error {
	switch c.ScrapeMode {
	case ConsumerGroupScrapeModeOffsetsTopic, ConsumerGroupScrapeModeAdminAPI:
	default:
		return fmt.Errorf("invalid scrape mode '%v' specified. Valid modes are '%v' or '%v'",
			c.ScrapeMode,
			ConsumerGroupScrapeModeOffsetsTopic,
			ConsumerGroupScrapeModeAdminAPI)
	}

	switch c.Granularity {
	case ConsumerGroupGranularityTopic, ConsumerGroupGranularityPartition:
	default:
		return fmt.Errorf("invalid consumer group granularity '%v' specified. Valid modes are '%v' or '%v'",
			c.Granularity,
			ConsumerGroupGranularityTopic,
			ConsumerGroupGranularityPartition)
	}

	// Check if all group strings are valid regex or literals
	for _, groupID := range c.AllowedGroupIDs {
		_, err := compileRegex(groupID)
		if err != nil {
			return fmt.Errorf("allowed group string '%v' is not valid regex", groupID)
		}
	}

	for _, groupID := range c.IgnoredGroupIDs {
		_, err := compileRegex(groupID)
		if err != nil {
			return fmt.Errorf("ignored group string '%v' is not valid regex", groupID)
		}
	}

	return nil
}


================================================
FILE: minion/config_log_dirs.go
================================================
package minion

type LogDirsConfig struct {
	// Enabled specifies whether log dirs shall be scraped and exported or not. This should be disabled for clusters prior
	// to version 1.0.0 as describing log dirs was not supported back then.
	Enabled bool `koanf:"enabled"`
}

// Validate if provided LogDirsConfig is valid.
func (c *LogDirsConfig) Validate() error {
	return nil
}

// SetDefaults for topic config
func (c *LogDirsConfig) SetDefaults() {
	c.Enabled = true
}


================================================
FILE: minion/config_topic_config.go
================================================
package minion

import (
	"fmt"
)

const (
	TopicGranularityTopic     string = "topic"
	TopicGranularityPartition string = "partition"
)

type TopicConfig struct {
	// Enabled can be set to false in order to not collect any topic metrics at all.
	Enabled bool `koanf:"enabled"`

	// Granularity can be per topic or per partition. If you want to reduce the number of exported metric series and
	// you aren't interested in per partition metrics you could choose "topic".
	Granularity string `koanf:"granularity"`

	// AllowedTopics are regex strings of topic names whose topic metrics that shall be exported.
	AllowedTopics []string `koanf:"allowedTopics"`

	// IgnoredTopics are regex strings of topic names that shall be ignored/skipped when exporting metrics. Ignored topics
	// take precedence over allowed topics.
	IgnoredTopics []string `koanf:"ignoredTopics"`

	// InfoMetric configures how the kafka_topic_info metric is populated
	InfoMetric InfoMetricConfig `koanf:"infoMetric"`
}

type InfoMetricConfig struct {
	// ConfigKeys configures optional topic configuration keys that should be exported
	// as prometheus metric labels.
	// By default only "cleanup.policy" is exported
	ConfigKeys []string `koanf:"configKeys"`
}

// Validate if provided TopicConfig is valid.
func (c *TopicConfig) Validate() error {
	switch c.Granularity {
	case TopicGranularityPartition, TopicGranularityTopic:
	default:
		return fmt.Errorf("given granularity '%v' is invalid", c.Granularity)
	}

	// Check whether each provided string is valid regex
	for _, topic := range c.AllowedTopics {
		_, err := compileRegex(topic)
		if err != nil {
			return fmt.Errorf("allowed topic string '%v' is not valid regex", topic)
		}
	}

	for _, topic := range c.IgnoredTopics {
		_, err := compileRegex(topic)
		if err != nil {
			return fmt.Errorf("ignored topic string '%v' is not valid regex", topic)
		}
	}

	return nil
}

// SetDefaults for topic config
func (c *TopicConfig) SetDefaults() {
	c.Enabled = true
	c.Granularity = TopicGranularityPartition
	c.AllowedTopics = []string{"/.*/"}
	c.InfoMetric = InfoMetricConfig{ConfigKeys: []string{"cleanup.policy"}}
}


================================================
FILE: minion/consumer_group_offsets.go
================================================
package minion

import (
	"context"
	"fmt"
	"sync"

	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
	"golang.org/x/sync/errgroup"
)

// ListAllConsumerGroupOffsetsInternal returns a map from the in memory storage. The map value is the offset commit
// value and is grouped by group id, topic, partition id as keys of the nested maps.
func (s *Service) ListAllConsumerGroupOffsetsInternal() map[string]map[string]map[int32]OffsetCommit {
	return s.storage.getGroupOffsets()
}

// ListAllConsumerGroupOffsetsAdminAPI return all consumer group offsets using Kafka's Admin API.
func (s *Service) ListAllConsumerGroupOffsetsAdminAPI(ctx context.Context) (map[string]*kmsg.OffsetFetchResponse, error) {
	groupsRes, err := s.listConsumerGroupsCached(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to list groupsRes: %w", err)
	}
	groupIDs := make([]string, len(groupsRes.Groups))
	for i, group := range groupsRes.Groups {
		groupIDs[i] = group.Group
	}

	return s.listConsumerGroupOffsetsBulk(ctx, groupIDs)
}

// listConsumerGroupOffsetsBulk returns a map which has the Consumer group name as key
func (s *Service) listConsumerGroupOffsetsBulk(ctx context.Context, groups []string) (map[string]*kmsg.OffsetFetchResponse, error) {
	eg, _ := errgroup.WithContext(ctx)

	mutex := sync.Mutex{}
	res := make(map[string]*kmsg.OffsetFetchResponse)

	f := func(group string) func() error {
		return func() error {
			offsets, err := s.listConsumerGroupOffsets(ctx, group)
			if err != nil {
				s.logger.Warn("failed to fetch consumer group offsets, inner kafka error",
					zap.String("consumer_group", group),
					zap.Error(err))
				return nil
			}

			mutex.Lock()
			res[group] = offsets
			mutex.Unlock()
			return nil
		}
	}

	for _, group := range groups {
		eg.Go(f(group))
	}

	if err := eg.Wait(); err != nil {
		return nil, err
	}

	return res, nil
}

// listConsumerGroupOffsets returns the committed group offsets for a single group
func (s *Service) listConsumerGroupOffsets(ctx context.Context, group string) (*kmsg.OffsetFetchResponse, error) {
	req := kmsg.NewOffsetFetchRequest()
	req.Group = group
	req.Topics = nil
	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return nil, fmt.Errorf("failed to request group offsets for group '%v': %w", group, err)
	}

	return res, nil
}


================================================
FILE: minion/describe_consumer_groups.go
================================================
package minion

import (
	"context"
	"fmt"
	"time"

	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

type DescribeConsumerGroupsResponse struct {
	BrokerMetadata kgo.BrokerMetadata
	Groups         *kmsg.DescribeGroupsResponse
}

func (s *Service) listConsumerGroupsCached(ctx context.Context) (*kmsg.ListGroupsResponse, error) {
	reqId := ctx.Value("requestId").(string)
	key := "list-consumer-groups-" + reqId

	if cachedRes, exists := s.getCachedItem(key); exists {
		return cachedRes.(*kmsg.ListGroupsResponse), nil
	}
	res, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {
		res, err := s.listConsumerGroups(ctx)
		if err != nil {
			return nil, err
		}
		s.setCachedItem(key, res, 120*time.Second)

		return res, nil
	})
	if err != nil {
		return nil, err
	}

	return res.(*kmsg.ListGroupsResponse), nil
}

func (s *Service) listConsumerGroups(ctx context.Context) (*kmsg.ListGroupsResponse, error) {
	listReq := kmsg.NewListGroupsRequest()
	res, err := listReq.RequestWith(ctx, s.client)
	if err != nil {
		return nil, fmt.Errorf("failed to list consumer groups: %w", err)
	}
	err = kerr.ErrorForCode(res.ErrorCode)
	if err != nil {
		return nil, fmt.Errorf("failed to list consumer groups. inner kafka error: %w", err)
	}

	return res, nil
}

func (s *Service) DescribeConsumerGroups(ctx context.Context) ([]DescribeConsumerGroupsResponse, error) {
	listRes, err := s.listConsumerGroupsCached(ctx)
	if err != nil {
		return nil, err
	}

	groupIDs := make([]string, len(listRes.Groups))
	for i, group := range listRes.Groups {
		groupIDs[i] = group.Group
	}

	describeReq := kmsg.NewDescribeGroupsRequest()
	describeReq.Groups = groupIDs
	describeReq.IncludeAuthorizedOperations = false
	shardedResp := s.client.RequestSharded(ctx, &describeReq)

	describedGroups := make([]DescribeConsumerGroupsResponse, 0)
	for _, kresp := range shardedResp {
		if kresp.Err != nil {
			s.logger.Warn("broker failed to respond to the described groups request",
				zap.Int32("broker_id", kresp.Meta.NodeID),
				zap.Error(kresp.Err))
			continue
		}
		res := kresp.Resp.(*kmsg.DescribeGroupsResponse)

		describedGroups = append(describedGroups, DescribeConsumerGroupsResponse{
			BrokerMetadata: kresp.Meta,
			Groups:         res,
		})
	}

	return describedGroups, nil
}


================================================
FILE: minion/describe_topic_config.go
================================================
package minion

import (
	"context"
	"fmt"

	"github.com/pkg/errors"
	"github.com/twmb/franz-go/pkg/kmsg"
)

func (s *Service) GetTopicConfigs(ctx context.Context) (*kmsg.DescribeConfigsResponse, error) {
	metadata, err := s.GetMetadataCached(ctx)
	if err != nil {
		return nil, errors.Wrap(err, "failed to get metadata")
	}

	req := kmsg.NewDescribeConfigsRequest()

	for _, topic := range metadata.Topics {
		resourceReq := kmsg.NewDescribeConfigsRequestResource()
		resourceReq.ResourceType = kmsg.ConfigResourceTypeTopic
		resourceReq.ResourceName = *topic.Topic
		req.Resources = append(req.Resources, resourceReq)
	}

	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return nil, fmt.Errorf("failed to request metadata: %w", err)
	}

	return res, nil
}


================================================
FILE: minion/list_offsets.go
================================================
package minion

import (
	"context"
	"errors"
	"fmt"
	"time"

	"github.com/twmb/franz-go/pkg/kadm"
	"go.uber.org/zap"
)

func (s *Service) ListEndOffsetsCached(ctx context.Context) (kadm.ListedOffsets, error) {
	return s.listOffsetsCached(ctx, "end")
}

func (s *Service) ListStartOffsetsCached(ctx context.Context) (kadm.ListedOffsets, error) {
	return s.listOffsetsCached(ctx, "start")
}

func (s *Service) listOffsetsCached(ctx context.Context, offsetType string) (kadm.ListedOffsets, error) {
	reqId := ctx.Value("requestId").(string)
	key := fmt.Sprintf("partition-%s-offsets-%s", offsetType, reqId)

	if cachedRes, exists := s.getCachedItem(key); exists {
		return cachedRes.(kadm.ListedOffsets), nil
	}

	var listFunc func(context.Context) (kadm.ListedOffsets, error)
	switch offsetType {
	case "end":
		listFunc = s.ListEndOffsets
	case "start":
		listFunc = s.ListStartOffsets
	default:
		return nil, fmt.Errorf("invalid offset type: %s", offsetType)
	}

	res, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {
		offsets, err := listFunc(ctx)
		if err != nil {
			return nil, err
		}

		s.setCachedItem(key, offsets, 120*time.Second)

		return offsets, nil
	})
	if err != nil {
		return nil, err
	}

	return res.(kadm.ListedOffsets), nil
}

// ListEndOffsets fetches the high water mark for all topic partitions.
func (s *Service) ListEndOffsets(ctx context.Context) (kadm.ListedOffsets, error) {
	return s.listOffsetsInternal(ctx, s.admClient.ListEndOffsets, "end")
}

// ListStartOffsets fetches the low water mark for all topic partitions.
func (s *Service) ListStartOffsets(ctx context.Context) (kadm.ListedOffsets, error) {
	return s.listOffsetsInternal(ctx, s.admClient.ListStartOffsets, "start")
}

type listOffsetsFunc func(context.Context, ...string) (kadm.ListedOffsets, error)

func (s *Service) listOffsetsInternal(ctx context.Context, listFunc listOffsetsFunc, offsetType string) (kadm.ListedOffsets, error) {
	listedOffsets, err := listFunc(ctx)
	if err != nil {
		var se *kadm.ShardErrors
		if !errors.As(err, &se) {
			return nil, fmt.Errorf("failed to list %s offsets: %w", offsetType, err)
		}

		if se.AllFailed {
			return nil, fmt.Errorf("failed to list %s offsets, all shard responses failed: %w", offsetType, err)
		}
		s.logger.Info(fmt.Sprintf("failed to list %s offset from some shards", offsetType), zap.Int("failed_shards", len(se.Errs)))
		for _, shardErr := range se.Errs {
			s.logger.Warn(fmt.Sprintf("shard error for listing %s offsets", offsetType),
				zap.Int32("broker_id", shardErr.Broker.NodeID),
				zap.Error(shardErr.Err))
		}
	}

	// Log inner errors before returning them. We do that inside of this function to avoid duplicate logging as the response
	// are cached for each scrape anyways.
	//
	// Create two metrics to aggregate error logs in few messages. Logging one message per occured partition error
	// is too much. Typical errors are LEADER_NOT_AVAILABLE etc.
	errorCountByErrCode := make(map[error]int)
	errorCountByTopic := make(map[string]int)

	// Iterate on all partitions
	listedOffsets.Each(func(offset kadm.ListedOffset) {
		if offset.Err != nil {
			errorCountByTopic[offset.Topic]++
			errorCountByErrCode[offset.Err]++
		}
	})

	// Print log line for each error type
	for err, count := range errorCountByErrCode {
		s.logger.Warn(fmt.Sprintf("failed to list some partitions %s watermarks", offsetType),
			zap.Error(err),
			zap.Int("error_count", count))
	}
	if len(errorCountByTopic) > 0 {
		s.logger.Warn(fmt.Sprintf("some topics had one or more partitions whose %s watermarks could not be fetched from Kafka", offsetType),
			zap.Int("topics_with_errors", len(errorCountByTopic)))
	}

	return listedOffsets, nil
}


================================================
FILE: minion/log_dirs.go
================================================
package minion

import (
	"context"

	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
)

type LogDirResponseShard struct {
	Err     error
	Broker  kgo.BrokerMetadata
	LogDirs *kmsg.DescribeLogDirsResponse
}

func (s *Service) DescribeLogDirs(ctx context.Context) []LogDirResponseShard {
	req := kmsg.NewDescribeLogDirsRequest()
	req.Topics = nil // Describe all topics
	responses := s.client.RequestSharded(ctx, &req)

	res := make([]LogDirResponseShard, len(responses))
	for i, responseShard := range responses {
		logDirs, ok := responseShard.Resp.(*kmsg.DescribeLogDirsResponse)
		if !ok {
			logDirs = &kmsg.DescribeLogDirsResponse{}
		}

		res[i] = LogDirResponseShard{
			Err:     responseShard.Err,
			Broker:  responseShard.Meta,
			LogDirs: logDirs,
		}
	}

	return res
}


================================================
FILE: minion/metadata.go
================================================
package minion

import (
	"context"
	"fmt"
	"time"

	"github.com/twmb/franz-go/pkg/kmsg"
)

func (s *Service) GetMetadataCached(ctx context.Context) (*kmsg.MetadataResponse, error) {
	reqId := ctx.Value("requestId").(string)
	key := "metadata-" + reqId

	if cachedRes, exists := s.getCachedItem(key); exists {
		return cachedRes.(*kmsg.MetadataResponse), nil
	}

	res, err, _ := s.requestGroup.Do(key, func() (interface{}, error) {
		metadata, err := s.GetMetadata(ctx)
		if err != nil {
			return nil, err
		}

		s.setCachedItem(key, metadata, 120*time.Second)

		return metadata, nil
	})
	if err != nil {
		return nil, err
	}

	return res.(*kmsg.MetadataResponse), nil
}

func (s *Service) GetMetadata(ctx context.Context) (*kmsg.MetadataResponse, error) {
	req := kmsg.NewMetadataRequest()
	req.Topics = nil

	res, err := req.RequestWith(ctx, s.client)
	if err != nil {
		return nil, fmt.Errorf("failed to request metadata: %w", err)
	}

	return res, nil
}


================================================
FILE: minion/offset_consumer.go
================================================
package minion

import (
	"context"
	"fmt"
	"time"

	"github.com/twmb/franz-go/pkg/kbin"
	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

// startConsumingOffsets consumes the __consumer_offsets topic and forwards the kafka messages to their respective
// methods where they'll be decoded and further processed.
func (s *Service) startConsumingOffsets(ctx context.Context) {
	client := s.client

	s.logger.Info("starting to consume messages from offsets topic")
	go s.checkIfConsumerLagIsCaughtUp(ctx)

	for {
		select {
		case <-ctx.Done():
			return
		default:
			fetches := client.PollFetches(ctx)
			errors := fetches.Errors()
			for _, err := range errors {
				// Log all errors and continue afterwards as we might get errors and still have some fetch results
				s.logger.Error("failed to fetch records from kafka",
					zap.String("topic", err.Topic),
					zap.Int32("partition", err.Partition),
					zap.Error(err.Err))
			}

			iter := fetches.RecordIter()
			for !iter.Done() {
				record := iter.Next()
				s.storage.markRecordConsumed(record)

				err := s.decodeOffsetRecord(record)
				if err != nil {
					s.logger.Warn("failed to decode offset record", zap.Error(err))
				}
			}
		}
	}
}

// checkIfConsumerLagIsCaughtUp fetches the newest partition offsets for all partitions in the __consumer_offsets
// topic and compares these against the last consumed messages from our offset consumer. If the consumed offsets are
// higher than the partition offsets this means we caught up the initial lag and can mark our storage as ready. A ready
// store will start to expose consumer group offsets.
func (s *Service) checkIfConsumerLagIsCaughtUp(ctx context.Context) {
	for {
		time.Sleep(12 * time.Second)
		s.logger.Debug("checking if lag in consumer offsets metadataReqTopic is caught up")

		// 1. Get metadataReqTopic high watermarks for __consumer_offsets metadataReqTopic
		metadataReq := kmsg.NewMetadataRequest()
		metadataReqTopic := kmsg.NewMetadataRequestTopic()
		topicName := "__consumer_offsets"
		metadataReqTopic.Topic = &topicName
		metadataReq.Topics = []kmsg.MetadataRequestTopic{metadataReqTopic}

		res, err := metadataReq.RequestWith(ctx, s.client)
		if err != nil {
			s.logger.Warn("failed to check if consumer lag on offsets metadataReqTopic is caught up because metadata request failed",
				zap.Error(err))
			continue
		}

		// 2. Request high watermarks for consumer offset partitions
		topicReqs := make([]kmsg.ListOffsetsRequestTopic, len(res.Topics))
		for i, topic := range res.Topics {
			req := kmsg.NewListOffsetsRequestTopic()
			req.Topic = *topic.Topic

			partitionReqs := make([]kmsg.ListOffsetsRequestTopicPartition, len(topic.Partitions))
			for j, partition := range topic.Partitions {
				partitionReqs[j] = kmsg.NewListOffsetsRequestTopicPartition()
				partitionReqs[j].Partition = partition.Partition
				partitionReqs[j].Timestamp = -1 // Newest
			}
			req.Partitions = partitionReqs

			topicReqs[i] = req
		}
		offsetReq := kmsg.NewListOffsetsRequest()
		offsetReq.Topics = topicReqs
		highMarksRes, err := offsetReq.RequestWith(ctx, s.client)
		if err != nil {
			s.logger.Warn("failed to check if consumer lag on offsets metadataReqTopic is caught up because high watermark request failed",
				zap.Error(err))
			continue
		}
		if len(highMarksRes.Topics) != 1 {
			s.logger.Error("expected exactly one metadataReqTopic response for high water mark request")
			continue
		}

		// 3. Check if high watermarks have been consumed. To avoid a race condition here we will wait some time before
		// comparing, so that the consumer has enough time to catch up to the new high watermarks we just fetched.
		time.Sleep(3 * time.Second)
		consumedOffsets := s.storage.getConsumedOffsets()
		topicRes := highMarksRes.Topics[0]
		isReady := true

		type laggingParition struct {
			Name string
			Id   int32
			Lag  int64
		}
		var partitionsLagging []laggingParition
		totalLag := int64(0)
		for _, partition := range topicRes.Partitions {
			err := kerr.ErrorForCode(partition.ErrorCode)
			if err != nil {
				s.logger.Warn("failed to check if consumer lag on offsets metadataReqTopic is caught up because high "+
					"watermark request failed, with an inner error",
					zap.Error(err))
			}

			highWaterMark := partition.Offset - 1
			consumedOffset := consumedOffsets[partition.Partition]
			partitionLag := highWaterMark - consumedOffset
			if partitionLag < 0 {
				partitionLag = 0
			}

			if partitionLag > 0 {
				partitionsLagging = append(partitionsLagging, laggingParition{
					Name: topicRes.Topic,
					Id:   partition.Partition,
					Lag:  partitionLag,
				})
				totalLag += partitionLag
				s.logger.Debug("consumer_offsets metadataReqTopic lag has not been caught up yet",
					zap.Int32("partition_id", partition.Partition),
					zap.Int64("high_water_mark", highWaterMark),
					zap.Int64("consumed_offset", consumedOffset),
					zap.Int64("partition_lag", partitionLag))
				isReady = false
				continue
			}
		}
		if isReady {
			s.logger.Info("successfully consumed all consumer offsets. consumer group lags will be exported from now on")
			s.storage.setReadyState(true)
			return
		} else {
			s.logger.Info("catching up the message lag on consumer offsets",
				zap.Int("lagging_partitions_count", len(partitionsLagging)),
				zap.Any("lagging_partitions", partitionsLagging),
				zap.Int64("total_lag", totalLag))
		}
	}
}

// decodeOffsetRecord decodes all messages in the consumer offsets topic by routing records to the correct decoding
// method.
func (s *Service) decodeOffsetRecord(record *kgo.Record) error {
	if len(record.Key) < 2 {
		return fmt.Errorf("offset commit key is supposed to be at least 2 bytes long")
	}
	messageVer := (&kbin.Reader{Src: record.Key}).Int16()

	switch messageVer {
	case 0, 1:
		err := s.decodeOffsetCommit(record)
		if err != nil {
			return err
		}
	case 2:
		err := s.decodeOffsetMetadata(record)
		if err != nil {
			return err
		}
	}

	return nil
}

// decodeOffsetMetadata decodes to metadata which includes the following information:
// - group
// - protocolType (connect/consumer/...)
// - generation
// - protocol
// - currentStateTimestamp
// - groupMembers (member metadata such aus: memberId, groupInstanceId, clientId, clientHost, rebalanceTimeout, ...)
func (s *Service) decodeOffsetMetadata(record *kgo.Record) error {
	childLogger := s.logger.With(
		zap.String("topic", record.Topic),
		zap.Int32("partition_id", record.Partition),
		zap.Int64("offset", record.Offset))

	metadataKey := kmsg.NewGroupMetadataKey()
	err := metadataKey.ReadFrom(record.Key)
	if err != nil {
		childLogger.Warn("failed to decode offset metadata key", zap.Error(err))
		return fmt.Errorf("failed to decode offset metadata key: %w", err)
	}

	if record.Value == nil {
		return nil
	}
	metadataValue := kmsg.NewGroupMetadataValue()
	err = metadataValue.ReadFrom(record.Value)
	if err != nil {
		childLogger.Warn("failed to decode offset metadata value", zap.Error(err))
		return fmt.Errorf("failed to decode offset metadata value: %w", err)
	}

	return nil
}

// decodeOffsetCommit decodes to group offsets which include the following information:
// - group, topic, partition
// - offset
// - leaderEpoch
// - metadata (user specified string for each offset commit)
// - commitTimestamp
// - expireTimestamp (only version 1 offset commits / deprecated)
func (s *Service) decodeOffsetCommit(record *kgo.Record) error {
	childLogger := s.logger.With(
		zap.String("topic", record.Topic),
		zap.Int32("partition_id", record.Partition),
		zap.Int64("offset", record.Offset))
	offsetCommitKey := kmsg.NewOffsetCommitKey()
	err := offsetCommitKey.ReadFrom(record.Key)
	if err != nil {
		childLogger.Warn("failed to decode offset commit key", zap.Error(err))
		return fmt.Errorf("failed to decode offset commit key: %w", err)
	}

	if record.Value == nil {
		// Tombstone - The group offset is expired or no longer valid (e.g. because the topic has been deleted)
		s.storage.deleteOffsetCommit(offsetCommitKey)
		return nil
	}

	offsetCommitValue := kmsg.NewOffsetCommitValue()
	err = offsetCommitValue.ReadFrom(record.Value)
	if err != nil {
		childLogger.Warn("failed to decode offset commit value", zap.Error(err))
		return fmt.Errorf("failed to decode offset commit value: %w", err)
	}
	s.storage.addOffsetCommit(offsetCommitKey, offsetCommitValue)

	return nil
}

func (s *Service) GetNumberOfOffsetRecordsConsumed() float64 {
	return s.storage.getNumberOfConsumedRecords()
}


================================================
FILE: minion/service.go
================================================
package minion

import (
	"context"
	"encoding/json"
	"fmt"
	"net/http"
	"regexp"
	"strings"
	"sync"
	"time"

	"github.com/twmb/franz-go/pkg/kadm"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"github.com/twmb/franz-go/pkg/kversion"
	"go.uber.org/zap"
	"golang.org/x/sync/singleflight"

	"github.com/cloudhut/kminion/v2/kafka"
)

type Service struct {
	Cfg    Config
	logger *zap.Logger

	// requestGroup is used to deduplicate multiple concurrent requests to kafka
	requestGroup *singleflight.Group
	cache        map[string]interface{}
	cacheLock    sync.RWMutex

	AllowedGroupIDsExpr []*regexp.Regexp
	IgnoredGroupIDsExpr []*regexp.Regexp
	AllowedTopicsExpr   []*regexp.Regexp
	IgnoredTopicsExpr   []*regexp.Regexp

	client    *kgo.Client
	admClient *kadm.Client
	storage   *Storage
}

func NewService(cfg Config, logger *zap.Logger, kafkaSvc *kafka.Service, metricsNamespace string, ctx context.Context) (*Service, error) {
	storage, err := newStorage(logger)
	if err != nil {
		return nil, fmt.Errorf("failed to create storage: %w", err)
	}

	// Kafka client
	minionHooks := newMinionClientHooks(logger.Named("kafka_hooks"), metricsNamespace)
	kgoOpts := []kgo.Opt{
		kgo.WithHooks(minionHooks),
	}
	if cfg.ConsumerGroups.Enabled && cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeOffsetsTopic {
		kgoOpts = append(kgoOpts,
			kgo.ConsumeResetOffset(kgo.NewOffset().AtStart()),
			kgo.ConsumeTopics("__consumer_offsets"))
	}

	logger.Info("connecting to Kafka seed brokers, trying to fetch cluster metadata",
		zap.String("seed_brokers", strings.Join(kafkaSvc.Brokers(), ",")))

	client, err := kafkaSvc.CreateAndTestClient(ctx, logger, kgoOpts)
	if err != nil {
		return nil, fmt.Errorf("failed to create kafka client: %w", err)
	}
	logger.Info("successfully connected to kafka cluster")

	// Compile regexes. We can ignore the errors because valid compilation has been validated already
	allowedGroupIDsExpr, _ := compileRegexes(cfg.ConsumerGroups.AllowedGroupIDs)
	ignoredGroupIDsExpr, _ := compileRegexes(cfg.ConsumerGroups.IgnoredGroupIDs)
	allowedTopicsExpr, _ := compileRegexes(cfg.Topics.AllowedTopics)
	ignoredTopicsExpr, _ := compileRegexes(cfg.Topics.IgnoredTopics)

	service := &Service{
		Cfg:    cfg,
		logger: logger.Named("minion_service"),

		requestGroup: &singleflight.Group{},
		cache:        make(map[string]interface{}),
		cacheLock:    sync.RWMutex{},

		AllowedGroupIDsExpr: allowedGroupIDsExpr,
		IgnoredGroupIDsExpr: ignoredGroupIDsExpr,
		AllowedTopicsExpr:   allowedTopicsExpr,
		IgnoredTopicsExpr:   ignoredTopicsExpr,

		client:    client,
		admClient: kadm.NewClient(client),

		storage: storage,
	}

	return service, nil
}

func (s *Service) Start(ctx context.Context) error {
	err := s.ensureCompatibility(ctx)
	if err != nil {
		return fmt.Errorf("failed to check feature compatibility against Kafka: %w", err)
	}

	if s.Cfg.ConsumerGroups.Enabled && s.Cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeOffsetsTopic {
		go s.startConsumingOffsets(ctx)
	}

	return nil
}

func (s *Service) isReady() bool {
	if s.Cfg.ConsumerGroups.ScrapeMode == ConsumerGroupScrapeModeAdminAPI {
		return true
	}

	return s.storage.isReady()
}

func (s *Service) HandleIsReady() http.HandlerFunc {
	type response struct {
		StatusCode int `json:"statusCode"`
	}
	return func(w http.ResponseWriter, r *http.Request) {
		status := http.StatusOK
		if !s.isReady() {
			status = http.StatusServiceUnavailable
		}
		res := response{StatusCode: status}
		resJson, _ := json.Marshal(res)
		w.WriteHeader(status)
		w.Write(resJson)
	}
}

// ensureCompatibility checks whether the options as configured are available in the connected cluster. For example
// we will check if the target Kafka's API version support the LogDirs request. If that's not the case we will
// disable the option and print a warning message.
func (s *Service) ensureCompatibility(ctx context.Context) error {
	ctx, cancel := context.WithTimeout(ctx, 15*time.Second)
	defer cancel()
	versionsRes, err := s.GetAPIVersions(ctx)
	if err != nil {
		return fmt.Errorf("kafka api versions couldn't be fetched: %w", err)
	}
	versions := kversion.FromApiVersionsResponse(versionsRes)

	// Check Describe Log Dirs
	if s.Cfg.LogDirs.Enabled {
		k := kmsg.NewDescribeLogDirsRequest()
		isSupported := versions.HasKey(k.Key())
		if !isSupported {
			s.logger.Warn("describing log dirs is enabled, but it is not supported because your Kafka cluster " +
				"version is too old. feature will be disabled")
			s.Cfg.LogDirs.Enabled = false
		}
	}

	return nil
}

func (s *Service) getCachedItem(key string) (interface{}, bool) {
	s.cacheLock.RLock()
	defer s.cacheLock.RUnlock()

	val, exists := s.cache[key]
	return val, exists
}

func (s *Service) setCachedItem(key string, val interface{}, timeout time.Duration) {
	s.cacheLock.Lock()
	defer s.cacheLock.Unlock()

	go func() {
		time.Sleep(timeout)
		s.deleteCachedItem(key)
	}()

	s.cache[key] = val
}

func (s *Service) deleteCachedItem(key string) {
	s.cacheLock.Lock()
	defer s.cacheLock.Unlock()

	delete(s.cache, key)
}


================================================
FILE: minion/storage.go
================================================
package minion

import (
	"fmt"
	"strconv"
	"time"

	cmap "github.com/orcaman/concurrent-map"
	"github.com/twmb/franz-go/pkg/kgo"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/atomic"
	"go.uber.org/zap"
)

// Storage stores the current state of all consumer group information that has been consumed using the offset consumer.
type Storage struct {
	logger *zap.Logger

	// offsetCommits is a map of all consumer offsets.
	// A unique key in the format "group:topic:partition" is used as map key.
	// Value is of type OffsetCommit
	offsetCommits cmap.ConcurrentMap

	// progressTracker is a map that tracks what offsets in each partition have already been consumed
	progressTracker cmap.ConcurrentMap

	isReadyBool *atomic.Bool

	// Number of consumed records (used for a Prometheus metric)
	consumedRecords *atomic.Float64
}

// OffsetCommit is used as value for the OffsetCommit map
type OffsetCommit struct {
	Key   kmsg.OffsetCommitKey
	Value kmsg.OffsetCommitValue

	// CommitCount is the number of offset commits for this group-topic-partition combination
	CommitCount int

	// ExpireTimestamp is a timestamp that indicates when this offset commit will expire on the Kafka cluster
	ExpireTimestamp time.Time
}

func newStorage(logger *zap.Logger) (*Storage, error) {
	return &Storage{
		logger:          logger.Named("storage"),
		offsetCommits:   cmap.New(),
		progressTracker: cmap.New(),
		isReadyBool:     atomic.NewBool(false),
		consumedRecords: atomic.NewFloat64(0),
	}, nil
}

func (s *Storage) isReady() bool {
	return s.isReadyBool.Load()
}

func (s *Storage) setReadyState(isReady bool) {
	s.isReadyBool.Store(isReady)
}

// markRecordConsumed stores the latest consumed offset for each partition. This is necessary in order to figure out
// whether we have caught up the message lag when starting KMinion as we start consuming from the very oldest offset
// commit.
func (s *Storage) markRecordConsumed(rec *kgo.Record) {
	key := fmt.Sprintf("%v", rec.Partition)
	s.progressTracker.Set(key, rec.Offset)
	s.consumedRecords.Add(1)
}

func (s *Storage) addOffsetCommit(key kmsg.OffsetCommitKey, value kmsg.OffsetCommitValue) {
	// For performance reasons we'll store offset commits using a "unique key". Writes happen way more frequently than
	// reads (Prometheus scraping the endpoint). Hence we can group everything by group or topic on the read path as
	// needed instead of writing it into nested maps like a map[GroupID]map[Topic]map[Partition]
	uniqueKey := encodeOffsetCommitKey(key)

	commitCount := 0
	commitInterface, exists := s.offsetCommits.Get(uniqueKey)
	if exists {
		offsetCommit := commitInterface.(OffsetCommit)
		commitCount = offsetCommit.CommitCount
	}

	timeDay := 24 * time.Hour
	commit := OffsetCommit{
		Key:             key,
		Value:           value,
		CommitCount:     commitCount + 1,
		ExpireTimestamp: time.Unix(0, value.CommitTimestamp*int64(time.Millisecond)).Add(7 * timeDay),
	}
	s.offsetCommits.Set(uniqueKey, commit)
}

func (s *Storage) getConsumedOffsets() map[int32]int64 {
	offsetsByPartition := make(map[int32]int64)
	offsets := s.progressTracker.Items()
	for partitionID, offsetStr := range offsets {
		val := offsetStr.(int64)
		partitionID, _ := strconv.ParseInt(partitionID, 10, 32)
		offsetsByPartition[int32(partitionID)] = val
	}

	return offsetsByPartition
}

func (s *Storage) getNumberOfConsumedRecords() float64 {
	return s.consumedRecords.Load()
}

func (s *Storage) getGroupOffsets() map[string]map[string]map[int32]OffsetCommit {
	// Offsets by group, topic, partition
	offsetsByGroup := make(map[string]map[string]map[int32]OffsetCommit)

	if !s.isReady() {
		s.logger.Info("Tried to fetch consumer group offsets, but haven't consumed the whole topic yet")
		return offsetsByGroup
	}

	offsets := s.offsetCommits.Items()
	for _, offset := range offsets {
		val := offset.(OffsetCommit)

		// Initialize inner maps as necessary
		if _, exists := offsetsByGroup[val.Key.Group]; !exists {
			offsetsByGroup[val.Key.Group] = make(map[string]map[int32]OffsetCommit)
		}
		if _, exists := offsetsByGroup[val.Key.Group][val.Key.Topic]; !exists {
			offsetsByGroup[val.Key.Group][val.Key.Topic] = make(map[int32]OffsetCommit)
		}

		offsetsByGroup[val.Key.Group][val.Key.Topic][val.Key.Partition] = val
	}

	return offsetsByGroup
}

func (s *Storage) deleteOffsetCommit(key kmsg.OffsetCommitKey) {
	uniqueKey := encodeOffsetCommitKey(key)
	s.offsetCommits.Remove(uniqueKey)
}

func encodeOffsetCommitKey(key kmsg.OffsetCommitKey) string {
	return fmt.Sprintf("%v:%v:%v", key.Group, key.Topic, key.Partition)
}


================================================
FILE: minion/utils.go
================================================
package minion

import (
	"fmt"
	"regexp"
	"strings"
)

func (s *Service) IsGroupAllowed(groupName string) bool {
	isAllowed := false
	for _, regex := range s.AllowedGroupIDsExpr {
		if regex.MatchString(groupName) {
			isAllowed = true
			break
		}
	}

	for _, regex := range s.IgnoredGroupIDsExpr {
		if regex.MatchString(groupName) {
			isAllowed = false
			break
		}
	}
	return isAllowed
}

func (s *Service) IsTopicAllowed(topicName string) bool {
	isAllowed := false
	for _, regex := range s.AllowedTopicsExpr {
		if regex.MatchString(topicName) {
			isAllowed = true
			break
		}
	}

	for _, regex := range s.IgnoredTopicsExpr {
		if regex.MatchString(topicName) {
			isAllowed = false
			break
		}
	}
	return isAllowed
}

func compileRegex(expr string) (*regexp.Regexp, error) {
	if strings.HasPrefix(expr, "/") && strings.HasSuffix(expr, "/") {
		substr := expr[1 : len(expr)-1]
		regex, err := regexp.Compile(substr)
		if err != nil {
			return nil, err
		}

		return regex, nil
	}

	// If this is no regex input (which is marked by the slashes around it) then we escape it so that it's a literal
	regex, err := regexp.Compile("^" + expr + "$")
	if err != nil {
		return nil, err
	}
	return regex, nil
}

func compileRegexes(expr []string) ([]*regexp.Regexp, error) {
	compiledExpressions := make([]*regexp.Regexp, len(expr))
	for i, exprStr := range expr {
		expr, err := compileRegex(exprStr)
		if err != nil {
			return nil, fmt.Errorf("failed to compile expression string '%v': %w", exprStr, err)
		}
		compiledExpressions[i] = expr
	}

	return compiledExpressions, nil
}


================================================
FILE: minion/versions.go
================================================
package minion

import (
	"context"
	"fmt"

	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kmsg"
	"github.com/twmb/franz-go/pkg/kversion"
)

func (s *Service) GetClusterVersion(ctx context.Context) (string, error) {
	res, err := s.GetAPIVersions(ctx)
	if err != nil {
		return "", err
	}

	versions := kversion.FromApiVersionsResponse(res)
	return versions.VersionGuess(), nil
}

func (s *Service) GetAPIVersions(ctx context.Context) (*kmsg.ApiVersionsResponse, error) {
	versionsReq := kmsg.NewApiVersionsRequest()
	versionsReq.ClientSoftwareName = "kminion"
	versionsReq.ClientSoftwareVersion = "v2"
	res, err := versionsReq.RequestWith(ctx, s.client)
	if err != nil {
		return nil, fmt.Errorf("failed to request api versions: %w", err)
	}

	err = kerr.ErrorForCode(res.ErrorCode)
	if err != nil {
		return nil, fmt.Errorf("failed to request api versions. Inner kafka error: %w", err)
	}

	return res, nil
}


================================================
FILE: prometheus/collect_broker_info.go
================================================
package prometheus

import (
	"context"
	"github.com/prometheus/client_golang/prometheus"
	"go.uber.org/zap"
	"strconv"
)

func (e *Exporter) collectBrokerInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {
	metadata, err := e.minionSvc.GetMetadataCached(ctx)
	if err != nil {
		e.logger.Error("failed to get kafka metadata", zap.Error(err))
		return false
	}

	for _, broker := range metadata.Brokers {
		rack := ""
		if broker.Rack != nil {
			rack = *broker.Rack
		}

		isController := metadata.ControllerID == broker.NodeID
		ch <- prometheus.MustNewConstMetric(
			e.brokerInfo,
			prometheus.GaugeValue,
			1,
			strconv.Itoa(int(broker.NodeID)),
			broker.Host,
			strconv.Itoa(int(broker.Port)),
			rack,
			strconv.FormatBool(isController),
		)
	}

	return true
}


================================================
FILE: prometheus/collect_cluster_info.go
================================================
package prometheus

import (
	"context"
	"github.com/prometheus/client_golang/prometheus"
	"go.uber.org/zap"
	"strconv"
)

func (e *Exporter) collectClusterInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {
	version, err := e.minionSvc.GetClusterVersion(ctx)
	if err != nil {
		e.logger.Error("failed to get kafka cluster version", zap.Error(err))
		return false
	}

	metadata, err := e.minionSvc.GetMetadataCached(ctx)
	if err != nil {
		e.logger.Error("failed to get kafka metadata", zap.Error(err))
		return false
	}
	brokerCount := len(metadata.Brokers)
	clusterID := ""
	if metadata.ClusterID != nil {
		clusterID = *metadata.ClusterID
	}

	ch <- prometheus.MustNewConstMetric(
		e.clusterInfo,
		prometheus.GaugeValue,
		1,
		version,
		strconv.Itoa(brokerCount),
		strconv.Itoa(int(metadata.ControllerID)),
		clusterID,
	)
	return true
}


================================================
FILE: prometheus/collect_consumer_group_lags.go
================================================
package prometheus

import (
	"context"
	"math"
	"strconv"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kadm"
	"github.com/twmb/franz-go/pkg/kerr"
	"go.uber.org/zap"

	"github.com/cloudhut/kminion/v2/minion"
)

type waterMark struct {
	TopicName     string
	PartitionID   int32
	LowWaterMark  int64
	HighWaterMark int64
}

func (e *Exporter) collectConsumerGroupLags(ctx context.Context, ch chan<- prometheus.Metric) bool {
	if !e.minionSvc.Cfg.ConsumerGroups.Enabled {
		return true
	}

	// Low Watermarks (at the moment they are not needed at all, they could be used to calculate the lag on partitions
	// that don't have any active offsets)
	lowWaterMarks, err := e.minionSvc.ListStartOffsetsCached(ctx)
	if err != nil {
		e.logger.Error("failed to fetch low water marks", zap.Error(err))
		return false
	}
	// High Watermarks
	highWaterMarks, err := e.minionSvc.ListEndOffsetsCached(ctx)
	if err != nil {
		e.logger.Error("failed to fetch low water marks", zap.Error(err))
		return false
	}
	waterMarksByTopic := e.waterMarksByTopic(lowWaterMarks, highWaterMarks)

	// We have two different options to get consumer group offsets - either via the AdminAPI or by consuming the
	// __consumer_offsets topic.
	if e.minionSvc.Cfg.ConsumerGroups.ScrapeMode == minion.ConsumerGroupScrapeModeAdminAPI {
		return e.collectConsumerGroupLagsAdminAPI(ctx, ch, waterMarksByTopic)
	} else {
		return e.collectConsumerGroupLagsOffsetTopic(ctx, ch, waterMarksByTopic)
	}
}

func (e *Exporter) collectConsumerGroupLagsOffsetTopic(_ context.Context, ch chan<- prometheus.Metric, marks map[string]map[int32]waterMark) bool {
	offsets := e.minionSvc.ListAllConsumerGroupOffsetsInternal()
	for groupName, group := range offsets {
		if !e.minionSvc.IsGroupAllowed(groupName) {
			continue
		}
		offsetCommits := 0

		for topicName, topic := range group {
			topicLag := float64(0)
			topicOffsetSum := float64(0)
			for partitionID, partition := range topic {
				childLogger := e.logger.With(
					zap.String("consumer_group", groupName),
					zap.String("topic_name", topicName),
					zap.Int32("partition_id", partitionID),
					zap.Int64("group_offset", partition.Value.Offset))

				topicMark, exists := marks[topicName]
				if !exists {
					childLogger.Warn("consumer group has committed offsets on a topic we don't have watermarks for")
					break // We can stop trying to find any other offsets for that topic so let's quit this loop
				}
				partitionMark, exists := topicMark[partitionID]
				if !exists {
					childLogger.Warn("consumer group has committed offsets on a partition we don't have watermarks for")
					continue
				}
				lag := float64(partitionMark.HighWaterMark - partition.Value.Offset)
				// Lag might be negative because we fetch group offsets after we get partition offsets. It's kinda a
				// race condition. Negative lags obviously do not make sense so use at least 0 as lag.
				lag = math.Max(0, lag)
				topicLag += lag
				topicOffsetSum += float64(partition.Value.Offset)

				// Offset commit count for this consumer group
				offsetCommits += partition.CommitCount

				if e.minionSvc.Cfg.ConsumerGroups.Granularity == minion.ConsumerGroupGranularityTopic {
					continue
				}
				ch <- prometheus.MustNewConstMetric(
					e.consumerGroupTopicPartitionLag,
					prometheus.GaugeValue,
					lag,
					groupName,
					topicName,
					strconv.Itoa(int(partitionID)),
				)
			}
			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupTopicLag,
				prometheus.GaugeValue,
				topicLag,
				groupName,
				topicName,
			)
			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupTopicOffsetSum,
				prometheus.GaugeValue,
				topicOffsetSum,
				groupName,
				topicName,
			)
		}

		ch <- prometheus.MustNewConstMetric(
			e.offsetCommits,
			prometheus.CounterValue,
			float64(offsetCommits),
			groupName,
		)
	}
	return true
}

func (e *Exporter) collectConsumerGroupLagsAdminAPI(ctx context.Context, ch chan<- prometheus.Metric, marks map[string]map[int32]waterMark) bool {
	isOk := true

	groupOffsets, err := e.minionSvc.ListAllConsumerGroupOffsetsAdminAPI(ctx)
	for groupName, offsetRes := range groupOffsets {
		if !e.minionSvc.IsGroupAllowed(groupName) {
			continue
		}

		err = kerr.ErrorForCode(offsetRes.ErrorCode)
		if err != nil {
			e.logger.Warn("failed to get offsets from consumer group, inner kafka error",
				zap.String("consumer_group", groupName),
				zap.Error(err))
			isOk = false
			continue
		}
		for _, topic := range offsetRes.Topics {
			topicLag := float64(0)
			topicOffsetSum := float64(0)
			for _, partition := range topic.Partitions {
				err := kerr.ErrorForCode(partition.ErrorCode)
				if err != nil {
					e.logger.Warn("failed to get consumer group offsets for a partition, inner kafka error",
						zap.String("consumer_group", groupName),
						zap.Error(err))
					isOk = false
					continue
				}

				childLogger := e.logger.With(
					zap.String("consumer_group", groupName),
					zap.String("topic_name", topic.Topic),
					zap.Int32("partition_id", partition.Partition),
					zap.Int64("group_offset", partition.Offset))
				topicMark, exists := marks[topic.Topic]
				if !exists {
					childLogger.Warn("consumer group has committed offsets on a topic we don't have watermarks for")
					isOk = false
					break // We can stop trying to find any other offsets for that topic so let's quit this loop
				}
				partitionMark, exists := topicMark[partition.Partition]
				if !exists {
					childLogger.Warn("consumer group has committed offsets on a partition we don't have watermarks for")
					isOk = false
					continue
				}
				lag := float64(partitionMark.HighWaterMark - partition.Offset)
				// Lag might be negative because we fetch group offsets after we get partition offsets. It's kinda a
				// race condition. Negative lags obviously do not make sense so use at least 0 as lag.
				lag = math.Max(0, lag)
				topicLag += lag
				topicOffsetSum += float64(partition.Offset)

				if e.minionSvc.Cfg.ConsumerGroups.Granularity == minion.ConsumerGroupGranularityTopic {
					continue
				}
				ch <- prometheus.MustNewConstMetric(
					e.consumerGroupTopicPartitionLag,
					prometheus.GaugeValue,
					lag,
					groupName,
					topic.Topic,
					strconv.Itoa(int(partition.Partition)),
				)
			}

			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupTopicLag,
				prometheus.GaugeValue,
				topicLag,
				groupName,
				topic.Topic,
			)
			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupTopicOffsetSum,
				prometheus.GaugeValue,
				topicOffsetSum,
				groupName,
				topic.Topic,
			)
		}
	}
	return isOk
}

func (e *Exporter) waterMarksByTopic(lowMarks kadm.ListedOffsets, highMarks kadm.ListedOffsets) map[string]map[int32]waterMark {
	type partitionID = int32
	type topicName = string
	waterMarks := make(map[topicName]map[partitionID]waterMark)

	for topic, lowMarksByPartitionID := range lowMarks {
		_, exists := waterMarks[topic]
		if !exists {
			waterMarks[topic] = make(map[partitionID]waterMark)
		}

		for _, lowOffset := range lowMarksByPartitionID {
			if lowOffset.Err != nil {
				e.logger.Debug("failed to get partition low water mark, inner kafka error",
					zap.String("topic_name", lowOffset.Topic),
					zap.Int32("partition_id", lowOffset.Partition),
					zap.Error(lowOffset.Err))
				continue
			}

			higOffset, exists := highMarks.Lookup(lowOffset.Topic, lowOffset.Partition)
			if !exists {
				e.logger.Error("got low water marks for a topic's partition but no high watermarks",
					zap.String("topic_name", lowOffset.Topic),
					zap.Int32("partition_id", lowOffset.Partition),
					zap.Int64("offset", lowOffset.Offset))
				delete(waterMarks, lowOffset.Topic)
				break // Topic watermarks are invalid -> delete & skip this topic
			}
			if higOffset.Err != nil {
				e.logger.Debug("failed to get partition low water mark, inner kafka error",
					zap.String("topic_name", lowOffset.Topic),
					zap.Int32("partition_id", lowOffset.Partition),
					zap.Error(lowOffset.Err))
				continue
			}

			waterMarks[lowOffset.Topic][lowOffset.Partition] = waterMark{
				TopicName:     lowOffset.Topic,
				PartitionID:   lowOffset.Partition,
				LowWaterMark:  lowOffset.Offset,
				HighWaterMark: higOffset.Offset,
			}
		}
	}

	return waterMarks
}


================================================
FILE: prometheus/collect_consumer_groups.go
================================================
package prometheus

import (
	"context"
	"fmt"
	"strconv"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kmsg"
	"go.uber.org/zap"
)

func (e *Exporter) collectConsumerGroups(ctx context.Context, ch chan<- prometheus.Metric) bool {
	if !e.minionSvc.Cfg.ConsumerGroups.Enabled {
		return true
	}
	groups, err := e.minionSvc.DescribeConsumerGroups(ctx)
	if err != nil {
		e.logger.Error("failed to collect consumer groups, because Kafka request failed", zap.Error(err))
		return false
	}

	// The list of groups may be incomplete due to group coordinators that might fail to respond. We do log an error
	// message in that case (in the kafka request method) and groups will not be included in this list.
	for _, grp := range groups {
		coordinator := grp.BrokerMetadata.NodeID
		for _, group := range grp.Groups.Groups {
			err := kerr.ErrorForCode(group.ErrorCode)
			if err != nil {
				e.logger.Warn("failed to describe consumer group, internal kafka error",
					zap.Error(err),
					zap.String("group_id", group.Group),
				)
				continue
			}
			if !e.minionSvc.IsGroupAllowed(group.Group) {
				continue
			}
			state := 0
			if group.State == "Stable" {
				state = 1
			}
			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupInfo,
				prometheus.GaugeValue,
				float64(state),
				group.Group,
				group.Protocol,
				group.ProtocolType,
				group.State,
				strconv.FormatInt(int64(coordinator), 10),
			)

			// total number of members in consumer groups
			ch <- prometheus.MustNewConstMetric(
				e.consumerGroupMembers,
				prometheus.GaugeValue,
				float64(len(group.Members)),
				group.Group,
			)

			// iterate all members and build two maps:
			// - {topic -> number-of-consumers}
			// - {topic -> number-of-partitions-assigned}
			topicConsumers := make(map[string]int)
			topicPartitionsAssigned := make(map[string]int)
			membersWithEmptyAssignment := 0
			failedAssignmentsDecode := 0
			for _, member := range group.Members {
				if len(member.MemberAssignment) == 0 {
					membersWithEmptyAssignment++
					continue
				}

				kassignment, err := decodeMemberAssignments(group.ProtocolType, member)
				if err != nil {
					e.logger.Debug("failed to decode consumer group member assignment, internal kafka error",
						zap.Error(err),
						zap.String("group_id", group.Group),
						zap.String("client_id", member.ClientID),
						zap.String("member_id", member.MemberID),
						zap.String("client_host", member.ClientHost),
					)
					failedAssignmentsDecode++
					continue
				}
				if kassignment == nil {
					// This is expected in the case of protocolTypes that don't provide valuable information
					continue
				}

				if len(kassignment.Topics) == 0 {
					membersWithEmptyAssignment++
				}
				for _, topic := range kassignment.Topics {
					topicConsumers[topic.Topic]++
					topicPartitionsAssigned[topic.Topic] += len(topic.Partitions)
				}
			}

			if failedAssignmentsDecode > 0 {
				e.logger.Error("failed to decode consumer group member assignment, internal kafka error",
					zap.Error(err),
					zap.String("group_id", group.Group),
					zap.Int("assignment_decode_failures", failedAssignmentsDecode),
				)
			}

			// number of members with no assignment in a stable consumer group
			if membersWithEmptyAssignment > 0 {
				ch <- prometheus.MustNewConstMetric(
					e.consumerGroupMembersEmpty,
					prometheus.GaugeValue,
					float64(membersWithEmptyAssignment),
					group.Group,
				)
			}
			// number of members in consumer groups for each topic
			for topicName, consumers := range topicConsumers {
				ch <- prometheus.MustNewConstMetric(
					e.consumerGroupTopicMembers,
					prometheus.GaugeValue,
					float64(consumers),
					group.Group,
					topicName,
				)
			}
			// number of partitions assigned in consumer groups for each topic
			for topicName, partitions := range topicPartitionsAssigned {
				ch <- prometheus.MustNewConstMetric(
					e.consumerGroupAssignedTopicPartitions,
					prometheus.GaugeValue,
					float64(partitions),
					group.Group,
					topicName,
				)
			}
		}
	}
	return true
}

func decodeMemberAssignments(protocolType string, member kmsg.DescribeGroupsResponseGroupMember) (*kmsg.ConsumerMemberAssignment, error) {
	switch protocolType {
	case "consumer":
		a := kmsg.NewConsumerMemberAssignment()
		if err := a.ReadFrom(member.MemberAssignment); err != nil {
			return nil, fmt.Errorf("failed to decode member assignment: %w", err)
		}
		return &a, nil
	case "connect":
		return nil, nil
	default:
		return nil, nil
	}
}


================================================
FILE: prometheus/collect_exporter_metrics.go
================================================
package prometheus

import (
	"context"
	"github.com/prometheus/client_golang/prometheus"
)

func (e *Exporter) collectExporterMetrics(_ context.Context, ch chan<- prometheus.Metric) bool {
	recordsConsumed := e.minionSvc.GetNumberOfOffsetRecordsConsumed()
	ch <- prometheus.MustNewConstMetric(
		e.offsetConsumerRecordsConsumed,
		prometheus.CounterValue,
		recordsConsumed,
	)
	return true
}


================================================
FILE: prometheus/collect_log_dirs.go
================================================
package prometheus

import (
	"context"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kerr"
	"github.com/twmb/franz-go/pkg/kgo"
	"go.uber.org/zap"
	"strconv"
)

func (e *Exporter) collectLogDirs(ctx context.Context, ch chan<- prometheus.Metric) bool {
	if !e.minionSvc.Cfg.LogDirs.Enabled {
		return true
	}
	isOk := true

	sizeByBroker := make(map[kgo.BrokerMetadata]int64)
	sizeByTopicName := make(map[string]int64)

	logDirsSharded := e.minionSvc.DescribeLogDirs(ctx)
	for _, logDirRes := range logDirsSharded {
		childLogger := e.logger.With(zap.String("broker_address", logDirRes.Broker.Host),
			zap.String("broker_id", strconv.Itoa(int(logDirRes.Broker.NodeID))))

		if logDirRes.Err != nil {
			childLogger.Error("failed to describe a broker's log dirs", zap.Error(logDirRes.Err))
			isOk = false
			continue
		}

		for _, dir := range logDirRes.LogDirs.Dirs {
			err := kerr.ErrorForCode(dir.ErrorCode)
			if err != nil {
				childLogger.Error("failed to describe a broker's log dir",
					zap.String("log_dir", dir.Dir),
					zap.Error(err))
				isOk = false
				continue
			}
			for _, topic := range dir.Topics {
				topicSize := int64(0)
				for _, partition := range topic.Partitions {
					topicSize += partition.Size
				}
				sizeByTopicName[topic.Topic] += topicSize
				sizeByBroker[logDirRes.Broker] += topicSize
			}
		}
	}

	// Report the total log dir size per broker
	for broker, size := range sizeByBroker {
		rackID := ""
		if broker.Rack != nil {
			rackID = *broker.Rack
		}
		ch <- prometheus.MustNewConstMetric(
			e.brokerLogDirSize,
			prometheus.GaugeValue,
			float64(size),
			strconv.Itoa(int(broker.NodeID)),
			broker.Host,
			strconv.Itoa(int(broker.Port)),
			rackID,
		)
	}

	// If one of the log dir responses returned an error we can not reliably report the topic log dirs, as there might
	// be additional data on the brokers that failed to respond.
	if !isOk {
		return false
	}

	// Report the total log dir size per topic
	for topicName, size := range sizeByTopicName {
		ch <- prometheus.MustNewConstMetric(
			e.topicLogDirSize,
			prometheus.GaugeValue,
			float64(size),
			topicName,
		)
	}

	return isOk
}


================================================
FILE: prometheus/collect_topic_info.go
================================================
package prometheus

import (
	"context"
	"strconv"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/franz-go/pkg/kerr"
	"go.uber.org/zap"
)

func (e *Exporter) collectTopicInfo(ctx context.Context, ch chan<- prometheus.Metric) bool {
	if !e.minionSvc.Cfg.Topics.Enabled {
		return true
	}

	metadata, err := e.minionSvc.GetMetadataCached(ctx)
	if err != nil {
		e.logger.Error("failed to get metadata", zap.Error(err))
		return false
	}

	topicConfigs, err := e.minionSvc.GetTopicConfigs(ctx)
	if err != nil {
		e.logger.Error("failed to get topic configs", zap.Error(err))
		return false
	}

	isOk := true
	// ConfigsByTopic is indexed by topic name and config resource name (inner key)
	configsByTopic := make(map[string]map[string]string)
	for _, resource := range topicConfigs.Resources {
		configsByTopic[resource.ResourceName] = make(map[string]string)
		typedErr := kerr.TypedErrorForCode(resource.ErrorCode)
		if typedErr != nil {
			isOk = false
			e.logger.Warn("failed to get topic config of a specific topic",
				zap.String("topic_name", resource.ResourceName),
				zap.Error(typedErr))
			continue
		}

		for _, config := range resource.Configs {
			confVal := "nil"
			if config.Value != nil {
				confVal = *config.Value
			}
			configsByTopic[resource.ResourceName][config.Name] = confVal
		}

	}

	for _, topic := range metadata.Topics {
		topicName := *topic.Topic
		if !e.minionSvc.IsTopicAllowed(topicName) {
			continue
		}
		typedErr := kerr.TypedErrorForCode(topic.ErrorCode)
		if typedErr != nil {
			isOk = false
			e.logger.Warn("failed to get metadata of a specific topic",
				zap.String("topic_name", topicName),
				zap.Error(typedErr))
			continue
		}
		partitionCount := len(topic.Partitions)
		replicationFactor := -1
		if partitionCount > 0 {
			// It should never be possible to skip this, but just to be safe we'll check this so that we don't cause panics
			replicationFactor = len(topic.Partitions[0].Replicas)
		}

		var labelsValues []string
		labelsValues = append(labelsValues, topicName)
		labelsValues = append(labelsValues, strconv.Itoa(partitionCount))
		labelsValues = append(labelsValues, strconv.Itoa(replicationFactor))
		for _, key := range e.minionSvc.Cfg.Topics.InfoMetric.ConfigKeys {
			labelsValues = append(labelsValues, getOrDefault(configsByTopic[topicName], key, "N/A"))
		}
		ch <- prometheus.MustNewConstMetric(
			e.topicInfo,
			prometheus.GaugeValue,
			float64(1),
			labelsValues...,
		)
	}
	return isOk
}

func getOrDefault(m map[string]string, key string, defaultValue string) string {
	if value, exists := m[key]; exists {
		return value
	}
	return defaultValue
}


================================================
FILE: prometheus/collect_topic_partition_offsets.go
================================================
package prometheus

import (
	"context"
	"strconv"

	"github.com/prometheus/client_golang/prometheus"
	"go.uber.org/zap"

	"github.com/cloudhut/kminion/v2/minion"
)

func (e *Exporter) collectTopicPartitionOffsets(ctx context.Context, ch chan<- prometheus.Metric) bool {
	if !e.minionSvc.Cfg.Topics.Enabled {
		return true
	}

	isOk := true

	// Low Watermarks
	lowWaterMarks, err := e.minionSvc.ListStartOffsetsCached(ctx)
	if err != nil {
		e.logger.Error("failed to fetch low water marks", zap.Error(err))
		return false
	}
	// High Watermarks
	highWaterMarks, err := e.minionSvc.ListEndOffsetsCached(ctx)
	if err != nil {
		e.logger.Error("failed to fetch low water marks", zap.Error(err))
		return false
	}

	// Process Low Watermarks

	for topicName, partitions := range lowWaterMarks {
		if !e.minionSvc.IsTopicAllowed(topicName) {
			continue
		}

		waterMarkSum := int64(0)
		hasErrors := false
		for _, offset := range partitions {
			if offset.Err != nil {
				hasErrors = true
				isOk = false
				continue
			}
			waterMarkSum += offset.Offset
			// Let's end here if partition metrics shall not be exposed
			if e.minionSvc.Cfg.Topics.Granularity == minion.TopicGranularityTopic {
				continue
			}
			ch <- prometheus.MustNewConstMetric(
				e.partitionLowWaterMark,
				prometheus.GaugeValue,
				float64(offset.Offset),
				topicName,
				strconv.Itoa(int(offset.Partition)),
			)
		}
		// We only want to report the sum of all partition marks if we receive watermarks from all partition
		if !hasErrors {
			ch <- prometheus.MustNewConstMetric(
				e.topicLowWaterMarkSum,
				prometheus.GaugeValue,
				float64(waterMarkSum),
				topicName,
			)
		}
	}

	for topicName, partitions := range highWaterMarks {
		if !e.minionSvc.IsTopicAllowed(topicName) {
			continue
		}
		waterMarkSum := int64(0)
		hasErrors := false
		for _, offset := range partitions {
			if offset.Err != nil {
				hasErrors = true
				isOk = false
				continue
			}
			waterMarkSum += offset.Offset
			// Let's end here if partition metrics shall not be exposed
			if e.minionSvc.Cfg.Topics.Granularity == minion.TopicGranularityTopic {
				continue
			}
			ch <- prometheus.MustNewConstMetric(
				e.partitionHighWaterMark,
				prometheus.GaugeValue,
				float64(offset.Offset),
				topicName,
				strconv.Itoa(int(offset.Partition)),
			)
		}
		// We only want to report the sum of all partition marks if we receive watermarks from all partitions
		if !hasErrors {
			ch <- prometheus.MustNewConstMetric(
				e.topicHighWaterMarkSum,
				prometheus.GaugeValue,
				float64(waterMarkSum),
				topicName,
			)
		}
	}

	return isOk
}


================================================
FILE: prometheus/config.go
================================================
package prometheus

type Config struct {
	Host      string `koanf:"host"`
	Port      int    `koanf:"port"`
	Namespace string `koanf:"namespace"`
}

func (c *Config) SetDefaults() {
	c.Port = 8080
	c.Namespace = "kminion"
}


================================================
FILE: prometheus/exporter.go
================================================
package prometheus

import (
	"context"
	"os"
	"strings"
	"time"

	"github.com/cloudhut/kminion/v2/minion"
	uuid2 "github.com/google/uuid"
	"github.com/prometheus/client_golang/prometheus"
	"go.uber.org/zap"
)

// Exporter is the Prometheus exporter that implements the prometheus.Collector interface
type Exporter struct {
	cfg       Config
	logger    *zap.Logger
	minionSvc *minion.Service

	// Exporter metrics
	exporterUp                    *prometheus.Desc
	offsetConsumerRecordsConsumed *prometheus.Desc

	// Kafka metrics
	// General
	clusterInfo *prometheus.Desc
	brokerInfo  *prometheus.Desc

	// Log Dir Sizes
	brokerLogDirSize *prometheus.Desc
	topicLogDirSize  *prometheus.Desc

	// Topic / Partition
	topicInfo              *prometheus.Desc
	topicHighWaterMarkSum  *prometheus.Desc
	partitionHighWaterMark *prometheus.Desc
	topicLowWaterMarkSum   *prometheus.Desc
	partitionLowWaterMark  *prometheus.Desc

	// Consumer Groups
	consumerGroupInfo                    *prometheus.Desc
	consumerGroupMembers                 *prometheus.Desc
	consumerGroupMembersEmpty            *prometheus.Desc
	consumerGroupTopicMembers            *prometheus.Desc
	consumerGroupAssignedTopicPartitions *prometheus.Desc
	consumerGroupTopicOffsetSum          *prometheus.Desc
	consumerGroupTopicPartitionLag       *prometheus.Desc
	consumerGroupTopicLag                *prometheus.Desc
	offsetCommits                        *prometheus.Desc
}

func NewExporter(cfg Config, logger *zap.Logger, minionSvc *minion.Service) (*Exporter, error) {
	return &Exporter{cfg: cfg, logger: logger.Named("prometheus"), minionSvc: minionSvc}, nil
}

func (e *Exporter) InitializeMetrics() {
	// Exporter / internal metrics
	// Exporter up
	e.exporterUp = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "exporter", "up"),
		"Build info about this Prometheus Exporter. Gauge value is 0 if one or more scrapes have failed.",
		nil,
		map[string]string{"version": os.Getenv("VERSION")},
	)
	// OffsetConsumer records consumed
	e.offsetConsumerRecordsConsumed = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "exporter", "offset_consumer_records_consumed_total"),
		"The number of offset records that have been consumed by the internal offset consumer",
		[]string{},
		nil,
	)

	// Kafka metrics
	// Cluster info
	e.clusterInfo = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "cluster_info"),
		"Kafka cluster information",
		[]string{"cluster_version", "broker_count", "controller_id", "cluster_id"},
		nil,
	)
	// Broker Info
	e.brokerInfo = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "broker_info"),
		"Kafka broker information",
		[]string{"broker_id", "address", "port", "rack_id", "is_controller"},
		nil,
	)

	// LogDir sizes
	e.brokerLogDirSize = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "broker_log_dir_size_total_bytes"),
		"The summed size in bytes of all log dirs for a given broker",
		[]string{"broker_id", "address", "port", "rack_id"},
		nil,
	)
	e.topicLogDirSize = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_log_dir_size_total_bytes"),
		"The summed size in bytes of partitions for a given topic. This includes the used space for replica partitions.",
		[]string{"topic_name"},
		nil,
	)

	// Topic / Partition metrics
	// Topic info
	var labels = []string{"topic_name", "partition_count", "replication_factor"}
	for _, key := range e.minionSvc.Cfg.Topics.InfoMetric.ConfigKeys {
		// prometheus does not allow . in label keys
		labels = append(labels, strings.ReplaceAll(key, ".", "_"))
	}
	e.topicInfo = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_info"),
		"Info labels for a given topic",
		labels,
		nil,
	)
	// Partition Low Water Mark
	e.partitionLowWaterMark = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_partition_low_water_mark"),
		"Partition Low Water Mark",
		[]string{"topic_name", "partition_id"},
		nil,
	)
	// Topic Low Water Mark Sum
	e.topicLowWaterMarkSum = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_low_water_mark_sum"),
		"Sum of all the topic's partition low water marks",
		[]string{"topic_name"},
		nil,
	)
	// Partition High Water Mark
	e.partitionHighWaterMark = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_partition_high_water_mark"),
		"Partition High Water Mark",
		[]string{"topic_name", "partition_id"},
		nil,
	)
	// Topic Low Water Mark Sum
	e.topicHighWaterMarkSum = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "topic_high_water_mark_sum"),
		"Sum of all the topic's partition high water marks",
		[]string{"topic_name"},
		nil,
	)

	// Consumer Group Metrics
	// Group Info
	e.consumerGroupInfo = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_info"),
		"Consumer Group info metrics. It will report 1 if the group is in the stable state, otherwise 0.",
		[]string{"group_id", "protocol", "protocol_type", "state", "coordinator_id"},
		nil,
	)
	// Group Members
	e.consumerGroupMembers = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_members"),
		"Consumer Group member count metrics. It will report the number of members in the consumer group",
		[]string{"group_id"},
		nil,
	)
	// Group Empty Memmbers
	e.consumerGroupMembersEmpty = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_empty_members"),
		"It will report the number of members in the consumer group with no partition assigned",
		[]string{"group_id"},
		nil,
	)
	// Group Topic Members
	e.consumerGroupTopicMembers = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_topic_members"),
		"It will report the number of members in the consumer group assigned on a given topic",
		[]string{"group_id", "topic_name"},
		nil,
	)
	// Group Topic Assigned Partitions
	e.consumerGroupAssignedTopicPartitions = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_topic_assigned_partitions"),
		"It will report the number of partitions assigned in the consumer group for a given topic",
		[]string{"group_id", "topic_name"},
		nil,
	)
	// Topic / Partition Offset Sum (useful for calculating the consumed messages / sec on a topic)
	e.consumerGroupTopicOffsetSum = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_topic_offset_sum"),
		"The sum of all committed group offsets across all partitions in a topic",
		[]string{"group_id", "topic_name"},
		nil,
	)
	// Partition Lag
	e.consumerGroupTopicPartitionLag = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_topic_partition_lag"),
		"The number of messages a consumer group is lagging behind the latest offset of a partition",
		[]string{"group_id", "topic_name", "partition_id"},
		nil,
	)
	// Topic Lag (sum of all partition lags)
	e.consumerGroupTopicLag = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_topic_lag"),
		"The number of messages a consumer group is lagging behind across all partitions in a topic",
		[]string{"group_id", "topic_name"},
		nil,
	)
	// Offset commits by group id
	e.offsetCommits = prometheus.NewDesc(
		prometheus.BuildFQName(e.cfg.Namespace, "kafka", "consumer_group_offset_commits_total"),
		"The number of offsets committed by a group",
		[]string{"group_id"},
		nil,
	)

}

// Describe implements the prometheus.Collector interface. It sends the
// super-set of all possible descriptors of metrics collected by this
// Collector to the provided channel and returns once the last descriptor
// has been sent. The sent descriptors fulfill the consistency and uniqueness
// requirements described in the Desc documentation.
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
	ch <- e.exporterUp
	ch <- e.clusterInfo
}

func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
	ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
	defer cancel()

	// Attach a unique id which will be used for caching (and and it's invalidation) of the kafka requests
	uuid := uuid2.New()
	ctx = context.WithValue(ctx, "requestId", uuid.String())

	ok := e.collectClusterInfo(ctx, ch)
	ok = e.collectExporterMetrics(ctx, ch) && ok
	ok = e.collectBrokerInfo(ctx, ch) && ok
	ok = e.collectLogDirs(ctx, ch) && ok
	ok = e.collectConsumerGroups(ctx, ch) && ok
	ok = e.collectTopicPartitionOffsets(ctx, ch) && ok
	ok = e.collectConsumerGroupLags(ctx, ch) && ok
	ok = e.collectTopicInfo(ctx, ch) && ok

	if ok {
		ch <- prometheus.MustNewConstMetric(e.exporterUp, prometheus.GaugeValue, 1.0)
	} else {
		ch <- prometheus.MustNewConstMetric(e.exporterUp, prometheus.GaugeValue, 0.0)
	}
}