Repository: aws/aws-k8s-tester
Branch: main
Commit: 2b0b6c2b51b7
Files: 270
Total size: 1.4 MB

Directory structure:
gitextract_r74v3ht2/

├── .dockerignore
├── .github/
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── build-neuron-ci.yaml
│       ├── build-nvidia-ci.yaml
│       ├── ci.yaml
│       ├── update-go-dependencies.yaml
│       ├── update-image-tags.yaml
│       ├── update-neuron-dependencies.yaml
│       └── update-nvidia-dependencies.yaml
├── .gitignore
├── .vscode/
│   └── settings.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Config
├── Dockerfile
├── LICENSE
├── Makefile
├── NOTICE
├── README.md
├── bmg.json
├── cmd/
│   ├── kubetest2-eksapi/
│   │   └── main.go
│   ├── kubetest2-eksapi-janitor/
│   │   └── main.go
│   ├── kubetest2-eksctl/
│   │   └── main.go
│   ├── kubetest2-tester-ginkgo-v1/
│   │   └── main.go
│   └── kubetest2-tester-multi/
│       └── main.go
├── external/
│   └── tools.go
├── go.mod
├── go.sum
├── hack/
│   ├── download-kubernetes-binaries.sh
│   ├── free-disk-space.sh
│   ├── update-go-dependencies.sh
│   ├── update-image-tags.sh
│   ├── update-neuron-dependencies.sh
│   └── update-nvidia-dependencies.sh
├── internal/
│   ├── awssdk/
│   │   └── config.go
│   ├── deployers/
│   │   ├── eksapi/
│   │   │   ├── addons.go
│   │   │   ├── ami_resolver.go
│   │   │   ├── ami_resolver_test.go
│   │   │   ├── auth_map_role.go
│   │   │   ├── auth_map_role_test.go
│   │   │   ├── aws.go
│   │   │   ├── cluster.go
│   │   │   ├── common.go
│   │   │   ├── common_test.go
│   │   │   ├── deployer.go
│   │   │   ├── infra.go
│   │   │   ├── janitor.go
│   │   │   ├── k8s.go
│   │   │   ├── kubeconfig.go
│   │   │   ├── logs.go
│   │   │   ├── logs_ssm_doc.json
│   │   │   ├── metrics.go
│   │   │   ├── node.go
│   │   │   ├── static_cluster.go
│   │   │   ├── templates/
│   │   │   │   ├── auth_map_role.yaml.template
│   │   │   │   ├── busybox_deployment.yaml.template
│   │   │   │   ├── cloudwatch-infra.yaml.template
│   │   │   │   ├── cloudwatch_agent_infra.yaml
│   │   │   │   ├── infra.yaml
│   │   │   │   ├── nvidia_static_cluster_nodepool.yaml.template
│   │   │   │   ├── templates.go
│   │   │   │   ├── templates_test.go
│   │   │   │   ├── unmanaged-nodegroup.yaml.template
│   │   │   │   ├── userdata_bootstrap.sh.mimepart.template
│   │   │   │   ├── userdata_bottlerocket.toml.template
│   │   │   │   └── userdata_nodeadm.yaml.mimepart.template
│   │   │   ├── userdata.go
│   │   │   ├── userdata_test.go
│   │   │   ├── vpccni.go
│   │   │   └── vpccni_test.go
│   │   └── eksctl/
│   │       ├── build.go
│   │       ├── cluster_config.go
│   │       ├── deployer.go
│   │       ├── down.go
│   │       └── up.go
│   ├── e2e/
│   │   ├── client.go
│   │   ├── conditions.go
│   │   ├── doc.go
│   │   ├── ec2.go
│   │   ├── health.go
│   │   ├── logs.go
│   │   ├── mpijobs/
│   │   │   ├── conditions.go
│   │   │   ├── conditions_test.go
│   │   │   └── types.go
│   │   └── resources.go
│   ├── metrics/
│   │   ├── cloudwatch.go
│   │   ├── noop.go
│   │   └── registry.go
│   ├── testers/
│   │   ├── ginkgov1/
│   │   │   ├── LICENSE.original
│   │   │   ├── README.md
│   │   │   ├── ginkgo.go
│   │   │   ├── kubectl/
│   │   │   │   └── kubectl.go
│   │   │   └── package.go
│   │   └── multi/
│   │       └── cmd.go
│   ├── util/
│   │   ├── cloudformation.go
│   │   ├── exec.go
│   │   ├── http.go
│   │   ├── http_test.go
│   │   ├── lang.go
│   │   ├── path.go
│   │   └── version.go
│   └── version.go
└── test/
    ├── cases/
    │   ├── disruptive/
    │   │   ├── graceful_reboot_test.go
    │   │   ├── graceful_shutdown_test.go
    │   │   └── main_test.go
    │   ├── dra/
    │   │   ├── dra_example_driver_test.go
    │   │   └── main_test.go
    │   ├── efa/
    │   │   ├── commons.go
    │   │   ├── main_test.go
    │   │   ├── pingpong_test.go
    │   │   └── unit_test.go
    │   ├── fips/
    │   │   ├── README.md
    │   │   ├── fips_test.go
    │   │   ├── main_test.go
    │   │   └── manifests/
    │   │       ├── registry-fips.yaml
    │   │       ├── registry-nonfips.yaml
    │   │       └── test-pods.yaml
    │   ├── netpol/
    │   │   ├── main_test.go
    │   │   └── np_test.go
    │   ├── neuron/
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   ├── multi-node-test-neuron.yaml
    │   │   │   └── single-node-test-neuronx.yaml
    │   │   └── neuron_test.go
    │   ├── neuron-dra/
    │   │   ├── main_test.go
    │   │   ├── neuron_dra_test.go
    │   │   ├── rcts/
    │   │   │   └── trn1/
    │   │   │       ├── rct-2-efas-4-neurons-wrong-match.yaml
    │   │   │       └── rct-all-efas-all-neurons.yaml
    │   │   ├── templates/
    │   │   │   └── nccom-test-mpijob.yaml.tmpl
    │   │   ├── testcases/
    │   │   │   └── trn1/
    │   │   │       ├── 2-efas-4-neurons-wrong-match.yaml
    │   │   │       └── all-efas-all-neurons.yaml
    │   │   └── topology.go
    │   ├── neuron-inference/
    │   │   ├── bert_inference_test.go
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   └── neuron-bert-inference.yaml
    │   │   └── vars.go
    │   ├── neuron-training/
    │   │   ├── bert_training_test.go
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   ├── bert-training.yaml
    │   │   │   └── training-comm-service.yaml
    │   │   └── vars.go
    │   ├── nvidia/
    │   │   ├── capabilities_test.go
    │   │   ├── containerd_test.go
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   ├── daemonset-containerd-check.yaml
    │   │   │   ├── job-hpc-benchmarks.yaml
    │   │   │   ├── job-unit-test-single-node.yaml
    │   │   │   ├── mpi-job-nccl-test-multi-node.yaml
    │   │   │   ├── mpi-job-pytorch-training-single-node.yaml
    │   │   │   └── nvidia-driver-capabilities-check.yaml
    │   │   ├── mpi_test.go
    │   │   └── unit_test.go
    │   ├── nvidia-dra/
    │   │   ├── main_test.go
    │   │   ├── nvidia_dra_test.go
    │   │   ├── rcts/
    │   │   │   └── p5/
    │   │   │       ├── rct-all-efas.yaml
    │   │   │       ├── rct-all-gpus.yaml
    │   │   │       └── rct-five-efas-one-gpu.yaml
    │   │   ├── templates/
    │   │   │   └── nccl-test-mpijob.yaml.tmpl
    │   │   ├── testcases/
    │   │   │   └── p5/
    │   │   │       ├── all-efas-all-gpus.yaml
    │   │   │       └── five-efas-one-gpu-negative-test.yaml
    │   │   └── topology.go
    │   ├── nvidia-inference/
    │   │   ├── bert_inference_test.go
    │   │   ├── main_test.go
    │   │   └── manifests/
    │   │       └── bert-inference.yaml
    │   ├── nvidia-training/
    │   │   ├── bert_training_test.go
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   └── bert-training.yaml
    │   │   └── vars.go
    │   ├── quick/
    │   │   ├── io_uring_test.go
    │   │   ├── limit_test.go
    │   │   ├── main_test.go
    │   │   ├── manifests/
    │   │   │   └── ulimit.yaml
    │   │   └── node_topology_test.go
    │   └── workload/
    │       ├── main_test.go
    │       └── workload_test.go
    ├── common/
    │   ├── dra.go
    │   ├── dra_features.go
    │   ├── dra_types.go
    │   ├── flags.go
    │   └── resources.go
    ├── images/
    │   ├── efa/
    │   │   ├── Dockerfile
    │   │   └── scripts/
    │   │       └── unit-test.sh
    │   ├── neuron/
    │   │   ├── Dockerfile
    │   │   ├── hack/
    │   │   │   └── install-test-resources.sh
    │   │   └── tests/
    │   │       ├── singleNodeTest.sh
    │   │       ├── testNeuronMlp.py
    │   │       ├── testNeuronParallelState.py
    │   │       └── testNeuronSingleAllReduce.py
    │   ├── neuron-inference/
    │   │   ├── Dockerfile
    │   │   └── infer.py
    │   ├── neuron-training/
    │   │   ├── Dockerfile
    │   │   └── train.py
    │   ├── nvidia/
    │   │   ├── Dockerfile
    │   │   └── gpu_unit_tests/
    │   │       ├── README.md
    │   │       ├── bash_unit
    │   │       ├── tests/
    │   │       │   ├── common.sh
    │   │       │   ├── test_basic.sh
    │   │       │   ├── test_sysinfo.sh
    │   │       │   └── test_sysinfo.sh.data/
    │   │       │       ├── g5.48xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── g5.8xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── g5g.2xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── g6f.2xlarge/
    │   │       │       │   ├── efa_count.txt
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   ├── nvidia_smi_topo.txt
    │   │       │       │   └── nvidia_vgpu_license_status.txt
    │   │       │       ├── g6f.4xlarge/
    │   │       │       │   ├── efa_count.txt
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   ├── nvidia_smi_topo.txt
    │   │       │       │   └── nvidia_vgpu_license_status.txt
    │   │       │       ├── g6f.large/
    │   │       │       │   ├── efa_count.txt
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   ├── nvidia_smi_topo.txt
    │   │       │       │   └── nvidia_vgpu_license_status.txt
    │   │       │       ├── g6f.xlarge/
    │   │       │       │   ├── efa_count.txt
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   ├── nvidia_smi_topo.txt
    │   │       │       │   └── nvidia_vgpu_license_status.txt
    │   │       │       ├── p3.16xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── p3.2xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── p4d.24xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       ├── p4de.24xlarge/
    │   │       │       │   ├── gpu_count.txt
    │   │       │       │   ├── numa_topo.txt
    │   │       │       │   ├── nvidia_persistence_status.txt
    │   │       │       │   └── nvidia_smi_topo.txt
    │   │       │       └── p5.48xlarge/
    │   │       │           ├── gpu_count.txt
    │   │       │           ├── numa_topo.txt
    │   │       │           ├── nvidia_persistence_status.txt
    │   │       │           └── nvidia_smi_topo.txt
    │   │       └── unit_test
    │   ├── nvidia-inference/
    │   │   ├── Dockerfile
    │   │   ├── infer.py
    │   │   └── requirements.txt
    │   └── nvidia-training/
    │       ├── Dockerfile
    │       ├── requirements.txt
    │       └── train.py
    └── manifests/
        ├── assets/
        │   ├── cloudwatch-agent.yaml
        │   ├── dcgm-exporter.yaml
        │   ├── dranet.yaml
        │   ├── efa-device-plugin.yaml
        │   ├── k8s-neuron-device-plugin-rbac.yml
        │   ├── k8s-neuron-device-plugin.yml
        │   ├── mpi-operator.yaml
        │   └── nvidia-device-plugin.yaml
        ├── raw.go
        └── rendered.go

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.git/
.github/
bin/
CHANGELOG/
Dockerfile
Makefile
aws-k8s-tester


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
*Issue #, if available:*

*Description of changes:*


By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.


================================================
FILE: .github/workflows/build-neuron-ci.yaml
================================================
name: "Neuron Images CI"
on:
  pull_request:
    types:
      - opened
      - reopened
      - synchronize
    paths:
      - 'test/images/neuron**'
jobs:
  build-image-neuronx:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --file test/images/neuron/Dockerfile .
  build-image-neuron-training:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --file test/images/neuron-training/Dockerfile test/images/neuron-training
  build-image-neuron-inference:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --file test/images/neuron-inference/Dockerfile test/images/neuron-inference


================================================
FILE: .github/workflows/build-nvidia-ci.yaml
================================================
name: "Nvidia Images CI"
on:
  pull_request:
    types:
      - opened
      - reopened
      - synchronize
    paths:
      - 'test/images/nvidia**'
jobs:
  build-image-nvidia:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --file test/images/nvidia/Dockerfile .
  build-image-nvidia-training:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: |
        docker build --file test/images/nvidia-training/Dockerfile test/images/nvidia-training \
          --build-arg PYTORCH_BUILD_ENV="MAX_JOBS=$(($(nproc) - 2)) USE_MKLDNN=0 USE_DISTRIBUTED=0 USE_CUDA=0 USE_ROCM=0 USE_CAFFE2=0 USE_QNNPACK=0 USE_NNPACK=0 USE_XNNPACK=0 USE_MPS=0 BUILD_SHARED_LIBS=OFF USE_FLASH_ATTENTION=0 USE_MEM_EFF_ATTENTION=0 BUILD_TEST=0"
  build-image-nvidia-inference:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: |
        docker build --file test/images/nvidia-inference/Dockerfile test/images/nvidia-inference \
          --build-arg PYTORCH_BUILD_ENV="MAX_JOBS=$(($(nproc) - 2)) USE_MKLDNN=0 USE_DISTRIBUTED=0 USE_CUDA=0 USE_ROCM=0 USE_CAFFE2=0 USE_QNNPACK=0 USE_NNPACK=0 USE_XNNPACK=0 USE_MPS=0 BUILD_SHARED_LIBS=OFF USE_FLASH_ATTENTION=0 USE_MEM_EFF_ATTENTION=0 BUILD_TEST=0"


================================================
FILE: .github/workflows/ci.yaml
================================================
name: "CI"
on:
  pull_request:
    types:
      - opened
      - reopened
      - synchronize
    paths-ignore:
      - 'test/images/nvidia**'
      - 'test/images/neuron**'
jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: go build ./...
    - run: go test ./...
  build-test:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: go test -c -tags=e2e ./test/...
  build-image:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --build-arg=KUBERNETES_MINOR_VERSION=latest --file Dockerfile .
  build-image-efa:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - run: ./hack/free-disk-space.sh
    - run: docker build --file test/images/efa/Dockerfile .


================================================
FILE: .github/workflows/update-go-dependencies.yaml
================================================
name: "[CI] update-go-dependencies"
on:
  workflow_dispatch:
  schedule:
    # once a week
    - cron: "0 0 * * 0"
permissions:
  id-token: write
  contents: write
  pull-requests: write
jobs:
  update-dependencies:
    runs-on: ubuntu-latest
    if: github.repository == 'aws/aws-k8s-tester'
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # 5.5.0
      - run: |
          ./hack/update-go-dependencies.sh
      - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # 7.0.8
        with:
          branch: update-go-dependencies
          base: main
          add-paths: |
            .
          commit-message: "chore: update go dependencies"
          committer: "GitHub <noreply@github.com>"
          author: "GitHub <noreply@github.com>"
          title: "chore: update go dependencies"
          body: |
            Generated by:
              ```
              ./hack/update-go-dependencies.sh
              ```


================================================
FILE: .github/workflows/update-image-tags.yaml
================================================
name: "[CI] update-image-tags"
on:
  workflow_dispatch:
  schedule:
    # once a week
    - cron: "0 0 * * 0"
permissions:
  id-token: write
  contents: write
  pull-requests: write
jobs:
  update-dependencies:
    runs-on: ubuntu-latest
    if: github.repository == 'aws/aws-k8s-tester'
    steps:
    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
    - run: ./hack/update-image-tags.sh
    - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # 7.0.8
      with:
        branch: update-image-tags
        base: main
        add-paths: |
          test/images/
        commit-message: "chore: update image tags"
        committer: "GitHub <noreply@github.com>"
        author: "GitHub <noreply@github.com>"
        title: "chore: update image tags"
        body: |
          Generated by:
          ```
          ./hack/update-image-tags.sh
          ```


================================================
FILE: .github/workflows/update-neuron-dependencies.yaml
================================================
name: "[CI] update-neuron-dependencies"
on:
  workflow_dispatch:
  schedule:
    # once a week
    - cron: "0 0 * * 0"
permissions:
  id-token: write
  contents: write
  pull-requests: write
jobs:
  update-dependencies:
    runs-on: ubuntu-latest
    if: github.repository == 'aws/aws-k8s-tester'
    steps:
    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
    - run: |
        ./hack/update-neuron-dependencies.sh
    - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # 7.0.8
      with:
        branch: update-neuron-dependencies
        base: main
        add-paths: |
          test/images/
        commit-message: "chore: update neuron dependencies"
        committer: "GitHub <noreply@github.com>"
        author: "GitHub <noreply@github.com>"
        title: "chore: update neuron dependencies"
        body: |
          Generated by:
          ```
          ./hack/update-neuron-dependencies.sh
          ```

          See the following URL for artifactes in the latest Neuron SDK release: https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/releasecontent.html#latest-neuron-release-artifacts


================================================
FILE: .github/workflows/update-nvidia-dependencies.yaml
================================================
name: "[CI] update-nvidia-dependencies"
on:
  workflow_dispatch:
  schedule:
    # once a week
    - cron: "0 0 * * 0"
permissions:
  id-token: write
  contents: write
  pull-requests: write
jobs:
  update-dependencies:
    runs-on: ubuntu-latest
    if: github.repository == 'aws/aws-k8s-tester'
    steps:
    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
    - run: ./hack/update-nvidia-dependencies.sh
    - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # 7.0.8
      with:
        branch: update-nvidia-dependencies
        base: main
        add-paths: |
          test/images/
        commit-message: "chore: update nvidia test dependencies"
        committer: "GitHub <noreply@github.com>"
        author: "GitHub <noreply@github.com>"
        title: "chore: update nvidia test dependencies"
        body: |
          Generated by:
          ```
          ./hack/update-nvidia-dependencies.sh
          ```


================================================
FILE: .gitignore
================================================
/.DS_Store
/bin
/_tmp
.idea
*.swp
/aws-k8s-tester
*/*/.DS_Store
*/.DS_Store
/_artifacts
/_rundir


================================================
FILE: .vscode/settings.json
================================================
{
    "git.ignoreLimitWarning": true
}

================================================
FILE: CODE_OF_CONDUCT.md
================================================
## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
opensource-codeofconduct@amazon.com with any additional questions or comments.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing Guidelines

Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
documentation, we greatly value feedback and contributions from our community.

Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
information to effectively respond to your bug report or contribution.


## Reporting Bugs/Feature Requests

We welcome you to use the GitHub issue tracker to report bugs or suggest features.

When filing an issue, please check [existing open](https://github.com/aws/aws-k8s-tester/issues), or [recently closed](https://github.com/aws/aws-k8s-tester/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:

* A reproducible test case or series of steps
* The version of our code being used
* Any modifications you've made relevant to the bug
* Anything unusual about your environment or deployment


## Contributing via Pull Requests
Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:

1. You are working against the latest source on the *master* branch.
2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
3. You open an issue to discuss any significant work - we would hate for your time to be wasted.

To send us a pull request, please:

1. Fork the repository.
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
3. Ensure local tests pass.
4. Commit to your fork using clear commit messages.
5. Send us a pull request, answering any default questions in the pull request interface.
6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.

GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).


## Finding contributions to work on
Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws/aws-k8s-tester/labels/help%20wanted) issues is a great place to start.


## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.


## Security issue notifications
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.


## Licensing

See the [LICENSE](https://github.com/aws/aws-k8s-tester/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.

We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.


================================================
FILE: Config
================================================
# This file is for Amazon internal build processes

# Copyright 2025 Amazon.com, Inc. or its affiliates.
# SPDX-License-Identifier: Apache-2.0

package.Aws-k8s-tester-mirror = {
    interfaces = (1.0);

    build-system = bgo-wrap-make;
    build-tools = {
        1.0 = {
            BrazilMakeGo = 3.0;
            GoLang = 1.x;
        };
    };
};


================================================
FILE: Dockerfile
================================================
FROM public.ecr.aws/amazonlinux/amazonlinux:2023 AS builder
ARG TARGETOS
ARG TARGETARCH
RUN dnf install -y git tar gzip make unzip gcc rsync wget jq
ARG GO_MINOR_VERSION=1.25
RUN curl https://go.dev/dl/?mode=json | jq -r .[].version | grep "^go${GO_MINOR_VERSION}" | head -n1 > go-version.txt
RUN  wget -O go.tar.gz https://go.dev/dl/$(cat go-version.txt).${TARGETOS}-${TARGETARCH}.tar.gz && \
    rm -rf /usr/local/go && \
    tar -C /usr/local -xzf go.tar.gz
ENV GOPATH=/usr/local/go
ENV PATH=$PATH:$GOPATH/bin
ENV GOPROXY=direct

WORKDIR $GOPATH/src/github.com/aws/aws-k8s-tester
COPY . .
RUN go install ./...
RUN go test -c -tags=e2e ./test/... -o $GOPATH/bin/

RUN go install sigs.k8s.io/kubetest2 && \
    go install sigs.k8s.io/kubetest2/kubetest2-tester-exec && \
    go install sigs.k8s.io/kubetest2/kubetest2-tester-ginkgo && \
    go install sigs.k8s.io/hydrophone@latest

FROM public.ecr.aws/amazonlinux/amazonlinux:2023
ARG TARGETOS
ARG TARGETARCH
WORKDIR /workdir
RUN dnf install -y tar gzip unzip wget openssh diffutils
RUN wget -O awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \
    unzip awscli.zip && \
    ./aws/install
# we need gsutil from the gcloud CLI for kubetest-tester-ginkgo
RUN dnf install -y python3.13
ARG GCLOUD_SDK_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/google-cloud-sdk.tar.gz
RUN wget -O google-cloud-sdk.tar.gz -q $GCLOUD_SDK_URL && \
    tar xzf google-cloud-sdk.tar.gz -C / && \
    rm google-cloud-sdk.tar.gz && \
    /google-cloud-sdk/install.sh \
        --disable-installation-options \
        --bash-completion=false \
        --path-update=false \
        --usage-reporting=false
ENV PATH=$PATH:/google-cloud-sdk/bin
ARG EKSCTL_VERSION=latest
RUN wget -O eksctl.tar.gz "https://github.com/eksctl-io/eksctl/releases/${EKSCTL_VERSION}/download/eksctl_Linux_${TARGETARCH}.tar.gz" && \
    tar xzf eksctl.tar.gz -C /bin/ && \
    rm eksctl.tar.gz
ARG HELM_VERSION=v4.1.4
RUN wget -O helm.tar.gz "https://get.helm.sh/helm-${HELM_VERSION}-${TARGETOS}-${TARGETARCH}.tar.gz" && \
    tar xzf helm.tar.gz --strip-components=1 -C /bin/ "${TARGETOS}-${TARGETARCH}/helm" && \
    rm helm.tar.gz
ARG KUBERNETES_MINOR_VERSION
COPY hack/download-kubernetes-binaries.sh .
RUN ./download-kubernetes-binaries.sh "${KUBERNETES_MINOR_VERSION}" "${TARGETOS}" "${TARGETARCH}"
RUN mkdir /info
ENV PATH=$PATH:/info
RUN cp kubernetes-version.txt /info/
RUN mv kubernetes/*/bin/* /bin/
RUN rm -rf /workdir
COPY --from=builder /usr/local/go/bin/* /bin/


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
include ${BGO_MAKEFILE}

pre-release::
	go test -c -tags=e2e ./test/... -o $(GOBIN)
	go install sigs.k8s.io/kubetest2/...@latest

update-deps:
	for SCRIPT in ./hack/update-*.sh; do \
    	"$$SCRIPT" ; \
	done

.PHONY: test-integration
test-integration: ## Run unit and integration tests
	go test -v -tags=integration ./...


================================================
FILE: NOTICE
================================================
Awstester
Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 


================================================
FILE: README.md
================================================
# Tools for testing Kubernetes on AWS

## Installation

This project will use rolling releases going forward; we recommend fetching the latest commit:
```
go install github.com/aws/aws-k8s-tester/...@HEAD
```

You'll need the standard `kubetest` tools as well:
```
go install sigs.k8s.io/kubetest2/...@latest
```

## `kubetest2` deployers and testers for EKS


### Usage

**Auto-detect cluster version**

The deployers will search for a file called `kubernetes-version.txt` on your `PATH`.
This file should contain a valid tag for a Kubernetes release.
The `--kubernetes-version` flag can be omitted if this file exists.

---

### `eksctl` deployer

This deployer is a thin wrapper around `eksctl`.

The simplest usage is:
```
kubetest2 \
  eksctl \
  --kubernetes-version=X.XX \
  --up \
  --down \
  --test=exec \
  -- echo "Hello world"
```

**Additional flags**

- `--instance-types` - comma-separated list of instance types to use for nodes
- `--ami` - AMI ID for nodes
- `--nodes` - number of nodes
- `--region` - AWS region
- `--config-file` - Path to eksctl config file (**if provided, other flags are ignored**)
- `--availability-zones` - Node availability zones
- `--ami-family` - AMI family to use: `AmazonLinux2023` | `Bottlerocket`
- `--efa-enabled` - Enable Elastic Fabric Adapter for the nodegroup
- `--volume-size` - Size of the node root volume in GB
- `--private-networking` - Use private networking for nodes
- `--with-oidc` - Enable OIDC provider for IAM roles for service accounts
- `--deploy-target` - The target to deploy: `cluster` | `nodegroup` (defaults to `cluster`)
- `--cluster-name` - Name of the EKS cluster (defaults to RunID if not specified)
- `--unmanaged-nodegroup` - Use unmanaged nodegroup instead of managed nodegroup
- `--nodegroup-name` - Name of the nodegroup (defaults to `ng-1`)

---

### `eksapi` deployer

This deployer calls the EKS API directly, instead of using CloudFormation for EKS resources.

The simplest usage is:
```
kubetest2 \
  eksapi \
  --kubernetes-version=X.XX \
  --up \
  --down \
  --test=exec \
  -- echo "Hello world"
```

**Additional flags**

- `--instance-types` - comma-separated list of instance types to use for nodes
- `--ami` - AMI ID for nodes
- `--nodes` - number of nodes
- `--region` - AWS region
- `--endpoint-url` - Override the EKS endpoint URL
- `--cluster-role-service-principal` - Additional service principal that can assume the cluster IAM role.

---

### `multi` tester

This tester wraps multiple executions of other testers.

Tester argument groups are separated by `--`, with the first group being passed to the `multi` tester itself.

The first positional argument of each subsequent group should be the name of a tester.

```
kubetest2 \
  noop \
  --test=multi \
  -- \
  --fail-fast=true \
  -- \
  ginkgo \
  --focus-regex='\[Conformance\]' \
  --parallel=4 \
  -- \
  exec \
  go test ./my/test/package
```


================================================
FILE: bmg.json
================================================
{
  "binary_artifacts_only": true
}


================================================
FILE: cmd/kubetest2-eksapi/main.go
================================================
package main

import (
	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi"
	"sigs.k8s.io/kubetest2/pkg/app"
)

func main() {
	app.Main(eksapi.DeployerName, eksapi.NewDeployer)
}


================================================
FILE: cmd/kubetest2-eksapi-janitor/main.go
================================================
package main

import (
	"context"
	"flag"
	"log/slog"
	"os"
	"time"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi"
)

func main() {
	var maxResourceAge time.Duration
	flag.DurationVar(&maxResourceAge, "max-resource-age", time.Hour*3, "Maximum resource age")
	var workers int
	flag.IntVar(&workers, "workers", 1, "number of workers to processes resources in parallel")
	var stackStatus string
	flag.StringVar(&stackStatus, "stack-status", "", "only process stacks with a specific status")
	var emitMetrics bool
	flag.BoolVar(&emitMetrics, "emit-metrics", false, "Send metrics to CloudWatch")
	flag.Parse()
	j := eksapi.NewJanitor(maxResourceAge, emitMetrics, workers, stackStatus)
	if err := j.Sweep(context.Background()); err != nil {
		slog.Error("failed to sweep resources", "error", err)
		os.Exit(1)
	}
}


================================================
FILE: cmd/kubetest2-eksctl/main.go
================================================
package main

import (
	"github.com/aws/aws-k8s-tester/internal/deployers/eksctl"
	"sigs.k8s.io/kubetest2/pkg/app"
)

func main() {
	app.Main(eksctl.DeployerName, eksctl.NewDeployer)
}


================================================
FILE: cmd/kubetest2-tester-ginkgo-v1/main.go
================================================
package main

import (
	"github.com/aws/aws-k8s-tester/internal/testers/ginkgov1"
)

func main() {
	ginkgov1.Main()
}


================================================
FILE: cmd/kubetest2-tester-multi/main.go
================================================
package main

import "github.com/aws/aws-k8s-tester/internal/testers/multi"

func main() {
	multi.Main()
}


================================================
FILE: external/tools.go
================================================
//go:build tools
// +build tools

package external

// this file allows us to declare direct dependencies on our required external tools.
// this file will not compile! that's expected.

import (
	_ "sigs.k8s.io/kubetest2"
	_ "sigs.k8s.io/kubetest2/kubetest2-tester-exec"
	_ "sigs.k8s.io/kubetest2/kubetest2-tester-ginkgo"
)


================================================
FILE: go.mod
================================================
module github.com/aws/aws-k8s-tester

go 1.25.5

require (
	github.com/aws/aws-sdk-go v1.55.8
	github.com/aws/aws-sdk-go-v2 v1.41.1
	github.com/aws/aws-sdk-go-v2/config v1.32.7
	github.com/aws/aws-sdk-go-v2/service/autoscaling v1.62.5
	github.com/aws/aws-sdk-go-v2/service/cloudformation v1.71.5
	github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1
	github.com/aws/aws-sdk-go-v2/service/ec2 v1.279.1
	github.com/aws/aws-sdk-go-v2/service/eks v1.76.4
	github.com/aws/aws-sdk-go-v2/service/iam v1.53.2
	github.com/aws/aws-sdk-go-v2/service/s3 v1.95.1
	github.com/aws/smithy-go v1.24.0
	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
	github.com/spf13/pflag v1.0.10
	github.com/stretchr/testify v1.11.1
	golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93
	k8s.io/api v0.35.0
	k8s.io/apimachinery v0.35.0
	k8s.io/client-go v0.35.0
	k8s.io/klog v1.0.0
	k8s.io/klog/v2 v2.130.1
	sigs.k8s.io/controller-runtime v0.22.4
	sigs.k8s.io/karpenter v1.8.0
	sigs.k8s.io/kubetest2 v0.0.0-20260108084739-2f9a9397f033
)

require (
	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect
	github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 // indirect
	github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339 // indirect
	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
	github.com/go-logr/zapr v1.3.0 // indirect
	github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
	github.com/pkg/errors v0.9.1
	github.com/robfig/cron/v3 v3.0.1 // indirect
	github.com/samber/lo v1.51.0 // indirect
	github.com/x448/float16 v0.8.4 // indirect
	golang.org/x/crypto v0.46.0 // indirect
	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
	k8s.io/apiextensions-apiserver v0.34.1 // indirect
)

require (
	cloud.google.com/go v0.121.2 // indirect
	cloud.google.com/go/compute/metadata v0.9.0 // indirect
	cloud.google.com/go/iam v1.5.2 // indirect
	cloud.google.com/go/storage v1.53.0 // indirect
	cuelabs.dev/go/oci/ociregistry v0.0.0-20240404174027-a39bec0462d2 // indirect
	cuelang.org/go v0.9.2 // indirect
	dario.cat/mergo v1.0.2 // indirect
	filippo.io/edwards25519 v1.1.0 // indirect
	github.com/Azure/azure-sdk-for-go v68.0.0+incompatible // indirect
	github.com/Azure/go-autorest v14.2.0+incompatible // indirect
	github.com/Azure/go-autorest/autorest v0.11.29 // indirect
	github.com/Azure/go-autorest/autorest/adal v0.9.23 // indirect
	github.com/Azure/go-autorest/autorest/azure/auth v0.5.12 // indirect
	github.com/Azure/go-autorest/autorest/azure/cli v0.4.6 // indirect
	github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
	github.com/Azure/go-autorest/logger v0.2.1 // indirect
	github.com/Azure/go-autorest/tracing v0.6.0 // indirect
	github.com/MakeNowJust/heredoc/v2 v2.0.1 // indirect
	github.com/Microsoft/go-winio v0.6.2 // indirect
	github.com/ProtonMail/go-crypto v1.1.6 // indirect
	github.com/ThalesIgnite/crypto11 v1.2.5 // indirect
	github.com/agnivade/levenshtein v1.2.1 // indirect
	github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 // indirect
	github.com/alibabacloud-go/cr-20160607 v1.0.1 // indirect
	github.com/alibabacloud-go/cr-20181201 v1.0.10 // indirect
	github.com/alibabacloud-go/darabonba-openapi v0.2.1 // indirect
	github.com/alibabacloud-go/debug v1.0.0 // indirect
	github.com/alibabacloud-go/endpoint-util v1.1.1 // indirect
	github.com/alibabacloud-go/openapi-util v0.1.0 // indirect
	github.com/alibabacloud-go/tea v1.2.2 // indirect
	github.com/alibabacloud-go/tea-utils v1.4.5 // indirect
	github.com/alibabacloud-go/tea-xml v1.1.3 // indirect
	github.com/aliyun/credentials-go v1.3.2 // indirect
	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
	github.com/aws/aws-sdk-go-v2/credentials v1.19.7 // indirect
	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect
	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect
	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect
	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
	github.com/aws/aws-sdk-go-v2/service/ecr v1.36.2 // indirect
	github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.27.2 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect
	github.com/aws/aws-sdk-go-v2/service/ssm v1.67.8
	github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect
	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect
	github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
	github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20240318154307-a1a918375412 // indirect
	github.com/beorn7/perks v1.0.1 // indirect
	github.com/blang/semver v3.5.1+incompatible // indirect
	github.com/blang/semver/v4 v4.0.0 // indirect
	github.com/buildkite/agent/v3 v3.81.0 // indirect
	github.com/buildkite/go-pipeline v0.13.1 // indirect
	github.com/buildkite/interpolate v0.1.3 // indirect
	github.com/buildkite/roko v1.2.0 // indirect
	github.com/cespare/xxhash/v2 v2.3.0 // indirect
	github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589 // indirect
	github.com/clbanning/mxj/v2 v2.7.0 // indirect
	github.com/cloudflare/circl v1.6.3 // indirect
	github.com/cockroachdb/apd/v3 v3.2.1 // indirect
	github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be // indirect
	github.com/containerd/stargz-snapshotter/estargz v0.18.1 // indirect
	github.com/coreos/go-oidc/v3 v3.17.0 // indirect
	github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f // indirect
	github.com/cyphar/filepath-securejoin v0.4.1 // indirect
	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
	github.com/digitorus/pkcs7 v0.0.0-20230818184609-3a137a874352 // indirect
	github.com/digitorus/timestamp v0.0.0-20231217203849-220c5c2851b7 // indirect
	github.com/dimchansky/utfbom v1.1.1 // indirect
	github.com/docker/cli v29.0.3+incompatible // indirect
	github.com/docker/distribution v2.8.3+incompatible // indirect
	github.com/docker/docker-credential-helpers v0.9.3 // indirect
	github.com/dustin/go-humanize v1.0.1 // indirect
	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
	github.com/emicklei/proto v1.13.2 // indirect
	github.com/emirpasic/gods v1.18.1 // indirect
	github.com/felixge/httpsnoop v1.0.4 // indirect
	github.com/fsnotify/fsnotify v1.9.0 // indirect
	github.com/glebarez/go-sqlite v1.22.0 // indirect
	github.com/go-chi/chi v4.1.2+incompatible // indirect
	github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
	github.com/go-git/go-billy/v5 v5.8.0 // indirect
	github.com/go-git/go-git/v5 v5.17.1 // indirect
	github.com/go-ini/ini v1.67.0 // indirect
	github.com/go-jose/go-jose/v3 v3.0.4 // indirect
	github.com/go-logr/logr v1.4.3 // indirect
	github.com/go-logr/stdr v1.2.2 // indirect
	github.com/go-ole/go-ole v1.3.0 // indirect
	github.com/go-openapi/analysis v0.23.0 // indirect
	github.com/go-openapi/errors v0.22.1 // indirect
	github.com/go-openapi/jsonpointer v0.21.0 // indirect
	github.com/go-openapi/jsonreference v0.21.0 // indirect
	github.com/go-openapi/loads v0.22.0 // indirect
	github.com/go-openapi/runtime v0.28.0 // indirect
	github.com/go-openapi/spec v0.21.0 // indirect
	github.com/go-openapi/strfmt v0.23.0 // indirect
	github.com/go-openapi/swag v0.23.0 // indirect
	github.com/go-openapi/validate v0.24.0 // indirect
	github.com/go-piv/piv-go v1.11.0 // indirect
	github.com/gobwas/glob v0.2.3 // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
	github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
	github.com/golang/snappy v0.0.4 // indirect
	github.com/google/certificate-transparency-go v1.3.2-0.20250507091337-0eddb39e94f8 // indirect
	github.com/google/gnostic-models v0.7.0 // indirect
	github.com/google/go-cmp v0.7.0 // indirect
	github.com/google/go-containerregistry v0.20.7 // indirect
	github.com/google/go-github/v55 v55.0.0 // indirect
	github.com/google/go-querystring v1.1.0 // indirect
	github.com/google/licenseclassifier/v2 v2.0.0 // indirect
	github.com/google/s2a-go v0.1.9 // indirect
	github.com/google/uuid v1.6.0 // indirect
	github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
	github.com/googleapis/gax-go/v2 v2.15.0 // indirect
	github.com/gorilla/mux v1.8.1 // indirect
	github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
	github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
	github.com/in-toto/in-toto-golang v0.9.0 // indirect
	github.com/inconshreveable/mousetrap v1.1.0 // indirect
	github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
	github.com/jedisct1/go-minisign v0.0.0-20230811132847-661be99b8267 // indirect
	github.com/jellydator/ttlcache/v3 v3.3.0 // indirect
	github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 // indirect
	github.com/josharian/intern v1.0.0 // indirect
	github.com/json-iterator/go v1.1.12 // indirect
	github.com/kevinburke/ssh_config v1.2.0 // indirect
	github.com/klauspost/compress v1.18.1 // indirect
	github.com/knqyf263/go-rpmdb v0.1.0 // indirect
	github.com/mailru/easyjson v0.9.0 // indirect
	github.com/mattn/go-isatty v0.0.20 // indirect
	github.com/miekg/pkcs11 v1.1.1 // indirect
	github.com/mitchellh/go-homedir v1.1.0 // indirect
	github.com/mitchellh/go-wordwrap v1.0.1 // indirect
	github.com/mitchellh/mapstructure v1.5.0 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
	github.com/mozillazg/docker-credential-acr-helper v0.4.0 // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/ncruces/go-strftime v0.1.9 // indirect
	github.com/nozzle/throttler v0.0.0-20180817012639-2ea982251481 // indirect
	github.com/oklog/ulid v1.3.1 // indirect
	github.com/oleiade/reflections v1.1.0 // indirect
	github.com/open-policy-agent/opa v1.4.0 // indirect
	github.com/opencontainers/go-digest v1.0.0 // indirect
	github.com/opencontainers/image-spec v1.1.1 // indirect
	github.com/opentracing/opentracing-go v1.2.0 // indirect
	github.com/package-url/packageurl-go v0.1.2 // indirect
	github.com/pborman/uuid v1.2.1 // indirect
	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
	github.com/pjbgf/sha1cd v0.3.2 // indirect
	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
	github.com/prometheus/client_golang v1.23.2 // indirect
	github.com/prometheus/client_model v0.6.2 // indirect
	github.com/prometheus/common v0.66.1 // indirect
	github.com/prometheus/procfs v0.17.0 // indirect
	github.com/protocolbuffers/txtpbfmt v0.0.0-20240116145035-ef3ab179eed6 // indirect
	github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect
	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
	github.com/rogpeppe/go-internal v1.14.1 // indirect
	github.com/sagikazarmark/locafero v0.9.0 // indirect
	github.com/sassoftware/relic v7.2.1+incompatible // indirect
	github.com/secure-systems-lab/go-securesystemslib v0.10.0 // indirect
	github.com/sergi/go-diff v1.4.0 // indirect
	github.com/shibumi/go-pathspec v1.3.0 // indirect
	github.com/shirou/gopsutil/v3 v3.24.5 // indirect
	github.com/sigstore/cosign/v2 v2.4.1 // indirect
	github.com/sigstore/fulcio v1.6.3 // indirect
	github.com/sigstore/rekor v1.3.9 // indirect
	github.com/sigstore/sigstore v1.10.3 // indirect
	github.com/sigstore/timestamp-authority v1.2.2 // indirect
	github.com/sirupsen/logrus v1.9.3 // indirect
	github.com/skeema/knownhosts v1.3.1 // indirect
	github.com/sourcegraph/conc v0.3.0 // indirect
	github.com/spf13/afero v1.15.0 // indirect
	github.com/spf13/cast v1.9.2 // indirect
	github.com/spf13/cobra v1.10.2 // indirect
	github.com/spf13/viper v1.20.1 // indirect
	github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect
	github.com/subosito/gotenv v1.6.0 // indirect
	github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect
	github.com/tchap/go-patricia/v2 v2.3.2 // indirect
	github.com/thales-e-security/pool v0.0.2 // indirect
	github.com/theupdateframework/go-tuf v0.7.0 // indirect
	github.com/tjfoc/gmsm v1.4.1 // indirect
	github.com/transparency-dev/merkle v0.0.2 // indirect
	github.com/vbatts/tar-split v0.12.2 // indirect
	github.com/xanzy/go-gitlab v0.109.0 // indirect
	github.com/xanzy/ssh-agent v0.3.3 // indirect
	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
	github.com/yashtewari/glob-intersection v0.2.0 // indirect
	github.com/yusufpapurcu/wmi v1.2.4 // indirect
	gitlab.alpinelinux.org/alpine/go v0.10.0 // indirect
	go.mongodb.org/mongo-driver v1.17.2 // indirect
	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
	go.opentelemetry.io/otel v1.39.0 // indirect
	go.opentelemetry.io/otel/metric v1.39.0 // indirect
	go.opentelemetry.io/otel/sdk v1.39.0 // indirect
	go.opentelemetry.io/otel/trace v1.39.0 // indirect
	go.step.sm/crypto v0.57.0 // indirect
	go.uber.org/multierr v1.11.0 // indirect
	go.uber.org/zap v1.27.0 // indirect
	golang.org/x/mod v0.31.0 // indirect
	golang.org/x/net v0.48.0 // indirect
	golang.org/x/oauth2 v0.34.0 // indirect
	golang.org/x/sync v0.19.0 // indirect
	golang.org/x/sys v0.39.0 // indirect
	golang.org/x/term v0.38.0 // indirect
	golang.org/x/text v0.32.0 // indirect
	golang.org/x/time v0.13.0 // indirect
	golang.org/x/tools/go/vcs v0.1.0-deprecated // indirect
	golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
	google.golang.org/api v0.242.0 // indirect
	google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect
	google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
	google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
	google.golang.org/grpc v1.79.3 // indirect
	google.golang.org/protobuf v1.36.10 // indirect
	gopkg.in/inf.v0 v0.9.1 // indirect
	gopkg.in/ini.v1 v1.67.0 // indirect
	gopkg.in/warnings.v0 v0.1.2 // indirect
	gopkg.in/yaml.v2 v2.4.0 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
	k8s.io/release v0.18.0 // indirect
	k8s.io/utils v0.0.0-20260108192941-914a6e750570
	modernc.org/libc v1.45.2 // indirect
	modernc.org/mathutil v1.6.0 // indirect
	modernc.org/memory v1.7.2 // indirect
	modernc.org/sqlite v1.29.5 // indirect
	sigs.k8s.io/bom v0.6.0 // indirect
	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
	sigs.k8s.io/promo-tools/v3 v3.6.0 // indirect
	sigs.k8s.io/release-sdk v0.12.2 // indirect
	sigs.k8s.io/release-utils v0.12.0 // indirect
	sigs.k8s.io/yaml v1.6.0
)

require (
	github.com/urfave/sflags v0.4.1
	github.com/weaveworks/eksctl v0.221.0
	k8s.io/cli-runtime v0.35.0
	k8s.io/cloud-provider-aws v1.35.0
	sigs.k8s.io/e2e-framework v0.6.1-0.20250909060333-8677714ff9a6 // bump version once https://github.com/kubernetes-sigs/e2e-framework/pull/517 gets released
)

require (
	cel.dev/expr v0.25.1 // indirect
	cloud.google.com/go/auth v0.16.5 // indirect
	cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
	cloud.google.com/go/monitoring v1.24.2 // indirect
	github.com/AliyunContainerService/ack-ram-tool/pkg/credentials/provider v0.14.0 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect
	github.com/avast/retry-go/v4 v4.6.1 // indirect
	github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.55.1 // indirect
	github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.61.1 // indirect
	github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.33.15 // indirect
	github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.54.2 // indirect
	github.com/aws/aws-sdk-go-v2/service/kms v1.47.1 // indirect
	github.com/aws/aws-sdk-go-v2/service/outposts v1.57.8 // indirect
	github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect
	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
	github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect
	github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
	github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
	github.com/fatih/color v1.18.0 // indirect
	github.com/go-errors/errors v1.5.1 // indirect
	github.com/go-jose/go-jose/v4 v4.1.3 // indirect
	github.com/go-resty/resty/v2 v2.16.5 // indirect
	github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
	github.com/google/btree v1.1.3 // indirect
	github.com/google/go-github/v60 v60.0.0 // indirect
	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
	github.com/hashicorp/go-version v1.7.0 // indirect
	github.com/in-toto/attestation v1.1.0 // indirect
	github.com/kris-nova/logger v0.2.2 // indirect
	github.com/mattn/go-colorable v0.1.14 // indirect
	github.com/mattn/go-runewidth v0.0.16 // indirect
	github.com/miekg/dns v1.1.61 // indirect
	github.com/moby/spdystream v0.5.0 // indirect
	github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
	github.com/octago/sflags v0.3.1 // indirect
	github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 // indirect
	github.com/olekukonko/ll v0.0.8 // indirect
	github.com/olekukonko/tablewriter v1.0.8 // indirect
	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
	github.com/rivo/uniseg v0.4.7 // indirect
	github.com/sigstore/protobuf-specs v0.5.0 // indirect
	github.com/sigstore/sigstore-go v0.6.1 // indirect
	github.com/stretchr/objx v0.5.2 // indirect
	github.com/theupdateframework/go-tuf/v2 v2.3.1 // indirect
	github.com/vladimirvivien/gexe v0.5.0 // indirect
	github.com/xlab/treeprint v1.2.0 // indirect
	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
	go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect
	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
	go.opentelemetry.io/otel/sdk/metric v1.39.0 // indirect
	go.yaml.in/yaml/v2 v2.4.3 // indirect
	go.yaml.in/yaml/v3 v3.0.4 // indirect
	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
	gopkg.in/gcfg.v1 v1.2.3 // indirect
	k8s.io/cloud-provider v0.35.0 // indirect
	k8s.io/component-base v0.35.0 // indirect
	k8s.io/kubelet v0.35.0 // indirect
	sigs.k8s.io/kustomize/api v0.20.1 // indirect
	sigs.k8s.io/kustomize/kyaml v0.20.1 // indirect
	sigs.k8s.io/randfill v1.0.0 // indirect
	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
)


================================================
FILE: go.sum
================================================
cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4=
cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.121.2 h1:v2qQpN6Dx9x2NmwrqlesOt3Ys4ol5/lFZ6Mg1B7OJCg=
cloud.google.com/go v0.121.2/go.mod h1:nRFlrHq39MNVWu+zESP2PosMWA0ryJw8KUBZ2iZpxbw=
cloud.google.com/go/auth v0.16.5 h1:mFWNQ2FEVWAliEQWpAdH80omXFokmrnbDhUS9cBywsI=
cloud.google.com/go/auth v0.16.5/go.mod h1:utzRfHMP+Vv0mpOkTRQoWD2q3BatTOoWbA7gCc2dUhQ=
cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
cloud.google.com/go/iam v1.5.2 h1:qgFRAGEmd8z6dJ/qyEchAuL9jpswyODjA2lS+w234g8=
cloud.google.com/go/iam v1.5.2/go.mod h1:SE1vg0N81zQqLzQEwxL2WI6yhetBdbNQuTvIKCSkUHE=
cloud.google.com/go/kms v1.22.0 h1:dBRIj7+GDeeEvatJeTB19oYZNV0aj6wEqSIT/7gLqtk=
cloud.google.com/go/kms v1.22.0/go.mod h1:U7mf8Sva5jpOb4bxYZdtw/9zsbIjrklYwPcvMk34AL8=
cloud.google.com/go/logging v1.13.0 h1:7j0HgAp0B94o1YRDqiqm26w4q1rDMH7XNRU34lJXHYc=
cloud.google.com/go/logging v1.13.0/go.mod h1:36CoKh6KA/M0PbhPKMq6/qety2DCAErbhXT62TuXALA=
cloud.google.com/go/longrunning v0.6.7 h1:IGtfDWHhQCgCjwQjV9iiLnUta9LBCo8R9QmAFsS/PrE=
cloud.google.com/go/longrunning v0.6.7/go.mod h1:EAFV3IZAKmM56TyiE6VAP3VoTzhZzySwI/YI1s/nRsY=
cloud.google.com/go/monitoring v1.24.2 h1:5OTsoJ1dXYIiMiuL+sYscLc9BumrL3CarVLL7dd7lHM=
cloud.google.com/go/monitoring v1.24.2/go.mod h1:x7yzPWcgDRnPEv3sI+jJGBkwl5qINf+6qY4eq0I9B4U=
cloud.google.com/go/storage v1.53.0 h1:gg0ERZwL17pJ+Cz3cD2qS60w1WMDnwcm5YPAIQBHUAw=
cloud.google.com/go/storage v1.53.0/go.mod h1:7/eO2a/srr9ImZW9k5uufcNahT2+fPb8w5it1i5boaA=
cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4=
cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI=
cuelabs.dev/go/oci/ociregistry v0.0.0-20240404174027-a39bec0462d2 h1:BnG6pr9TTr6CYlrJznYUDj6V7xldD1W+1iXPum0wT/w=
cuelabs.dev/go/oci/ociregistry v0.0.0-20240404174027-a39bec0462d2/go.mod h1:pK23AUVXuNzzTpfMCA06sxZGeVQ/75FdVtW249de9Uo=
cuelang.org/go v0.9.2 h1:pfNiry2PdRBr02G/aKm5k2vhzmqbAOoaB4WurmEbWvs=
cuelang.org/go v0.9.2/go.mod h1:qpAYsLOf7gTM1YdEg6cxh553uZ4q9ZDWlPbtZr9q1Wk=
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA=
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/AdamKorcz/go-fuzz-headers-1 v0.0.0-20230919221257-8b5d3ce2d11d h1:zjqpY4C7H15HjRPEenkS4SAn3Jy2eRRjkjZbGR30TOg=
github.com/AdamKorcz/go-fuzz-headers-1 v0.0.0-20230919221257-8b5d3ce2d11d/go.mod h1:XNqJ7hv2kY++g8XEHREpi+JqZo3+0l+CH2egBVN4yqM=
github.com/AliyunContainerService/ack-ram-tool/pkg/credentials/provider v0.14.0 h1:kcnfY4vljxXliXDBrA9K9lwF8IoEZ4Up6Eg9kWTIm28=
github.com/AliyunContainerService/ack-ram-tool/pkg/credentials/provider v0.14.0/go.mod h1:tlqp9mUGbsP+0z3Q+c0Q5MgSdq/OMwQhm5bffR3Q3ss=
github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0hS+6+I79yEDJBqVNcqUzU=
github.com/Azure/azure-sdk-for-go v68.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 h1:Gt0j3wceWMwPmiazCa8MzMA0MfhmPIz0Qp0FJ6qcM0U=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0/go.mod h1:Ot/6aikWnKWi4l9QB7qVSwa8iMphQNqkWALMoNT3rzM=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 h1:B+blDbyVIG3WaikNxPnhPiJ1MThR03b3vKGtER95TP4=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1/go.mod h1:JdM5psgjfBf5fo2uWOZhflPWyDBZ/O/CNAH9CtsuZE4=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 h1:FPKJS1T+clwv+OLGt13a8UjqeRuh0O4SJ3lUriThc+4=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1/go.mod h1:j2chePtV91HrC22tGoRX3sGY42uF13WzmmV80/OdVAA=
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azkeys v1.3.0 h1:7rKG7UmnrxX4N53TFhkYqjc+kVUZuw0fL8I3Fh+Ld9E=
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azkeys v1.3.0/go.mod h1:Wjo+24QJVhhl/L7jy6w9yzFF2yDOf3cKECAa8ecf9vE=
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0 h1:eXnN9kaS8TiDwXjoie3hMRLuwdUBUMW9KRgOqB3mCaw=
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0/go.mod h1:XIpam8wumeZ5rVMuhdDQLMfIPDf1WO3IzrCRO3e3e3o=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest/autorest v0.11.24/go.mod h1:G6kyRlFnTuSbEYkQGawPfsCswgme4iYf6rfSKUDzbCc=
github.com/Azure/go-autorest/autorest v0.11.29 h1:I4+HL/JDvErx2LjyzaVxllw2lRDB5/BT2Bm4g20iqYw=
github.com/Azure/go-autorest/autorest v0.11.29/go.mod h1:ZtEzC4Jy2JDrZLxvWs8LrBWEBycl1hbT1eknI8MtfAs=
github.com/Azure/go-autorest/autorest/adal v0.9.18/go.mod h1:XVVeme+LZwABT8K5Lc3hA4nAe8LDBVle26gTrguhhPQ=
github.com/Azure/go-autorest/autorest/adal v0.9.22/go.mod h1:XuAbAEUv2Tta//+voMI038TrJBqjKam0me7qR+L8Cmk=
github.com/Azure/go-autorest/autorest/adal v0.9.23 h1:Yepx8CvFxwNKpH6ja7RZ+sKX+DWYNldbLiALMC3BTz8=
github.com/Azure/go-autorest/autorest/adal v0.9.23/go.mod h1:5pcMqFkdPhviJdlEy3kC/v1ZLnQl0MH6XA5YCcMhy4c=
github.com/Azure/go-autorest/autorest/azure/auth v0.5.12 h1:wkAZRgT/pn8HhFyzfe9UnqOjJYqlembgCTi72Bm/xKk=
github.com/Azure/go-autorest/autorest/azure/auth v0.5.12/go.mod h1:84w/uV8E37feW2NCJ08uT9VBfjfUHpgLVnG2InYD6cg=
github.com/Azure/go-autorest/autorest/azure/cli v0.4.5/go.mod h1:ADQAXrkgm7acgWVUNamOgh8YNrv4p27l3Wc55oVfpzg=
github.com/Azure/go-autorest/autorest/azure/cli v0.4.6 h1:w77/uPk80ZET2F+AfQExZyEWtn+0Rk/uw17m9fv5Ajc=
github.com/Azure/go-autorest/autorest/azure/cli v0.4.6/go.mod h1:piCfgPho7BiIDdEQ1+g4VmKyD5y+p/XtSNqE6Hc4QD0=
github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw=
github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU=
github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg=
github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 h1:oygO0locgZJe7PpYPXT5A29ZkwJaPqcva7BVeemZOZs=
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 h1:fYE9p3esPxA/C0rQ0AHhP0drtPXDRhaWiwg1DPqO7IU=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0/go.mod h1:BnBReJLvVYx2CS/UHOgVz2BXKXD9wsQPxZug20nZhd0=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0 h1:OqVGm6Ei3x5+yZmSJG1Mh2NwHvpVmZ08CB5qJhT9Nuk=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0/go.mod h1:SZiPHWGOOk3bl8tkevxkoiwPgsIl6CwrWcbwjfHZpdM=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 h1:6/0iUd0xrnX7qt+mLNRwg5c0PGv8wpE8K90ryANQwMI=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0=
github.com/MakeNowJust/heredoc/v2 v2.0.1 h1:rlCHh70XXXv7toz95ajQWOWQnN4WNLt0TdpZYIR/J6A=
github.com/MakeNowJust/heredoc/v2 v2.0.1/go.mod h1:6/2Abh5s+hc3g9nbWLe9ObDIOhaRrqsyY9MWy+4JdRM=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw=
github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
github.com/ThalesIgnite/crypto11 v1.2.5 h1:1IiIIEqYmBvUYFeMnHqRft4bwf/O36jryEUpY+9ef8E=
github.com/ThalesIgnite/crypto11 v1.2.5/go.mod h1:ILDKtnCKiQ7zRoNxcp36Y1ZR8LBPmR2E23+wTQe/MlE=
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0=
github.com/alessio/shellescape v1.4.1/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30=
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.2/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc=
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 h1:iC9YFYKDGEy3n/FtqJnOkZsene9olVspKmkX5A2YBEo=
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc=
github.com/alibabacloud-go/cr-20160607 v1.0.1 h1:WEnP1iPFKJU74ryUKh/YDPHoxMZawqlPajOymyNAkts=
github.com/alibabacloud-go/cr-20160607 v1.0.1/go.mod h1:QHeKZtZ3F3FOE+/uIXCBAp8POwnUYekpLwr1dtQa5r0=
github.com/alibabacloud-go/cr-20181201 v1.0.10 h1:B60f6S1imsgn2fgC6X6FrVNrONDrbCT0NwYhsJ0C9/c=
github.com/alibabacloud-go/cr-20181201 v1.0.10/go.mod h1:VN9orB/w5G20FjytoSpZROqu9ZqxwycASmGqYUJSoDc=
github.com/alibabacloud-go/darabonba-openapi v0.1.12/go.mod h1:sTAjsFJmVsmcVeklL9d9uDBlFsgl43wZ6jhI6BHqHqU=
github.com/alibabacloud-go/darabonba-openapi v0.1.14/go.mod h1:w4CosR7O/kapCtEEMBm3JsQqWBU/CnZ2o0pHorsTWDI=
github.com/alibabacloud-go/darabonba-openapi v0.2.1 h1:WyzxxKvhdVDlwpAMOHgAiCJ+NXa6g5ZWPFEzaK/ewwY=
github.com/alibabacloud-go/darabonba-openapi v0.2.1/go.mod h1:zXOqLbpIqq543oioL9IuuZYOQgHQ5B8/n5OPrnko8aY=
github.com/alibabacloud-go/darabonba-string v1.0.0/go.mod h1:93cTfV3vuPhhEwGGpKKqhVW4jLe7tDpo3LUM0i0g6mA=
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68/go.mod h1:6pb/Qy8c+lqua8cFpEy7g39NRRqOWc3rOwAy8m5Y2BY=
github.com/alibabacloud-go/debug v1.0.0 h1:3eIEQWfay1fB24PQIEzXAswlVJtdQok8f3EVN5VrBnA=
github.com/alibabacloud-go/debug v1.0.0/go.mod h1:8gfgZCCAC3+SCzjWtY053FrOcd4/qlH6IHTI4QyICOc=
github.com/alibabacloud-go/endpoint-util v1.1.0/go.mod h1:O5FuCALmCKs2Ff7JFJMudHs0I5EBgecXXxZRyswlEjE=
github.com/alibabacloud-go/endpoint-util v1.1.1 h1:ZkBv2/jnghxtU0p+upSU0GGzW1VL9GQdZO3mcSUTUy8=
github.com/alibabacloud-go/endpoint-util v1.1.1/go.mod h1:O5FuCALmCKs2Ff7JFJMudHs0I5EBgecXXxZRyswlEjE=
github.com/alibabacloud-go/openapi-util v0.0.9/go.mod h1:sQuElr4ywwFRlCCberQwKRFhRzIyG4QTP/P4y1CJ6Ws=
github.com/alibabacloud-go/openapi-util v0.0.10/go.mod h1:sQuElr4ywwFRlCCberQwKRFhRzIyG4QTP/P4y1CJ6Ws=
github.com/alibabacloud-go/openapi-util v0.0.11/go.mod h1:sQuElr4ywwFRlCCberQwKRFhRzIyG4QTP/P4y1CJ6Ws=
github.com/alibabacloud-go/openapi-util v0.1.0 h1:0z75cIULkDrdEhkLWgi9tnLe+KhAFE/r5Pb3312/eAY=
github.com/alibabacloud-go/openapi-util v0.1.0/go.mod h1:sQuElr4ywwFRlCCberQwKRFhRzIyG4QTP/P4y1CJ6Ws=
github.com/alibabacloud-go/tea v1.1.0/go.mod h1:IkGyUSX4Ba1V+k4pCtJUc6jDpZLFph9QMy2VUPTwukg=
github.com/alibabacloud-go/tea v1.1.7/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
github.com/alibabacloud-go/tea v1.1.8/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
github.com/alibabacloud-go/tea v1.1.11/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
github.com/alibabacloud-go/tea v1.1.17/go.mod h1:nXxjm6CIFkBhwW4FQkNrolwbfon8Svy6cujmKFUq98A=
github.com/alibabacloud-go/tea v1.1.19/go.mod h1:nXxjm6CIFkBhwW4FQkNrolwbfon8Svy6cujmKFUq98A=
github.com/alibabacloud-go/tea v1.2.2 h1:aTsR6Rl3ANWPfqeQugPglfurloyBJY85eFy7Gc1+8oU=
github.com/alibabacloud-go/tea v1.2.2/go.mod h1:CF3vOzEMAG+bR4WOql8gc2G9H3EkH3ZLAQdpmpXMgwk=
github.com/alibabacloud-go/tea-utils v1.3.1/go.mod h1:EI/o33aBfj3hETm4RLiAxF/ThQdSngxrpF8rKUDJjPE=
github.com/alibabacloud-go/tea-utils v1.3.9/go.mod h1:EI/o33aBfj3hETm4RLiAxF/ThQdSngxrpF8rKUDJjPE=
github.com/alibabacloud-go/tea-utils v1.4.3/go.mod h1:KNcT0oXlZZxOXINnZBs6YvgOd5aYp9U67G+E3R8fcQw=
github.com/alibabacloud-go/tea-utils v1.4.5 h1:h0/6Xd2f3bPE4XHTvkpjwxowIwRCJAJOqY6Eq8f3zfA=
github.com/alibabacloud-go/tea-utils v1.4.5/go.mod h1:KNcT0oXlZZxOXINnZBs6YvgOd5aYp9U67G+E3R8fcQw=
github.com/alibabacloud-go/tea-xml v1.1.2/go.mod h1:Rq08vgCcCAjHyRi/M7xlHKUykZCEtyBy9+DPF6GgEu8=
github.com/alibabacloud-go/tea-xml v1.1.3 h1:7LYnm+JbOq2B+T/B0fHC4Ies4/FofC4zHzYtqw7dgt0=
github.com/alibabacloud-go/tea-xml v1.1.3/go.mod h1:Rq08vgCcCAjHyRi/M7xlHKUykZCEtyBy9+DPF6GgEu8=
github.com/aliyun/credentials-go v1.1.2/go.mod h1:ozcZaMR5kLM7pwtCMEpVmQ242suV6qTJya2bDq4X1Tw=
github.com/aliyun/credentials-go v1.3.2 h1:L4WppI9rctC8PdlMgyTkF8bBsy9pyKQEzBD1bHMRl+g=
github.com/aliyun/credentials-go v1.3.2/go.mod h1:tlpz4uys4Rn7Ik4/piGRrTbXy2uLKvePgQJJduE+Y5c=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/avast/retry-go/v4 v4.6.1 h1:VkOLRubHdisGrHnTu89g08aQEWEgRU7LVEop3GbIcMk=
github.com/avast/retry-go/v4 v4.6.1/go.mod h1:V6oF8njAwxJ5gRo1Q7Cxab24xs5NCWZBeaHHBklR8mA=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.2 h1:F8GBspJo+RmR4rYyw75XywEEQHQxBbF7QYKaMMnYREc=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.2/go.mod h1:wdlMRtz9G4IO6H1yZPsqfGBxR8E6B/bdxHlGkls4kGQ=
github.com/aws/aws-sdk-go v1.55.8 h1:JRmEUbU52aJQZ2AjX4q4Wu7t4uZjOu71uyNmaWlUkJQ=
github.com/aws/aws-sdk-go v1.55.8/go.mod h1:ZkViS9AqA6otK+JBBNH2++sx1sgxrPKcSzPPvQkUtXk=
github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU=
github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4=
github.com/aws/aws-sdk-go-v2/config v1.32.7 h1:vxUyWGUwmkQ2g19n7JY/9YL8MfAIl7bTesIUykECXmY=
github.com/aws/aws-sdk-go-v2/config v1.32.7/go.mod h1:2/Qm5vKUU/r7Y+zUk/Ptt2MDAEKAfUtKc1+3U1Mo3oY=
github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8=
github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 h1:JqcdRG//czea7Ppjb+g/n4o8i/R50aTBHkA7vu0lK+k=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17/go.mod h1:CO+WeGmIdj/MlPel2KwID9Gt7CNq4M65HUfBW97liM0=
github.com/aws/aws-sdk-go-v2/service/autoscaling v1.62.5 h1:3maqUQlVW7C6zAdSknv6V/LInH/RJaDW0kTFcy7dkOw=
github.com/aws/aws-sdk-go-v2/service/autoscaling v1.62.5/go.mod h1:8O5Pj92iNpfw/Fa7WdHbn6YiEjDoVdutz+9PGRNoP3Y=
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.71.5 h1:UNllAzfiRvz9il9s0yHJkySMJbxWqEVDfyLdDblnuT4=
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.71.5/go.mod h1:d6XSvIZM3pSKyXNbezwYT3nAcJeUzsJIXtZMNuQ9K2k=
github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.55.1 h1:fRFvc/mgSPujB9JrKuPt+HGnJE9I+nDwXMhEAwHI/GM=
github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.55.1/go.mod h1:XSNDmicqamWtX6yg5lisFAiFaf56PErQo/cMQvUQWX0=
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1 h1:ElB5x0nrBHgQs+XcpQ1XJpSJzMFCq6fDTpT6WQCWOtQ=
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1/go.mod h1:Cj+LUEvAU073qB2jInKV6Y0nvHX0k7bL7KAga9zZ3jw=
github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.61.1 h1:1Ci283hJE+S3XC4n5b2peV/wlcAo5rTVDb6j6JJ1aTo=
github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.61.1/go.mod h1:WXcA3mYRgWVIzjD+kxzap0axltmt4zBVDZaRX0S86gk=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.279.1 h1:hnNVFVOYrzJjkqI+mxc1M4ztgcVw986n0t0TCPlnDPY=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.279.1/go.mod h1:Uy+C+Sc58jozdoL1McQr8bDsEvNFx+/nBY+vpO1HVUY=
github.com/aws/aws-sdk-go-v2/service/ecr v1.36.2 h1:VDQaVwGOokbd3VUbHF+wupiffdrbAZPdQnr5XZMJqrs=
github.com/aws/aws-sdk-go-v2/service/ecr v1.36.2/go.mod h1:lvUlMghKYmSxSfv0vU7pdU/8jSY+s0zpG8xXhaGKCw0=
github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.27.2 h1:Zru9Iy2JPM5+uRnFnoqeOZzi8JIVIHJ0ua6JdeDHcyg=
github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.27.2/go.mod h1:PtQC3XjutCYFCn1+i8+wtpDaXvEK+vXF2gyLIKAmh4A=
github.com/aws/aws-sdk-go-v2/service/eks v1.76.4 h1:5f9jIMcEd0wvRpEoo925Ltfw/2Yalcf+amFm3e1tRd8=
github.com/aws/aws-sdk-go-v2/service/eks v1.76.4/go.mod h1:Qg678m+87sCuJhcsZojenz8mblYG+Tq86V4m3hjVz0s=
github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.33.15 h1:dJtNm4/eMx8nczyN3P4iAARXMj2rAvOJnj608zCqCmw=
github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.33.15/go.mod h1:QEbuU4eh8HGdv4uvld0Jth+KW8L0lOSYlyPcW6+JJo8=
github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.54.2 h1:xJkfrBzq4b4JxnxwNNzjUKmbQj1hPa4uUikSeXQFBYk=
github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.54.2/go.mod h1:DpGMmFhQwV/HH9zugLT5Ovf9HMKdQ+6ejfJybqEC9i4=
github.com/aws/aws-sdk-go-v2/service/iam v1.53.2 h1:62G6btFUwAa5uR5iPlnlNVAM0zJSLbWgDfKOfUC7oW4=
github.com/aws/aws-sdk-go-v2/service/iam v1.53.2/go.mod h1:av9clChrbZbJ5E21msSsiT2oghl2BJHfQGhCkXmhyu8=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 h1:Z5EiPIzXKewUQK0QTMkutjiaPVeVYXX7KIqhXu/0fXs=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8/go.mod h1:FsTpJtvC4U1fyDXk7c71XoDv3HlRm8V3NiYLeYLh5YE=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 h1:bGeHBsGZx0Dvu/eJC0Lh9adJa3M1xREcndxLNZlve2U=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17/go.mod h1:dcW24lbU0CzHusTE8LLHhRLI42ejmINN8Lcr22bwh/g=
github.com/aws/aws-sdk-go-v2/service/kms v1.47.1 h1:6+C0RoGF4HJQALrsecOXN7cm/l5rgNHCw2xbcvFgpH4=
github.com/aws/aws-sdk-go-v2/service/kms v1.47.1/go.mod h1:VJcNH6BLr+3VJwinRKdotLOMglHO8mIKlD3ea5c7hbw=
github.com/aws/aws-sdk-go-v2/service/outposts v1.57.8 h1:zB9Q/dG0NkURC5E1g4qL/lsUp7aOqilfb7Ru9EOigDU=
github.com/aws/aws-sdk-go-v2/service/outposts v1.57.8/go.mod h1:3osURGv9q/2wxP1qYnB15GWYgr6w2AbQkSxYtE6vTaY=
github.com/aws/aws-sdk-go-v2/service/pricing v1.34.3 h1:vAv0hi3SWcc8cotkWRP4mPkmRbp/XqWKFyPW4Nwpzv0=
github.com/aws/aws-sdk-go-v2/service/pricing v1.34.3/go.mod h1:giTP9ufzBQJRB6bc7P30PO8s35hCp6au5uM70zkohU4=
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.1 h1:C2dUPSnEpy4voWFIq3JNd8gN0Y5vYGDo44eUE58a/p8=
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.1/go.mod h1:5jggDlZ2CLQhwJBiZJb4vfk4f0GxWdEDruWKEJ1xOdo=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M=
github.com/aws/aws-sdk-go-v2/service/ssm v1.67.8 h1:31Llf5VfrZ78YvYs7sWcS7L2m3waikzRc6q1nYenVS4=
github.com/aws/aws-sdk-go-v2/service/ssm v1.67.8/go.mod h1:/jgaDlU1UImoxTxhRNxXHvBAPqPZQ8oCjcPbbkR6kac=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ=
github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk=
github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20240318154307-a1a918375412 h1:tfbmGNeOidVXzO1I7zo/WsT5QX7Aa0BGTbnEAE4FG3E=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20240318154307-a1a918375412/go.mod h1:kcUkjB9HwuV7PSck2b60kJtgDy+eTHWuAP0kb93FXsk=
github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20251001043626-89ce6578d960 h1:F/q1AN14KuY3I6HyEJxEUuQmEo5cDRpbXptP7UlB8GQ=
github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20251001043626-89ce6578d960/go.mod h1:cOBzmLe5lF+1C3h0SNnbl2LvMi+Gm8EXGlPxdXoucio=
github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339 h1:p4oSlQ9IaT7/DHfgcrs9zdNhdIp37VIMujZLuxSgECk=
github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339/go.mod h1:tNmCf0qIjaGbODGbm3DM8GIKBUvvxM7iW3KHbpSnVgw=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/buildkite/agent/v3 v3.81.0 h1:JVfkng2XnsXesFXwiFwLJFkuzVu4zvoJCvedfoIXD6E=
github.com/buildkite/agent/v3 v3.81.0/go.mod h1:edJeyycODRxaFvpT22rDGwaQ5oa4eB8GjtbjgX5VpFw=
github.com/buildkite/go-pipeline v0.13.1 h1:Y9p8pQIwPtauVwNrcmTDH6+XK7jE1nLuvWVaK8oymA8=
github.com/buildkite/go-pipeline v0.13.1/go.mod h1:2HHqlSFTYgHFhzedJu0LhLs9n5c9XkYnHiQFVN5HE4U=
github.com/buildkite/interpolate v0.1.3 h1:OFEhqji1rNTRg0u9DsSodg63sjJQEb1uWbENq9fUOBM=
github.com/buildkite/interpolate v0.1.3/go.mod h1:UNVe6A+UfiBNKbhAySrBbZFZFxQ+DXr9nWen6WVt/A8=
github.com/buildkite/roko v1.2.0 h1:hbNURz//dQqNl6Eo9awjQOVOZwSDJ8VEbBDxSfT9rGQ=
github.com/buildkite/roko v1.2.0/go.mod h1:23R9e6nHxgedznkwwfmqZ6+0VJZJZ2Sg/uVcp2cP46I=
github.com/bytecodealliance/wasmtime-go/v3 v3.0.2 h1:3uZCA/BLTIu+DqCfguByNMJa2HVHpXvjfy0Dy7g6fuA=
github.com/bytecodealliance/wasmtime-go/v3 v3.0.2/go.mod h1:RnUjnIXxEJcL6BgCvNyzCCRzZcxCgsZCi+RNlvYor5Q=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/bubbles v0.20.0 h1:jSZu6qD8cRQ6k9OMfR1WlM+ruM8fkPWkHvQWD9LIutE=
github.com/charmbracelet/bubbles v0.20.0/go.mod h1:39slydyswPy+uVOHZ5x/GjwVAFkCsV8IIVy+4MhzwwU=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs=
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk=
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
github.com/charmbracelet/x/ansi v0.10.1 h1:rL3Koar5XvX0pHGfovN03f5cxLbCF2YvLeyz7D2jVDQ=
github.com/charmbracelet/x/ansi v0.10.1/go.mod h1:3RQDQ6lDnROptfpWuUVIUG64bD2g2BgntdxH0Ya5TeE=
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8=
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589 h1:krfRl01rzPzxSxyLyrChD+U+MzsBXbm0OwYYB67uF+4=
github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589/go.mod h1:OuDyvmLnMCwa2ep4Jkm6nyA0ocJuZlGyk2gGseVzERM=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/clbanning/mxj/v2 v2.5.5/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s=
github.com/clbanning/mxj/v2 v2.7.0 h1:WA/La7UGCanFe5NpHF0Q3DNtnCsVoxbPKuyBNHWRyME=
github.com/clbanning/mxj/v2 v2.7.0/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/cfssl v1.6.5 h1:46zpNkm6dlNkMZH/wMW22ejih6gIaJbzL2du6vD7ZeI=
github.com/cloudflare/cfssl v1.6.5/go.mod h1:Bk1si7sq8h2+yVEDrFJiz3d7Aw+pfjjJSZVaD+Taky4=
github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8=
github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg=
github.com/cockroachdb/apd/v3 v3.2.1/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc=
github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb h1:EDmT6Q9Zs+SbUoc7Ik9EfrFqcylYqgPZ9ANSbTAntnE=
github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4=
github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be h1:J5BL2kskAlV9ckgEsNQXscjIaLiOYiZ75d4e94E6dcQ=
github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be/go.mod h1:mk5IQ+Y0ZeO87b858TlA645sVcEcbiX6YqP98kt+7+w=
github.com/containerd/stargz-snapshotter/estargz v0.18.1 h1:cy2/lpgBXDA3cDKSyEfNOFMA/c10O1axL69EU7iirO8=
github.com/containerd/stargz-snapshotter/estargz v0.18.1/go.mod h1:ALIEqa7B6oVDsrF37GkGN20SuvG/pIMm7FwP7ZmRb0Q=
github.com/coreos/go-oidc/v3 v3.17.0 h1:hWBGaQfbi0iVviX4ibC7bk8OKT5qNr4klBaCHVNvehc=
github.com/coreos/go-oidc/v3 v3.17.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f h1:eHnXnuK47UlSTOQexbzxAZfekVz6i+LKRdj1CU5DPaM=
github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f/go.mod h1:uzvlm1mxhHkdfqitSA92i7Se+S9ksOn3a3qmv/kyOCw=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
github.com/danieljoos/wincred v1.2.2 h1:774zMFJrqaeYCK2W57BgAem/MLi6mtSE47MB6BOJ0i0=
github.com/danieljoos/wincred v1.2.2/go.mod h1:w7w4Utbrz8lqeMbDAK0lkNJUv5sAOkFi7nd/ogr0Uh8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/depcheck-test/depcheck-test v0.0.0-20220607135614-199033aaa936 h1:foGzavPWwtoyBvjWyKJYDYsyzy+23iBV7NKTwdk+LRY=
github.com/depcheck-test/depcheck-test v0.0.0-20220607135614-199033aaa936/go.mod h1:ttKPnOepYt4LLzD+loXQ1rT6EmpyIYHro7TAJuIIlHo=
github.com/dgraph-io/badger/v4 v4.7.0 h1:Q+J8HApYAY7UMpL8d9owqiB+odzEc0zn/aqOD9jhc6Y=
github.com/dgraph-io/badger/v4 v4.7.0/go.mod h1:He7TzG3YBy3j4f5baj5B7Zl2XyfNe5bl4Udl0aPemVA=
github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM=
github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI=
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/digitorus/pkcs7 v0.0.0-20230713084857-e76b763bdc49/go.mod h1:SKVExuS+vpu2l9IoOc0RwqE7NYnb0JlcFHFnEJkVDzc=
github.com/digitorus/pkcs7 v0.0.0-20230818184609-3a137a874352 h1:ge14PCmCvPjpMQMIAH7uKg0lrtNSOdpYsRXlwk3QbaE=
github.com/digitorus/pkcs7 v0.0.0-20230818184609-3a137a874352/go.mod h1:SKVExuS+vpu2l9IoOc0RwqE7NYnb0JlcFHFnEJkVDzc=
github.com/digitorus/timestamp v0.0.0-20231217203849-220c5c2851b7 h1:lxmTCgmHE1GUYL7P0MlNa00M67axePTq+9nBSGddR8I=
github.com/digitorus/timestamp v0.0.0-20231217203849-220c5c2851b7/go.mod h1:GvWntX9qiTlOud0WkQ6ewFm0LPy5JUR1Xo0Ngbd1w6Y=
github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/docker/cli v29.0.3+incompatible h1:8J+PZIcF2xLd6h5sHPsp5pvvJA+Sr2wGQxHkRl53a1E=
github.com/docker/cli v29.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8=
github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=
github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE=
github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/emicklei/proto v1.13.2 h1:z/etSFO3uyXeuEsVPzfl56WNgzcvIr42aQazXaQmFZY=
github.com/emicklei/proto v1.13.2/go.mod h1:rn1FgRS/FANiZdD2djyH7TMA9jdRDcYQ9IEN9yvjX0A=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA=
github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU=
github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4=
github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/evanphx/json-patch v5.9.11+incompatible h1:ixHHqfcGvxhWkniF1tWxBHA0yb4Z+d1UQi45df52xW8=
github.com/evanphx/json-patch v5.9.11+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/evertras/bubble-table v0.17.1 h1:HJwq3iQrZulXDE93ZcqJNiUVQCBbN4IJ2CkB/IxO3kk=
github.com/evertras/bubble-table v0.17.1/go.mod h1:ifHujS1YxwnYSOgcR2+m3GnJ84f7CVU/4kUOxUCjEbQ=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7DlmewI=
github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ=
github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc=
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
github.com/go-chi/chi v4.1.2+incompatible h1:fGFk2Gmi/YKXk0OmGfBh0WgmN3XB8lVnEyNz34tQRec=
github.com/go-chi/chi v4.1.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=
github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=
github.com/go-git/go-billy/v5 v5.8.0 h1:I8hjc3LbBlXTtVuFNJuwYuMiHvQJDq1AT6u4DwDzZG0=
github.com/go-git/go-billy/v5 v5.8.0/go.mod h1:RpvI/rw4Vr5QA+Z60c6d6LXH0rYJo0uD5SqfmrrheCY=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII=
github.com/go-git/go-git/v5 v5.17.1 h1:WnljyxIzSj9BRRUlnmAU35ohDsjRK0EKmL0evDqi5Jk=
github.com/go-git/go-git/v5 v5.17.1/go.mod h1:pW/VmeqkanRFqR6AljLcs7EA7FbZaN5MQqO7oZADXpo=
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
github.com/go-openapi/analysis v0.23.0 h1:aGday7OWupfMs+LbmLZG4k0MYXIANxcuBTYUC03zFCU=
github.com/go-openapi/analysis v0.23.0/go.mod h1:9mz9ZWaSlV8TvjQHLl2mUW2PbZtemkE8yA5v22ohupo=
github.com/go-openapi/errors v0.22.1 h1:kslMRRnK7NCb/CvR1q1VWuEQCEIsBGn5GgKD9e+HYhU=
github.com/go-openapi/errors v0.22.1/go.mod h1:+n/5UdIqdVnLIJ6Q9Se8HNGUXYaY6CN8ImWzfi/Gzp0=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
github.com/go-openapi/loads v0.22.0 h1:ECPGd4jX1U6NApCGG1We+uEozOAvXvJSF4nnwHZ8Aco=
github.com/go-openapi/loads v0.22.0/go.mod h1:yLsaTCS92mnSAZX5WWoxszLj0u+Ojl+Zs5Stn1oF+rs=
github.com/go-openapi/runtime v0.28.0 h1:gpPPmWSNGo214l6n8hzdXYhPuJcGtziTOgUpvsFWGIQ=
github.com/go-openapi/runtime v0.28.0/go.mod h1:QN7OzcS+XuYmkQLw05akXk0jRH/eZ3kb18+1KwW9gyc=
github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
github.com/go-openapi/strfmt v0.23.0 h1:nlUS6BCqcnAk0pyhi9Y+kdDVZdZMHfEKQiS4HaMgO/c=
github.com/go-openapi/strfmt v0.23.0/go.mod h1:NrtIpfKtWIygRkKVsxh7XQMDQW5HKQl6S5ik2elW+K4=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-openapi/validate v0.24.0 h1:LdfDKwNbpB6Vn40xhTdNZAnfLECL81w+VX3BumrGD58=
github.com/go-openapi/validate v0.24.0/go.mod h1:iyeX1sEufmv3nPbBdX3ieNviWnOZaJ1+zquzJEf2BAQ=
github.com/go-piv/piv-go v1.11.0 h1:5vAaCdRTFSIW4PeqMbnsDlUZ7odMYWnHBDGdmtU/Zhg=
github.com/go-piv/piv-go v1.11.0/go.mod h1:NZ2zmjVkfFaL/CF8cVQ/pXdXtuj110zEKGdJM6fJZZM=
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
github.com/go-resty/resty/v2 v2.16.5 h1:hBKqmWrr7uRc3euHVqmh1HTHcKn99Smr7o5spptdhTM=
github.com/go-resty/resty/v2 v2.16.5/go.mod h1:hkJtXbA2iKHzJheXYvQ8snQES5ZLGKMwQ07xAwp/fiA=
github.com/go-rod/rod v0.116.2 h1:A5t2Ky2A+5eD/ZJQr1EfsQSe5rms5Xof/qj296e+ZqA=
github.com/go-rod/rod v0.116.2/go.mod h1:H+CMO9SCNc2TJ2WfrG+pKhITz57uGNYU43qYHh438Mg=
github.com/go-sql-driver/mysql v1.9.1 h1:FrjNGn/BsJQjVRuSa8CBrM5BWA9BWoXXat3KrtSb/iI=
github.com/go-sql-driver/mysql v1.9.1/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 h1:p104kn46Q8WdvHunIJ9dAyjPVtrBPhSr3KT2yUst43I=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw=
github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/certificate-transparency-go v1.3.2-0.20250507091337-0eddb39e94f8 h1:1RSWsOSxq2gk4pD/63bhsPwoOXgz2yXVadxXPbwZ0ec=
github.com/google/certificate-transparency-go v1.3.2-0.20250507091337-0eddb39e94f8/go.mod h1:6Rm5w0Mlv87LyBNOCgfKYjdIBBpF42XpXGsbQvQGomQ=
github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-containerregistry v0.20.7 h1:24VGNpS0IwrOZ2ms2P1QE3Xa5X9p4phx0aUgzYzHW6I=
github.com/google/go-containerregistry v0.20.7/go.mod h1:Lx5LCZQjLH1QBaMPeGwsME9biPeo1lPx6lbGj/UmzgM=
github.com/google/go-github/v55 v55.0.0 h1:4pp/1tNMB9X/LuAhs5i0KQAE40NmiR/y6prLNb9x9cg=
github.com/google/go-github/v55 v55.0.0/go.mod h1:JLahOTA1DnXzhxEymmFF5PP2tSS9JVNj68mSZNDwskA=
github.com/google/go-github/v60 v60.0.0 h1:oLG98PsLauFvvu4D/YPxq374jhSxFYdzQGNCyONLfn8=
github.com/google/go-github/v60 v60.0.0/go.mod h1:ByhX2dP9XT9o/ll2yXAu2VD8l5eNVg8hD4Cr0S/LmQk=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.1-0.20210504230335-f78f29fc09ea h1:VcIYpAGBae3Z6BVncE0OnTE/ZjlDXqtYhOZky88neLM=
github.com/google/gofuzz v1.2.1-0.20210504230335-f78f29fc09ea/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/licenseclassifier/v2 v2.0.0 h1:1Y57HHILNf4m0ABuMVb6xk4vAJYEUO0gDxNpog0pyeA=
github.com/google/licenseclassifier/v2 v2.0.0/go.mod h1:cOjbdH0kyC9R22sdQbYsFkto4NGCAc+ZSwbeThazEtM=
github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc=
github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY=
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/tink/go v1.7.0 h1:6Eox8zONGebBFcCBqkVmt60LaWZa6xg1cl/DwAh/J1w=
github.com/google/tink/go v1.7.0/go.mod h1:GAUOd+QE3pgj9q8VKIGTCP33c/B7eb4NhxLcgTJZStM=
github.com/google/trillian v1.7.1 h1:+zX8jLM3524bAMPS+VxaDIDgsMv3/ty6DuLWerHXcek=
github.com/google/trillian v1.7.1/go.mod h1:E1UMAHqpZCA8AQdrKdWmHmtUfSeiD0sDWD1cv00Xa+c=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4=
github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo=
github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.7 h1:UpiO20jno/eV1eVZcxqWnUohyKRe1g8FPV/xH1s/2qs=
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.7/go.mod h1:QmrqtbKuxxSWTN3ETMPuB+VtEiBJ/A9XhoYGv8E1uD8=
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 h1:kes8mmyCpxJsI7FTwtzRqEy9CdjCtrXrXGuOpxEA7Ts=
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2/go.mod h1:Gou2R9+il93BqX25LAKCLuM+y9U2T4hlwvT1yprcna4=
github.com/hashicorp/go-sockaddr v1.0.5 h1:dvk7TIXCZpmfOlM+9mlcrWmWjw/wlKT+VDq2wMvfPJU=
github.com/hashicorp/go-sockaddr v1.0.5/go.mod h1:uoUUmtwU7n9Dv3O4SNLeFvg0SxQ3lyjsj6+CCykpaxI=
github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY=
github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.1-vault-5 h1:kI3hhbbyzr4dldA8UdTb7ZlVVlI2DACdCfz31RPDgJM=
github.com/hashicorp/hcl v1.0.1-vault-5/go.mod h1:XYhtn6ijBSAj6n4YqAaf7RBPS4I06AItNorpy+MoQNM=
github.com/hashicorp/vault/api v1.15.0 h1:O24FYQCWwhwKnF7CuSqP30S51rTV7vz1iACXE/pj5DA=
github.com/hashicorp/vault/api v1.15.0/go.mod h1:+5YTO09JGn0u+b6ySD/LLVf8WkJCPLAL2Vkmrn2+CM8=
github.com/howeyc/gopass v0.0.0-20210920133722-c8aef6fb66ef h1:A9HsByNhogrvm9cWb28sjiS3i7tcKCkflWFEkHfuAgM=
github.com/howeyc/gopass v0.0.0-20210920133722-c8aef6fb66ef/go.mod h1:lADxMC39cJJqL93Duh1xhAs4I2Zs8mKS89XWXFGp9cs=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/in-toto/attestation v1.1.0 h1:oRWzfmZPDSctChD0VaQV7MJrywKOzyNrtpENQFq//2Q=
github.com/in-toto/attestation v1.1.0/go.mod h1:DB59ytd3z7cIHgXxwpSX2SABrU6WJUKg/grpdgHVgVs=
github.com/in-toto/in-toto-golang v0.9.0 h1:tHny7ac4KgtsfrG6ybU8gVOZux2H8jN05AXJ9EBM1XU=
github.com/in-toto/in-toto-golang v0.9.0/go.mod h1:xsBVrVsHNsB61++S6Dy2vWosKhuA3lUTQd+eF9HdeMo=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.7.4 h1:9wKznZrhWa2QiHL+NjTSPP6yjl3451BX3imWDnokYlg=
github.com/jackc/pgx/v5 v5.7.4/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jedisct1/go-minisign v0.0.0-20230811132847-661be99b8267 h1:TMtDYDHKYY15rFihtRfck/bfFqNfvcabqvXAFQfAUpY=
github.com/jedisct1/go-minisign v0.0.0-20230811132847-661be99b8267/go.mod h1:h1nSAbGFqGVzn6Jyl1R/iCcBUHN4g+gW1u9CoBTrb9E=
github.com/jellydator/ttlcache/v3 v3.3.0 h1:BdoC9cE81qXfrxeb9eoJi9dWrdhSuwXMAnHTbnBm4Wc=
github.com/jellydator/ttlcache/v3 v3.3.0/go.mod h1:bj2/e0l4jRnQdrnSTaGTsh4GSXvMjQcy41i7th0GVGw=
github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 h1:liMMTbpW34dhU4az1GN0pTPADwNmvoRSeoZ6PItiqnY=
github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
github.com/knqyf263/go-rpmdb v0.1.0 h1:pOgjtOGtW0B+ibY905hP3ETrYFmLZsHiReKsplcs+to=
github.com/knqyf263/go-rpmdb v0.1.0/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kris-nova/logger v0.2.2 h1:qdWg2fNr4Bni4obkgehwOSbCoxaX+wDGGrzQ1T2mA20=
github.com/kris-nova/logger v0.2.2/go.mod h1:uOTzfb9ssx0XYb3UpeAjKsys8KByjD12OMN4szmym4w=
github.com/kris-nova/lolgopher v0.0.0-20210112022122-73f0047e8b65/go.mod h1:V0HF/ZBlN86HqewcDC/cVxMmYDiRukWjSrgKLUAn9Js=
github.com/kubicorn/kubicorn v0.0.0-20191114212505-a2c64ce430b9 h1:HgzA4yC4kPQfNIya55o4yA1WiKCXXA5wXvwoBKgIwXI=
github.com/kubicorn/kubicorn v0.0.0-20191114212505-a2c64ce430b9/go.mod h1:Z/PU7XQicaZV6QFTAvm8EaWyfNbAb4a76kmR4Am4KA8=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/miekg/pkcs11 v1.0.3-0.20190429190417-a667d056470f/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU=
github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
github.com/mozillazg/docker-credential-acr-helper v0.4.0 h1:Uoh3Z9CcpEDnLiozDx+D7oDgRq7X+R296vAqAumnOcw=
github.com/mozillazg/docker-credential-acr-helper v0.4.0/go.mod h1:2kiicb3OlPytmlNC9XGkLvVC+f0qTiJw3f/mhmeeQBg=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nozzle/throttler v0.0.0-20180817012639-2ea982251481 h1:Up6+btDp321ZG5/zdSLo48H9Iaq0UQGthrhWC6pCxzE=
github.com/nozzle/throttler v0.0.0-20180817012639-2ea982251481/go.mod h1:yKZQO8QE2bHlgozqWDiRVqTFlLQSj30K/6SAK8EeYFw=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc=
github.com/octago/sflags v0.3.1 h1:LW65z20iAQKteEyjsnnc+/lyoCUnIoRuAocggr6RB6A=
github.com/octago/sflags v0.3.1/go.mod h1:hVUkbnYwMU9kZiZJyOAIVN56YiVMMPxgJ46kRZ19jh0=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/oleiade/reflections v1.1.0 h1:D+I/UsXQB4esMathlt0kkZRJZdUDmhv5zGi/HOwYTWo=
github.com/oleiade/reflections v1.1.0/go.mod h1:mCxx0QseeVCHs5Um5HhJeCKVC7AwS8kO67tky4rdisA=
github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 h1:r3FaAI0NZK3hSmtTDrBVREhKULp8oUeqLT5Eyl2mSPo=
github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y=
github.com/olekukonko/ll v0.0.8 h1:sbGZ1Fx4QxJXEqL/6IG8GEFnYojUSQ45dJVwN2FH2fc=
github.com/olekukonko/ll v0.0.8/go.mod h1:En+sEW0JNETl26+K8eZ6/W4UQ7CYSrrgg/EdIYT2H8g=
github.com/olekukonko/tablewriter v1.0.8 h1:f6wJzHg4QUtJdvrVPKco4QTrAylgaU0+b9br/lJxEiQ=
github.com/olekukonko/tablewriter v1.0.8/go.mod h1:H428M+HzoUXC6JU2Abj9IT9ooRmdq9CxuDmKMtrOCMs=
github.com/oliveagle/jsonpath v0.0.0-20180606110733-2e52cf6e6852 h1:Yl0tPBa8QPjGmesFh1D0rDy+q1Twx6FyU7VWHi8wZbI=
github.com/oliveagle/jsonpath v0.0.0-20180606110733-2e52cf6e6852/go.mod h1:eqOVx5Vwu4gd2mmMZvVZsgIqNSaW3xxRThUJ0k/TPk4=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c=
github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/open-policy-agent/opa v1.4.0 h1:IGO3xt5HhQKQq2axfa9memIFx5lCyaBlG+fXcgHpd3A=
github.com/open-policy-agent/opa v1.4.0/go.mod h1:DNzZPKqKh4U0n0ANxcCVlw8lCSv2c+h5G/3QvSYdWZ8=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/package-url/packageurl-go v0.1.2 h1:0H2DQt6DHd/NeRlVwW4EZ4oEI6Bn40XlNPRqegcxuo4=
github.com/package-url/packageurl-go v0.1.2/go.mod h1:uQd4a7Rh3ZsVg5j0lNyAfyxIeGde9yrlhjF78GzeW0c=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw=
github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=
github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/protocolbuffers/txtpbfmt v0.0.0-20240116145035-ef3ab179eed6 h1:MAzmm+JtFxQwTPb1cVMLkemw2OxLy5AB/d/rxtAwGQQ=
github.com/protocolbuffers/txtpbfmt v0.0.0-20240116145035-ef3ab179eed6/go.mod h1:jgxiZysxFPM+iWKwQwPR+y+Jvo54ARd4EisXxKYpB5c=
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM=
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk=
github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
github.com/sagikazarmark/locafero v0.9.0 h1:GbgQGNtTrEmddYDSAH9QLRyfAHY12md+8YFTqyMTC9k=
github.com/sagikazarmark/locafero v0.9.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk=
github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=
github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/samber/lo v1.51.0 h1:kysRYLbHy/MB7kQZf5DSN50JHmMsNEdeY24VzJFu7wI=
github.com/samber/lo v1.51.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
github.com/sanathkr/go-yaml v0.0.0-20170819195128-ed9d249f429b h1:jUK33OXuZP/l6babJtnLo1qsGvq6G9so9KMflGAm4YA=
github.com/sanathkr/go-yaml v0.0.0-20170819195128-ed9d249f429b/go.mod h1:8458kAagoME2+LN5//WxE71ysZ3B7r22fdgb7qVmXSY=
github.com/sanathkr/yaml v0.0.0-20170819201035-0056894fa522 h1:fOCp11H0yuyAt2wqlbJtbyPzSgaxHTv8uN1pMpkG1t8=
github.com/sanathkr/yaml v0.0.0-20170819201035-0056894fa522/go.mod h1:tQTYKOQgxoH3v6dEmdHiz4JG+nbxWwM5fgPQUpSZqVQ=
github.com/sassoftware/relic v7.2.1+incompatible h1:Pwyh1F3I0r4clFJXkSI8bOyJINGqpgjJU3DYAZeI05A=
github.com/sassoftware/relic v7.2.1+incompatible/go.mod h1:CWfAxv73/iLZ17rbyhIEq3K9hs5w6FpNMdUT//qR+zk=
github.com/sassoftware/relic/v7 v7.6.2 h1:rS44Lbv9G9eXsukknS4mSjIAuuX+lMq/FnStgmZlUv4=
github.com/sassoftware/relic/v7 v7.6.2/go.mod h1:kjmP0IBVkJZ6gXeAu35/KCEfca//+PKM6vTAsyDPY+k=
github.com/secure-systems-lab/go-securesystemslib v0.10.0 h1:l+H5ErcW0PAehBNrBxoGv1jjNpGYdZ9RcheFkB2WI14=
github.com/secure-systems-lab/go-securesystemslib v0.10.0/go.mod h1:MRKONWmRoFzPNQ9USRF9i1mc7MvAVvF1LlW8X5VWDvk=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw=
github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/shibumi/go-pathspec v1.3.0 h1:QUyMZhFo0Md5B8zV8x2tesohbb5kfbpTi9rBnKh5dkI=
github.com/shibumi/go-pathspec v1.3.0/go.mod h1:Xutfslp817l2I1cZvgcfeMQJG5QnU2lh5tVaaMCl3jE=
github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
github.com/sigstore/cosign/v2 v2.4.1 h1:b8UXEfJFks3hmTwyxrRNrn6racpmccUycBHxDMkEPvU=
github.com/sigstore/cosign/v2 v2.4.1/go.mod h1:GvzjBeUKigI+XYnsoVQDmMAsMMc6engxztRSuxE+x9I=
github.com/sigstore/fulcio v1.6.3 h1:Mvm/bP6ELHgazqZehL8TANS1maAkRoM23CRAdkM4xQI=
github.com/sigstore/fulcio v1.6.3/go.mod h1:5SDgLn7BOUVLKe1DwOEX3wkWFu5qEmhUlWm+SFf0GH8=
github.com/sigstore/protobuf-specs v0.5.0 h1:F8YTI65xOHw70NrvPwJ5PhAzsvTnuJMGLkA4FIkofAY=
github.com/sigstore/protobuf-specs v0.5.0/go.mod h1:+gXR+38nIa2oEupqDdzg4qSBT0Os+sP7oYv6alWewWc=
github.com/sigstore/rekor v1.3.9 h1:sUjRpKVh/hhgqGMs0t+TubgYsksArZ6poLEC3MsGAzU=
github.com/sigstore/rekor v1.3.9/go.mod h1:xThNUhm6eNEmkJ/SiU/FVU7pLY2f380fSDZFsdDWlcM=
github.com/sigstore/sigstore v1.10.3 h1:s7fBYYOzW/2Vd0nND2ZdpWySb5vRF2u9eix/NZMHJm0=
github.com/sigstore/sigstore v1.10.3/go.mod h1:T26vXIkpnGEg391v3TaZ8EERcXbnjtZb/1erh5jbIQk=
github.com/sigstore/sigstore-go v0.6.1 h1:tGkkv1oDIER+QYU5MrjqlttQOVDWfSkmYwMqkJhB/cg=
github.com/sigstore/sigstore-go v0.6.1/go.mod h1:Xe5GHmUeACRFbomUWzVkf/xYCn8xVifb9DgqJrV2dIw=
github.com/sigstore/sigstore/pkg/signature/kms/aws v1.8.12 h1:EC3UmIaa7nV9sCgSpVevmvgvTYTkMqyrRbj5ojPp7tE=
github.com/sigstore/sigstore/pkg/signature/kms/aws v1.8.12/go.mod h1:aw60vs3crnQdM/DYH+yF2P0MVKtItwAX34nuaMrY7Lk=
github.com/sigstore/sigstore/pkg/signature/kms/azure v1.8.12 h1:FPpliDTywSy0woLHMAdmTSZ5IS/lVBZ0dY0I+2HmnSY=
github.com/sigstore/sigstore/pkg/signature/kms/azure v1.8.12/go.mod h1:NkPiz4XA0JcBSXzJUrjMj7Xi7oSTew1Ip3Zmt56mHlw=
github.com/sigstore/sigstore/pkg/signature/kms/gcp v1.8.12 h1:kweBChR6M9FEvmxN3BMEcl7SNnwxTwKF7THYFKLOE5U=
github.com/sigstore/sigstore/pkg/signature/kms/gcp v1.8.12/go.mod h1:6+d+A6oYt1W5OgtzgEVb21V7tAZ/C2Ihtzc5MNJbayY=
github.com/sigstore/sigstore/pkg/signature/kms/hashivault v1.8.12 h1:jvY1B9bjP+tKzdKDyuq5K7O19CG2IKzGJNTy5tuL2Gs=
github.com/sigstore/sigstore/pkg/signature/kms/hashivault v1.8.12/go.mod h1:2uEeOb8xE2RC6OvzxKux1wkS39Zv8gA27z92m49xUTc=
github.com/sigstore/timestamp-authority v1.2.2 h1:X4qyutnCQqJ0apMewFyx+3t7Tws00JQ/JonBiu3QvLE=
github.com/sigstore/timestamp-authority v1.2.2/go.mod h1:nEah4Eq4wpliDjlY342rXclGSO7Kb9hoRrl9tqLW13A=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/smallstep/assert v0.0.0-20200723003110-82e2b9b3b262 h1:unQFBIznI+VYD1/1fApl1A+9VcBk+9dcqGfnePY87LY=
github.com/smallstep/assert v0.0.0-20200723003110-82e2b9b3b262/go.mod h1:MyOHs9Po2fbM1LHej6sBUT8ozbxmMOFG+E+rx/GSGuc=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=
github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg=
github.com/spf13/cast v1.9.2 h1:SsGfm7M8QOFtEzumm7UZrZdLLquNdzFYfIbEXntcFbE=
github.com/spf13/cast v1.9.2/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4=
github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4=
github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo=
github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d h1:vfofYNRScrDdvS342BElfbETmL1Aiz3i2t0zfRj16Hs=
github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d/go.mod h1:RRCYJbIwD5jmqPI9XoAFR0OcDxqUctll6zUj/+B4S48=
github.com/tchap/go-patricia/v2 v2.3.2 h1:xTHFutuitO2zqKAQ5rCROYgUb7Or/+IC3fts9/Yc7nM=
github.com/tchap/go-patricia/v2 v2.3.2/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwDlLElMsnSHD4k=
github.com/thales-e-security/pool v0.0.2 h1:RAPs4q2EbWsTit6tpzuvTFlgFRJ3S8Evf5gtvVDbmPg=
github.com/thales-e-security/pool v0.0.2/go.mod h1:qtpMm2+thHtqhLzTwgDBj/OuNnMpupY8mv0Phz0gjhU=
github.com/theupdateframework/go-tuf v0.7.0 h1:CqbQFrWo1ae3/I0UCblSbczevCCbS31Qvs5LdxRWqRI=
github.com/theupdateframework/go-tuf v0.7.0/go.mod h1:uEB7WSY+7ZIugK6R1hiBMBjQftaFzn7ZCDJcp1tCUug=
github.com/theupdateframework/go-tuf/v2 v2.3.1 h1:fReZUTLvPdqIL8Rd9xEKPmaxig8GIXe0kS4RSEaRfaM=
github.com/theupdateframework/go-tuf/v2 v2.3.1/go.mod h1:9S0Srkf3c13FelsOyt5OyG3ZZDq9OJDA4IILavrt72Y=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/tink-crypto/tink-go-awskms/v2 v2.1.0 h1:N9UxlsOzu5mttdjhxkDLbzwtEecuXmlxZVo/ds7JKJI=
github.com/tink-crypto/tink-go-awskms/v2 v2.1.0/go.mod h1:PxSp9GlOkKL9rlybW804uspnHuO9nbD98V/fDX4uSis=
github.com/tink-crypto/tink-go-gcpkms/v2 v2.2.0 h1:3B9i6XBXNTRspfkTC0asN5W0K6GhOSgcujNiECNRNb0=
github.com/tink-crypto/tink-go-gcpkms/v2 v2.2.0/go.mod h1:jY5YN2BqD/KSCHM9SqZPIpJNG/u3zwfLXHgws4x2IRw=
github.com/tink-crypto/tink-go/v2 v2.5.0 h1:B8KLF6AofxdBIE4UJIaFbmoj5/1ehEtt7/MmzfI4Zpw=
github.com/tink-crypto/tink-go/v2 v2.5.0/go.mod h1:2WbBA6pfNsAfBwDCggboaHeB2X29wkU8XHtGwh2YIk8=
github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w=
github.com/tjfoc/gmsm v1.4.1 h1:aMe1GlZb+0bLjn+cKTPEvvn9oUEBlJitaZiiBwsbgho=
github.com/tjfoc/gmsm v1.4.1/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE=
github.com/transparency-dev/merkle v0.0.2 h1:Q9nBoQcZcgPamMkGn7ghV8XiTZ/kRxn1yCG81+twTK4=
github.com/transparency-dev/merkle v0.0.2/go.mod h1:pqSy+OXefQ1EDUVmAJ8MUhHB9TXGuzVAT58PqBoHz1A=
github.com/urfave/sflags v0.4.1 h1:9BKteZiMaLlgfMm8eYbFge3eRAUsrJXs4HsCemdDl+A=
github.com/urfave/sflags v0.4.1/go.mod h1:NCIz2mBC+woyrkl88PeiKAuQUKJdEre2Y4at5SreAeU=
github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4=
github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
github.com/vladimirvivien/gexe v0.5.0 h1:AWBVaYnrTsGYBktXvcO0DfWPeSiZxn6mnQ5nvL+A1/A=
github.com/vladimirvivien/gexe v0.5.0/go.mod h1:3gjgTqE2c0VyHnU5UOIwk7gyNzZDGulPb/DJPgcw64E=
github.com/weaveworks/eksctl v0.221.0 h1:sJEuVRU+8dia8rj/4VmB8DwKArmGhG7uwaqdUYJhqv0=
github.com/weaveworks/eksctl v0.221.0/go.mod h1:fkWnFg8h/h24bl5DmyRgJIERB/7g5zqIeNgSklfeH5Q=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xanzy/go-gitlab v0.109.0 h1:RcRme5w8VpLXTSTTMZdVoQWY37qTJWg+gwdQl4aAttE=
github.com/xanzy/go-gitlab v0.109.0/go.mod h1:wKNKh3GkYDMOsGmnfuX+ITCmDuSDWFO0G+C4AygL9RY=
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/yashtewari/glob-intersection v0.2.0 h1:8iuHdN88yYuCzCdjt0gDe+6bAhUwBeEWqThExu54RFg=
github.com/yashtewari/glob-intersection v0.2.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok=
github.com/ysmood/fetchup v0.2.3 h1:ulX+SonA0Vma5zUFXtv52Kzip/xe7aj4vqT5AJwQ+ZQ=
github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns=
github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
github.com/ysmood/got v0.40.0 h1:ZQk1B55zIvS7zflRrkGfPDrPG3d7+JOza1ZkNxcc74Q=
github.com/ysmood/got v0.40.0/go.mod h1:W7DdpuX6skL3NszLmAsC5hT7JAhuLZhByVzHTq874Qg=
github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE=
github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/leakless v0.9.0 h1:qxCG5VirSBvmi3uynXFkcnLMzkphdh3xx5FtrORwDCU=
github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.30/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zalando/go-keyring v0.2.3 h1:v9CUu9phlABObO4LPWycf+zwMG7nlbb3t/B5wa97yms=
github.com/zalando/go-keyring v0.2.3/go.mod h1:HL4k+OXQfJUWaMnqyuSOc0drfGPX2b51Du6K+MRgZMk=
gitlab.alpinelinux.org/alpine/go v0.10.0 h1:/ekBiNqDSXZpK+AfZx4lrtVwKTDrWz3N3ck0S+fCxwU=
gitlab.alpinelinux.org/alpine/go v0.10.0/go.mod h1:LKzOqYjGTZNLwcHl+c2I5VNioQio7agzRFvlGB9Owk4=
go.mongodb.org/mongo-driver v1.17.2 h1:gvZyk8352qSfzyZ2UMWcpDpMSGEr1eqE4T793SqyhzM=
go.mongodb.org/mongo-driver v1.17.2/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE=
go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 h1:xJ2qHD0C1BeYVTLLR9sX12+Qb95kfeD/byKj6Ky1pXg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0/go.mod h1:u5BF1xyjstDowA1R5QAO9JHzqK+ublenEW/dyqTjBVk=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0 h1:PB3Zrjs1sG1GBX51SXyTSoOTqcDglmsk7nT6tkKPb/k=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0/go.mod h1:U2R3XyVPzn0WX7wOIypPuptulsMcPDPs/oiSVOMVnHY=
go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0=
go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs=
go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18=
go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE=
go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8=
go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew=
go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI=
go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.step.sm/crypto v0.57.0 h1:YjoRQDaJYAxHLVwjst0Bl0xcnoKzVwuHCJtEo2VSHYU=
go.step.sm/crypto v0.57.0/go.mod h1:+Lwp5gOVPaTa3H/Ul/TzGbxQPXZZcKIUGMS0lG6n9Go=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191219195013-becbf705a915/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0=
golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220607020251-c690dde0001d/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200509044756-6aff5f38e54f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200509030707-2212a7e161a5/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
golang.org/x/tools/go/vcs v0.1.0-deprecated h1:cOIJqWBl99H1dH5LWizPa+0ImeeJq3t3cJjaeOWUAL4=
golang.org/x/tools/go/vcs v0.1.0-deprecated/go.mod h1:zUrvATBAvEI9535oC0yWYsLsHIV4Z7g63sNPVMtuBy8=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/api v0.242.0 h1:7Lnb1nfnpvbkCiZek6IXKdJ0MFuAZNAJKQfA1ws62xg=
google.golang.org/api v0.242.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4=
google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s=
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls=
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto=
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww=
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs=
gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/ini.v1 v1.56.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY=
k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA=
k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI=
k8s.io/apiextensions-apiserver v0.34.1/go.mod h1:hP9Rld3zF5Ay2Of3BeEpLAToP+l4s5UlxiHfqRaRcMc=
k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8=
k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns=
k8s.io/cli-runtime v0.35.0 h1:PEJtYS/Zr4p20PfZSLCbY6YvaoLrfByd6THQzPworUE=
k8s.io/cli-runtime v0.35.0/go.mod h1:VBRvHzosVAoVdP3XwUQn1Oqkvaa8facnokNkD7jOTMY=
k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE=
k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o=
k8s.io/cloud-provider v0.35.0 h1:syiBCQbKh2gho/S1BkIl006Dc44pV8eAtGZmv5NMe7M=
k8s.io/cloud-provider v0.35.0/go.mod h1:7grN+/Nt5Hf7tnSGPT3aErt4K7aQpygyCrGpbrQbzNc=
k8s.io/cloud-provider-aws v1.35.0 h1:jlMZmc4JjJ6lkYj41xeKqZ8nw1ais00xQi8Nnz2lqkI=
k8s.io/cloud-provider-aws v1.35.0/go.mod h1:6R9TIgQ/ecysPukSmEUs4kZIwqvju80+FjMAhtJ22Q0=
k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94=
k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0=
k8s.io/component-helpers v0.35.0 h1:wcXv7HJRksgVjM4VlXJ1CNFBpyDHruRI99RrBtrJceA=
k8s.io/component-helpers v0.35.0/go.mod h1:ahX0m/LTYmu7fL3W8zYiIwnQ/5gT28Ex4o2pymF63Co=
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kops v1.33.1 h1:MFrj3r6f+F9rL2DQQdfAXEyFJDdq0GAyu96woF6TOaQ=
k8s.io/kops v1.33.1/go.mod h1:epTyN30uGaeRBmN1jmT993Kc4Wd/tti9snQDd5aivXc=
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
k8s.io/kubelet v0.35.0 h1:8cgJHCBCKLYuuQ7/Pxb/qWbJfX1LXIw7790ce9xHq7c=
k8s.io/kubelet v0.35.0/go.mod h1:ciRzAXn7C4z5iB7FhG1L2CGPPXLTVCABDlbXt/Zz8YA=
k8s.io/release v0.18.0 h1:xn+ZU/8bDmtAcSZMh0K2HMa2+dYrD3Qqq+yqv3Uuk9k=
k8s.io/release v0.18.0/go.mod h1:PJ4HhnTcmTKSakE475b4e3xJEVw+EVB5ycZM9vWFcTU=
k8s.io/utils v0.0.0-20260108192941-914a6e750570 h1:JT4W8lsdrGENg9W+YwwdLJxklIuKWdRm+BC+xt33FOY=
k8s.io/utils v0.0.0-20260108192941-914a6e750570/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk=
modernc.org/cc/v4 v4.19.3 h1:vE9kmJqUcyvNOf8F2Hn8od14SOMq34BiqcZ2tMzLk5c=
modernc.org/cc/v4 v4.19.3/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
modernc.org/ccgo/v4 v4.11.0 h1:2uc2kRvZLC/oHylsrirRW6f1I4wljQST2BBbm+aKiXM=
modernc.org/ccgo/v4 v4.11.0/go.mod h1:GwrfAtnU6PdZkCWD4XI8wB1T5Xj3fSw9lO/40H1ldys=
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
modernc.org/libc v1.45.2 h1:oRlBu8xlBen2awVAWuLOkvYNBPaIKFxFOj9wA/jaXHM=
modernc.org/libc v1.45.2/go.mod h1:YkRHLoN4L70OdO1cVmM83KZhRbRvsc3XogfVzbTXBwE=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E=
modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E=
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
modernc.org/sqlite v1.29.5 h1:8l/SQKAjDtZFo9lkJLdk8g9JEOeYRG4/ghStDCCTiTE=
modernc.org/sqlite v1.29.5/go.mod h1:S02dvcmm7TnTRvGhv8IGYyLnIt7AS2KPaB1F/71p75U=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
sigs.k8s.io/bom v0.6.0 h1:IPMPHx6XdmMeW2oEeF66DgNyP5d4RxfuXwiC1qn+n9o=
sigs.k8s.io/bom v0.6.0/go.mod h1:MV0D3vdGlkaPgi5EwpwMBeQ8n8QS8Q2u1lJ5LyE7RLM=
sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A=
sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
sigs.k8s.io/e2e-framework v0.6.1-0.20250909060333-8677714ff9a6 h1:5saOTCrwclRdFJLj5zDMJITisRmR0HuG8SU6ts9z5IY=
sigs.k8s.io/e2e-framework v0.6.1-0.20250909060333-8677714ff9a6/go.mod h1:MUvWdQO9AGg4/yP9Y0kOcmX+KIOXI0UR6Xw6xz11ULw=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/karpenter v1.8.0 h1:AmTHUPtnuL8IX9mbcD3NOohyk62idrBCBtM+8Wn6Jvk=
sigs.k8s.io/karpenter v1.8.0/go.mod h1:nDDVB5873dVVuyTam3oJrllSv0sAgp6as6/5HRTcV4o=
sigs.k8s.io/kubetest2 v0.0.0-20260108084739-2f9a9397f033 h1:+HmjjgPGGqvYRBErxVSbguBnp7hILyuwHHDKUXRCDA4=
sigs.k8s.io/kubetest2 v0.0.0-20260108084739-2f9a9397f033/go.mod h1:pBd0cFaT0hDqmwQg+TIhyLgPMYaH66QMLcKd09XnKTI=
sigs.k8s.io/kustomize/api v0.20.1 h1:iWP1Ydh3/lmldBnH/S5RXgT98vWYMaTUL1ADcr+Sv7I=
sigs.k8s.io/kustomize/api v0.20.1/go.mod h1:t6hUFxO+Ph0VxIk1sKp1WS0dOjbPCtLJ4p8aADLwqjM=
sigs.k8s.io/kustomize/kyaml v0.20.1 h1:PCMnA2mrVbRP3NIB6v9kYCAc38uvFLVs8j/CD567A78=
sigs.k8s.io/kustomize/kyaml v0.20.1/go.mod h1:0EmkQHRUsJxY8Ug9Niig1pUMSCGHxQ5RklbpV/Ri6po=
sigs.k8s.io/promo-tools/v3 v3.6.0 h1:C2L08ezrWm1aZI8Emd3iZPZQserLPRgzuqQVxvI0PUI=
sigs.k8s.io/promo-tools/v3 v3.6.0/go.mod h1:XJ3jy0hJYs+hWKt8XsLHFzGQV8PUtvllvbxjN/E5RXI=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/release-sdk v0.12.2 h1:ncuHwUu8VWcZVVrNkjoUR8xGo6ibHg+AM6uMMD+IwuQ=
sigs.k8s.io/release-sdk v0.12.2/go.mod h1:tlJgWPJLeRbWOvcyq1XrCZmLe8Yfn3H5U/LNtmBa0Nc=
sigs.k8s.io/release-utils v0.12.0 h1:+Z8cEUAaxItrMcTOJ0jtUg3Fm1uNgPNol+VIL6XtQqQ=
sigs.k8s.io/release-utils v0.12.0/go.mod h1:TveYRPK4Mq6qXA0PJiUMEOlWvvIQG0Mh5APQmHD5JpA=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
software.sslmate.com/src/go-pkcs12 v0.4.0 h1:H2g08FrTvSFKUj+D309j1DPfk5APnIdAQAB8aEykJ5k=
software.sslmate.com/src/go-pkcs12 v0.4.0/go.mod h1:Qiz0EyvDRJjjxGyUQa2cCNZn/wMyzrRJ/qcDXOQazLI=


================================================
FILE: hack/download-kubernetes-binaries.sh
================================================
#!/usr/bin/env bash

set -o errexit
set -o nounset

BUNDLES=(
  "kubernetes-client"
  "kubernetes-test"
)

if [ "$#" -ne 3 ]; then
  echo >&2 "usage: $0 (KUBERNETES_MINOR_VERSION|latest) OS ARCH"
  exit 1
fi

if [ "$1" = "latest" ]; then
  RELEASE_MARKER="latest.txt"
else
  RELEASE_MARKER="latest-$1.txt"
fi

echo >&2 "Release marker: ${RELEASE_MARKER}"

OS="$2"
ARCH="$3"

function download_binaries() {
  local basePath=$1

  local KUBERNETES_VERSION=$(curl --silent "${basePath}/${RELEASE_MARKER}")

  echo "Kubernetes version: ${KUBERNETES_VERSION}"
  echo "${KUBERNETES_VERSION}" > kubernetes-version.txt

  for BUNDLE in ${BUNDLES[@]}; do
    echo >&2 "Downloading bundle: ${BUNDLE}"
    local TARBALL="${BUNDLE}.tar.gz"
    if ! wget --quiet --output-document=${TARBALL} $basePath/${KUBERNETES_VERSION}/${BUNDLE}-${OS}-${ARCH}.tar.gz; then
      return 1
    fi
    tar xzf ${TARBALL}
    rm ${TARBALL}
  done
}

if ! download_binaries https://storage.googleapis.com/kubernetes-release/release; then
  echo >&2 "binary download failed from release bucket, falling back to ci dev release"
  download_binaries https://storage.googleapis.com/k8s-release-dev/ci
fi


================================================
FILE: hack/free-disk-space.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

# hack to free up disk space for build
# ref: https://github.com/easimon/maximize-build-space/blob/master/action.yml

# storage before
sudo df -h

sudo rm -rf \
  /usr/share/dotnet \
  /usr/local/lib/android \
  /opt/ghc \
  /opt/hostedtoolcache/CodeQL

docker image prune --all --force
docker builder prune -a

# storage after
sudo df -h


================================================
FILE: hack/update-go-dependencies.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

echo "Updating go modules..."
go get $(go list -f '{{if not (or .Main .Indirect)}}{{.Path}}{{end}}' -mod=mod -m all) && go mod tidy

echo "Updating kubetest2 image go version..."
MODULE_GO_VERSION=$(go list -m -f "{{if .Main}}{{.GoVersion}}{{end}}" | cut -d'.' -f1-2)
find . -type f -name Dockerfile -exec sed -i "s/\(GO_MINOR_VERSION\)=.*/\1=${MODULE_GO_VERSION}/g" {} +


================================================
FILE: hack/update-image-tags.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

ECR_PUBLIC_REGISTRY="public.ecr.aws"
EKS_CONTAINER_REGISTRY="602401143452.dkr.ecr.us-west-2.amazonaws.com"

# get_ecr_image_tags <REGISTRY> <REPOSITORY>
# e.g. get_ecr_image_tags $ECR_PUBLIC_REGISTRY amazonlinux/amazonlinux
get_ecr_image_tags() {
    set -e
    local REGISTRY=$1 
    local REPOSITORY=$2
    local TOKEN

    # Get ECR public token if image is from a public registry, otherwise use a private token
    # An authorization token is required for every ECR HTTP request
    if [ "$REGISTRY" = "$ECR_PUBLIC_REGISTRY" ]; then
        TOKEN=$(aws ecr-public get-authorization-token --region us-east-1 --output=text --query 'authorizationData.authorizationToken')
        local AUTHORIZATION_TYPE="Bearer"
    else 
        TOKEN=$(aws ecr get-authorization-token --output text --query 'authorizationData[].authorizationToken')
        local AUTHORIZATION_TYPE="Basic"
    fi

    curl -s -H "Authorization: ${AUTHORIZATION_TYPE} $TOKEN" "https://$REGISTRY/v2/$REPOSITORY/tags/list" | jq '.tags'
}

# update_image_uris REPOSITORY IMAGE_TAG
update_image_uris() {
    local REPOSITORY=$1
    local NEW_TAG=$2
    PREFIX="image: ${REPOSITORY}"
    find ./test/manifests -type f -exec sed -i "s#$PREFIX:.*#$PREFIX:$NEW_TAG#g" {} +
}

# update the nvidia k8s device plugin
echo "Updating Nvidia device plugin image"
NVIDIA_DEVICE_PLUGIN_TAG=$(curl -s 'https://catalog.ngc.nvidia.com/api/containers/images?orgName=nvidia&name=k8s-device-plugin&isPublic=true' | jq -r '.images | sort_by(.updatedDate) | reverse | map(select(.tag | test("^v[0-9]+.[0-9]+.[0-9]+$"))) | first | .tag')
update_image_uris nvcr.io/nvidia/k8s-device-plugin $NVIDIA_DEVICE_PLUGIN_TAG

# below updates require authentication and should not exit early with a failure.
# TODO: remove this once the aws credentials are setup and the paths are expected to succeed.
set +e

# update the neuron k8s device plugin
echo "Updating Neuron device plugin image"
NEURON_DEVICE_PLUGIN_REPOSITORY_NAME="neuron/neuron-device-plugin"
NEURON_DEVICE_PLUGIN_TAGS=$(get_ecr_image_tags $ECR_PUBLIC_REGISTRY $NEURON_DEVICE_PLUGIN_REPOSITORY_NAME)
if [ $? -eq 0 ]; then
    LATEST_NEURON_DEVICE_PLUGIN_TAG=$(echo $NEURON_DEVICE_PLUGIN_TAGS | jq -r 'max_by(split(".") | map(tonumber))')
    update_image_uris "${ECR_PUBLIC_REGISTRY}/${NEURON_DEVICE_PLUGIN_REPOSITORY_NAME}" $LATEST_NEURON_DEVICE_PLUGIN_TAG
fi

# update the efa k8s device plugin
echo "Updating EFA device plugin image"
EFA_DEVICE_PLUGIN_REPOSITORY_NAME="eks/aws-efa-k8s-device-plugin"
EFA_DEVICE_PLUGIN_TAGS=$(get_ecr_image_tags $EKS_CONTAINER_REGISTRY $EFA_DEVICE_PLUGIN_REPOSITORY_NAME)
if [ $? -eq 0 ]; then
    LATEST_EFA_DEVICE_PLUGIN_TAG=$(echo $EFA_DEVICE_PLUGIN_TAGS | jq -r 'map(split("-") | .[0]) | max_by(sub("^v"; "") | split(".") | map(tonumber))')
    update_image_uris "${EKS_CONTAINER_REGISTRY}/${EFA_DEVICE_PLUGIN_REPOSITORY_NAME}" $LATEST_EFA_DEVICE_PLUGIN_TAG
fi

================================================
FILE: hack/update-neuron-dependencies.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

# pip_versionsearch takes exactly 1 argument and returns its latest available version from the neuron pip repo
# usage: pip_versionsearch PACKAGE
pip_versionsearch() {
    local PACKAGE_INDEX_NAME=$(echo $1 | tr -s '_' '-')
    local PACKAGE_VERSION_NAME=$(echo $PACKAGE_INDEX_NAME | tr -s '-' '_')
    curl -s https://pip.repos.neuron.amazonaws.com/${PACKAGE_INDEX_NAME} | grep -o -G "${PACKAGE_VERSION_NAME}-[0-9\.]*+[a-f0-9]*" | sed "s/$PACKAGE_VERSION_NAME-//" | sort -V | tail -n 1 
}

# versionsearch takes exactly 1 argument and returns its latest available version from the neuron amd64 apt repo
# usage: versionsearch PACKAGE
versionsearch() {
    local PACKAGE_NAME=$1
    curl -s https://apt.repos.neuron.amazonaws.com/dists/focal/main/binary-amd64/Packages | grep -o "${PACKAGE_NAME}_[0-9\.]*-*[a-f0-9]*" | sed "s/${PACKAGE_NAME}_//" | sort -V | tail -n 1 
}

# update_arg ARG NEW_VALUE
update_arg() {
    local ARG=$1
    local NEW_VALUE=$2
    echo "setting $ARG to $NEW_VALUE"
    find . -type f -name Dockerfile -exec sed -i "s/${ARG}=.*/${ARG}=$NEW_VALUE/g" {} +
}

update_arg NEURONX_RUNTIME_LIB_VERSION $(versionsearch aws-neuronx-runtime-lib)
update_arg NEURONX_COLLECTIVES_LIB_VERSION $(versionsearch aws-neuronx-collectives)
update_arg NEURONX_TOOLS_VERSION $(versionsearch aws-neuronx-tools)
update_arg NEURONX_FRAMEWORK_VERSION $(pip_versionsearch torch-neuronx)
update_arg NEURONX_CC_VERSION $(pip_versionsearch neuronx-cc)
update_arg NEURONX_DISTRIBUTED_VERSION $(pip_versionsearch neuronx_distributed)

================================================
FILE: hack/update-nvidia-dependencies.sh
================================================
#!/usr/bin/env bash

# following from the last updated dependency:
# 1. get the latest release of aws-ofi-nccl
# 2. get the supported version of libnccl
# 3. get the latest correct cuda version used for libnccl

set -o nounset
set -o errexit
set -o pipefail

echo "Updating aws-ofi-nccl"
AWS_OFI_NCCL_TAG=$(curl -s https://api.github.com/repos/aws/aws-ofi-nccl/releases/latest | jq -r .tag_name | sed 's/^v//')
find . -type f -name Dockerfile -exec sed -i "s/AWS_OFI_NCCL_VERSION=.*/AWS_OFI_NCCL_VERSION=$AWS_OFI_NCCL_TAG/g" {} +

echo "Updating nccl"
LIB_NCCL_TAG=$(curl -s https://api.github.com/repos/aws/aws-ofi-nccl/releases/latest | jq -r .body | grep -oP '\[NCCL \K(\S*)(?=\])' | head -n 1 | sed 's/^v//')
find . -type f -name Dockerfile -exec sed -i "s/LIBNCCL_VERSION=.*/LIBNCCL_VERSION=$LIB_NCCL_TAG/g" {} +

echo "Updating nvbandwidth"
NVBANDWIDTH_TAG=$(curl -s https://api.github.com/repos/NVIDIA/nvbandwidth/releases/latest | jq -r .tag_name)
find . -type f -name Dockerfile -exec sed -i "s/NVBANDWIDTH_VERSION=.*/NVBANDWIDTH_VERSION=$NVBANDWIDTH_TAG/g" {} +


================================================
FILE: internal/awssdk/config.go
================================================
package awssdk

import (
	"context"
	"log/slog"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/config"
)

// NewConfig returns an AWS SDK config
// It will panic if the cnfig cannot be created
func NewConfig() aws.Config {
	c, err := config.LoadDefaultConfig(context.TODO())
	if err != nil {
		slog.Error("failed to create AWS SDK config", "error", err)
		panic(err)
	}
	return c
}


================================================
FILE: internal/deployers/eksapi/addons.go
================================================
package eksapi

import (
	"context"
	"fmt"
	"log/slog"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/eks"
)

const (
	addonCreationTimeout = 5 * time.Minute
)

type AddonManager struct {
	clients *awsClients
}

func NewAddonManager(clients *awsClients) *AddonManager {
	return &AddonManager{
		clients: clients,
	}
}

func (m *AddonManager) createAddons(infra *Infrastructure, cluster *Cluster, opts *deployerOptions) error {
	ctx := context.TODO()

	addonMap := map[string]string{}
	for _, addon := range opts.Addons {
		addonParts := strings.Split(addon, ":")
		if len(addonParts) != 2 {
			return fmt.Errorf("invalid addon format: %s", addon)
		}
		name := addonParts[0]
		version := addonParts[1]
		slog.Info("resolving addon version", "addon", name, "version", version)
		resolvedVersion, err := m.resolveAddonVersion(name, version, opts.KubernetesVersion)
		if err != nil {
			return err
		}
		// dedupe addons with the same name. last provided entry wins.
		addonMap[name] = resolvedVersion
	}

	for addonName, addonVersion := range addonMap {
		slog.Info("creating addon", "addon", addonName, "version", addonVersion)
		input := eks.CreateAddonInput{
			AddonName:    aws.String(addonName),
			AddonVersion: aws.String(addonVersion),
			ClusterName:  aws.String(cluster.name),
		}
		_, err := m.clients.EKS().CreateAddon(ctx, &input)
		if err != nil {
			return fmt.Errorf("failed to create addon: %v", err)
		}
		slog.Info("waiting for addon to be active", "addon", addonName)
		err = eks.NewAddonActiveWaiter(m.clients.EKS()).
			Wait(ctx, &eks.DescribeAddonInput{
				AddonName:   aws.String(addonName),
				ClusterName: aws.String(cluster.name),
			}, addonCreationTimeout)
		if err != nil {
			return fmt.Errorf("failed to wait for addon to be active: %v", err)
		}
	}

	return nil
}

func (m *AddonManager) resolveAddonVersion(name string, versionMarker string, kubernetesVersion string) (string, error) {
	input := eks.DescribeAddonVersionsInput{
		AddonName:         aws.String(name),
		KubernetesVersion: aws.String(kubernetesVersion),
	}
	descOutput, err := m.clients.EKS().DescribeAddonVersions(context.TODO(), &input)
	if err != nil {
		return "", err
	}
	for _, addon := range descOutput.Addons {
		for _, versionInfo := range addon.AddonVersions {
			switch versionMarker {
			case "latest":
				return *versionInfo.AddonVersion, nil
			case "default":
				for _, compatibility := range versionInfo.Compatibilities {
					if compatibility.DefaultVersion {
						return *versionInfo.AddonVersion, nil
					}
				}
			default:
				if *versionInfo.AddonVersion == versionMarker {
					return *versionInfo.AddonVersion, nil
				}
			}
		}
	}
	return "", fmt.Errorf("failed to resolve addon version: %s=%s", name, versionMarker)
}


================================================
FILE: internal/deployers/eksapi/ami_resolver.go
================================================
package eksapi

import (
	"context"
	"fmt"
	"log/slog"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
	"github.com/aws/aws-sdk-go-v2/service/ssm"
)

func NewAMIResolver(awsClients *awsClients) *amiResolver {
	return &amiResolver{
		clients: awsClients,
	}
}

type amiResolver struct {
	clients *awsClients
}

func (r *amiResolver) Resolve(ctx context.Context, opts *deployerOptions) (string, error) {
	switch opts.UserDataFormat {
	case UserDataBootstrapSh:
		// TODO: AL2 is not a high priority, skipping for now.
		return "", fmt.Errorf("%s is not handled", opts.UserDataFormat)
	case UserDataNodeadm:
		return r.ResolveAL2023(ctx, opts)
	case UserDataBottlerocket:
		return r.ResolveBottlerocket(ctx, opts)
	default:
		return "", fmt.Errorf("unhandled userdata format: %s", opts.UserDataFormat)
	}
}

func (r *amiResolver) ResolveAL2023(ctx context.Context, opts *deployerOptions) (string, error) {
	describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{
		InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))},
	})
	if err != nil {
		return "", err
	}
	instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0]

	arch, err := r.resolveArch(instanceTypeInfo)
	if err != nil {
		return "", err
	}

	variant := "standard"
	if instanceTypeInfo.NeuronInfo != nil {
		if len(instanceTypeInfo.NeuronInfo.NeuronDevices) > 0 {
			variant = "neuron"
		}
	} else if instanceTypeInfo.GpuInfo != nil {
		for _, gpu := range instanceTypeInfo.GpuInfo.Gpus {
			if aws.ToString(gpu.Manufacturer) == "NVIDIA" {
				variant = "nvidia"
				break
			}
		}
	}

	getParameterReponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{
		Name: aws.String(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/%s/%s/recommended/image_id", opts.KubernetesVersion, arch, variant)),
	})
	if err != nil {
		return "", err
	}

	return aws.ToString(getParameterReponse.Parameter.Value), nil
}

func (r *amiResolver) ResolveBottlerocket(ctx context.Context, opts *deployerOptions) (string, error) {
	describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{
		InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))},
	})
	if err != nil {
		return "", err
	}
	instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0]

	arch, err := r.resolveArch(instanceTypeInfo)
	if err != nil {
		return "", err
	}

	// TODO: enable fips
	flavorSuffix := ""
	if instanceTypeInfo.GpuInfo != nil {
		for _, gpu := range instanceTypeInfo.GpuInfo.Gpus {
			if aws.ToString(gpu.Manufacturer) == "NVIDIA" {
				flavorSuffix = "-nvidia"
				break
			}
		}
	}

	getParameterResponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{
		Name: aws.String(fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s%s/%s/latest/image_id", opts.KubernetesVersion, flavorSuffix, arch)),
	})
	if err != nil {
		return "", err
	}

	return aws.ToString(getParameterResponse.Parameter.Value), nil
}

func (r *amiResolver) getInstance(opts *deployerOptions) string {
	instanceType := opts.InstanceTypes[0]
	if len(opts.InstanceTypes) > 1 {
		slog.Warn("only resolving AMI based on first instance type", "instanceType", instanceType)
	}
	return instanceType
}

func (r *amiResolver) resolveArch(instanceTypeInfo ec2types.InstanceTypeInfo) (string, error) {
	// TODO: the ordering might be weird because old instances might support
	// both i386 and x8664.
	switch arch := instanceTypeInfo.ProcessorInfo.SupportedArchitectures[0]; arch {
	case ec2types.ArchitectureTypeArm64, ec2types.ArchitectureTypeX8664:
		return string(arch), nil
	default:
		return "", fmt.Errorf("unhandled arch: %s", arch)
	}
}


================================================
FILE: internal/deployers/eksapi/ami_resolver_test.go
================================================
//go:build integration

package eksapi

import (
	"context"
	"testing"

	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/stretchr/testify/assert"
)

func TestAMIResolver(t *testing.T) {
	ctx := context.Background()
	awsCfg, err := config.LoadDefaultConfig(ctx)
	assert.NoError(t, err)

	amiResolver := NewAMIResolver(newAWSClients(awsCfg, ""))

	t.Run("AL2023-nvidia", func(t *testing.T) {
		opts := deployerOptions{
			UserDataFormat:    UserDataNodeadm,
			KubernetesVersion: "1.33",
		}
		t.Run("nvidia", func(t *testing.T) {
			opts := opts
			opts.InstanceTypes = []string{"g5.xlarge"}

			ami, err := amiResolver.Resolve(ctx, &opts)
			assert.NoError(t, err)
			assert.Regexp(t, "ami-.*", ami)
		})
		t.Run("standard", func(t *testing.T) {
			opts := opts
			opts.InstanceTypes = []string{"m5.xlarge"}

			ami, err := amiResolver.Resolve(ctx, &opts)
			assert.NoError(t, err)
			assert.Regexp(t, "ami-.*", ami)
		})
	})

	t.Run("Bottlerocket", func(t *testing.T) {
		opts := deployerOptions{
			UserDataFormat:    UserDataBottlerocket,
			KubernetesVersion: "1.33",
		}
		t.Run("nvidia", func(t *testing.T) {
			opts := opts
			opts.InstanceTypes = []string{"g5.xlarge"}

			ami, err := amiResolver.Resolve(ctx, &opts)
			assert.NoError(t, err)
			assert.Regexp(t, "ami-.*", ami)
		})
		t.Run("standard", func(t *testing.T) {
			opts := opts
			opts.InstanceTypes = []string{"m5.xlarge"}

			ami, err := amiResolver.Resolve(ctx, &opts)
			assert.NoError(t, err)
			assert.Regexp(t, "ami-.*", ami)
		})
	})
}


================================================
FILE: internal/deployers/eksapi/auth_map_role.go
================================================
package eksapi

import (
	"bytes"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
)

func generateAuthMapRole(nodeNameStrategy string, rolearn string) (string, error) {
	template := templates.AuthMapRole
	buf := bytes.Buffer{}
	if err := template.Execute(&buf, templates.AuthMapRoleTemplateData{
		NodeNameStrategy: nodeNameStrategy,
		Rolearn:          rolearn,
	}); err != nil {
		return "", err
	}
	return buf.String(), nil
}


================================================
FILE: internal/deployers/eksapi/auth_map_role_test.go
================================================
package eksapi

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

const rolearn = "mock-role-arn"

const sessionNamedAuthMapRole = `
- username: system:node:{{SessionName}} 
  groups:
    - system:bootstrappers
    - system:nodes
  rolearn: mock-role-arn`

const privateDNSNamedAuthMapRole = `
- username: system:node:{{EC2PrivateDNSName}} 
  groups:
    - system:bootstrappers
    - system:nodes
  rolearn: mock-role-arn`

func Test_generateAuthRoleMap(t *testing.T) {
	cases := []struct {
		nodeNameStrategy string
		expected         string
	}{
		{
			nodeNameStrategy: "SessionName",
			expected:         sessionNamedAuthMapRole,
		},
		{
			nodeNameStrategy: "EC2PrivateDNSName",
			expected:         privateDNSNamedAuthMapRole,
		},
	}
	for _, c := range cases {
		t.Run(c.nodeNameStrategy, func(t *testing.T) {
			actual, err := generateAuthMapRole(c.nodeNameStrategy, rolearn)
			if err != nil {
				t.Log(err)
				t.Error(err)
			}
			assert.Equal(t, c.expected, actual)
		})
	}
}


================================================
FILE: internal/deployers/eksapi/aws.go
================================================
package eksapi

import (
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/autoscaling"
	"github.com/aws/aws-sdk-go-v2/service/cloudformation"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	"github.com/aws/aws-sdk-go-v2/service/iam"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	"github.com/aws/aws-sdk-go-v2/service/ssm"
)

type awsClients struct {
	_eks       *eks.Client
	_cfn       *cloudformation.Client
	_ec2       *ec2.Client
	_asg       *autoscaling.Client
	_ssm       *ssm.Client
	_iam       *iam.Client
	_s3        *s3.Client
	_s3Presign *s3.PresignClient
}

func newAWSClients(config aws.Config, eksEndpointURL string) *awsClients {
	clients := awsClients{
		_cfn: cloudformation.NewFromConfig(config),
		_ec2: ec2.NewFromConfig(config),
		_asg: autoscaling.NewFromConfig(config),
		_ssm: ssm.NewFromConfig(config),
		_iam: iam.NewFromConfig(config),
		_s3:  s3.NewFromConfig(config),
	}
	clients._s3Presign = s3.NewPresignClient(clients._s3)
	if eksEndpointURL != "" {
		clients._eks = eks.NewFromConfig(config, func(o *eks.Options) {
			o.BaseEndpoint = aws.String(eksEndpointURL)
		})
	} else {
		clients._eks = eks.NewFromConfig(config)
	}
	return &clients
}

func (c *awsClients) EKS() *eks.Client {
	return c._eks
}

func (c *awsClients) CFN() *cloudformation.Client {
	return c._cfn
}

func (c *awsClients) EC2() *ec2.Client {
	return c._ec2
}

func (c *awsClients) ASG() *autoscaling.Client {
	return c._asg
}

func (c *awsClients) SSM() *ssm.Client {
	return c._ssm
}

func (c *awsClients) IAM() *iam.Client {
	return c._iam
}

func (c *awsClients) S3() *s3.Client {
	return c._s3
}

func (c *awsClients) S3Presign() *s3.PresignClient {
	return c._s3Presign
}


================================================
FILE: internal/deployers/eksapi/cluster.go
================================================
package eksapi

import (
	"context"
	"errors"
	"fmt"
	"log/slog"
	"time"

	"github.com/aws/aws-k8s-tester/internal/util"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
	"github.com/aws/smithy-go/ptr"
)

type ClusterManager struct {
	clients    *awsClients
	resourceID string
}

func NewClusterManager(clients *awsClients, resourceID string) *ClusterManager {
	return &ClusterManager{
		clients:    clients,
		resourceID: resourceID,
	}
}

type Cluster struct {
	endpoint                 string
	certificateAuthorityData string
	securityGroupId          string
	arn                      string
	name                     string
	cidr                     string
}

func (m *ClusterManager) getOrCreateCluster(infra *Infrastructure, opts *deployerOptions) (*Cluster, error) {
	targetClusterName := opts.StaticClusterName
	if targetClusterName == "" {
		slog.Info("creating cluster...")
		input := eks.CreateClusterInput{
			Name: aws.String(m.resourceID),
			ResourcesVpcConfig: &ekstypes.VpcConfigRequest{
				EndpointPrivateAccess: aws.Bool(true),
				EndpointPublicAccess:  aws.Bool(true),
				SubnetIds:             infra.subnets(),
			},
			RoleArn: aws.String(infra.clusterRoleARN),
			KubernetesNetworkConfig: &ekstypes.KubernetesNetworkConfigRequest{
				IpFamily: ekstypes.IpFamily(opts.IPFamily),
			},
			Version: aws.String(opts.KubernetesVersion),
		}
		if opts.AutoMode {
			input.ComputeConfig = &ekstypes.ComputeConfigRequest{
				// we don't enable any of the default node pools, we'll create our own
				Enabled:     aws.Bool(true),
				NodeRoleArn: aws.String(infra.nodeRoleARN),
				// TODO: we can't currently enable managed compute without a default NodePool
				// the system NodePool is tainted for critical addons only, so will be ignored for our test workloads
				NodePools: []string{"system"},
			}
			input.StorageConfig = &ekstypes.StorageConfigRequest{
				BlockStorage: &ekstypes.BlockStorage{
					Enabled: aws.Bool(true),
				},
			}
			input.KubernetesNetworkConfig.ElasticLoadBalancing = &ekstypes.ElasticLoadBalancing{
				Enabled: aws.Bool(true),
			}
			input.AccessConfig = &ekstypes.CreateAccessConfigRequest{
				AuthenticationMode: ekstypes.AuthenticationModeApi,
			}
			input.BootstrapSelfManagedAddons = aws.Bool(false)
		}
		if opts.EnableClusterLogging {
			input.Logging = &ekstypes.Logging{
				ClusterLogging: []ekstypes.LogSetup{
					{
						Enabled: ptr.Bool(true),
						Types:   ekstypes.LogTypeApi.Values(),
					},
				},
			}
		}
		apiOpts, err := util.NewHTTPHeaderAPIOptions(opts.UpClusterHeaders)
		if err != nil {
			return nil, fmt.Errorf("failed to create API options: %v", err)
		}
		createOutput, err := m.clients.EKS().CreateCluster(context.TODO(), &input,
			func(o *eks.Options) {
				o.APIOptions = apiOpts
			})
		if err != nil {
			return nil, fmt.Errorf("failed to create cluster: %v", err)
		}
		targetClusterName = aws.ToString(createOutput.Cluster.Name)
	} else {
		slog.Info("reusing existing static cluster", "clusterName", opts.StaticClusterName)
	}
	cluster, waitErr := m.waitForClusterActive(targetClusterName, opts.ClusterCreationTimeout)
	if waitErr != nil {
		return nil, fmt.Errorf("failed to wait for cluster to become active: %v", waitErr)
	}
	return cluster, nil
}

func (m *ClusterManager) waitForClusterActive(clusterName string, timeout time.Duration) (*Cluster, error) {
	slog.Info("waiting for cluster to be active", "clusterName", clusterName)
	out, err := eks.NewClusterActiveWaiter(m.clients.EKS()).WaitForOutput(context.TODO(), &eks.DescribeClusterInput{
		Name: aws.String(clusterName),
	}, timeout)
	// log when possible, whether there was an error or not
	if out != nil {
		slog.Info("cluster details", "cluster", out.Cluster)
	}
	if err != nil {
		return nil, fmt.Errorf("failed waiting for cluster be active: %v", err)
	}
	slog.Info("cluster is active", "arn", *out.Cluster.Arn)
	var cidr string
	switch out.Cluster.KubernetesNetworkConfig.IpFamily {
	case ekstypes.IpFamilyIpv4:
		cidr = *out.Cluster.KubernetesNetworkConfig.ServiceIpv4Cidr
	case ekstypes.IpFamilyIpv6:
		cidr = *out.Cluster.KubernetesNetworkConfig.ServiceIpv6Cidr
	default:
		return nil, fmt.Errorf("unknown cluster IP family: '%v'", out.Cluster.KubernetesNetworkConfig.IpFamily)
	}
	return &Cluster{
		arn:                      *out.Cluster.Arn,
		certificateAuthorityData: *out.Cluster.CertificateAuthority.Data,
		cidr:                     cidr,
		endpoint:                 *out.Cluster.Endpoint,
		name:                     *out.Cluster.Name,
		securityGroupId:          *out.Cluster.ResourcesVpcConfig.ClusterSecurityGroupId,
	}, nil
}

func (m *ClusterManager) isClusterActive() (bool, error) {
	result, err := m.clients.EKS().DescribeCluster(context.TODO(), &eks.DescribeClusterInput{
		Name: aws.String(m.resourceID),
	})
	if err != nil {
		return false, err
	}
	switch result.Cluster.Status {
	case ekstypes.ClusterStatusActive:
		return true, nil
	case ekstypes.ClusterStatusCreating:
		return false, nil
	default:
		return false, fmt.Errorf("cluster status is: %v", result.Cluster.Status)
	}
}

func (m *ClusterManager) deleteCluster() error {
	const (
		retryInterval = 2 * time.Minute
		maxAttempts   = 5
	)

	for attempt := 1; attempt <= maxAttempts; attempt++ {
		input := eks.DeleteClusterInput{
			Name: aws.String(m.resourceID),
		}

		slog.Info("deleting cluster...", "attempt", attempt)
		out, err := m.clients.EKS().DeleteCluster(context.TODO(), &input)
		if err != nil {
			var notFound *ekstypes.ResourceNotFoundException
			if errors.As(err, &notFound) {
				slog.Info("cluster does not exist", "resourceID", m.resourceID)
				return nil
			}
			if attempt == maxAttempts {
				return fmt.Errorf("failed to delete cluster after %d attempts: %v", maxAttempts, err)
			}
			slog.Info("deletion failed, retrying...", "error", err, "retryInterval", retryInterval)
			time.Sleep(retryInterval)
			continue
		}

		slog.Info("waiting for cluster to be deleted", "arn", *out.Cluster.Arn)
		err = eks.NewClusterDeletedWaiter(m.clients.EKS()).
			Wait(context.TODO(), &eks.DescribeClusterInput{
				Name: aws.String(m.resourceID),
			}, time.Minute*15)

		if err != nil {
			return fmt.Errorf("failed to wait for cluster to be deleted: %v", err)
		}
		return nil
	}

	return fmt.Errorf("failed to delete cluster after %d attempts", maxAttempts)
}


================================================
FILE: internal/deployers/eksapi/common.go
================================================
package eksapi

import (
	"os"
	"slices"
	"strings"
)

const AvailabilityZonePriorityEnv = "EKSAPI_AZ_PRIORITY"

func availabilityZoneHintedOrder(availabilityZones []string) []string {
	var priorityAZs []string
	if priorityAZsString, ok := os.LookupEnv(AvailabilityZonePriorityEnv); ok {
		priorityAZs = strings.Split(priorityAZsString, ",")
	}
	if len(priorityAZs) == 0 {
		return availabilityZones
	}
	return slices.SortedStableFunc(slices.Values(availabilityZones), func(az1, az2 string) int {
		if slices.Contains(priorityAZs, az1) {
			if slices.Contains(priorityAZs, az2) {
				return 0
			}
			return -1
		}
		return 0
	})
}


================================================
FILE: internal/deployers/eksapi/common_test.go
================================================
package eksapi

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func Test_AZ_PRIORITY(t *testing.T) {
	t.Setenv(AvailabilityZonePriorityEnv, "us-west-2d")
	assert.Equal(t,
		[]string{"us-west-2d", "us-west-2b", "us-west-2c"},
		availabilityZoneHintedOrder([]string{"us-west-2b", "us-west-2c", "us-west-2d"}),
	)
}


================================================
FILE: internal/deployers/eksapi/deployer.go
================================================
package eksapi

import (
	"context"
	"flag"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/aws/aws-k8s-tester/internal"
	"github.com/aws/aws-k8s-tester/internal/awssdk"
	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/internal/metrics"
	"github.com/aws/aws-k8s-tester/internal/util"

	"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
	"github.com/spf13/pflag"
	"github.com/urfave/sflags/gen/gpflag"
	"golang.org/x/exp/slices"
	"sigs.k8s.io/kubetest2/pkg/types"
)

// DeployerName is the name of the deployer
const DeployerName = "eksapi"

const ResourcePrefix = "kubetest2-" + DeployerName

var SupportedNodeNameStrategy = []string{"SessionName", "EC2PrivateDNSName"}

// assert that deployer implements optional interfaces
var _ types.DeployerWithKubeconfig = &deployer{}
var _ types.DeployerWithInit = &deployer{}
var _ types.DeployerWithFinish = &deployer{}

type deployer struct {
	commonOptions types.Options
	deployerOptions

	metrics              metrics.MetricRegistry
	infraManager         *InfrastructureManager
	clusterManager       *ClusterManager
	addonManager         *AddonManager
	nodeManager          *nodeManager
	logManager           *logManager
	staticClusterManager *StaticClusterManager

	awsClients *awsClients

	infra   *Infrastructure
	cluster *Cluster

	k8sClient *k8sClient

	initTime time.Time
}

type deployerOptions struct {
	Addons                      []string      `flag:"addons" desc:"Managed addons (name:version pairs) to create in the cluster. Use 'latest' for the most recent version, or 'default' for the default version."`
	AMI                         string        `flag:"ami" desc:"AMI for unmanaged nodes"`
	AMIType                     string        `flag:"ami-type" desc:"AMI type for managed nodes"`
	AutoMode                    bool          `flag:"auto-mode" desc:"Enable EKS Auto Mode"`
	CapacityReservation         bool          `flag:"capacity-reservation" desc:"Use capacity reservation for the unmanaged nodegroup"`
	TargetCapacityReservationId string        `flag:"target-capacity-reservation-id" desc:"CapacityReservation ID to use for targeted launches. Implies --capacity-reservation."`
	ClusterCreationTimeout      time.Duration `flag:"cluster-creation-timeout" desc:"Time to wait for cluster to be created and become active."`
	ClusterRoleServicePrincipal string        `flag:"cluster-role-service-principal" desc:"Additional service principal that can assume the cluster role"`
	DeployCloudwatchInfra       bool          `flag:"deploy-cloudwatch-infra" desc:"Deploy required infrastructure for emitting metrics to CloudWatch"`
	EFA                         bool          `flag:"efa" desc:"Create EFA interfaces on the node of an unmanaged nodegroup. One instance type must be passed if set. Requires --unmanaged-nodes and --instance-types."`
	EKSEndpointURL              string        `flag:"endpoint-url" desc:"Endpoint URL for the EKS API"`
	EmitMetrics                 bool          `flag:"emit-metrics" desc:"Record and emit metrics to CloudWatch"`
	EnableClusterLogging        bool          `flag:"enable-cluster-logging" desc:"Enable sending EKS control plane logs to an /aws/eks/<cluster_name/cluster log group. https://docs.aws.amazon.com/eks/latest/userguide/control-plane-logs.html"`
	ExpectedAMI                 string        `flag:"expected-ami" desc:"Expected AMI of nodes. Up will fail if the actual nodes are not utilizing the expected AMI. Defaults to --ami if defined."`
	// TODO: remove this once it's no longer used in downstream jobs
	GenerateSSHKey          bool          `flag:"generate-ssh-key" desc:"Generate an SSH key to use for tests. The generated key should not be used in production, as it will not have a passphrase."`
	InstanceTypes           []string      `flag:"instance-types" desc:"Node instance types. Cannot be used with --instance-type-archs"`
	InstanceTypeArchs       []string      `flag:"instance-type-archs" desc:"Use default node instance types for specific architectures. Cannot be used with --instance-types"`
	IPFamily                string        `flag:"ip-family" desc:"IP family for the cluster (ipv4 or ipv6)"`
	KubeconfigPath          string        `flag:"kubeconfig" desc:"Path to kubeconfig"`
	KubernetesVersion       string        `flag:"kubernetes-version" desc:"cluster Kubernetes version"`
	LogBucket               string        `flag:"log-bucket" desc:"S3 bucket for storing logs for each run. If empty, logs will not be stored."`
	NodeadmFeatureGates     []string      `flag:"nodeadm-feature-gates" desc:"Feature gates to enable for nodeadm (key=value pairs)"`
	NodeCreationTimeout     time.Duration `flag:"node-creation-timeout" desc:"Time to wait for nodes to be created/launched. This should consider instance availability."`
	NodeReadyTimeout        time.Duration `flag:"node-ready-timeout" desc:"Time to wait for all nodes to become ready"`
	Nodes                   int           `flag:"nodes" desc:"number of nodes to launch in cluster"`
	NodeNameStrategy        string        `flag:"node-name-strategy" desc:"Specifies the naming strategy for node. Allowed values: ['SessionName', 'EC2PrivateDNSName'], default to EC2PrivateDNSName"`
	Region                  string        `flag:"region" desc:"AWS region for EKS cluster"`
	SkipNodeReadinessChecks bool          `flag:"skip-node-readiness-checks" desc:"Skip performing readiness checks on created nodes"`
	StaticClusterName       string        `flag:"static-cluster-name" desc:"Optional when re-use existing cluster and node group by querying the kubeconfig and run test"`
	SetClusterDNSIP         bool          `flag:"set-cluster-dns-ip" desc:"Explicitly set cluster-dns-ip in node userdata instead of letting the node derive it"`
	TuneVPCCNI              bool          `flag:"tune-vpc-cni" desc:"Apply tuning parameters to the VPC CNI DaemonSet"`
	UnmanagedNodes          bool          `flag:"unmanaged-nodes" desc:"Use an AutoScalingGroup instead of an EKS-managed nodegroup. Requires --ami"`
	UpClusterHeaders        []string      `flag:"up-cluster-header" desc:"Additional header to add to eks:CreateCluster requests. Specified in the same format as curl's -H flag."`
	UserDataFormat          string        `flag:"user-data-format" desc:"Format of the node instance user data"`
	ZoneType                string        `flag:"zone-type" desc:"Type of zone to use for infrastructure (availability-zone, local-zone, etc). Defaults to availability-zone"`
}

// NewDeployer implements deployer.New for EKS using the EKS (and other AWS) API(s) directly (no cloudformation)
func NewDeployer(opts types.Options) (types.Deployer, *pflag.FlagSet) {
	// create a deployer object and set fields that are not flag controlled
	d := &deployer{
		commonOptions: opts,
	}
	// register flags and return
	return d, bindFlags(d)
}

// bindFlags is a helper used to create & bind a flagset to the deployer
func bindFlags(d *deployer) *pflag.FlagSet {
	flags, err := gpflag.Parse(d)
	if err != nil {
		slog.Error("unable to bind flags for deployer")
		os.Exit(1)
	}
	flags.AddGoFlagSet(flag.CommandLine)
	return flags
}

func (d *deployer) Version() string {
	return internal.Version
}

func (d *deployer) Init() error {
	d.initTime = time.Now()
	awsConfig := awssdk.NewConfig()
	d.awsClients = newAWSClients(awsConfig, d.EKSEndpointURL)
	resourceID := ResourcePrefix + "-" + d.commonOptions.RunID()
	if d.deployerOptions.EmitMetrics {
		client := cloudwatch.NewFromConfig(awsConfig)
		d.metrics = metrics.NewCloudWatchRegistry(client)
	} else {
		d.metrics = metrics.NewNoopMetricRegistry()
	}
	d.infraManager = NewInfrastructureManager(d.awsClients, resourceID, d.metrics)
	d.clusterManager = NewClusterManager(d.awsClients, resourceID)
	d.addonManager = NewAddonManager(d.awsClients)
	d.nodeManager = NewNodeManager(d.awsClients, resourceID)
	d.logManager = NewLogManager(d.awsClients, resourceID)
	if d.deployerOptions.StaticClusterName != "" {
		d.staticClusterManager = NewStaticClusterManager(&d.deployerOptions)
	}
	return nil
}

func (d *deployer) Finish() error {
	d.metrics.Record(totalRuntimeSeconds, float64(time.Since(d.initTime).Seconds()), nil)
	return d.metrics.Emit()
}

// Build is a no-op
func (d *deployer) Build() error {
	return nil
}

// DumpClusterLogs is a no-op
func (d *deployer) DumpClusterLogs() error {
	return nil
}

func (d *deployer) Kubeconfig() (string, error) {
	if d.KubeconfigPath == "" {
		kubeconfigPath := filepath.Join(d.commonOptions.RunDir(), "kubeconfig")
		err := writeKubeconfig(d.cluster, kubeconfigPath)
		if err != nil {
			slog.Warn("failed to write kubeconfig", "error", err)
			return "", err
		}
		d.KubeconfigPath = kubeconfigPath
	}
	return d.KubeconfigPath, nil
}

func (d *deployer) Up() error {
	if err := d.verifyUpFlags(); err != nil {
		return fmt.Errorf("up flags are invalid: %v", err)
	}
	if d.deployerOptions.StaticClusterName == "" {
		if infra, err := d.infraManager.createInfrastructureStack(&d.deployerOptions); err != nil {
			return err
		} else {
			d.infra = infra
		}
	}
	cluster, err := d.clusterManager.getOrCreateCluster(d.infra, &d.deployerOptions)
	if err != nil {
		return err
	}
	d.cluster = cluster
	kubeconfig, err := d.Kubeconfig()
	if err != nil {
		return err
	}
	d.k8sClient, err = newK8sClient(kubeconfig)
	if err != nil {
		return err
	}
	if d.deployerOptions.StaticClusterName != "" {
		slog.Info("inited k8s client, skip the rest resource creation for static cluster")
		d.staticClusterManager.SetK8sClient(kubeconfig)
		if err := d.staticClusterManager.EnsureNodeForStaticCluster(); err != nil {
			slog.Error("failed to launch nodes", "error", err)
			return err
		}
		slog.Info("nodes launched for static cluster")
		return nil
	}
	if d.UnmanagedNodes {
		if err := d.k8sClient.createAWSAuthConfigMap(d.NodeNameStrategy, d.infra.nodeRoleARN); err != nil {
			return err
		}
	}
	if d.AMI != "" && d.ExpectedAMI == "" {
		d.ExpectedAMI = d.AMI
	}

	if err := d.addonManager.createAddons(d.infra, d.cluster, &d.deployerOptions); err != nil {
		return err
	}
	if d.deployerOptions.TuneVPCCNI {
		if err := d.k8sClient.tuneVPCCNI(); err != nil {
			return err
		}
	}
	if err := d.nodeManager.createNodes(d.infra, d.cluster, &d.deployerOptions, d.k8sClient); err != nil {
		return err
	}
	if !d.SkipNodeReadinessChecks {
		if err := d.k8sClient.waitForReadyNodes(d.Nodes, d.NodeReadyTimeout); err != nil {
			return err
		}
		if d.EmitMetrics {
			if err := d.k8sClient.emitNodeMetrics(d.metrics, d.awsClients.EC2()); err != nil {
				return err
			}
		}
		if err := d.logManager.gatherLogsFromNodes(d.k8sClient, &d.deployerOptions, deployerPhaseUp); err != nil {
			slog.Warn("failed to gather logs from nodes", "error", err)
			// don't return err, this isn't critical
		}
	}

	if d.DeployCloudwatchInfra {
		slog.Info("setting up CloudWatch infrastructure...")
		roleArn, err := d.infraManager.createCloudWatchInfrastructureStack(d.cluster.name)
		if err != nil {
			slog.Error("CloudWatch infrastructure stack creation failed", "error", err)
			return err
		}
		d.infra.cloudwatchRoleArn = roleArn
		if err := d.infraManager.createCloudWatchPodIdentityAssociation(d.cluster.name, roleArn); err != nil {
			slog.Error("CloudWatch PodIdentityAssociation creation failed", "error", err)
			return err
		}
		slog.Info("CloudWatch infrastructure setup completed")
		// Apply CloudWatch infrastructure manifest
		manifest := templates.CloudWatchAgentRbac
		if err := fwext.ApplyManifests(d.k8sClient.config, manifest); err != nil {
			slog.Error("CloudWatch infrastructure manifest failed", "error", err)
			return err
		}
		slog.Info("CloudWatch infrastructure manifest applied successfully")
	}
	return nil
}

func (d *deployer) verifyUpFlags() error {
	if d.KubernetesVersion == "" {
		slog.Info("--kubernetes-version is empty, attempting to detect it...")
		detectedVersion, err := detectKubernetesVersion()
		if err != nil {
			return fmt.Errorf("unable to detect --kubernetes-version, flag cannot be empty")
		}
		slog.Info("detected kubernetes version", "version", detectedVersion)
		d.KubernetesVersion = detectedVersion
	}
	if d.Nodes < 0 {
		return fmt.Errorf("number of nodes must be greater than zero")
	}
	if d.Nodes == 0 {
		d.Nodes = 3
		slog.Info("using default number of nodes", "nodes", d.Nodes)
	}
	if d.IPFamily == "" {
		d.IPFamily = string(ekstypes.IpFamilyIpv4)
		slog.Info("using default IP family", "ipFamily", d.IPFamily)
	}
	if d.ZoneType == "" {
		d.ZoneType = "availability-zone"
		slog.Info("using default zone type", "zoneType", d.ZoneType)
	}
	if d.ClusterCreationTimeout == 0 {
		d.ClusterCreationTimeout = time.Minute * 15
	}
	if d.NodeCreationTimeout == 0 {
		d.NodeCreationTimeout = time.Minute * 20
	}
	if d.NodeReadyTimeout == 0 {
		d.NodeReadyTimeout = time.Minute * 5
	}
	if d.StaticClusterName != "" {
		slog.Info("skip configuration for static cluster")
		return nil
	}
	if len(d.InstanceTypes) > 0 && len(d.InstanceTypeArchs) > 0 {
		return fmt.Errorf("--instance-types and --instance-type-archs are mutually exclusive")
	}
	if d.TargetCapacityReservationId != "" {
		d.CapacityReservation = true
	}
	if d.UnmanagedNodes {
		if d.AMIType != "" {
			return fmt.Errorf("--ami-type should not be provided with --unmanaged-nodes")
		}
		if d.NodeNameStrategy == "" {
			d.NodeNameStrategy = "EC2PrivateDNSName"
			slog.Info("using default node name strategy", "strategy", "EC2PrivateDNSName")
		} else {
			if !slices.Contains(SupportedNodeNameStrategy, d.NodeNameStrategy) {
				return fmt.Errorf("--node-name-strategy must be one of the following values: ['SessionName', 'EC2PrivateDNSName']")
			}
		}
		if d.UserDataFormat == "" {
			d.UserDataFormat = UserDataBootstrapSh
			slog.Info("using default user data format", "format", d.UserDataFormat)
		}
		// AMI ID check must come after user-data format resolution because we
		// can try to infer the AMI type for unmanaged nodes.
		if d.AMI == "" {
			ami, err := NewAMIResolver(d.awsClients).Resolve(context.TODO(), &d.deployerOptions)
			if err != nil {
				return fmt.Errorf("failed to automatically resolve ami for unmanaged nodegroup (provide --ami to short circuit this): %w", err)
			}
			d.AMI = ami
		}

		if d.EFA && len(d.InstanceTypes) != 1 {
			return fmt.Errorf("--efa requires a single instance type")
		}
	} else {
		if d.AMI != "" {
			return fmt.Errorf("--ami should not be provided without --unmanaged-nodes")
		}
		if d.AMIType == "" {
			d.AMIType = "AL2023_x86_64_STANDARD"
			slog.Info("using default AMI type", "amiType", d.AMIType)
		}
	}
	if d.EKSEndpointURL != "" && d.ClusterRoleServicePrincipal == "" {
		spType := "beta"
		if strings.Contains(d.EKSEndpointURL, "gamma") {
			spType = "gamma"
		}
		d.ClusterRoleServicePrincipal = fmt.Sprintf("eks-%s.aws.internal", spType)
	}
	if d.DeployCloudwatchInfra {
		slog.Info("prepending pod identity agent to addons for cloudwatch infrastructure")
		// this must be prepended to the list in order to respect user overrides.
		d.deployerOptions.Addons = slices.Insert(d.deployerOptions.Addons, 0, "eks-pod-identity-agent:default")
	}
	return nil
}

func detectKubernetesVersion() (string, error) {
	detectedVersion, err := util.DetectKubernetesVersion()
	if err != nil {
		return "", err
	}
	minorVersion, err := util.ParseMinorVersion(detectedVersion)
	if err != nil {
		return "", err
	}
	return minorVersion, nil
}

func (d *deployer) IsUp() (up bool, err error) {
	return d.clusterManager.isClusterActive()
}

func (d *deployer) Down() error {
	if err := d.logManager.gatherLogsFromNodes(d.k8sClient, &d.deployerOptions, deployerPhaseDown); err != nil {
		slog.Warn("failed to gather logs from nodes", "error", err)
		// don't return err, this isn't critical
	}
	if d.deployerOptions.StaticClusterName != "" {
		return d.staticClusterManager.TearDownNodeForStaticCluster()
	}
	return deleteResources(d.infraManager, d.clusterManager, d.nodeManager, d.k8sClient, &d.deployerOptions)
}

func deleteResources(im *InfrastructureManager, cm *ClusterManager, nm *nodeManager, k8sClient *k8sClient /* nillable */, opts *deployerOptions /* nillable */) error {
	if err := im.deleteCloudWatchInfrastructureStack(); err != nil {
		return err
	}
	if err := nm.deleteNodes(k8sClient, opts); err != nil {
		return err
	}
	// the EKS-managed cluster security group may be associated with a leaked ENI
	// so we need to make sure we've deleted leaked ENIs before we delete the cluster
	// otherwise, the cluster security group will be left behind and will block deletion of our VPC
	if err := im.deleteLeakedENIs(); err != nil {
		return err
	}
	if err := cm.deleteCluster(); err != nil {
		return err
	}
	return im.deleteInfrastructureStack()
}


================================================
FILE: internal/deployers/eksapi/infra.go
================================================
package eksapi

import (
	"context"
	_ "embed"
	"errors"
	"fmt"
	"log/slog"
	"path"
	"slices"
	"sort"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/cloudformation"
	cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
	cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	"github.com/aws/aws-sdk-go-v2/service/iam"
	iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types"
	"github.com/aws/aws-sdk-go/aws/arn"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
	"github.com/aws/aws-k8s-tester/internal/metrics"
	"github.com/aws/aws-k8s-tester/internal/util"
)

const (
	infraStackCreationTimeout         = time.Minute * 15
	infraStackDeletionTimeout         = time.Minute * 30
	networkInterfaceDetachmentTimeout = time.Minute * 10
)

const (
	// the VPC CNI will always add this tag to ENI's that it creates
	vpcCNIENITagKey         = "node.k8s.amazonaws.com/createdAt"
	// the IPAM controller will add this tag to the ENI's that it creates
	ipamControllerENITagKey = "eks:kubernetes-cni-node-name"
)

// eksEndpointURLTag is the key for an optional tag on the infrastructure CloudFormation stack,
// which indicates which EKS environment is associated with the stack's resources.
// The tag is only added when --endpoint-url is passed to the deployer.
const eksEndpointURLTag = "eks-endpoint-url"

var (
	infraMetricNamespace     = path.Join(DeployerMetricNamespace, "infrastructure")
	infraStackDeletionFailed = &metrics.MetricSpec{
		Namespace: infraMetricNamespace,
		Metric:    "StackDeletionFailed",
		Unit:      cloudwatchtypes.StandardUnitCount,
	}
	infraLeakedENIs = &metrics.MetricSpec{
		Namespace: infraMetricNamespace,
		Metric:    "LeakedENIs",
		Unit:      cloudwatchtypes.StandardUnitCount,
	}
)

type InfrastructureManager struct {
	clients    *awsClients
	resourceID string
	metrics    metrics.MetricRegistry
}

func NewInfrastructureManager(clients *awsClients, resourceID string, metrics metrics.MetricRegistry) *InfrastructureManager {
	return &InfrastructureManager{
		clients:    clients,
		resourceID: resourceID,
		metrics:    metrics,
	}
}

type Infrastructure struct {
	availabilityZones []string
	vpc               string
	subnetsPublic     []string
	subnetsPrivate    []string
	clusterRoleARN    string
	nodeRoleARN       string
	nodeRoleName      string
	cloudwatchRoleArn string
}

func (i *Infrastructure) subnets() []string {
	return append(i.subnetsPublic, i.subnetsPrivate...)
}

func (m *InfrastructureManager) createInfrastructureStack(opts *deployerOptions) (*Infrastructure, error) {
	var subnetAzs []string
	if opts.CapacityReservation {
		azs, err := m.getAZsWithCapacity(opts)
		if err != nil {
			return nil, err
		}
		subnetAzs = azs
	} else if len(opts.InstanceTypes) > 0 {
		azs, err := m.getRankedAZsForInstanceTypes(opts)
		if err != nil {
			return nil, err
		}
		if len(azs) == 0 {
			return nil, fmt.Errorf("no AZs support any of the provided instance types (%v)", opts.InstanceTypes)
		}
		subnetAzs = azs
	}

	// this value is not currently configurable, the infra stack is hardcoded to create 2
	// TODO: create a subnet in every AZ. today we need exactly 2 AZs for the subnets.
	const numInfraAZs = 2

	subnetAzs, err := m.normalizeAZs(opts, subnetAzs, numInfraAZs)
	if err != nil {
		return nil, err
	}

	slog.Info("creating infrastructure stack", "availabilityZones", subnetAzs)
	input := cloudformation.CreateStackInput{
		StackName:    aws.String(m.resourceID),
		TemplateBody: aws.String(templates.Infrastructure),
		Capabilities: []cloudformationtypes.Capability{cloudformationtypes.CapabilityCapabilityIam},
		Parameters: []cloudformationtypes.Parameter{
			{
				ParameterKey:   aws.String("ResourceId"),
				ParameterValue: aws.String(m.resourceID),
			},
			{
				ParameterKey:   aws.String("Subnet01AZ"),
				ParameterValue: aws.String(subnetAzs[0]),
			},
			{
				ParameterKey:   aws.String("Subnet02AZ"),
				ParameterValue: aws.String(subnetAzs[1]),
			},
			{
				ParameterKey:   aws.String("AutoMode"),
				ParameterValue: aws.String(fmt.Sprintf("%t", opts.AutoMode)),
			},
		},
	}
	if opts.ClusterRoleServicePrincipal != "" {
		input.Parameters = append(input.Parameters, cloudformationtypes.Parameter{
			ParameterKey:   aws.String("AdditionalClusterRoleServicePrincipal"),
			ParameterValue: aws.String(opts.ClusterRoleServicePrincipal),
		})
	}
	if opts.EKSEndpointURL != "" {
		input.Tags = []cloudformationtypes.Tag{
			{
				Key:   aws.String(eksEndpointURLTag),
				Value: aws.String(opts.EKSEndpointURL),
			},
		}
	}
	slog.Info("creating infrastructure stack...")
	out, err := m.clients.CFN().CreateStack(context.TODO(), &input)
	if err != nil {
		return nil, err
	}
	slog.Info("waiting for infrastructure stack to be created", "stackId", *out.StackId)
	err = cloudformation.NewStackCreateCompleteWaiter(m.clients.CFN()).
		Wait(context.TODO(),
			&cloudformation.DescribeStacksInput{
				StackName: out.StackId,
			},
			infraStackCreationTimeout)
	if err != nil {
		return nil, fmt.Errorf("failed to wait for infrastructure stack creation: %w", err)
	}
	slog.Info("getting infrastructure stack resources", "stackId", *out.StackId)
	infra, err := m.getInfrastructureStackResources()
	infra.availabilityZones = subnetAzs
	if err != nil {
		return nil, fmt.Errorf("failed to get infrastructure stack resources: %w", err)
	}
	slog.Info("created infrastructure", "infra", infra)

	return infra, nil
}

func (m *InfrastructureManager) getInfrastructureStackResources() (*Infrastructure, error) {
	stack, err := m.clients.CFN().DescribeStacks(context.TODO(), &cloudformation.DescribeStacksInput{
		StackName: aws.String(m.resourceID),
	})
	if err != nil {
		return nil, err
	}
	infra := Infrastructure{}
	for _, output := range stack.Stacks[0].Outputs {
		value := *output.OutputValue
		switch *output.OutputKey {
		case "VPC":
			infra.vpc = value
		case "SubnetsPublic":
			infra.subnetsPublic = strings.Split(value, ",")
		case "SubnetsPrivate":
			infra.subnetsPrivate = strings.Split(value, ",")
		case "ClusterRole":
			arn, err := arn.Parse(value)
			if err != nil {
				return nil, fmt.Errorf("infrastructure stack ClusterRole output is not a valid ARN: '%s': %v", value, err)
			}
			infra.clusterRoleARN = arn.String()
		case "NodeRole":
			arn, err := arn.Parse(value)
			if err != nil {
				return nil, fmt.Errorf("infrastructure stack NodeRole output is not a valid ARN: '%s': %v", value, err)
			}
			infra.nodeRoleARN = arn.String()
			// Resource looks like 'role:/MyRole'
			resourceParts := strings.Split(arn.Resource, "/")
			infra.nodeRoleName = resourceParts[len(resourceParts)-1]
		}
	}
	return &infra, nil
}

func (m *InfrastructureManager) deleteInfrastructureStack() error {
	infra, err := m.getInfrastructureStackResources()
	if err != nil {
		var notFound *cloudformationtypes.StackNotFoundException
		if errors.As(err, &notFound) {
			slog.Info("infrastructure stack does not exist", "resourceID", m.resourceID)
			return nil
		}
		return err
	}
	if err := m.deleteLeakedInstanceProfiles(infra); err != nil {
		return err
	}
	input := cloudformation.DeleteStackInput{
		StackName: aws.String(m.resourceID),
	}
	slog.Info("deleting infrastructure stack", "resourceID", m.resourceID)
	_, err = m.clients.CFN().DeleteStack(context.TODO(), &input)
	if err != nil {
		var notFound *cloudformationtypes.StackNotFoundException
		if errors.As(err, &notFound) {
			slog.Info("infrastructure stack does not exist", "resourceID", m.resourceID)
			return nil
		}
		return fmt.Errorf("failed to delete infrastructure stack: %w", err)
	}
	slog.Info("waiting for infrastructure stack to be deleted", "resourceID", m.resourceID)
	err = cloudformation.NewStackDeleteCompleteWaiter(m.clients.CFN()).
		Wait(context.TODO(),
			&cloudformation.DescribeStacksInput{
				StackName: aws.String(m.resourceID),
			},
			infraStackDeletionTimeout)
	if err != nil {
		// don't fail the overall test, the janitor can clean this up
		slog.Warn("failed to wait for infrastructure stack deletion", "error", err)
		m.metrics.Record(infraStackDeletionFailed, 1, nil)
		return nil
	}
	slog.Info("deleted infrastructure stack", "resourceID", m.resourceID)
	return nil
}

// deleteLeakedInstanceProfiles deletes any instance profiles to which the node role is attached,
// because this will block node role deletion (and deletion of the infrastructure stack).
// For example, when --auto-mode is used, an instance profile will be created for us and won't be deleted automatically with the cluster.
func (m *InfrastructureManager) deleteLeakedInstanceProfiles(infra *Infrastructure) error {
	if infra.nodeRoleName == "" {
		// if the infra stack failed to create, it could end up in a weird state with no node role
		// we know there aren't any instance profiles in that case, so all good
		return nil
	}
	out, err := m.clients.IAM().ListInstanceProfilesForRole(context.TODO(), &iam.ListInstanceProfilesForRoleInput{
		RoleName: aws.String(infra.nodeRoleName),
	})
	if err != nil {
		var notFound *iamtypes.NoSuchEntityException
		if errors.As(err, &notFound) {
			return nil
		}
		return fmt.Errorf("failed to list instance profiles for role name: '%s': %v", infra.nodeRoleName, err)
	} else if len(out.InstanceProfiles) > 0 {
		var deletedInstanceProfiles []string
		for _, instanceProfile := range out.InstanceProfiles {
			_, err := m.clients.IAM().RemoveRoleFromInstanceProfile(context.TODO(), &iam.RemoveRoleFromInstanceProfileInput{
				RoleName:            aws.String(infra.nodeRoleName),
				InstanceProfileName: instanceProfile.InstanceProfileName,
			})
			if err != nil {
				var notFound *iamtypes.NoSuchEntityException
				if errors.As(err, &notFound) {
					slog.Info("instance profile does not exist", "name", aws.ToString(instanceProfile.InstanceProfileName))
					continue
				}
				return fmt.Errorf("failed to remove node role %s from instance profile: %s: %v", infra.nodeRoleName, aws.ToString(instanceProfile.InstanceProfileName), err)
			}
			_, err = m.clients.IAM().DeleteInstanceProfile(context.TODO(), &iam.DeleteInstanceProfileInput{
				InstanceProfileName: instanceProfile.InstanceProfileName,
			})
			if err != nil {
				var notFound *iamtypes.NoSuchEntityException
				if errors.As(err, &notFound) {
					slog.Info("instance profile does not exist", "name", aws.ToString(instanceProfile.InstanceProfileName))
					continue
				}
				return fmt.Errorf("failed to delete instance profile: %s: %v", aws.ToString(instanceProfile.InstanceProfileName), err)
			}
			deletedInstanceProfiles = append(deletedInstanceProfiles, aws.ToString(instanceProfile.InstanceProfileName))
		}
		slog.Info("deleted leaked instance profiles", "count", len(deletedInstanceProfiles), "profiles", deletedInstanceProfiles)
	}
	return nil
}

// deleteLeakedENIs deletes Elastic Network Interfaces that may have been allocated (and left behind) by the VPC CNI.
// These leaked ENIs will prevent deletion of their associated subnets and security groups.
func (m *InfrastructureManager) deleteLeakedENIs() error {
	infra, err := m.getInfrastructureStackResources()
	if err != nil {
		var notFound *cloudformationtypes.StackNotFoundException
		if errors.As(err, &notFound) {
			return nil
		}
		return fmt.Errorf("failed to get infrastructure stack resources: %w", err)
	}
	enis, err := m.getVPCCNINetworkInterfaceIds(infra.vpc)
	if err != nil {
		return err
	}
	if len(enis) == 0 {
		return nil
	}
	slog.Info("waiting for leaked ENIs to become available", "count", len(enis), "enis", enis)
	if err := ec2.NewNetworkInterfaceAvailableWaiter(m.clients.EC2()).Wait(context.TODO(), &ec2.DescribeNetworkInterfacesInput{
		NetworkInterfaceIds: enis,
	}, networkInterfaceDetachmentTimeout); err != nil {
		refreshedENIs, err2 := m.getVPCCNINetworkInterfaceIds(infra.vpc)
		if err2 != nil {
			return fmt.Errorf("waiter failed, and re-checking ENIs also failed: %w", err2)
		}
		if len(refreshedENIs) == 0 {
			slog.Info("ENIs were deleted during waiter timeout, skipping delete")
			return nil
		}
		return fmt.Errorf("failed to wait for ENI(s) to become available: %v", err)
	}
	for _, eni := range enis {
		slog.Info("deleting leaked ENI", "eni", eni)
		_, err := m.clients.EC2().DeleteNetworkInterface(context.TODO(), &ec2.DeleteNetworkInterfaceInput{
			NetworkInterfaceId: aws.String(eni),
		})
		if err != nil {
			return fmt.Errorf("failed to delete leaked ENI: %w", err)
		}
	}
	slog.Info("deleted leaked ENIs", "count", len(enis))
	m.metrics.Record(infraLeakedENIs, float64(len(enis)), nil)
	return nil
}

// getVPCCNINetworkInterfaceIds returns the IDs of ENIs in the specified VPC that were created by the VPC CNI
func (m *InfrastructureManager) getVPCCNINetworkInterfaceIds(vpcId string) ([]string, error) {
	paginator := ec2.NewDescribeNetworkInterfacesPaginator(m.clients.EC2(), &ec2.DescribeNetworkInterfacesInput{
		Filters: []ec2types.Filter{
			{
				Name:   aws.String("vpc-id"),
				Values: []string{vpcId},
			},
			{
				Name:   aws.String("interface-type"),
				Values: []string{"interface"},
			},
			{
				Name:   aws.String("tag-key"),
				Values: []string{vpcCNIENITagKey, ipamControllerENITagKey},
			},
		},
	})
	var enis []string
	for paginator.HasMorePages() {
		page, err := paginator.NextPage(context.TODO())
		if err != nil {
			return nil, fmt.Errorf("failed to describe ENIs: %w", err)
		}
		for _, eni := range page.NetworkInterfaces {
			enis = append(enis, *eni.NetworkInterfaceId)
		}
	}
	return enis, nil
}

// normalizeAZs removes availability zones that don't meet launch requirements
// for instances and ensures that the resulting list containers enough AZs to
// satisfy the deployment.
func (m *InfrastructureManager) normalizeAZs(opts *deployerOptions, subnetAZs []string, expectedCount int) ([]string, error) {
	azs, err := m.clients.EC2().DescribeAvailabilityZones(context.TODO(), &ec2.DescribeAvailabilityZonesInput{
		Filters: []ec2types.Filter{
			{
				Name:   aws.String("zone-type"),
				Values: []string{opts.ZoneType},
			},
		},
	})
	if err != nil {
		return nil, err
	}

	var supporttedAZs []string
	for _, az := range azs.AvailabilityZones {
		supporttedAZs = append(supporttedAZs, aws.ToString(az.ZoneName))
	}

	var filteredAZs []string
	for _, az := range subnetAZs {
		if slices.Contains(supporttedAZs, az) {
			filteredAZs = append(filteredAZs, az)
		}
	}

	// enforce users' preferred ordering over AZs
	filteredAZs = availabilityZoneHintedOrder(filteredAZs)
	// truncate the list if we went over the max
	filteredAZs = filteredAZs[:min(len(filteredAZs), expectedCount)]

	// pad the availability zones with supported entries if we end up not having
	// enough after filtering.
	if len(filteredAZs) < expectedCount {
		for _, az := range supporttedAZs {
			if len(filteredAZs) == expectedCount {
				break
			}
			if !slices.Contains(filteredAZs, az) {
				slog.Info("padding infra stack with AZ", "az", az)
				filteredAZs = append(filteredAZs, az)
			}
		}
	}

	if len(filteredAZs) != expectedCount {
		return nil, fmt.Errorf("failed to provide AZs with expected count %d: %v", expectedCount, filteredAZs)
	}

	return filteredAZs, nil
}

// getAZsWithInstanceTypes returns the availability zones ordered decreasingly by the number of
// requested instance types they support
func (m *InfrastructureManager) getRankedAZsForInstanceTypes(opts *deployerOptions) ([]string, error) {
	offerings, err := m.clients.EC2().DescribeInstanceTypeOfferings(context.TODO(), &ec2.DescribeInstanceTypeOfferingsInput{
		LocationType: ec2types.LocationTypeAvailabilityZone,
		Filters: []ec2types.Filter{
			{
				Name:   aws.String("instance-type"),
				Values: opts.InstanceTypes,
			},
		},
	})
	if err != nil {
		return nil, fmt.Errorf("failed to describe instance type offerings: %v", err)
	}
	counts := make(map[string]int)
	for _, offering := range offerings.InstanceTypeOfferings {
		counts[aws.ToString(offering.Location)]++
	}
	var azs []string
	for az := range counts {
		azs = append(azs, az)
	}
	sort.Slice(azs, func(i, j int) bool {
		return counts[azs[i]] > counts[azs[j]]
	})
	return azs, nil
}

func (m *InfrastructureManager) getAZsWithCapacity(opts *deployerOptions) ([]string, error) {
	// TODO: consolidate this with the CapacityReservation logic in node.go
	var subnetAzs []string
	describeReservationsInput := ec2.DescribeCapacityReservationsInput{
		Filters: []ec2types.Filter{
			{
				Name:   aws.String("instance-type"),
				Values: opts.InstanceTypes,
			},
			{
				Name:   aws.String("state"),
				Values: []string{"active"},
			},
		},
	}
	if opts.TargetCapacityReservationId != "" {
		describeReservationsInput.CapacityReservationIds = []string{opts.TargetCapacityReservationId}
	}
	capacityReservations, err := m.clients.EC2().DescribeCapacityReservations(context.TODO(), &describeReservationsInput)
	if err != nil {
		return nil, err
	}
	for _, cr := range capacityReservations.CapacityReservations {
		if *cr.AvailableInstanceCount >= int32(opts.Nodes) {
			subnetAzs = append(subnetAzs, *cr.AvailabilityZone)
			break
		}
	}
	return subnetAzs, nil
}

func getCloudWatchStackName(resourceID string) (string, string) {
	clusterUUID := strings.TrimPrefix(resourceID, ResourcePrefix+"-")
	return fmt.Sprintf("%s-cw", resourceID), clusterUUID
}

func (m *InfrastructureManager) createCloudWatchInfrastructureStack(clusterName string) (string, error) {
	stackName, clusterUUID := getCloudWatchStackName(clusterName)
	slog.Info("creating CloudWatch infrastructure stack", "stackName", stackName)
	out, err := m.clients.CFN().CreateStack(context.TODO(), &cloudformation.CreateStackInput{
		StackName:    aws.String(stackName),
		TemplateBody: aws.String(templates.CloudWatchInfra),
		Capabilities: []cloudformationtypes.Capability{cloudformationtypes.CapabilityCapabilityNamedIam},
		Parameters: []cloudformationtypes.Parameter{
			{
				ParameterKey:   aws.String("ClusterUUID"),
				ParameterValue: aws.String(clusterUUID),
			},
		},
	})
	if err != nil {
		return "", fmt.Errorf("failed to create CloudWatch infrastructure stack: %w", err)
	}

	slog.Info("waiting for CloudWatch infrastructure stack to be created", "stackId", *out.StackId)
	if err := cloudformation.NewStackCreateCompleteWaiter(m.clients.CFN()).
		Wait(context.TODO(),
			&cloudformation.DescribeStacksInput{
				StackName: out.StackId,
			},
			infraStackCreationTimeout); err != nil {
		return "", util.WrapCFNStackFailure(context.TODO(), m.clients.CFN(), fmt.Errorf("failed to wait for CloudWatch infrastructure stack creation: %w", err), stackName)
	}

	stack, err := m.clients.CFN().DescribeStacks(context.TODO(), &cloudformation.DescribeStacksInput{
		StackName: out.StackId,
	})
	if err != nil {
		return "", fmt.Errorf("failed to describe CloudWatch infrastructure stack: %w", err)
	}

	// Get the CloudWatch role ARN from stack outputs
	var roleArn string
	for _, output := range stack.Stacks[0].Outputs {
		if aws.ToString(output.OutputKey) == "CloudWatchRoleArn" {
			roleArn = aws.ToString(output.OutputValue)
			break
		}
	}
	if roleArn == "" {
		return "", fmt.Errorf("CloudWatch role ARN not found in stack outputs")
	}

	slog.Info("CloudWatch infrastructure stack created successfully", "roleArn", roleArn)
	return roleArn, nil
}

// createCloudWatchPodIdentityAssociation creates a PodIdentityAssociation
// via the EKS API directly, rather than through CloudFormation, to ensure
// the correct EKS endpoint is used when a custom endpoint URL is configured.
// The association is automatically cleaned up when the cluster is deleted.
func (m *InfrastructureManager) createCloudWatchPodIdentityAssociation(clusterName string, roleArn string) error {
	slog.Info("creating PodIdentityAssociation", "clusterName", clusterName)
	_, err := m.clients.EKS().CreatePodIdentityAssociation(context.TODO(), &eks.CreatePodIdentityAssociationInput{
		ClusterName:    aws.String(clusterName),
		Namespace:      aws.String("amazon-cloudwatch"),
		ServiceAccount: aws.String("cwagent"),
		RoleArn:        aws.String(roleArn),
	})
	if err != nil {
		return fmt.Errorf("failed to create PodIdentityAssociation: %w", err)
	}
	slog.Info("PodIdentityAssociation created successfully", "clusterName", clusterName)
	return nil
}

func (m *InfrastructureManager) deleteCloudWatchInfrastructureStack() error {
	stackName, _ := getCloudWatchStackName(m.resourceID)

	// The PodIdentityAssociation created via the EKS API is automatically
	// cleaned up when the cluster is deleted, so no explicit deletion is needed.

	slog.Info("deleting CloudWatch infrastructure stack", "stackName", stackName)
	if _, err := m.clients.CFN().DeleteStack(context.TODO(), &cloudformation.DeleteStackInput{
		StackName: aws.String(stackName),
	}); err != nil {
		var notFound *cloudformationtypes.StackNotFoundException
		if errors.As(err, &notFound) {
			slog.Info("CloudWatch infrastructure stack does not exist", "stackName", stackName)
			return nil
		}
		return fmt.Errorf("failed to delete CloudWatch infrastructure stack: %w", err)
	}

	slog.Info("initiated deletion of CloudWatch infrastructure stack", "stackName", stackName)
	return nil
}


================================================
FILE: internal/deployers/eksapi/janitor.go
================================================
package eksapi

import (
	"context"
	"errors"
	"fmt"
	"log/slog"
	"strings"
	"sync"
	"time"

	"github.com/aws/aws-k8s-tester/internal/awssdk"
	"github.com/aws/aws-k8s-tester/internal/metrics"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/cloudformation"
	cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
	"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
)

func NewJanitor(maxResourceAge time.Duration, emitMetrics bool, workers int, stackStatus string) *janitor {
	awsConfig := awssdk.NewConfig()
	var metricRegistry metrics.MetricRegistry
	if emitMetrics {
		metricRegistry = metrics.NewCloudWatchRegistry(cloudwatch.NewFromConfig(awsConfig))
	} else {
		metricRegistry = metrics.NewNoopMetricRegistry()
	}
	if workers <= 0 {
		workers = 1
	}
	return &janitor{
		maxResourceAge: maxResourceAge,
		workers:        workers,
		stackStatus:    stackStatus,
		awsConfig:      awsConfig,
		cfnClient:      cloudformation.NewFromConfig(awsConfig),
		metrics:        metricRegistry,
	}
}

type janitor struct {
	maxResourceAge time.Duration
	workers        int
	stackStatus    string

	awsConfig aws.Config
	cfnClient *cloudformation.Client
	metrics   metrics.MetricRegistry
}

func (j *janitor) Sweep(ctx context.Context) error {
	awsConfig := awssdk.NewConfig()
	cfnClient := cloudformation.NewFromConfig(awsConfig)
	stacks, err := j.getStacks(ctx, cfnClient)
	if err != nil {
		return fmt.Errorf("failed to get stacks: %v", err)
	}
	var wg sync.WaitGroup
	stackQueue := make(chan cloudformationtypes.Stack, len(stacks))
	errChan := make(chan error, len(stacks))
	for i := 1; i <= j.workers; i++ {
		wg.Add(1)
		go j.sweepWorker(&wg, stackQueue, errChan)
	}

	for _, stack := range stacks {
		stackQueue <- stack
	}
	close(stackQueue)

	wg.Wait()
	close(errChan)
	var errs []error
	for err := range errChan {
		errs = append(errs, err)
	}
	return errors.Join(errs...)
}

func (j *janitor) getStacks(ctx context.Context, cfnClient *cloudformation.Client) ([]cloudformationtypes.Stack, error) {
	var stacks []cloudformationtypes.Stack
	stackPaginator := cloudformation.NewDescribeStacksPaginator(cfnClient, &cloudformation.DescribeStacksInput{})
	for stackPaginator.HasMorePages() {
		page, err := stackPaginator.NextPage(ctx)
		if err != nil {
			return nil, err
		}
		stacks = append(stacks, page.Stacks...)
	}
	return stacks, nil
}

func (j *janitor) sweepWorker(wg *sync.WaitGroup, stackQueue <-chan cloudformationtypes.Stack, errChan chan<- error) {
	defer wg.Done()
	for stack := range stackQueue {
		resourceID := *stack.StackName
		if !strings.HasPrefix(resourceID, ResourcePrefix) {
			continue
		}
		if stack.StackStatus == "DELETE_COMPLETE" {
			continue
		}
		if j.stackStatus != "" && j.stackStatus != string(stack.StackStatus) {
			slog.Info("skipping resources", "status", stack.StackStatus, "resourceID", resourceID)
			continue
		}
		resourceAge := time.Since(*stack.CreationTime)
		if resourceAge < j.maxResourceAge {
			slog.Info("skipping resources", "age", resourceAge, "resourceID", resourceID)
			continue
		}
		clients := j.awsClientsForStack(stack)
		infraManager := NewInfrastructureManager(clients, resourceID, j.metrics)
		clusterManager := NewClusterManager(clients, resourceID)
		nodeManager := NewNodeManager(clients, resourceID)
		slog.Info("deleting resources", "age", resourceAge, "resourceID", resourceID)
		if err := deleteResources(infraManager, clusterManager, nodeManager, nil /* k8sClient */, nil /* deployerOptions */); err != nil {
			errChan <- fmt.Errorf("failed to delete resources: %s: %v", resourceID, err)
		}
	}
}

func (j *janitor) awsClientsForStack(stack cloudformationtypes.Stack) *awsClients {
	var eksEndpointURL string
	for _, tag := range stack.Tags {
		if *tag.Key == eksEndpointURLTag {
			eksEndpointURL = *tag.Value
		}
	}
	return newAWSClients(j.awsConfig, eksEndpointURL)
}


================================================
FILE: internal/deployers/eksapi/k8s.go
================================================
package eksapi

import (
	"context"
	"errors"
	"fmt"
	"log/slog"
	"net"
	"net/url"
	"strings"
	"time"

	"github.com/aws/aws-k8s-tester/internal/metrics"
	"github.com/aws/aws-k8s-tester/internal/util"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/util/sets"
	"k8s.io/apimachinery/pkg/util/wait"
	"k8s.io/apimachinery/pkg/watch"
	"k8s.io/client-go/dynamic"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/tools/clientcmd"
	"sigs.k8s.io/controller-runtime/pkg/client"
	crlog "sigs.k8s.io/controller-runtime/pkg/log"
	"sigs.k8s.io/controller-runtime/pkg/log/zap"

	corev1 "k8s.io/api/core/v1"
)

func init() {
	// controller-runtime will complain loudly if this isn't set, even though we don't use this logger
	crlog.SetLogger(zap.New())
}

const (
	requestRetryInterval = 10 * time.Second
	requestRetryTimeout  = 10 * time.Minute
)

type k8sClient struct {
	config    *rest.Config
	clientset kubernetes.Interface
	client    client.Client
	dclient   *dynamic.DynamicClient
}

func newK8sClient(kubeconfigPath string) (*k8sClient, error) {
	config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
	if err != nil {
		return nil, err
	}
	return &k8sClient{
		config:    config,
		clientset: kubernetes.NewForConfigOrDie(config),
		client:    util.Must(client.New(config, client.Options{})),
		dclient:   util.Must(dynamic.NewForConfig(config)),
	}, nil
}

func (k *k8sClient) waitForReadyNodes(nodeCount int, timeout time.Duration) error {
	slog.Info("waiting for nodes to be ready", "nodeCount", nodeCount, "timeout", timeout)
	readyNodes := sets.NewString()
	watcher, err := k.clientset.CoreV1().Nodes().Watch(context.TODO(), metav1.ListOptions{})
	if err != nil {
		return fmt.Errorf("failed to create node watcher: %v", err)
	}
	defer watcher.Stop()
	initialReadyNodes, err := k.getReadyNodes()
	if err != nil {
		return fmt.Errorf("failed to get ready nodes: %v", err)
	}
	counter := len(initialReadyNodes)
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()
	for {
		select {
		case event, ok := <-watcher.ResultChan():
			if !ok {
				return fmt.Errorf("the watcher channel for the nodes was closed by Kubernetes due to an unknown error")
			}
			if event.Type == watch.Error {
				msg := "unexpected error event type from node watcher"
				if statusErr, ok := event.Object.(*metav1.Status); ok {
					return fmt.Errorf("%s: %s", msg, statusErr.String())
				}
				return fmt.Errorf("%s: %+v", msg, event.Object)
			}
			if event.Object != nil && event.Type != watch.Deleted {
				if node, ok := event.Object.(*corev1.Node); ok {
					if isNodeReady(node) {
						readyNodes.Insert(node.Name)
						counter = readyNodes.Len()
					}
				}
			}
		case <-ctx.Done():
			return fmt.Errorf("timed out waiting for %d nodes to be ready: %w", nodeCount, ctx.Err())
		}
		if counter >= nodeCount {
			break
		}
	}
	slog.Info("nodes are ready", "count", readyNodes.Len(), "nodes", readyNodes)
	return nil
}

func (k *k8sClient) waitForNodeDeletion(timeout time.Duration) error {
	slog.Info("waiting for nodes to be deleted", "timeout", timeout)
	nodes := sets.NewString()
	watcher, err := k.clientset.CoreV1().Nodes().Watch(context.TODO(), metav1.ListOptions{})
	if err != nil {
		return fmt.Errorf("failed to create node watcher: %v", err)
	}
	defer watcher.Stop()
	initialNodes, err := k.clientset.CoreV1().Nodes().List(context.TODO(), v1.ListOptions{})
	if err != nil {
		return fmt.Errorf("failed to list nodes: %v", err)
	}
	for _, node := range initialNodes.Items {
		nodes.Insert(node.Name)
	}
	ctx, cancelFunc := context.WithTimeout(context.Background(), timeout)
	defer cancelFunc()
	for {
		select {
		case event, ok := <-watcher.ResultChan():
			if !ok {
				return fmt.Errorf("the watcher channel for the nodes was closed by Kubernetes due to an unknown error")
			}
			if event.Type == watch.Error {
				msg := "unexpected error event type from node watcher"
				if statusErr, ok := event.Object.(*metav1.Status); ok {
					return fmt.Errorf("%s: %s", msg, statusErr.String())
				}
				return fmt.Errorf("%s: %+v", msg, event.Object)
			}
			if event.Object != nil {
				if node, ok := event.Object.(*corev1.Node); !ok {
					return fmt.Errorf("node watcher received an object that isn't a Node: %+v", event.Object)
				} else {
					switch event.Type {
					case watch.Added:
						nodes.Insert(node.Name)
					case watch.Deleted:
						nodes.Delete(node.Name)
					}
				}
			}
		case <-ctx.Done():
			return fmt.Errorf("timed out waiting for nodes to be deleted: %w", ctx.Err())
		}
		if len(nodes) == 0 {
			break
		}
	}
	slog.Info("all nodes deleted!")
	return nil
}

func (k *k8sClient) getReadyNodes() ([]corev1.Node, error) {
	nodes, err := k.clientset.CoreV1().Nodes().List(context.TODO(), v1.ListOptions{})
	if err != nil {
		return nil, err
	}
	var readyNodes []corev1.Node
	for _, node := range nodes.Items {
		if isNodeReady(&node) {
			readyNodes = append(readyNodes, node)
		}
	}
	return readyNodes, nil
}

func isNodeReady(node *corev1.Node) bool {
	c := getNodeReadyCondition(node)
	if c == nil {
		return false
	}
	return c.Status == corev1.ConditionTrue
}

func getNodeReadyCondition(node *corev1.Node) *corev1.NodeCondition {
	for _, c := range node.Status.Conditions {
		if c.Type == corev1.NodeReady {
			return &c
		}
	}
	return nil
}

func (k *k8sClient) createAWSAuthConfigMap(nodeNameStrategy string, nodeRoleARN string) error {
	mapRoles, err := generateAuthMapRole(nodeNameStrategy, nodeRoleARN)
	if err != nil {
		return err
	}
	slog.Info("generated AuthMapRole", "mapRoles", mapRoles)
	err = wait.PollUntilContextTimeout(context.TODO(), requestRetryInterval, requestRetryTimeout, true, func(ctx context.Context) (bool, error) {
		_, err := k.clientset.CoreV1().ConfigMaps("kube-system").Create(ctx, &corev1.ConfigMap{
			ObjectMeta: metav1.ObjectMeta{
				Name:      "aws-auth",
				Namespace: "kube-system",
			},
			Data: map[string]string{
				"mapRoles": mapRoles,
			},
		}, metav1.CreateOptions{})
		if err != nil {
			var dnsErr *net.DNSError
			if errors.As(err, &dnsErr) {
				slog.Warn("failed to create aws-auth configmap due to DNS error, retrying", "error", err)
				return false, nil
			}
			return false, err
		}
		return true, nil
	})
	if err != nil {
		return fmt.Errorf("retry loop failed: %w", err)
	}
	return nil
}

func getNodeInstanceIDs(nodes []corev1.Node) ([]string, error) {
	var instanceIds []string
	var errs []error
	for _, node := range nodes {
		providerId, err := parseKubernetesProviderID(node.Spec.ProviderID)
		if err != nil {
			errs = append(errs, err)
			continue
		}
		instanceIds = append(instanceIds, providerId.InstanceID)
	}
	if len(errs) > 0 {
		return nil, errors.Join(errs...)
	}
	return instanceIds, nil
}

func (k *k8sClient) emitNodeMetrics(metricRegistry metrics.MetricRegistry, ec2Client *ec2.Client) error {
	nodes, err := k.getReadyNodes()
	if err != nil {
		return err
	}
	var errs []error
	for _, node := range nodes {
		providerId, err := parseKubernetesProviderID(node.Spec.ProviderID)
		if err != nil {
			errs = append(errs, err)
			continue
		}
		instanceInfo, err := ec2Client.DescribeInstances(context.TODO(), &ec2.DescribeInstancesInput{
			InstanceIds: []string{providerId.InstanceID},
		})
		if err != nil {
			errs = append(errs, err)
			continue
		}
		instance := instanceInfo.Reservations[0].Instances[0]
		launchTime := *instance.LaunchTime
		timeToRegistration := node.ObjectMeta.CreationTimestamp.Time.Sub(launchTime)
		timeToReady := getNodeReadyCondition(&node).LastTransitionTime.Time.Sub(launchTime)

		nodeDimensions := map[string]string{
			"instanceType": string(instance.InstanceType),
			"os":           node.Status.NodeInfo.OperatingSystem,
			"osImage":      node.Status.NodeInfo.OSImage,
			"arch":         node.Status.NodeInfo.Architecture,
		}

		// we'll emit the metrics with different subset(s) of dimensions, to make aggregation simpler
		var nodeDimensionSets []map[string]string
		nodeDimensionSets = append(nodeDimensionSets, nodeDimensions)

		var osDistro string
		if strings.HasPrefix(node.Status.NodeInfo.OSImage, "Amazon Linux") {
			// on al2: "Amazon Linux 2"
			// on al2023: "Amazon Linux 2023.6.20241010"
			parts := strings.Split(node.Status.NodeInfo.OSImage, ".")
			amazonLinuxMajorVersion := parts[0]
			osDistro = amazonLinuxMajorVersion
		}

		if osDistro != "" {
			nodeDimensions["osDistro"] = osDistro

			// if we have an osDistro, add a pared-down dimension set that includes it
			nodeDimensionSets = append(nodeDimensionSets, map[string]string{
				"osDistro":     nodeDimensions["osDistro"],
				"instanceType": nodeDimensions["instanceType"],
				"arch":         nodeDimensions["arch"],
			})
		}

		for _, nodeDimensionSet := range nodeDimensionSets {
			metricRegistry.Record(nodeTimeToRegistrationSeconds, timeToRegistration.Seconds(), nodeDimensionSet)
			metricRegistry.Record(nodeTimeToReadySeconds, timeToReady.Seconds(), nodeDimensionSet)
		}
	}
	return errors.Join(errs...)
}

type KubernetesProviderID struct {
	AvailabilityZone string
	InstanceID       string
}

func parseKubernetesProviderID(rawProviderId string) (*KubernetesProviderID, error) {
	url, err := url.Parse(rawProviderId)
	if err != nil {
		return nil, fmt.Errorf("malformed provider ID: %s", rawProviderId)
	}
	if url.Scheme != "aws" {
		return nil, fmt.Errorf("usupported provider ID scheme: %s", url.Scheme)
	}
	if url.Path == "" {
		return nil, fmt.Errorf("provider ID path is empty: %s", rawProviderId)
	}
	// example: /us-west-2a/i-12345abcdefg
	parts := strings.Split(url.Path, "/")
	if len(parts) != 3 {
		return nil, fmt.Errorf("provider ID path does not have 3 parts: %s", url.Path)
	}
	return &KubernetesProviderID{
		AvailabilityZone: parts[1],
		InstanceID:       parts[2],
	}, nil
}


================================================
FILE: internal/deployers/eksapi/kubeconfig.go
================================================
package eksapi

import (
	"bytes"
	"fmt"
	"log/slog"
	"os"
	"text/template"
)

const kubeconfigPerm = 0666

var kubeconfigTemplate = `---
apiVersion: v1
kind: Config
clusters:
- cluster:
    certificate-authority-data: {{ .ClusterCertificateAuthority }}
    server: {{ .ClusterEndpoint }}
  name: {{ .ClusterARN }}
contexts:
- context:
    cluster: {{ .ClusterARN }}
    user: {{ .ClusterARN }}
  name: {{ .ClusterARN }}
current-context: {{ .ClusterARN }}
preferences: {}
users:
- name: {{ .ClusterARN }}
  user:
    exec:
      apiVersion: client.authentication.k8s.io/v1beta1
      command: aws
      args:
      - eks
      - get-token
      - --cluster-name
      - {{ .ClusterName }}
`

type kubeconfigTemplateParameters struct {
	ClusterCertificateAuthority string
	ClusterARN                  string
	ClusterEndpoint             string
	ClusterName                 string
}

func writeKubeconfig(cluster *Cluster, kubeconfigPath string) error {
	if cluster == nil {
		return fmt.Errorf("Cluster is nil, you might need set --static-cluster-name or set --up to initial cluster resrouces")
	}
	slog.Info("writing kubeconfig", "path", kubeconfigPath, "clusterArn", cluster.arn)
	templateParams := kubeconfigTemplateParameters{
		ClusterCertificateAuthority: cluster.certificateAuthorityData,
		ClusterARN:                  cluster.arn,
		ClusterEndpoint:             cluster.endpoint,
		ClusterName:                 cluster.name,
	}

	kubeconfig := bytes.Buffer{}

	t, err := template.New("kubeconfig").Parse(kubeconfigTemplate)
	if err != nil {
		return err
	}
	err = t.Execute(&kubeconfig, templateParams)
	if err != nil {
		return err
	}

	err = os.WriteFile(kubeconfigPath, kubeconfig.Bytes(), kubeconfigPerm)
	if err != nil {
		return err
	}

	slog.Info("wrote kubeconfig", "path", kubeconfigPath, "content", kubeconfig.String())
	return nil
}


================================================
FILE: internal/deployers/eksapi/logs.go
================================================
package eksapi

import (
	"context"
	_ "embed"
	"errors"
	"fmt"
	"log/slog"
	"slices"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/autoscaling"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	"github.com/aws/aws-sdk-go-v2/service/ssm"
	ssmtypes "github.com/aws/aws-sdk-go-v2/service/ssm/types"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/util/wait"
	"sigs.k8s.io/controller-runtime/pkg/client"
)

type logManager struct {
	clients    *awsClients
	resourceID string
}

type deployerPhase string

const (
	deployerPhaseUp   = "up"
	deployerPhaseDown = "down"
)

func NewLogManager(clients *awsClients, resourceID string) *logManager {
	return &logManager{
		clients:    clients,
		resourceID: resourceID,
	}
}

func (m *logManager) gatherLogsFromNodes(k8sClient *k8sClient, opts *deployerOptions, phase deployerPhase) error {
	if opts.LogBucket == "" {
		slog.Info("--log-bucket is empty, no logs will be gathered!")
		return nil
	}
	if k8sClient == nil {
		slog.Info("no k8s client available, no logs will be gathered!")
		return nil
	}
	if opts.AutoMode {
		return m.gatherLogsUsingNodeDiagnostic(k8sClient, opts, phase)
	}
	switch opts.UserDataFormat {
	case "bootstrap.sh", "nodeadm", "": // if no --user-data-format was passed, we must be using managed nodes, which default to AL-based AMIs
		return m.gatherLogsUsingScript(k8sClient, opts, phase)
	default:
		slog.Warn("unable to gather logs for userDataFormat", "format", opts.UserDataFormat)
		return nil
	}
}

//go:embed logs_ssm_doc.json
var logCollectorScriptSsmDocumentContent string

const logCollectorSsmDocumentTimeout = 5 * time.Minute

func (m *logManager) gatherLogsUsingScript(k8sClient *k8sClient, opts *deployerOptions, phase deployerPhase) error {
	slog.Info("gathering logs using script...")
	nodes, err := k8sClient.clientset.CoreV1().Nodes().List(context.TODO(), v1.ListOptions{})
	if err != nil {
		return err
	}
	var instanceIds []string
	if len(nodes.Items) > 0 {
		instanceIds, err = getNodeInstanceIDs(nodes.Items)
		if err != nil {
			return err
		}
	} else {
		slog.Warn("no nodes found in cluster!")
		// if we're using unmanaged nodes, we can track down the instances in the ASG even if they didn't join the cluster
		if opts.UnmanagedNodes {
			slog.Info("fetching instances from unmanaged nodegroup...")
			out, err := m.clients.ASG().DescribeAutoScalingGroups(context.TODO(), &autoscaling.DescribeAutoScalingGroupsInput{
				AutoScalingGroupNames: []string{m.resourceID},
			})
			if err != nil {
				slog.Warn("failed to describe unmanaged nodegroup ASG", "error", err)
				return nil
			}
			if len(out.AutoScalingGroups) != 1 {
				slog.Warn("autoscaling group not found", "resourceID", m.resourceID)
			} else {
				for _, asg := range out.AutoScalingGroups {
					for _, instance := range asg.Instances {
						instanceIds = append(instanceIds, aws.ToString(instance.InstanceId))
					}
				}
			}
		}
	}
	if len(instanceIds) == 0 {
		slog.Warn("no nodes to gather logs from!")
		return nil
	}
	doc, err := m.clients.SSM().CreateDocument(context.TODO(), &ssm.CreateDocumentInput{
		Content:        aws.String(logCollectorScriptSsmDocumentContent),
		Name:           aws.String(fmt.Sprintf("%s-log-collector", m.resourceID)),
		DocumentType:   ssmtypes.DocumentTypeCommand,
		DocumentFormat: ssmtypes.DocumentFormatJson,
	})
	if err != nil {
		return err
	}
	defer func() {
		m.clients.SSM().DeleteDocument(context.TODO(), &ssm.DeleteDocumentInput{
			Name: doc.DocumentDescription.Name,
		})
	}()
	command, err := m.clients.SSM().SendCommand(context.TODO(), &ssm.SendCommandInput{
		DocumentName: doc.DocumentDescription.Name,
		InstanceIds:  instanceIds,
		Parameters: map[string][]string{
			"s3Destination": {fmt.Sprintf("s3://%s/node-logs/%s/%s/", opts.LogBucket, m.resourceID, phase)},
		},
	})
	if err != nil {
		return err
	}
	var errs []error
	for _, instanceId := range instanceIds {
		out, err := ssm.NewCommandExecutedWaiter(m.clients.SSM()).WaitForOutput(context.TODO(), &ssm.GetCommandInvocationInput{
			CommandId:  command.Command.CommandId,
			InstanceId: aws.String(instanceId),
		}, logCollectorSsmDocumentTimeout)
		if err != nil {
			errs = append(errs, err)
		} else {
			slog.Info("log collection command completed", "instanceId", instanceId, "status", out.Status)
		}
	}
	if len(errs) > 0 {
		return errors.Join(errs...)
	}
	slog.Info("gathered logs from nodes", "instanceIds", instanceIds)
	return nil
}

const logCollectorNodeDiagnosticTimeout = 5 * time.Minute

func (m *logManager) gatherLogsUsingNodeDiagnostic(k8sClient *k8sClient, opts *deployerOptions, phase deployerPhase) error {
	slog.Info("gathering logs using NodeDiagnostic...")
	nodes, err := k8sClient.clientset.CoreV1().Nodes().List(context.TODO(), v1.ListOptions{})
	if err != nil {
		return err
	}
	if len(nodes.Items) == 0 {
		slog.Warn("no nodes to gather logs from!")
		return nil
	}
	instanceIds, err := getNodeInstanceIDs(nodes.Items)
	if err != nil {
		return err
	}
	var errs []error
	var nodeDiagnostics []unstructured.Unstructured
	for _, instanceId := range instanceIds {
		presignedPut, err := m.clients.S3Presign().PresignPutObject(context.TODO(), &s3.PutObjectInput{
			Bucket: aws.String(opts.LogBucket),
			Key:    aws.String(fmt.Sprintf("node-logs/%s/%s/%s.tar.gz", m.resourceID, phase, instanceId)),
		})
		if err != nil {
			errs = append(errs, fmt.Errorf("failed to create presigned PUT for %s: %v", instanceId, err))
			continue
		}
		nodeDiagnostic := unstructured.Unstructured{
			Object: map[string]interface{}{
				"apiVersion": "eks.amazonaws.com/v1alpha1",
				"kind":       "NodeDiagnostic",
				"metadata": v1.ObjectMeta{
					Name: instanceId,
				},
				"spec": map[string]interface{}{
					"logCapture": map[string]interface{}{
						"destination": presignedPut.URL,
					},
				},
			},
		}
		if err := k8sClient.client.Create(context.TODO(), &nodeDiagnostic); err != nil {
			errs = append(errs, err)
		} else {
			nodeDiagnostics = append(nodeDiagnostics, nodeDiagnostic)
		}
	}
	outcomes, err := m.waitForNodeDiagnostics(k8sClient, nodeDiagnostics)
	if err != nil {
		errs = append(errs, fmt.Errorf("failed to wait for node diagnostics: %v", err))
	}
	for instanceId, reasons := range outcomes {
		for _, reason := range reasons {
			// consider SuccessWithErrors a success, this isn't high stakes
			if !slices.Contains([]string{"Success", "SuccessWithErrors"}, reason) {
				errs = append(errs, fmt.Errorf("node diagnostic outcome reason for %s: %s", instanceId, reason))
			}
		}
	}
	for _, nodeDiagnostic := range nodeDiagnostics {
		if err := k8sClient.client.Delete(context.TODO(), &nodeDiagnostic); err != nil {
			errs = append(errs, err)
		}
	}
	if len(errs) > 0 {
		return errors.Join(errs...)
	}
	slog.Info("gathered logs from nodes", "instanceIds", instanceIds)
	return nil
}

// waitForNodeDiagnostics polls each node diagnostic until it reaches a terminal state, or the timeout is reached
// a map of node diagnostic names to their outcome reason(s) is returned if no error occurred
func (m *logManager) waitForNodeDiagnostics(k8sClient *k8sClient, nodeDiagnostics []unstructured.Unstructured) (map[string][]string, error) {
	outcomes := make(map[string][]string)
	err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, logCollectorNodeDiagnosticTimeout, false, func(ctx context.Context) (done bool, err error) {
		for _, nodeDiagnostic := range nodeDiagnostics {
			objectKey := client.ObjectKeyFromObject(&nodeDiagnostic)
			if _, ok := outcomes[objectKey.Name]; ok {
				// we already have an outcome for this node diagnostic
				continue
			}
			if err := k8sClient.client.Get(ctx, objectKey, &nodeDiagnostic); err != nil {
				return false, fmt.Errorf("failed to get node diagnostic: %+v: %v", objectKey, err)
			}
			complete, reasons := m.isNodeDiagnosticComplete(&nodeDiagnostic)
			if !complete {
				continue
			}
			outcomes[objectKey.Name] = reasons
		}
		if len(outcomes) == len(nodeDiagnostics) {
			// we're done!
			return true, nil
		}
		return false, nil
	})
	if err != nil {
		return nil, err
	}
	return outcomes, nil
}

func (m *logManager) isNodeDiagnosticComplete(nodeDiagnostic *unstructured.Unstructured) (bool, []string) {
	captureStatuses, found, err := unstructured.NestedSlice(nodeDiagnostic.Object, "status", "captureStatuses")
	if err != nil {
		slog.Error("NodeDiagnostic captureStatuses does not match expected type", "nodeDiagnostic", nodeDiagnostic)
		return false, nil
	}
	if !found {
		return false, nil
	}
	var reasons []string
	for _, captureStatus := range captureStatuses {
		captureStatusMap, ok := captureStatus.(map[string]interface{})
		if !ok {
			slog.Error("NodeDiagnostic captureStatus does not match expected type", "nodeDiagnostic", nodeDiagnostic)
			return false, nil
		}
		reason, found, err := unstructured.NestedString(captureStatusMap, "state", "completed", "reason")
		if err != nil {
			slog.Error("NodeDiagnostic captureStatus.reason does not match expected type", "nodeDiagnostic", nodeDiagnostic)
			return false, nil
		}
		if !found {
			return false, nil
		}
		reasons = append(reasons, reason)
	}
	return true, reasons
}


================================================
FILE: internal/deployers/eksapi/logs_ssm_doc.json
================================================
{
    "schemaVersion": "2.2",
    "description": "Collect logs from an Amazon Linux EKS node",
    "parameters": {
        "s3Destination": {
            "type": "String"
        }
    },
    "mainSteps": [
        {
            "action": "aws:runShellScript",
            "name": "collectAndUploadLogs",
            "precondition": {
                "StringEquals": [
                    "platformType",
                    "Linux"
                ]
            },
            "inputs": {
                "runCommand": [
                    "bash /etc/eks/log-collector-script/eks-log-collector.sh >/dev/null 2>&1",
                    "aws s3 cp /var/log/eks_i* {{s3Destination}}"
                ]
            }
        }
    ]
}

================================================
FILE: internal/deployers/eksapi/metrics.go
================================================
package eksapi

import (
	"path"

	"github.com/aws/aws-k8s-tester/internal/metrics"
	cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
)

var DeployerMetricNamespace = path.Join("kubetest2", DeployerName)

var (
	totalRuntimeSeconds = &metrics.MetricSpec{
		Namespace: DeployerMetricNamespace,
		Metric:    "TotalRuntimeSeconds",
		Unit:      cloudwatchtypes.StandardUnitSeconds,
	}

	nodeTimeToRegistrationSeconds = &metrics.MetricSpec{
		Namespace: DeployerMetricNamespace,
		Metric:    "NodeTimeToRegistrationSeconds",
		Unit:      cloudwatchtypes.StandardUnitSeconds,
	}

	nodeTimeToReadySeconds = &metrics.MetricSpec{
		Namespace: DeployerMetricNamespace,
		Metric:    "NodeTimeToReadySeconds",
		Unit:      cloudwatchtypes.StandardUnitSeconds,
	}
)


================================================
FILE: internal/deployers/eksapi/node.go
================================================
package eksapi

import (
	"bytes"
	"context"
	_ "embed"
	"errors"
	"fmt"
	"strconv"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/autoscaling"
	"github.com/aws/aws-sdk-go-v2/service/cloudformation"
	cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
	"github.com/aws/smithy-go"
	appsv1 "k8s.io/api/apps/v1"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/runtime/schema"
	"log/slog"
	"k8s.io/utils/pointer"
	karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
	"github.com/aws/aws-k8s-tester/internal/util"
	apierrors "k8s.io/apimachinery/pkg/api/errors"
)

const (
	nodeDeletionTimeout = time.Minute * 20
)

var (
	defaultInstanceTypes_x86_64 = []string{
		"m6i.xlarge",
		"m6i.large",
		"m5.large",
		"t3.large",
	}

	defaultInstanceTypes_arm64 = []string{
		"m7g.xlarge",
		"m7g.large",
		"m6g.xlarge",
		"m6g.large",
		"t4g.xlarge",
		"t4g.large",
	}

	defaultInstanceTypesByEC2ArchitectureValues = map[ec2types.ArchitectureValues][]string{
		ec2types.ArchitectureValuesX8664: defaultInstanceTypes_x86_64,
		ec2types.ArchitectureValuesArm64: defaultInstanceTypes_arm64,
	}

	defaultInstanceTypesByEKSAMITypes = map[ekstypes.AMITypes][]string{
		ekstypes.AMITypesAl2X8664:            defaultInstanceTypes_x86_64,
		ekstypes.AMITypesAl2Arm64:            defaultInstanceTypes_arm64,
		ekstypes.AMITypesAl2023X8664Standard: defaultInstanceTypes_x86_64,
		ekstypes.AMITypesAl2023Arm64Standard: defaultInstanceTypes_arm64,
	}
	nodeClassResource = schema.GroupVersionResource{Group: "eks.amazonaws.com", Version: "v1", Resource: "nodeclasses"}
)

type nodeManager struct {
	clients    *awsClients
	resourceID string
}

func NewNodeManager(clients *awsClients, resourceID string) *nodeManager {
	return &nodeManager{
		clients:    clients,
		resourceID: resourceID,
	}
}

func (m *nodeManager) createNodes(infra *Infrastructure, cluster *Cluster, opts *deployerOptions, k8sClient *k8sClient) error {
	if err := m.resolveInstanceTypes(opts); err != nil {
		return fmt.Errorf("failed to resolve instance types: %v", err)
	}
	if opts.AutoMode {
		if err := m.createNodeClass(opts, k8sClient); err != nil {
			return err
		}
		if err := m.createNodePool(opts, k8sClient); err != nil {
			return err
		}
		_, err := m.createPlaceholderDeployment(opts, k8sClient)
		return err
	} else if opts.UnmanagedNodes {
		return m.createUnmanagedNodegroup(infra, cluster, opts)
	} else {
		return m.createManagedNodegroup(infra, cluster, opts)
	}
}

func (m *nodeManager) resolveInstanceTypes(opts *deployerOptions) (err error) {
	instanceTypes := opts.InstanceTypes
	if len(instanceTypes) == 0 {
		if len(opts.InstanceTypeArchs) > 0 {
			slog.Info("choosing instance types based on architectures", "archs", opts.InstanceTypeArchs)
			for _, arch := range opts.InstanceTypeArchs {
				var ec2Arch ec2types.ArchitectureValues
				switch arch {
				case "x86_64", "amd64":
					ec2Arch = ec2types.ArchitectureValuesX8664
				case "aarch64", "arm64":
					ec2Arch = ec2types.ArchitectureValuesArm64
				default:
					return fmt.Errorf("unknown architecture: '%s'", arch)
				}
				instanceTypesForArch, ok := defaultInstanceTypesByEC2ArchitectureValues[ec2Arch]
				if !ok {
					return fmt.Errorf("no default instance types known for architecture: '%s'", arch)
				}
				instanceTypes = append(instanceTypes, instanceTypesForArch...)
			}
		} else if opts.UnmanagedNodes {
			slog.Info("choosing instance types based on AMI architecture...")
			if out, err := m.clients.EC2().DescribeImages(context.TODO(), &ec2.DescribeImagesInput{
				ImageIds: []string{opts.AMI},
			}); err != nil {
				return fmt.Errorf("failed to describe AMI: %s: %v", opts.AMI, err)
			} else {
				amiArch := out.Images[0].Architecture
				instanceTypesForAMIArchitecture, ok := defaultInstanceTypesByEC2ArchitectureValues[amiArch]
				if !ok {
					return fmt.Errorf("no default instance types known for AMI architecture: %v", amiArch)
				}
				instanceTypes = instanceTypesForAMIArchitecture
			}
		} else {
			// we don't rely on the service's default instance types, because they're a bit too small for the k8s e2e suite
			slog.Info("choosing instance types based on managed nodegroup's AMI type...")
			instanceTypesForAMIType, ok := defaultInstanceTypesByEKSAMITypes[ekstypes.AMITypes(opts.AMIType)]
			if !ok {
				return fmt.Errorf("no default instance types known for AMI type: %v", opts.AMIType)
			}
			instanceTypes = instanceTypesForAMIType
		}
	}
	validInstanceTypes, err := m.getValidInstanceTypes(instanceTypes)
	if err != nil {
		return err
	}
	if len(validInstanceTypes) == 0 {
		return fmt.Errorf("none of the instance types %v were valid", instanceTypes)
	}
	opts.InstanceTypes = validInstanceTypes
	slog.Info("using instance types", "instanceTypes", opts.InstanceTypes)
	return nil
}

func (m *nodeManager) createNodeClass(opts *deployerOptions, k8sClient *k8sClient) error {
	nodeclass, err := k8sClient.dclient.Resource(nodeClassResource).Get(context.Background(), "default", metav1.GetOptions{})
	if err != nil {
		return fmt.Errorf("getting default nodeclass, %w", err)
	}
	slog.Info("got existing default nodeclass for template..")

	// clear out the metadata and set the name only
	nodeclass.Object["metadata"] = map[string]interface{}{}
	nodeclass.SetName(m.resourceID)

	// clear out the status
	delete(nodeclass.Object, "status")

	// update the ephemeral storage spec to be 500Gi
	if spec, ok := nodeclass.Object["spec"].(map[string]interface{}); ok {
		if ephemeralStorage, ok := spec["ephemeralStorage"].(map[string]interface{}); ok {
			ephemeralStorage["size"] = "500Gi"
		}

		// configure capacity reservation selector terms if capacity reservation is enabled
		if opts.CapacityReservation {
			capacityReservation, err := m.getCapacityReservation(opts)
			if err != nil {
				return fmt.Errorf("failed to get capacity reservation: %w", err)
			}
			spec["capacityReservationSelectorTerms"] = []map[string]interface{}{
				{
					"id": aws.ToString(capacityReservation.CapacityReservationId),
				},
			}
		}
	}

	slog.Info("creating new node class...")
	_, err = k8sClient.dclient.Resource(nodeClassResource).Create(context.Background(), nodeclass, metav1.CreateOptions{})
	if err != nil {
		return fmt.Errorf("creating new nodeclass, %w", err)
	}
	slog.Info("node class created!")
	return nil
}

func (m *nodeManager) createNodePool(opts *deployerOptions, k8sClient *k8sClient) error {
	nodePool := karpv1.NodePool{
		ObjectMeta: metav1.ObjectMeta{
			Name: m.resourceID,
		},
		Spec: karpv1.NodePoolSpec{
			Weight: pointer.Int32(100), // max
			Disruption: karpv1.Disruption{
				Budgets: []karpv1.Budget{
					{
						Nodes: "10%",
					},
				},
				ConsolidateAfter: karpv1.MustParseNillableDuration("Never"),
			},
			Template: karpv1.NodeClaimTemplate{
				Spec: karpv1.NodeClaimTemplateSpec{
					ExpireAfter: karpv1.MustParseNillableDuration("24h"),
					NodeClassRef: &karpv1.NodeClassReference{
						Group: "eks.amazonaws.com",
						Kind:  "NodeClass",
						Name:  m.resourceID,
					},
					Requirements: []karpv1.NodeSelectorRequirementWithMinValues{
						{
							NodeSelectorRequirement: corev1.NodeSelectorRequirement{
								Key:      "kubernetes.io/os",
								Operator: corev1.NodeSelectorOpIn,
								Values:   []string{"linux"},
							},
						},
						{
							NodeSelectorRequirement: corev1.NodeSelectorRequirement{
								Key:      "karpenter.sh/capacity-type",
								Operator: corev1.NodeSelectorOpIn,
								Values:   []string{"reserved", "on-demand"},
							},
						},
						{
							NodeSelectorRequirement: corev1.NodeSelectorRequirement{
								Key:      "node.kubernetes.io/instance-type",
								Operator: corev1.NodeSelectorOpIn,
								Values:   opts.InstanceTypes,
							},
						},
					},
				},
			},
		},
	}
	slog.Info("creating node pool...")
	if err := k8sClient.client.Create(context.TODO(), &nodePool); err != nil {
		return fmt.Errorf("failed to create node pool: %v", err)
	}
	slog.Info("created node pool", "nodePool", nodePool)
	return nil
}

func (m *nodeManager) deleteNodeClass(k8sClient *k8sClient) error {
	slog.Info("deleting node class...")
	if err := k8sClient.dclient.Resource(nodeClassResource).Delete(context.Background(), m.resourceID, metav1.DeleteOptions{}); err != nil {
		if apierrors.IsNotFound(err) {
			slog.Info("node class does not exist", "resourceID", m.resourceID)
			return nil
		}
		return fmt.Errorf("failed to delete node class, %w", err)
	}
	slog.Info("deleted node class!")
	return nil
}

func (m *nodeManager) deleteNodePool(k8sClient *k8sClient) error {
	nodePool := karpv1.NodePool{
		ObjectMeta: metav1.ObjectMeta{
			Name: m.resourceID,
		},
	}
	slog.Info("deleting node pool...")
	if err := k8sClient.client.Delete(context.TODO(), &nodePool); err != nil {
		if apierrors.IsNotFound(err) {
			slog.Info("node pool does not exist", "resourceID", m.resourceID)
			return nil
		}
		return fmt.Errorf("failed to delete node pool: %w", err)
	}
	slog.Info("deleted node pool!")
	return nil
}

// createPlaceholderDeployment creates a Deployment with the specified number of replicas that requires
// each replica to be scheduled on different nodes.
// This ensures that (at least) the specified number of nodes exist in an EKS Auto cluster
func (m *nodeManager) createPlaceholderDeployment(opts *deployerOptions, k8sClient *k8sClient) (*appsv1.Deployment, error) {
	if opts.Nodes == 0 {
		slog.Info("not creating placeholder deployment!")
		return nil, nil
	}
	labels := map[string]string{
		"app": m.resourceID,
	}
	d := &appsv1.Deployment{
		ObjectMeta: metav1.ObjectMeta{Name: m.resourceID, Namespace: "default"},
		Spec: appsv1.DeploymentSpec{
			Replicas: pointer.Int32(int32(opts.Nodes)),
			Selector: &metav1.LabelSelector{
				MatchLabels: labels,
			},
			Template: corev1.PodTemplateSpec{
				ObjectMeta: metav1.ObjectMeta{
					Labels: labels,
				},
				Spec: corev1.PodSpec{
					Affinity: &corev1.Affinity{
						PodAntiAffinity: &corev1.PodAntiAffinity{
							RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
								{
									LabelSelector: &metav1.LabelSelector{
										MatchLabels: labels,
									},
									TopologyKey: "kubernetes.io/hostname",
								},
							},
						},
					},
					Containers: []corev1.Container{
						{
							Name:    "main",
							Image:   "public.ecr.aws/amazonlinux/amazonlinux:2023",
							Command: []string{"sleep", "infinity"},
						},
					},
				},
			},
		},
	}
	slog.Info("creating placeholder deployment...")
	d, err := k8sClient.clientset.AppsV1().Deployments("default").Create(context.TODO(), d, metav1.CreateOptions{})
	if err != nil {
		return nil, fmt.Errorf("failed to create placeholder deployment: %v", err)
	}
	slog.Info("created placeholder deployment", "deployment", d)
	return d, nil
}

func (m *nodeManager) deletePlaceholderDeployment(k8sClient *k8sClient) error {
	slog.Info("deleting placeholder deployment...")
	if err := k8sClient.clientset.AppsV1().Deployments("default").Delete(context.TODO(), m.resourceID, *metav1.NewDeleteOptions( /* no grace period */ 0)); err != nil {
		if apierrors.IsNotFound(err) {
			slog.Info("placeholder deployment does not exist", "resourceID", m.resourceID)
			return nil
		}
		return fmt.Errorf("failed to delete placeholder deployment: %v", err)
	}
	slog.Info("deleted placeholder deployment!")
	return nil
}

func (m *nodeManager) createManagedNodegroup(infra *Infrastructure, cluster *Cluster, opts *deployerOptions) error {
	slog.Info("creating nodegroup...")
	input := eks.CreateNodegroupInput{
		ClusterName:   aws.String(m.resourceID),
		NodegroupName: aws.String(m.resourceID),
		NodeRole:      aws.String(infra.nodeRoleARN),
		Subnets:       infra.subnets(),
		DiskSize:      aws.Int32(100),
		CapacityType:  ekstypes.CapacityTypesOnDemand,
		ScalingConfig: &ekstypes.NodegroupScalingConfig{
			MinSize:     aws.Int32(int32(opts.Nodes)),
			MaxSize:     aws.Int32(int32(opts.Nodes)),
			DesiredSize: aws.Int32(int32(opts.Nodes)),
		},
		AmiType:       ekstypes.AMITypes(opts.AMIType),
		InstanceTypes: opts.InstanceTypes,
	}
	out, err := m.clients.EKS().CreateNodegroup(context.TODO(), &input)
	if err != nil {
		return err
	}
	slog.Info("waiting for nodegroup to be active", "arn", *out.Nodegroup.NodegroupArn)
	err = eks.NewNodegroupActiveWaiter(m.clients.EKS()).
		Wait(context.TODO(), &eks.DescribeNodegroupInput{
			ClusterName:   input.ClusterName,
			NodegroupName: input.NodegroupName,
		}, opts.NodeCreationTimeout)
	if err != nil {
		return err
	}
	slog.Info("nodegroup is active", "arn", *out.Nodegroup.NodegroupArn)
	if opts.ExpectedAMI != "" {
		out, err := m.clients.EKS().DescribeNodegroup(context.TODO(), &eks.DescribeNodegroupInput{
			ClusterName:   input.ClusterName,
			NodegroupName: input.NodegroupName,
		})
		if err != nil {
			return err
		}
		asgName := out.Nodegroup.Resources.AutoScalingGroups[0].Name
		if ok, err := m.verifyASGAMI(*asgName, opts.ExpectedAMI); err != nil {
			return err
		} else if !ok {
			return fmt.Errorf("ASG %s is not using expected AMI: %s", *asgName, opts.ExpectedAMI)
		}
	}
	return nil
}

func (m *nodeManager) createUnmanagedNodegroup(infra *Infrastructure, cluster *Cluster, opts *deployerOptions) error {
	var availabilityZoneFilter []string
	var capacityReservationId string
	stackName := m.getUnmanagedNodegroupStackName()
	slog.Info("creating unmanaged nodegroup stack", "stackName", stackName)
	userData, userDataIsMimePart, err := generateUserData(cluster, opts)
	if err != nil {
		return err
	}
	if opts.CapacityReservation {
		capacityReservation, err := m.getCapacityReservation(opts)
		if err != nil {
			return err
		}
		capacityReservationId = aws.ToString(capacityReservation.CapacityReservationId)
		availabilityZoneFilter = []string{aws.ToString(capacityReservation.AvailabilityZone)}
	} else {
		availabilityZoneFilter, err = m.getValidAvailabilityZonesFilter(opts, infra)
		if err != nil {
			return err
		}
	}
	targetSubnets, err := m.getValidSubnets(opts, infra, availabilityZoneFilter)
	if err != nil {
		return err
	}
	networkInterfaces, err := m.getNetworkInterfaces(opts, []string{cluster.securityGroupId}, targetSubnets)
	if err != nil {
		return err
	}
	volumeMountPath := "/dev/xvda"
	if opts.UserDataFormat == "bottlerocket" {
		volumeMountPath = "/dev/xvdb"
	}
	templateBuf := bytes.Buffer{}
	err = templates.UnmanagedNodegroup.Execute(&templateBuf, struct {
		NetworkInterfaces []templates.NetworkInterface
		InstanceTypes     []string
	}{
		NetworkInterfaces: networkInterfaces,
		InstanceTypes:     opts.InstanceTypes,
	})
	if err != nil {
		return err
	}
	input := cloudformation.CreateStackInput{
		StackName:    aws.String(stackName),
		TemplateBody: aws.String(templateBuf.String()),
		Capabilities: []cloudformationtypes.Capability{cloudformationtypes.CapabilityCapabilityIam},
		Parameters: []cloudformationtypes.Parameter{
			{
				ParameterKey:   aws.String("ResourceId"),
				ParameterValue: aws.String(m.resourceID),
			},
			{
				ParameterKey:   aws.String("VpcId"),
				ParameterValue: aws.String(infra.vpc),
			},
			{
				ParameterKey:   aws.String("SubnetIds"),
				ParameterValue: aws.String(strings.Join(targetSubnets, ",")),
			},
			{
				ParameterKey:   aws.String("UserData"),
				ParameterValue: aws.String(userData),
			},
			{
				ParameterKey:   aws.String("UserDataIsMIMEPart"),
				ParameterValue: aws.String(strconv.FormatBool(userDataIsMimePart)),
			},
			{
				ParameterKey:   aws.String("VolumeMountPath"),
				ParameterValue: aws.String(volumeMountPath),
			},
			{
				ParameterKey:   aws.String("ClusterName"),
				ParameterValue: aws.String(cluster.name),
			},
			{
				ParameterKey:   aws.String("NodeRoleName"),
				ParameterValue: aws.String(infra.nodeRoleName),
			},
			{
				ParameterKey:   aws.String("NodeCount"),
				ParameterValue: aws.String(strconv.Itoa(opts.Nodes)),
			},
			{
				ParameterKey:   aws.String("SecurityGroup"),
				ParameterValue: aws.String(cluster.securityGroupId),
			},
			{
				ParameterKey:   aws.String("AMIId"),
				ParameterValue: aws.String(opts.AMI),
			},
			{
				ParameterKey:   aws.String("CapacityReservationId"),
				ParameterValue: aws.String(capacityReservationId),
			},
		},
	}
	out, err := m.clients.CFN().CreateStack(context.TODO(), &input)
	if err != nil {
		return err
	}
	slog.Info("waiting for unmanaged nodegroup stack to be created", "stackId", aws.ToString(out.StackId))
	err = cloudformation.NewStackCreateCompleteWaiter(m.clients.CFN()).
		Wait(context.TODO(),
			&cloudformation.DescribeStacksInput{
				StackName: out.StackId,
			},
			opts.NodeCreationTimeout)
	if err != nil {
		return util.WrapCFNStackFailure(context.TODO(), m.clients.CFN(), fmt.Errorf("failed to wait for unmanaged nodegroup stack creation: %w", err), stackName)
	}
	slog.Info("created unmanaged nodegroup stack", "stackId", *out.StackId)
	if opts.ExpectedAMI != "" {
		if ok, err := m.verifyASGAMI(m.resourceID, opts.ExpectedAMI); err != nil {
			return err
		} else if !ok {
			return fmt.Errorf("ASG %s is not using expected AMI: %s", m.resourceID, opts.ExpectedAMI)
		}
	}
	return nil
}

// deleteNodes cleans up any nodes in the cluster
// it will be called outside the context of a deployer run (by the janitor, for example)
// so will try to delete nodes of any type
func (m *nodeManager) deleteNodes(k8sClient *k8sClient, opts *deployerOptions) error {
	if err := m.deleteUnmanagedNodegroup(); err != nil {
		return err
	}
	if err := m.deleteManagedNodegroup(); err != nil {
		return err
	}
	// we only have a k8sClient when this is called by the deployer, not by the janitor
	// TODO implement cleanup of Auto nodes in the janitor
	if k8sClient != nil && opts != nil && opts.AutoMode {
		if err := m.deletePlaceholderDeployment(k8sClient); err != nil {
			return err
		}
		if err := m.deleteNodeClass(k8sClient); err != nil {
			return err
		}
		if err := m.deleteNodePool(k8sClient); err != nil {
			return err
		}
		if err := k8sClient.waitForNodeDeletion(nodeDeletionTimeout); err != nil {
			return err
		}
	}
	return nil
}

func (m *nodeManager) deleteManagedNodegroup() error {
	input := eks.DeleteNodegroupInput{
		ClusterName:   aws.String(m.resourceID),
		NodegroupName: aws.String(m.resourceID),
	}
	slog.Info("deleting nodegroup...")
	out, err := m.clients.EKS().DeleteNodegroup(context.TODO(), &input)
	if err != nil {
		var notFound *ekstypes.ResourceNotFoundException
		if errors.As(err, &notFound) {
			slog.Info("nodegroup does not exist", "resourceID", m.resourceID)
			return nil
		}
		return fmt.Errorf("failed to delete nodegroup: %v", err)
	}
	slog.Info("waiting for nodegroup deletion", "arn", *out.Nodegroup.NodegroupArn)
	err = eks.NewNodegroupDeletedWaiter(m.clients.EKS()).
		Wait(context.TODO(), &eks.DescribeNodegroupInput{
			ClusterName:   input.ClusterName,
			NodegroupName: input.NodegroupName,
		}, nodeDeletionTimeout)
	if err != nil {
		return fmt.Errorf("failed to wait for nodegroup deletion: %v", err)
	}
	slog.Info("nodegroup deleted", "arn", *out.Nodegroup.NodegroupArn)
	return nil
}

func (m *nodeManager) deleteUnmanagedNodegroup() error {
	stackName := m.getUnmanagedNodegroupStackName()
	input := cloudformation.DeleteStackInput{
		StackName: aws.String(stackName),
	}
	slog.Info("deleting unmanaged nodegroup stack", "stackName", stackName)
	_, err := m.clients.CFN().DeleteStack(context.TODO(), &input)
	if err != nil {
		var notFound *cloudformationtypes.StackNotFoundException
		if errors.As(err, &notFound) {
			slog.Info("unmanaged nodegroup stack does not exist", "stackName", stackName)
			return nil
		}
		return fmt.Errorf("failed to delete unmanaged nodegroup stack: %w", err)
	}
	slog.Info("waiting for unmanaged nodegroup stack to be deleted", "stackName", stackName)
	err = cloudformation.NewStackDeleteCompleteWaiter(m.clients.CFN()).
		Wait(context.TODO(),
			&cloudformation.DescribeStacksInput{
				StackName: aws.String(stackName),
			},
			infraStackDeletionTimeout)
	if err != nil {
		return fmt.Errorf("failed to wait for unmanaged nodegroup stack deletion: %w", err)
	}
	slog.Info("deleted unmanaged nodegroup stack", "stackName", stackName)
	return nil
}

func (m *nodeManager) getUnmanagedNodegroupStackName() string {
	return fmt.Sprintf("%s-unmanaged-nodegroup", m.resourceID)
}

func (m *nodeManager) verifyASGAMI(asgName string, amiId string) (bool, error) {
	slog.Info("verifying AMI for ASG", "amiId", amiId, "asgName", asgName)
	asgOut, err := m.clients.ASG().DescribeAutoScalingGroups(context.TODO(), &autoscaling.DescribeAutoScalingGroupsInput{
		AutoScalingGroupNames: []string{asgName},
	})
	if err != nil {
		return false, nil
	}
	if len(asgOut.AutoScalingGroups) != 1 {
		return false, fmt.Errorf("autoscaling group not found: %s", asgName)
	}
	var instanceIds []string
	for _, instance := range asgOut.AutoScalingGroups[0].Instances {
		instanceIds = append(instanceIds, *instance.InstanceId)
	}
	slog.Info("verifying AMI for instances", "instanceIds", instanceIds)
	ec2Out, err := m.clients.EC2().DescribeInstances(context.TODO(), &ec2.DescribeInstancesInput{
		InstanceIds: instanceIds,
	})
	if err != nil {
		return false, err
	}
	var errs []error
	for _, reservation := range ec2Out.Reservations {
		for _, instance := range reservation.Instances {
			if *instance.ImageId != amiId {
				errs = append(errs, fmt.Errorf("instance %s using wrong AMI: %s", *instance.InstanceId, *instance.ImageId))
			}
		}
	}
	if len(errs) > 0 {
		return false, errors.Join(errs...)
	}
	slog.Info("ASG instances are using expected AMI", "amiId", amiId)
	return true, nil
}

func (m *nodeManager) getCapacityReservation(opts *deployerOptions) (*ec2types.CapacityReservation, error) {
	describeReservationsInput := ec2.DescribeCapacityReservationsInput{
		Filters: []ec2types.Filter{
			{
				Name:   aws.String("instance-type"),
				Values: opts.InstanceTypes,
			},
			{
				Name:   aws.String("state"),
				Values: []string{"active"},
			},
		},
	}
	if opts.TargetCapacityReservationId != "" {
		describeReservationsInput.CapacityReservationIds = []string{opts.TargetCapacityReservationId}
	}
	capacityReservations, err := m.clients.EC2().DescribeCapacityReservations(context.TODO(), &describeReservationsInput)
	if err != nil {
		return nil, fmt.Errorf("failed to describe capacity reservation: %v", err)
	}
	var capacityReservation *ec2types.CapacityReservation
	for _, cr := range capacityReservations.CapacityReservations {
		if aws.ToInt32(cr.AvailableInstanceCount) >= int32(opts.Nodes) {
			capacityReservation = &cr
			break
		}
	}
	if capacityReservation == nil {
		return nil, fmt.Errorf("no capacity reservation found for instance type %s with %d nodes count", opts.InstanceTypes[0], opts.Nodes)
	}
	slog.Info("using capacity reservation", "id", aws.ToString(capacityReservation.CapacityReservationId))
	return capacityReservation, nil
}

func (m *nodeManager) getValidAvailabilityZonesFilter(opts *deployerOptions, infra *Infrastructure) ([]string, error) {
	if !opts.EFA {
		// no filter needed, leaves scheduling to EC2 provisioner
		return []string{}, nil
	}
	describeFilters := []ec2types.Filter{
		{
			Name:   aws.String("instance-type"),
			Values: opts.InstanceTypes,
		},
		{
			Name:   aws.String("location"),
			Values: infra.availabilityZones,
		},
	}
	describeResponse, err := m.clients.EC2().DescribeInstanceTypeOfferings(context.TODO(), &ec2.DescribeInstanceTypeOfferingsInput{
		Filters:      describeFilters,
		LocationType: ec2types.LocationTypeAvailabilityZone,
	})
	if err != nil {
		return nil, fmt.Errorf("failed to describe instance type offerings: %v", err)
	}
	if describeResponse == nil || len(describeResponse.InstanceTypeOfferings) == 0 {
		return nil, fmt.Errorf("no instance type offerings in current region with filters %v", describeFilters)
	}
	var candidateAZs []string
	for _, offering := range describeResponse.InstanceTypeOfferings {
		candidateAZs = append(candidateAZs, aws.ToString(offering.Location))
	}
	// EFA traffic cannot cross an AZ https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-limits
	targetAZ := availabilityZoneHintedOrder(candidateAZs)[0]
	slog.Info("found availability zone with offering", "az", targetAZ, "instanceTypes", opts.InstanceTypes)
	return []string{targetAZ}, nil
}

func formatFilters(filters []ec2types.Filter) string {
	var parts []string
	for _, f := range filters {
		parts = append(parts, fmt.Sprintf("{Name:%s Values:%v}", aws.ToString(f.Name), f.Values))
	}
	return "[" + strings.Join(parts, ",") + "]"
}

func (m *nodeManager) getValidSubnets(opts *deployerOptions, infra *Infrastructure, availabilityZoneFilter []string) ([]string, error) {
	var describeFilters []ec2types.Filter
	var targetSubnets []string
	if opts.EFA {
		// EFA requires private subnets
		targetSubnets = infra.subnetsPrivate
	} else {
		targetSubnets = infra.subnets()
	}
	if len(availabilityZoneFilter) > 0 {
		describeFilters = append(describeFilters, ec2types.Filter{
			Name:   aws.String("availability-zone"),
			Values: availabilityZoneFilter,
		})
	}
	describeResponse, err := m.clients.EC2().DescribeSubnets(context.TODO(), &ec2.DescribeSubnetsInput{
		Filters:   describeFilters,
		SubnetIds: targetSubnets,
	})
	if err != nil {
		return nil, fmt.Errorf("failed to describe subnets %v: %v", targetSubnets, err)
	}
	if describeResponse == nil || len(describeResponse.Subnets) == 0 {
		return nil, fmt.Errorf("no subnet in %v satisfied filters: %s", targetSubnets, formatFilters(describeFilters))
	}
	var subnetIds []string
	for _, subnet := range describeResponse.Subnets {
		subnetIds = append(subnetIds, *subnet.SubnetId)
	}
	slog.Info("using subnets", "subnetIds", subnetIds)
	return subnetIds, nil
}

func (m *nodeManager) getValidInstanceTypes(desiredInstanceTypes []string) ([]string, error) {
	var validInstanceTypes []string
	for _, instanceType := range desiredInstanceTypes {
		ec2InstanceType := ec2types.InstanceType(instanceType)
		_, err := m.clients.EC2().DescribeInstanceTypes(context.TODO(), &ec2.DescribeInstanceTypesInput{
			InstanceTypes: []ec2types.InstanceType{ec2InstanceType},
		})
		if err != nil {
			var apierr smithy.APIError
			if errors.As(err, &apierr) && apierr.ErrorCode() == "InvalidInstanceType" {
				slog.Info("eliminating instance type as an option", "instanceType", instanceType)
			} else {
				return nil, fmt.Errorf("failed to describe instance type: %s: %v", instanceType, err)
			}
		} else {
			validInstanceTypes = append(validInstanceTypes, instanceType)
		}
	}
	return validInstanceTypes, nil
}

func (m *nodeManager) getNetworkInterfaces(opts *deployerOptions, securityGroups []string, subnetIDs []string) ([]templates.NetworkInterface, error) {
	if !opts.EFA {
		// create only the default primary network interface if not using EFA
		netiface, err := getNetworkInterface(opts, 0, subnetIDs, securityGroups)
		if err != nil {
			return nil, err
		}
		return []templates.NetworkInterface{netiface}, nil
	}
	// EFA option assumes a single instance type
	instanceType := opts.InstanceTypes[0]
	ec2InstanceType := ec2types.InstanceType(instanceType)
	describeInstanceTypeOutput, err := m.clients.EC2().DescribeInstanceTypes(context.TODO(), &ec2.DescribeInstanceTypesInput{
		InstanceTypes: []ec2types.InstanceType{ec2InstanceType},
	})
	if err != nil {
		return nil, fmt.Errorf("failed to describe instance type %s to get network interface support: %v", instanceType, err)
	}
	networkInfo := describeInstanceTypeOutput.InstanceTypes[0].NetworkInfo
	if !aws.ToBool(networkInfo.EfaSupported) {
		// fail early for better transparency
		return nil, fmt.Errorf("cannot generate efa interfaces for instance type %s because it does not support efa", instanceType)
	}

	// 1 EFA interface is supported per network card
	// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-limits
	numEfaInterfaces := int(aws.ToInt32(networkInfo.MaximumNetworkCards))
	var networkInterfaces []templates.NetworkInterface
	for cardIndex := range numEfaInterfaces {
		efaInterface, err := getNetworkInterface(opts, cardIndex, subnetIDs, securityGroups)
		if err != nil {
			return nil, err
		}
		networkInterfaces = append(networkInterfaces, efaInterface)
	}
	return networkInterfaces, nil
}

func getNetworkInterface(opts *deployerOptions, networkCardIndex int, subnetIds []string, securityGroups []string) (templates.NetworkInterface, error) {
	// simplification that works with currently supported network interfaces based on
	// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html#network-cards
	// and
	// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/create-efa.html#efa-launch
	deviceIndex := 0
	if networkCardIndex > 0 {
		deviceIndex = 1
	}
	var description, interfaceType, subnetID *string
	if opts.EFA {
		if len(subnetIds) == 0 {
			return templates.NetworkInterface{}, fmt.Errorf("EFA interfaces require a subnet but none were provided")
		}
		subnetID = &subnetIds[0]
		interfaceType = aws.String("efa")
		description = aws.String("EFA-enabled network interface")
	} else {
		// no need to assign a subnet here, more restrictive than it is helpful
		interfaceType = aws.String("interface")
		description = aws.String("Standard network interface")
	}
	return templates.NetworkInterface{
		Description:         description,
		DeviceIndex:         &deviceIndex,
		NetworkCardIndex:    &networkCardIndex,
		InterfaceType:       interfaceType,
		SubnetId:            subnetID,
		Groups:              securityGroups,
		DeleteOnTermination: aws.Bool(true),
	}, nil
}


================================================
FILE: internal/deployers/eksapi/static_cluster.go
================================================
package eksapi

import (
	"bytes"
	"context"
	"fmt"
	"log/slog"
	"strings"
	"time"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
	v1 "k8s.io/api/apps/v1"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/util/wait"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/tools/clientcmd"
	"sigs.k8s.io/controller-runtime/pkg/client"
	karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
	"sigs.k8s.io/yaml"
)

type StaticClusterManager struct {
	k8sClient       *kubernetes.Clientset
	karpenterClient client.Client
	options         *deployerOptions
}

type NodeCondition func(nodes []corev1.Node) bool

func NewStaticClusterManager(options *deployerOptions) *StaticClusterManager {
	return &StaticClusterManager{
		options: options,
	}
}

func (s *StaticClusterManager) SetK8sClient(kubeconfig string) {
	cfg, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
	if err != nil {
		slog.Error("failed to build kubeconfig", "error", err)
		panic(err)
	}

	s.k8sClient, err = kubernetes.NewForConfig(cfg)
	if err != nil {
		slog.Error("failed to create Kubernetes client", "error", err)
		panic(err)
	}

	s.karpenterClient, err = client.New(cfg, client.Options{})
	if err != nil {
		slog.Error("failed to create Karpenter client", "error", err)
		panic(err)
	}
}

func (s *StaticClusterManager) EnsureNodeForStaticCluster() error {
	if err := s.CreateNodePool(); err != nil {
		return err
	}
	return s.DeployBusyboxAndWaitForNodes()
}

func (s *StaticClusterManager) TearDownNodeForStaticCluster() error {
	if err := s.TearDownBusyboxAndNodes(); err != nil {
		return err
	}
	return s.TearDownNodePool()
}

func (s *StaticClusterManager) CreateNodePool() error {
	if !strings.Contains(strings.ToLower(s.options.StaticClusterName), "nvidia") {
		slog.Info("NVIDIA not in cluster name, skipping node pool creation")
		return nil
	}

	var arch string
	if strings.Contains(s.options.StaticClusterName, "x86_64") {
		arch = "amd64"
	} else if strings.Contains(s.options.StaticClusterName, "aarch64") {
		arch = "arm64"
	} else {
		return fmt.Errorf("unable to determine architecture from cluster name")
	}

	t := templates.NvidiaStaticClusterNodepool
	var buf bytes.Buffer
	if err := t.Execute(&buf, templates.NvidiaStaticClusterNodepoolTemplateData{
		Arch:          arch,
		InstanceTypes: s.options.InstanceTypes,
	}); err != nil {
		return err
	}

	nodePool := &karpv1.NodePool{}
	if err := yaml.Unmarshal(buf.Bytes(), nodePool); err != nil {
		return fmt.Errorf("failed to unmarshal nodepool YAML: %v", err)
	}

	ctx := context.TODO()
	existing := &karpv1.NodePool{}
	err := s.karpenterClient.Get(ctx, client.ObjectKey{Name: nodePool.Name}, existing)
	if client.IgnoreNotFound(err) != nil {
		return err
	}

	if errors.IsNotFound(err) {
		return s.karpenterClient.Create(ctx, nodePool)
	}
	return nil
}

func (s *StaticClusterManager) TearDownNodePool() error {
	if !strings.Contains(strings.ToLower(s.options.StaticClusterName), "nvidia") {
		slog.Info("NVIDIA not in cluster name, skipping node pool deletion")
		return nil
	}

	nodePool := &karpv1.NodePool{
		ObjectMeta: metav1.ObjectMeta{
			Name: "nvidia",
		},
	}

	if err := s.karpenterClient.Delete(context.TODO(), nodePool); err != nil {
		if errors.IsNotFound(err) {
			slog.Info("NodePool 'nvidia' not found, skipping deletion")
			return nil
		}
		return fmt.Errorf("failed to delete nodepool: %v", err)
	}

	slog.Info("NodePool deleted successfully")
	return nil
}

func (s *StaticClusterManager) DeployBusyboxAndWaitForNodes() error {
	slog.Info("deploying busybox pods")

	t := templates.BusyboxDeployment
	var buf bytes.Buffer
	if err := t.Execute(&buf, templates.BusyboxDeploymentTemplateData{
		Nodes: s.options.Nodes,
	}); err != nil {
		return err
	}

	deployment := &v1.Deployment{}
	err := yaml.Unmarshal(buf.Bytes(), deployment)
	if err != nil {
		return fmt.Errorf("failed to unmarshal deployment: %v", err)
	}

	result, err := s.k8sClient.AppsV1().Deployments("default").Create(context.TODO(), deployment, metav1.CreateOptions{})
	if err != nil {
		return err
	}

	slog.Info("created deployment", "name", result.GetObjectMeta().GetName())
	return waitForNodeCondition(s.k8sClient, func(nodes []corev1.Node) bool {
		readyNodes := 0
		for _, node := range nodes {
			if isNodeReady(&node) {
				readyNodes++
			}
		}
		slog.Info("waiting for nodes", "readyNodes", readyNodes, "expectedNodes", s.options.Nodes)
		return readyNodes >= s.options.Nodes
	}, 15*time.Minute, "Waiting for nodes to be ready")
}

func (s *StaticClusterManager) TearDownBusyboxAndNodes() error {
	slog.Info("cleaning up busybox pods")

	err := s.k8sClient.AppsV1().Deployments("default").Delete(context.TODO(), "busybox-deployment", metav1.DeleteOptions{})
	if err != nil {
		return fmt.Errorf("failed to delete deployment: %v", err)
	}
	slog.Info("busybox deployment deleted successfully")

	return waitForNodeCondition(s.k8sClient, func(nodes []corev1.Node) bool {
		return len(nodes) == 0
	}, 30*time.Minute, "Waiting for nodes to be removed")
}

func waitForNodeCondition(clientset *kubernetes.Clientset, condition NodeCondition, timeout time.Duration, description string) error {
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()
	return wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
		nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
		if err != nil {
			return false, err
		}

		conditionMet := condition(nodes.Items)
		slog.Info(description, "nodeCount", len(nodes.Items))
		return conditionMet, nil
	})
}


================================================
FILE: internal/deployers/eksapi/templates/auth_map_role.yaml.template
================================================

- username: system:node:{{"{{"}}{{.NodeNameStrategy}}{{"}}"}} 
  groups:
    - system:bootstrappers
    - system:nodes
  rolearn: {{.Rolearn}}

================================================
FILE: internal/deployers/eksapi/templates/busybox_deployment.yaml.template
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: busybox-deployment
spec:
  replicas: {{.Nodes}}
  selector:
    matchLabels:
      app: busybox
  template:
    metadata:
      labels:
        app: busybox
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: app
                operator: In
                values:
                - busybox
            topologyKey: "kubernetes.io/hostname"
      containers:
      - name: busybox
        image: busybox
        command: ["sleep", "infinity"]


================================================
FILE: internal/deployers/eksapi/templates/cloudwatch-infra.yaml.template
================================================
AWSTemplateFormatVersion: '2010-09-09'
Description: kubetest2-eksapi CloudWatch using Pod Identity

Parameters:
  ClusterUUID:
    Description: UUID portion of the cluster name
    Type: String

Resources:
  CloudWatchRole:
    Type: AWS::IAM::Role
    Properties:
      RoleName: !Sub "cloudwatch-role-${ClusterUUID}"
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Sid: AllowEksAuthToAssumeRoleForPodIdentity
            Effect: Allow
            Principal:
              Service:
                - pods.eks.amazonaws.com
                - beta.pods.eks.aws.internal
            Action:
              - sts:AssumeRole
              - sts:TagSession
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
      Description: Role for CloudWatch Agent in EKS cluster

Outputs:
  CloudWatchRoleArn:
    Description: ARN of the CloudWatch IAM role
    Value: !GetAtt CloudWatchRole.Arn
    Export:
      Name: !Sub "${AWS::StackName}::CloudWatchRoleArn"


================================================
FILE: internal/deployers/eksapi/templates/cloudwatch_agent_infra.yaml
================================================
apiVersion: v1
kind: Namespace
metadata:
  name: amazon-cloudwatch
  labels:
    name: amazon-cloudwatch

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cwagent
  namespace: amazon-cloudwatch

---
# ClusterRole for cwagent
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: cwagent-role
rules:
  - apiGroups: [""]
    resources:
      - nodes
      - nodes/proxy
      - services
      - endpoints
      - pods
    verbs: ["get", "list", "watch"]
  - apiGroups: ["extensions"]
    resources:
      - ingresses
    verbs: ["get", "list", "watch"]
  - nonResourceURLs: ["/metrics"]
    verbs: ["get"]

---
# ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: cwagent-role-binding
subjects:
  - kind: ServiceAccount
    name: cwagent
    namespace: amazon-cloudwatch
roleRef:
  kind: ClusterRole
  name: cwagent-role
  apiGroup: rbac.authorization.k8s.io

================================================
FILE: internal/deployers/eksapi/templates/infra.yaml
================================================
---
AWSTemplateFormatVersion: "2010-09-09"
Description: "kubetest2-eksapi infrastructure"

Parameters:
  VpcBlock:
    Type: String
    Default: 192.168.0.0/16
    Description: The CIDR range for the VPC. This should be a valid private (RFC 1918) CIDR range.

  PublicSubnet01Block:
    Type: String
    Default: 192.168.0.0/18
    Description: CidrBlock for public subnet 01 within the VPC

  PublicSubnet02Block:
    Type: String
    Default: 192.168.64.0/18
    Description: CidrBlock for public subnet 02 within the VPC

  PrivateSubnet01Block:
    Type: String
    Default: 192.168.128.0/18
    Description: CidrBlock for private subnet 01 within the VPC

  PrivateSubnet02Block:
    Type: String
    Default: 192.168.192.0/18
    Description: CidrBlock for private subnet 02 within the VPC

  AdditionalClusterRoleServicePrincipal:
    Type: String
    Default: ""
    Description: Additional service principal with sts:AssumeRole permissions on the ClusterRole

  ResourceId:
    Type: String

  Subnet01AZ:
    Type: String

  Subnet02AZ:
    Type: String

  AutoMode:
    Type: String
    AllowedValues:
      - "true"
      - "false"
    Default: "false"

Metadata:
  AWS::CloudFormation::Interface:
    ParameterGroups:
      - Label:
          default: "Worker Network Configuration"
        Parameters:
          - VpcBlock
          - PublicSubnet01Block
          - PublicSubnet02Block
          - PrivateSubnet01Block
          - PrivateSubnet02Block

Conditions:
  HasAdditionalClusterRoleServicePrincipal:
    Fn::Not:
      - Fn::Equals:
        - ""
        - !Ref AdditionalClusterRoleServicePrincipal

  IsAutoMode: !Equals [!Ref AutoMode, "true"]

Resources:
  #
  # Public VPC
  #
  VPC:
    Type: AWS::EC2::VPC
    Properties:
      CidrBlock: !Ref VpcBlock
      EnableDnsHostnames: true
      EnableDnsSupport: true
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/VPC"
  IPv6CidrBlock:
    Type: AWS::EC2::VPCCidrBlock
    Properties:
      AmazonProvidedIpv6CidrBlock: true
      VpcId:
        Ref: VPC

  #
  # Internet gateways (ipv4, and egress for ipv6)
  #
  InternetGateway:
    Type: AWS::EC2::InternetGateway
    Properties:
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/InternetGateway"
  VPCGatewayAttachment:
    Type: AWS::EC2::VPCGatewayAttachment
    Properties:
      InternetGatewayId:
        Ref: InternetGateway
      VpcId:
        Ref: VPC
  EgressOnlyInternetGateway:
    Type: AWS::EC2::EgressOnlyInternetGateway
    Properties:
      VpcId:
        Ref: VPC

  #
  # Nat gateways
  #
  NATGateway01:
    Type: AWS::EC2::NatGateway
    DependsOn:
      - NatGatewayEIP1
      - SubnetPublic01
      - VPCGatewayAttachment
    Properties:
      AllocationId:
        Fn::GetAtt:
          - NatGatewayEIP1
          - AllocationId
      SubnetId:
        Ref: SubnetPublic01
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/NATGateway01"
  NATGateway02:
    Type: AWS::EC2::NatGateway
    DependsOn:
      - NatGatewayEIP2
      - SubnetPublic02
      - VPCGatewayAttachment
    Properties:
      AllocationId:
        Fn::GetAtt:
          - NatGatewayEIP2
          - AllocationId
      SubnetId:
        Ref: SubnetPublic02
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/NATGateway02"
  #
  # Nat Gateway IPs
  #
  NatGatewayEIP1:
    Type: AWS::EC2::EIP
    DependsOn:
      - VPCGatewayAttachment
    Properties:
      Domain: vpc
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/NatGatewayEIP1"
  NatGatewayEIP2:
    Type: AWS::EC2::EIP
    DependsOn:
      - VPCGatewayAttachment
    Properties:
      Domain: vpc
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/NatGatewayEIP2"

  #
  # Routing - public subnets
  #
  PublicRouteTable:
    Type: AWS::EC2::RouteTable
    Properties:
      VpcId:
        Ref: VPC
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/PublicRouteTable"
  PublicSubnetDefaultRoute:
    Type: AWS::EC2::Route
    DependsOn:
      - InternetGateway
      - VPCGatewayAttachment
    Properties:
      DestinationCidrBlock: 0.0.0.0/0
      GatewayId:
        Ref: InternetGateway
      RouteTableId:
        Ref: PublicRouteTable
  PublicSubnetDefaultIpv6Route:
    Type: AWS::EC2::Route
    DependsOn:
      - InternetGateway
      - VPCGatewayAttachment
    Properties:
      DestinationIpv6CidrBlock: ::/0
      GatewayId:
        Ref: InternetGateway
      RouteTableId:
        Ref: PublicRouteTable

  #
  # Routing - private subnets
  # Route tables
  #
  PrivateRouteTable01:
    Type: AWS::EC2::RouteTable
    Properties:
      VpcId:
        Ref: VPC
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/PrivateRouteTable01"
  PrivateRouteTable02:
    Type: AWS::EC2::RouteTable
    Properties:
      VpcId:
        Ref: VPC
      Tags:
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/PrivateRouteTable02"
  #
  # Nat IPv4 Private Routes
  #
  PrivateSubnetDefaultRoute01:
    Type: AWS::EC2::Route
    DependsOn:
      - VPCGatewayAttachment
      - NATGateway01
    Properties:
      DestinationCidrBlock: 0.0.0.0/0
      NatGatewayId:
        Ref: NATGateway01
      RouteTableId:
        Ref: PrivateRouteTable01
  PrivateSubnetDefaultRoute02:
    Type: AWS::EC2::Route
    DependsOn:
      - VPCGatewayAttachment
      - NATGateway02
    Properties:
      DestinationCidrBlock: 0.0.0.0/0
      NatGatewayId:
        Ref: NATGateway02
      RouteTableId:
        Ref: PrivateRouteTable02

  #
  # EOIG IPv6 Private Routes
  #
  PrivateSubnetDefaultIpv6Route01:
    Type: AWS::EC2::Route
    Properties:
      DestinationIpv6CidrBlock: ::/0
      EgressOnlyInternetGatewayId:
        Ref: EgressOnlyInternetGateway
      RouteTableId:
        Ref: PrivateRouteTable01
  PrivateSubnetDefaultIpv6Route02:
    Type: AWS::EC2::Route
    Properties:
      DestinationIpv6CidrBlock: ::/0
      EgressOnlyInternetGatewayId:
        Ref: EgressOnlyInternetGateway
      RouteTableId:
        Ref: PrivateRouteTable02

  #
  # Public subnets
  SubnetPublic01:
    Type: AWS::EC2::Subnet
    Metadata:
      Comment: Subnet 01
    DependsOn: IPv6CidrBlock
    Properties:
      AvailabilityZone:
        Ref: Subnet01AZ
      CidrBlock:
        Ref: PublicSubnet01Block
      Ipv6CidrBlock:
        !Select [0, !Cidr [!Select [0, !GetAtt VPC.Ipv6CidrBlocks], 8, 64]]
      AssignIpv6AddressOnCreation: true
      MapPublicIpOnLaunch: true
      Tags:
        - Key: kubernetes.io/role/elb
          Value: "1"
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/SubnetPublic01"
      VpcId:
        Ref: VPC
  SubnetPublic02:
    Type: AWS::EC2::Subnet
    DependsOn: IPv6CidrBlock
    Properties:
      AvailabilityZone:
        Ref: Subnet02AZ
      CidrBlock:
        Ref: PublicSubnet02Block
      Ipv6CidrBlock:
        !Select [1, !Cidr [!Select [0, !GetAtt VPC.Ipv6CidrBlocks], 8, 64]]
      AssignIpv6AddressOnCreation: true
      MapPublicIpOnLaunch: true
      Tags:
        - Key: kubernetes.io/role/elb
          Value: "1"
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/SubnetPublic02"
      VpcId:
        Ref: VPC

  #
  # Public route table associations
  #
  RouteTableAssociationPublic01:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      RouteTableId:
        Ref: PublicRouteTable
      SubnetId:
        Ref: SubnetPublic01
  RouteTableAssociationPublic02:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      RouteTableId:
        Ref: PublicRouteTable
      SubnetId:
        Ref: SubnetPublic02

  #
  # Private subnets
  #
  SubnetPrivate01:
    Type: AWS::EC2::Subnet
    DependsOn: IPv6CidrBlock
    Properties:
      AvailabilityZone:
        Ref: Subnet01AZ
      CidrBlock:
        Ref: PrivateSubnet01Block
      Ipv6CidrBlock:
        !Select [2, !Cidr [!Select [0, !GetAtt VPC.Ipv6CidrBlocks], 8, 64]]
      AssignIpv6AddressOnCreation: true
      Tags:
        - Key: kubernetes.io/role/internal-elb
          Value: "1"
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/SubnetPrivate01"
      VpcId:
        Ref: VPC
  SubnetPrivate02:
    Type: AWS::EC2::Subnet
    DependsOn: IPv6CidrBlock
    Properties:
      AvailabilityZone:
        Ref: Subnet02AZ
      CidrBlock:
        Ref: PrivateSubnet02Block
      Ipv6CidrBlock:
        !Select [3, !Cidr [!Select [0, !GetAtt VPC.Ipv6CidrBlocks], 8, 64]]
      AssignIpv6AddressOnCreation: true
      Tags:
        - Key: kubernetes.io/role/internal-elb
          Value: "1"
        - Key: Name
          Value:
            Fn::Sub: "${AWS::StackName}/SubnetPrivate02"
      VpcId:
        Ref: VPC

  #
  # Private route table associations
  #
  RouteTableAssociationPrivate01:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      RouteTableId:
        Ref: PrivateRouteTable01
      SubnetId:
        Ref: SubnetPrivate01
  RouteTableAssociationPrivate02:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      RouteTableId:
        Ref: PrivateRouteTable02
      SubnetId:
        Ref: SubnetPrivate02

  ClusterRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Action:
            - "sts:AssumeRole"
            - "sts:TagSession"
            Effect: Allow
            Principal:
              Service:
                Fn::If:
                  - HasAdditionalClusterRoleServicePrincipal
                  - - "eks.amazonaws.com"
                    - !Ref AdditionalClusterRoleServicePrincipal
                  - - "eks.amazonaws.com"
      ManagedPolicyArns:
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonEKSClusterPolicy"
        - !If
          - IsAutoMode
          - !Join
            - ""
            - - "arn:"
              - !Ref "AWS::Partition"
              - ":iam::aws:policy/AmazonEKSBlockStoragePolicy"
          - !Ref "AWS::NoValue"
        - !If
          - IsAutoMode
          - !Join
            - ""
            - - "arn:"
              - !Ref "AWS::Partition"
              - ":iam::aws:policy/AmazonEKSComputePolicy"
          - !Ref "AWS::NoValue"
        - !If
          - IsAutoMode
          - !Join
            - ""
            - - "arn:"
              - !Ref "AWS::Partition"
              - ":iam::aws:policy/AmazonEKSLoadBalancingPolicy"
          - !Ref "AWS::NoValue"
        - !If
          - IsAutoMode
          - !Join
            - ""
            - - "arn:"
              - !Ref "AWS::Partition"
              - ":iam::aws:policy/AmazonEKSNetworkingPolicy"
          - !Ref "AWS::NoValue"

  NodeRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Action: "sts:AssumeRole"
            Effect: Allow
            Principal:
              Service: ec2.amazonaws.com
      ManagedPolicyArns:
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonEKSWorkerNodePolicy"
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonEKS_CNI_Policy"
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonSSMManagedInstanceCore"
        - !Join
          - ""
          - - "arn:"
            - !Ref "AWS::Partition"
            - ":iam::aws:policy/AmazonS3FullAccess"

  VPCCNIIPv6Policy:
    Type: AWS::IAM::Policy
    Properties:
      PolicyDocument: |
        {
          "Version": "2012-10-17",
          "Statement": [
              {
                  "Effect": "Allow",
                  "Action": [
                      "ec2:AssignIpv6Addresses",
                      "ec2:DescribeInstances",
                      "ec2:DescribeTags",
                      "ec2:DescribeNetworkInterfaces",
                      "ec2:DescribeInstanceTypes"
                  ],
                  "Resource": "*"
              },
              {
                  "Effect": "Allow",
                  "Action": [
                      "ec2:CreateTags"
                  ],
                  "Resource": [
                      "arn:*:ec2:*:*:network-interface/*"
                  ]
              }
          ]
        }
      PolicyName: AmazonEKS_CNI_IPv6_Policy
      Roles:
        - !Ref NodeRole

Outputs:
  SubnetsPrivate:
    Value:
      Fn::Join:
        - ","
        - - Ref: SubnetPrivate01
          - Ref: SubnetPrivate02
    Export:
      Name:
        Fn::Sub: "${AWS::StackName}::SubnetsPrivate"

  SubnetsPublic:
    Value:
      Fn::Join:
        - ","
        - - Ref: SubnetPublic01
          - Ref: SubnetPublic02
    Export:
      Name:
        Fn::Sub: "${AWS::StackName}::SubnetsPublic"

  VPC:
    Value:
      Ref: VPC
    Export:
      Name:
        Fn::Sub: "${AWS::StackName}::VPC"

  ClusterRole:
    Value:
      Fn::Join:
        - ""
        - - "arn:"
          - !Ref "AWS::Partition"
          - ":iam::"
          - !Ref "AWS::AccountId"
          - ":role/"
          - !Ref ClusterRole
    Export:
      Name:
        Fn::Sub: "${AWS::StackName}::ClusterRole"

  NodeRole:
    Value:
      Fn::Join:
        - ""
        - - "arn:"
          - !Ref "AWS::Partition"
          - ":iam::"
          - !Ref "AWS::AccountId"
          - ":role/"
          - !Ref NodeRole
    Export:
      Name:
        Fn::Sub: "${AWS::StackName}::NodeRole"


================================================
FILE: internal/deployers/eksapi/templates/nvidia_static_cluster_nodepool.yaml.template
================================================
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
  labels:
    app.kubernetes.io/managed-by: eks
  name: nvidia
spec:
  weight: 50
  template:
    spec:
      requirements:
        - key: kubernetes.io/arch
          operator: In
          values: [{{.Arch}}]
        - key: kubernetes.io/os
          operator: In
          values: ["linux"]
        - key: karpenter.sh/capacity-type
          operator: In
          values: ["on-demand"]
        - key: node.kubernetes.io/instance-type
          operator: In
          values: 
            {{- range .InstanceTypes}}
            - "{{.}}"
            {{- end}}
        - key: eks.amazonaws.com/instance-gpu-count
          operator: Exists
      nodeClassRef:
        group: eks.amazonaws.com
        kind: NodeClass
        name: default
      expireAfter: 336h 
  disruption:
    budgets:
      - nodes: 10%
    consolidationPolicy: WhenEmpty
    consolidateAfter: 600s


================================================
FILE: internal/deployers/eksapi/templates/templates.go
================================================
package templates

import (
	_ "embed"
	"text/template"
)

//go:embed infra.yaml
var Infrastructure string

//go:embed cloudwatch_agent_infra.yaml
var CloudWatchAgentRbac []byte

var (
	//go:embed unmanaged-nodegroup.yaml.template
	unmanagedNodegroupTemplate string
	UnmanagedNodegroup         = template.Must(template.New("unmanagedNodegroup").Parse(unmanagedNodegroupTemplate))
)

//go:embed cloudwatch-infra.yaml.template
var CloudWatchInfra string

type NetworkInterface struct {
	Description         *string
	NetworkCardIndex    *int
	DeviceIndex         *int
	InterfaceType       *string
	Groups              []string
	SubnetId            *string
	DeleteOnTermination *bool
}

type UnmanagedNodegroupTemplateData struct {
	NetworkInterfaces []NetworkInterface
	KubernetesVersion string
	InstanceTypes     []string
}

type BusyboxDeploymentTemplateData struct {
	Nodes int
}

type NvidiaStaticClusterNodepoolTemplateData struct {
	Arch          string
	InstanceTypes []string
}

var (
	//go:embed userdata_bootstrap.sh.mimepart.template
	userDataBootstrapShTemplate string
	UserDataBootstrapSh         = template.Must(template.New("userDataBootstrapSh").Parse(userDataBootstrapShTemplate))

	//go:embed userdata_nodeadm.yaml.mimepart.template
	userDataNodeadmTemplate string
	UserDataNodeadm         = template.Must(template.New("userDataNodeadm").Parse(userDataNodeadmTemplate))

	//go:embed userdata_bottlerocket.toml.template
	userDataBottlerocketTemplate string
	UserDataBottlerocket         = template.Must(template.New("userDataBottlerocket").Parse(userDataBottlerocketTemplate))

	//go:embed busybox_deployment.yaml.template
	busyboxDeploymentTemplate string
	BusyboxDeployment         = template.Must(template.New("busyboxDeployment").Parse(busyboxDeploymentTemplate))

	//go:embed nvidia_static_cluster_nodepool.yaml.template
	nvidiaStaticClusterNodepoolTemplate string
	NvidiaStaticClusterNodepool         = template.Must(template.New("nvidiaStaticClusterNodepool").Parse(nvidiaStaticClusterNodepoolTemplate))
)

type UserDataTemplateData struct {
	Name                 string
	CertificateAuthority string
	CIDR                 string
	ClusterDNSIP         string
	APIServerEndpoint    string
	KubeletFeatureGates  map[string]bool
	NodeadmFeatureGates  map[string]bool
}

var (
	//go:embed auth_map_role.yaml.template
	authMapRoleTemplate string
	AuthMapRole         = template.Must(template.New("authMapRole").Parse(authMapRoleTemplate))
)

type AuthMapRoleTemplateData struct {
	NodeNameStrategy string
	Rolearn          string
}


================================================
FILE: internal/deployers/eksapi/templates/templates_test.go
================================================
package templates

import (
	"bytes"
	"testing"
)

func Test_UnmanagedNodegroup(t *testing.T) {
	buf := bytes.Buffer{}
	err := UnmanagedNodegroup.Execute(&buf, UnmanagedNodegroupTemplateData{
		KubernetesVersion: "1.28",
		InstanceTypes: []string{
			"t2.medium",
			"t2.large",
			"t2.xlarge",
		},
	})
	if err != nil {
		t.Error(err)
	}
}


================================================
FILE: internal/deployers/eksapi/templates/unmanaged-nodegroup.yaml.template
================================================
---
AWSTemplateFormatVersion: '2010-09-09'
Description: 'kubetest2-eksapi unmanaged nodegroup'

Parameters:
  ResourceId:
    Description: Unique identifier for this kubetest2-eksapi execution.
    Type: String

  VpcId:
    Type: AWS::EC2::VPC::Id

  SubnetIds:
    Type: List<AWS::EC2::Subnet::Id>

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup::Id

  AMIId:
    Type: String
    Description: Specify AMI id for the node instances.

  NodeDiskSize:
    Type: Number
    Description: Node disk size in gigabytes.
    Default: 100

  NodeCount:
    Type: Number

  ClusterName:
    Type: String

  NodeRoleName:
    Description: The IAM role name of worker nodes.
    Type: String

  UserData:
    Type: String

  VolumeMountPath:
    Type: String

  CapacityReservationId:
    Type: String
    Description: Capacity reservation id for the unmanaged nodegroup

  UserDataIsMIMEPart:
    Description: "User data should be embedded as a part of a multi-part MIME document"
    Default: true
    Type: String
    AllowedValues: [true, false]

Conditions:
  IsCapacityReservationIdSet: !Not [!Equals [!Ref CapacityReservationId, ""]]
  IsUserDataMIMEPart: !Equals [true, !Ref UserDataIsMIMEPart]

Resources:
  EFASecurityGroupIngress:
    Type: "AWS::EC2::SecurityGroupIngress"
    Properties:
      Description: Allow node to communicate with each other
      FromPort: 0
      ToPort: 65535
      GroupId: !Ref SecurityGroup
      IpProtocol: "-1"
      SourceSecurityGroupId: !Ref SecurityGroup

  EFASecurityGroupEgress:
    Type: "AWS::EC2::SecurityGroupEgress"
    Properties:
      Description: Allow the efa worker nodes outbound communication
      DestinationSecurityGroupId: !Ref SecurityGroup
      FromPort: 0
      ToPort: 65536
      GroupId: !Ref SecurityGroup
      IpProtocol: "-1"
  
  EFASecurityGroupEgressAllIpv4:
    Type: "AWS::EC2::SecurityGroupEgress"
    Properties:
      Description: Allow the efa worker nodes outbound communication
      FromPort: 0
      ToPort: 65536
      CidrIp: "0.0.0.0/0"
      GroupId: !Ref SecurityGroup
      IpProtocol: "-1"

  EFASecurityGroupEgressAllIpv6:
    Type: "AWS::EC2::SecurityGroupEgress"
    Properties:
      Description: Allow the efa worker nodes outbound communication
      FromPort: 0
      ToPort: 65536
      CidrIpv6: "::/0"
      GroupId: !Ref SecurityGroup
      IpProtocol: "-1"

  NodeInstanceProfile:
    Type: AWS::IAM::InstanceProfile
    Properties:
      Path: "/"
      Roles:
        - !Ref NodeRoleName

  NodeLaunchTemplate:
    Type: AWS::EC2::LaunchTemplate
    Properties:
      LaunchTemplateName: !Ref ResourceId
      LaunchTemplateData:
        BlockDeviceMappings:
          - DeviceName: !Ref VolumeMountPath
            Ebs:
              DeleteOnTermination: true
              VolumeSize: !Ref NodeDiskSize
              VolumeType: gp2
        CapacityReservationSpecification:
          Fn::If:
            - IsCapacityReservationIdSet
            - CapacityReservationTarget:
                CapacityReservationId: !Ref CapacityReservationId
            - !Ref AWS::NoValue
        IamInstanceProfile:
          Arn: !GetAtt NodeInstanceProfile.Arn
        ImageId: !Ref AMIId
        InstanceType: "{{index .InstanceTypes 0}}"
        MetadataOptions: 
          HttpTokens: required
        {{ if .NetworkInterfaces -}}
        NetworkInterfaces:
        {{- range .NetworkInterfaces}}
          - NetworkCardIndex: {{ .NetworkCardIndex }}{{ if .DeviceIndex }} {{/* network card index cannot be empty */}}
            DeviceIndex: {{ .DeviceIndex }}{{ end }}{{ if .InterfaceType }}
            InterfaceType: {{ .InterfaceType }}{{ end }}{{ if .Groups }}
            Groups: {{ .Groups }}{{ end }}{{ if .SubnetId }}
            SubnetId: {{ .SubnetId }}{{ end }}{{ if .DeleteOnTermination }}
            DeleteOnTermination: {{ .DeleteOnTermination }}{{ end }}{{ if .Description }}
            Description: {{ .Description}}{{ end -}}
        {{- end}}
        {{ end -}}
        UserData:
          Fn::Base64:
            Fn::If:
              - IsUserDataMIMEPart
              - Fn::Sub: |
                  Content-Type: multipart/mixed; boundary="BOUNDARY"
                  MIME-Version: 1.0

                  --BOUNDARY
                  ${UserData}

                  --BOUNDARY
                  Content-Type: text/x-shellscript; charset="us-ascii"
                  MIME-Version: 1.0

                  #!/usr/bin/env bash
                  /opt/aws/bin/cfn-signal \
                    --stack  ${AWS::StackName} \
                    --resource NodeGroup \
                    --region ${AWS::Region}

                  --BOUNDARY--
              - Fn::Sub: |
                  ${UserData}

  NodeGroup:
    Type: AWS::AutoScaling::AutoScalingGroup
    UpdatePolicy:
      AutoScalingRollingUpdate:
        WaitOnResourceSignals: true
        PauseTime: PT15M
    Properties:
      AutoScalingGroupName: !Ref ResourceId
      MixedInstancesPolicy:
        LaunchTemplate:
          LaunchTemplateSpecification:
            LaunchTemplateId: !Ref NodeLaunchTemplate
            Version: !GetAtt NodeLaunchTemplate.LatestVersionNumber
          Overrides:
              {{- range .InstanceTypes}}
                - InstanceType: "{{.}}"
              {{- end}}
      DesiredCapacity: !Ref NodeCount
      MinSize: !Ref NodeCount
      MaxSize: !Ref NodeCount
      VPCZoneIdentifier: !Ref SubnetIds
      Tags:
        - Key: Name
          Value: !Sub "${ClusterName}-Node"
          PropagateAtLaunch: true
        # necessary for kubelet's legacy, in-tree cloud provider
        - Key: !Sub "kubernetes.io/cluster/${ClusterName}"
          Value: owned
          PropagateAtLaunch: true

================================================
FILE: internal/deployers/eksapi/templates/userdata_bootstrap.sh.mimepart.template
================================================
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0

#!/usr/bin/env bash
/etc/eks/bootstrap.sh {{.Name}} \
  --b64-cluster-ca {{.CertificateAuthority}} \
  --apiserver-endpoint {{.APIServerEndpoint}}


================================================
FILE: internal/deployers/eksapi/templates/userdata_bottlerocket.toml.template
================================================
[settings.kubernetes]
"cluster-name" = "{{.Name}}"
"api-server" = "{{.APIServerEndpoint}}"
"cluster-certificate" = "{{.CertificateAuthority}}"
{{- if .ClusterDNSIP}}
"cluster-dns-ip" = "{{.ClusterDNSIP}}"
{{- end}}
device-ownership-from-security-context = true

[settings.host-containers.admin]
"enabled" = true


================================================
FILE: internal/deployers/eksapi/templates/userdata_nodeadm.yaml.mimepart.template
================================================
Content-Type: application/node.eks.aws
MIME-Version: 1.0

---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
{{- if .NodeadmFeatureGates}}
  featureGates:
    {{- range $gate, $value := .NodeadmFeatureGates }}
    {{$gate}}: {{$value}}
    {{- end }}
{{- end }}
  cluster:
    name: {{.Name}}
    apiServerEndpoint: {{.APIServerEndpoint}}
    certificateAuthority: {{.CertificateAuthority}}
    cidr: {{.CIDR}}
{{- if .KubeletFeatureGates}}
  kubelet:
    config:
      featureGates:
        {{- range $gate, $value := .KubeletFeatureGates }}
        {{$gate}}: {{$value}}
        {{- end }}
{{- end }}


================================================
FILE: internal/deployers/eksapi/userdata.go
================================================
package eksapi

import (
	"bytes"
	"fmt"
	"net"
	"strconv"
	"strings"
	"text/template"

	"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
)

const (
	UserDataBootstrapSh  = "bootstrap.sh"
	UserDataNodeadm      = "nodeadm"
	UserDataBottlerocket = "bottlerocket"
)

func generateUserData(cluster *Cluster, opts *deployerOptions) (string, bool, error) {
	userDataIsMimePart := true
	var t *template.Template
	switch opts.UserDataFormat {
	case UserDataBootstrapSh:
		t = templates.UserDataBootstrapSh
	case UserDataNodeadm:
		// TODO: replace the YAML template with proper usage of the nodeadm API go types
		t = templates.UserDataNodeadm
	case UserDataBottlerocket:
		t = templates.UserDataBottlerocket
		userDataIsMimePart = false
	default:
		return "", false, fmt.Errorf("unknown user data format: '%s'", opts.UserDataFormat)
	}

	kubeletFeatureGates := map[string]bool{}
	// DRA is in beta for 1.33, and so needs to be explicitly enabled.
	if opts.KubernetesVersion == "1.33" {
		kubeletFeatureGates["DynamicResourceAllocation"] = true
	}

	nodeadmFeatureGates, err := extractFeatureGates(opts.NodeadmFeatureGates)
	if err != nil {
		return "", false, err
	}

	var dnsIP string
	if opts.SetClusterDNSIP {
		dnsIP, err = deriveClusterDNSIP(cluster.cidr)
		if err != nil {
			return "", false, err
		}
	}

	var buf bytes.Buffer
	if err := t.Execute(&buf, templates.UserDataTemplateData{
		APIServerEndpoint:    cluster.endpoint,
		CertificateAuthority: cluster.certificateAuthorityData,
		CIDR:                 cluster.cidr,
		ClusterDNSIP:         dnsIP,
		Name:                 cluster.name,
		KubeletFeatureGates:  kubeletFeatureGates,
		NodeadmFeatureGates:  nodeadmFeatureGates,
	}); err != nil {
		return "", false, err
	}
	return buf.String(), userDataIsMimePart, nil
}

func deriveClusterDNSIP(cidr string) (string, error) {
	_, ipNet, err := net.ParseCIDR(cidr)
	if err != nil {
		return "", fmt.Errorf("invalid CIDR: %v", err)
	}
	ip := ipNet.IP
	ip[len(ip)-1] += 10
	return ip.String(), nil
}

func extractFeatureGates(featureGatePairs []string) (map[string]bool, error) {
	featureGateMap := make(map[string]bool)
	for _, keyValuePair := range featureGatePairs {
		components := strings.Split(keyValuePair, "=")
		if len(components) != 2 {
			return featureGateMap, fmt.Errorf("expected key=value pairs but %s has %d components", keyValuePair, len(components))
		}
		boolValue, err := strconv.ParseBool(components[1])
		if err != nil {
			return featureGateMap, fmt.Errorf("expected bool value in %s: %v", keyValuePair, err)
		}
		featureGateMap[components[0]] = boolValue
	}
	return featureGateMap, nil
}


================================================
FILE: internal/deployers/eksapi/userdata_test.go
================================================
package eksapi

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

var cluster = Cluster{
	name:                     "cluster",
	endpoint:                 "https://example.com",
	certificateAuthorityData: "certificateAuthority",
	cidr:                     "10.100.0.0/16",
}

const bootstrapShUserData = `Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0

#!/usr/bin/env bash
/etc/eks/bootstrap.sh cluster \
  --b64-cluster-ca certificateAuthority \
  --apiserver-endpoint https://example.com
`

const nodeadmUserData = `Content-Type: application/node.eks.aws
MIME-Version: 1.0

---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
  cluster:
    name: cluster
    apiServerEndpoint: https://example.com
    certificateAuthority: certificateAuthority
    cidr: 10.100.0.0/16
`

const nodeadmUserDataKubeletDRA = `Content-Type: application/node.eks.aws
MIME-Version: 1.0

---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
  cluster:
    name: cluster
    apiServerEndpoint: https://example.com
    certificateAuthority: certificateAuthority
    cidr: 10.100.0.0/16
  kubelet:
    config:
      featureGates:
        DynamicResourceAllocation: true
`

const nodeadmUserDataFeatureGate = `Content-Type: application/node.eks.aws
MIME-Version: 1.0

---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
  featureGates:
    foo: true
  cluster:
    name: cluster
    apiServerEndpoint: https://example.com
    certificateAuthority: certificateAuthority
    cidr: 10.100.0.0/16
`

const bottlerocketUserData = `[settings.kubernetes]
"cluster-name" = "cluster"
"api-server" = "https://example.com"
"cluster-certificate" = "certificateAuthority"
device-ownership-from-security-context = true

[settings.host-containers.admin]
"enabled" = true
`

const bottlerocketUserDataWithDNS = `[settings.kubernetes]
"cluster-name" = "cluster"
"api-server" = "https://example.com"
"cluster-certificate" = "certificateAuthority"
"cluster-dns-ip" = "10.100.0.10"
device-ownership-from-security-context = true

[settings.host-containers.admin]
"enabled" = true
`

func Test_generateUserData(t *testing.T) {
	cases := []struct {
		format              string
		expected            string
		expectedIsMimePart  bool
		kubernetesVersion   string
		NodeadmFeatureGates []string
		setClusterDNSIP     bool
		wantErr             bool
	}{
		{
			format:             "bootstrap.sh",
			expected:           bootstrapShUserData,
			expectedIsMimePart: true,
		},
		{
			format:             "nodeadm",
			expected:           nodeadmUserData,
			expectedIsMimePart: true,
		},
		{
			format:             "bottlerocket",
			expected:           bottlerocketUserData,
			expectedIsMimePart: false,
		},
		{
			format:             "bottlerocket",
			expected:           bottlerocketUserDataWithDNS,
			expectedIsMimePart: false,
			setClusterDNSIP:    true,
		},
		{
			format:             "nodeadm",
			expected:           nodeadmUserDataKubeletDRA,
			kubernetesVersion:  "1.33",
			expectedIsMimePart: true,
		},
		{
			format:              "nodeadm",
			expected:            nodeadmUserDataFeatureGate,
			kubernetesVersion:   "1.30",
			NodeadmFeatureGates: []string{"foo=true"},
			expectedIsMimePart:  true,
		},
	}
	for _, c := range cases {
		t.Run(c.format, func(t *testing.T) {
			deployerOpts := &deployerOptions{
				KubernetesVersion:   c.kubernetesVersion,
				NodeadmFeatureGates: c.NodeadmFeatureGates,
				SetClusterDNSIP:     c.setClusterDNSIP,
				UserDataFormat:      c.format,
			}
			actual, isMimePart, err := generateUserData(&cluster, deployerOpts)
			if err != nil {
				t.Log(err)
				t.Error(err)
			}
			assert.Equal(t, c.expected, actual)
			assert.Equal(t, c.expectedIsMimePart, isMimePart)
		})
	}
}

func Test_extractFeatureGates(t *testing.T) {
	testCases := []struct {
		input     []string
		expected  map[string]bool
		expectErr bool
	}{
		{
			input: []string{"foo=true", "bar=false"},
			expected: map[string]bool{
				"foo": true,
				"bar": false,
			},
		},
		{
			input:     []string{"foo:true"},
			expectErr: true,
		},
		{
			input:     []string{"foo=bar"},
			expectErr: true,
		},
	}
	for _, testCase := range testCases {
		output, err := extractFeatureGates(testCase.input)
		if testCase.expectErr {
			assert.Error(t, err)
		} else {
			assert.NoError(t, err)
			assert.Equal(t, testCase.expected, output)
		}
	}
}

func Test_deriveClusterDNSIP(t *testing.T) {
	testCases := []struct {
		cidr      string
		expected  string
		expectErr bool
	}{
		{cidr: "192.0.2.0/24", expected: "192.0.2.10"},
		{cidr: "198.51.100.0/24", expected: "198.51.100.10"},
		{cidr: "2001:db8:1234::/108", expected: "2001:db8:1234::a"},
		{cidr: "invalid", expectErr: true},
	}
	for _, tc := range testCases {
		result, err := deriveClusterDNSIP(tc.cidr)
		if tc.expectErr {
			assert.Error(t, err)
		} else {
			assert.NoError(t, err)
			assert.Equal(t, tc.expected, result)
		}
	}
}


================================================
FILE: internal/deployers/eksapi/vpccni.go
================================================
package eksapi

import (
	"bytes"
	"context"
	"encoding/json"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
)

const vpcCNIDaemonSetPatch = `{
	"spec": {
		"template": {
			"spec": {
				"containers": [
					{
						"name": "aws-node",
						"env": [
							{
								"name": "ENABLE_PREFIX_DELEGATION",
								"value": "true"
							},
							{
								"name": "MINIMUM_IP_TARGET",
								"value": "80"
							},
							{
								"name": "WARM_IP_TARGET",
								"value": "10"
							}
						]
					}
				]
			}
		}
	}
}`

// tuneVPCCNI applies configuration to the VPC CNI DaemonSet that helps prevent test flakiness
func (k *k8sClient) tuneVPCCNI() error {
	var patch bytes.Buffer
	if err := json.Compact(&patch, []byte(vpcCNIDaemonSetPatch)); err != nil {
		return err
	}
	_, err := k.clientset.AppsV1().DaemonSets("kube-system").Patch(context.TODO(), "aws-node", types.StrategicMergePatchType, patch.Bytes(), metav1.PatchOptions{})
	return err
}


================================================
FILE: internal/deployers/eksapi/vpccni_test.go
================================================
package eksapi

import (
	"encoding/json"
	"testing"
)

func Test_validVPCCNIDaemonSetPatch(t *testing.T) {
	var j json.RawMessage
	if err := json.Unmarshal([]byte(vpcCNIDaemonSetPatch), &j); err != nil {
		t.Error(err)
	}
}


================================================
FILE: internal/deployers/eksctl/build.go
================================================
package eksctl

// Build is a no-op
func (d *deployer) Build() error {
	return nil
}


================================================
FILE: internal/deployers/eksctl/cluster_config.go
================================================
package eksctl

import (
	"fmt"
	"log/slog"

	eksctl_api "github.com/weaveworks/eksctl/pkg/apis/eksctl.io/v1alpha5"
	"sigs.k8s.io/yaml"
)

// CreateClusterConfig constructs an eksctl_api.ClusterConfig object based on UpOptions.
// This function replaces the string-based template rendering.
func (d *deployer) CreateClusterConfig() (*eksctl_api.ClusterConfig, error) {
	d.initClusterName()

	cfg := eksctl_api.NewClusterConfig()
	// Metadata
	cfg.Metadata.Name = d.clusterName
	cfg.Metadata.Region = d.Region
	cfg.Metadata.Version = d.KubernetesVersion
	// IAM
	cfg.IAM.WithOIDC = &d.WithOIDC

	amiFamily := d.AMIFamily
	if amiFamily == "" {
		amiFamily = eksctl_api.NodeImageFamilyAmazonLinux2
	}
	nodeGroupName := d.NodegroupName
	if nodeGroupName == "" {
		nodeGroupName = "ng-1"
	}
	// Create node group or managed node group (MNG)
	if d.UseUnmanagedNodegroup {
		ng := cfg.NewNodeGroup()
		// TODO: update this when we add support for SSH.
		ng.SSH = nil
		ng.AMIFamily = amiFamily
		ng.Name = nodeGroupName
		if len(d.InstanceTypes) > 0 {
			ng.InstanceType = d.InstanceTypes[0]
		}
		if d.Nodes >= 0 {
			ng.MinSize = &d.Nodes
			ng.MaxSize = &d.Nodes
			ng.DesiredCapacity = &d.Nodes
		}
		if d.VolumeSize >= 0 {
			ng.VolumeSize = &d.VolumeSize
		}
		ng.PrivateNetworking = d.PrivateNetworking
		ng.EFAEnabled = &d.EFAEnabled
		if len(d.AvailabilityZones) > 0 {
			ng.AvailabilityZones = d.AvailabilityZones
		}
		if d.AMI != "" && amiFamily == eksctl_api.NodeImageFamilyAmazonLinux2 {
			bootstrapCommand := fmt.Sprintf(`#!/bin/bash
source /var/lib/cloud/scripts/eksctl/bootstrap.helper.sh
/etc/eks/bootstrap.sh %s --kubelet-extra-args "--node-labels=${NODE_LABELS}"`, d.clusterName)
			ng.OverrideBootstrapCommand = &bootstrapCommand
		}
	} else {
		// Create managed node group
		mng := eksctl_api.NewManagedNodeGroup()
		cfg.ManagedNodeGroups = append(cfg.ManagedNodeGroups, mng)
		// TODO: update this when we add support for SSH.
		mng.SSH = nil
		mng.AMIFamily = amiFamily
		mng.Name = nodeGroupName
		mng.InstanceTypes = d.InstanceTypes
		if d.Nodes >= 0 {
			mng.MinSize = &d.Nodes
			mng.MaxSize = &d.Nodes
			mng.DesiredCapacity = &d.Nodes
		}
		if d.VolumeSize >= 0 {
			mng.VolumeSize = &d.VolumeSize
		}
		mng.PrivateNetworking = d.PrivateNetworking
		mng.EFAEnabled = &d.EFAEnabled
		if len(d.AvailabilityZones) > 0 {
			mng.AvailabilityZones = d.AvailabilityZones
		}
		if d.AMI != "" && amiFamily == eksctl_api.NodeImageFamilyAmazonLinux2 {
			bootstrapCommand := fmt.Sprintf(`#!/bin/bash
source /var/lib/cloud/scripts/eksctl/bootstrap.helper.sh
/etc/eks/bootstrap.sh %s --kubelet-extra-args "--node-labels=${NODE_LABELS}"`, d.clusterName)
			mng.OverrideBootstrapCommand = &bootstrapCommand
		} else if d.AMI != "" && amiFamily == eksctl_api.NodeImageFamilyBottlerocket {
			mng.AMI = d.AMI
		}
	}
	return cfg, nil
}

type clusterConfigTemplateParams struct {
	UpOptions
	ClusterName string
	Region      string
}

func (d *deployer) RenderClusterConfig() ([]byte, error) {

	cfg, err := d.CreateClusterConfig()
	if err != nil {
		slog.Error("failed to create ClusterConfig", "error", err)
	}
	slog.Info("rendering cluster config yaml", "config", cfg)
	return yaml.Marshal(cfg)
}


================================================
FILE: internal/deployers/eksctl/deployer.go
================================================
package eksctl

import (
	"flag"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"

	"github.com/aws/aws-k8s-tester/internal"
	"github.com/aws/aws-k8s-tester/internal/awssdk"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	"github.com/spf13/pflag"
	"github.com/urfave/sflags/gen/gpflag"
	"sigs.k8s.io/kubetest2/pkg/types"
	"sigs.k8s.io/yaml"
)

// DeployerName is the name of the deployer
const DeployerName = "eksctl"

type deployer struct {
	// generic parts
	commonOptions types.Options
	*UpOptions
	awsConfig      aws.Config
	eksClient      *eks.Client
	KubeconfigPath string `flag:"kubeconfig" desc:"Path to kubeconfig"`
	// ClusterName is the effective cluster name (from flag or RunID)
	clusterName string
}

// NewDeployer implements deployer.New for EKS using eksctl
func NewDeployer(opts types.Options) (types.Deployer, *pflag.FlagSet) {
	// create a deployer object and set fields that are not flag controlled
	awsConfig := awssdk.NewConfig()
	d := &deployer{
		commonOptions: opts,
		awsConfig:     awsConfig,
		eksClient:     eks.NewFromConfig(awsConfig),
	}
	// register flags and return
	return d, bindFlags(d)
}

func (d *deployer) DumpClusterLogs() error {
	return nil
}

func (d *deployer) Kubeconfig() (string, error) {
	if d.KubeconfigPath != "" {
		return d.KubeconfigPath, nil
	}
	return filepath.Join(d.commonOptions.RunDir(), "kubeconfig"), nil
}

func (d *deployer) Version() string {
	return internal.Version
}

// bindFlags is a helper used to create & bind a flagset to the deployer
func bindFlags(d *deployer) *pflag.FlagSet {
	flags, err := gpflag.Parse(d)
	if err != nil {
		slog.Error("unable to bind flags for deployer")
		os.Exit(1)
	}
	flags.AddGoFlagSet(flag.CommandLine)
	return flags
}

// initClusterName sets the effective cluster name with this precedence:
// 1. config file
// 2. --cluster-name flag
// 3. RunID of the kubetest
func (d *deployer) initClusterName() {
	// First priority: config file if provided
	if d.UpOptions.ConfigFile != "" {
		clusterName, err := d.parseClusterNameFromConfig(d.UpOptions.ConfigFile)
		if err == nil {
			d.clusterName = clusterName
			slog.Debug("using cluster name from config file", "clusterName", d.clusterName)
			return
		}
		slog.Warn("failed to extract cluster name from config file", "error", err)
		// Continue with other methods if parsing fails
	}

	if d.UpOptions.ClusterName != "" {
		d.clusterName = d.UpOptions.ClusterName
		slog.Debug("using cluster name from flag", "clusterName", d.clusterName)
	} else {
		d.clusterName = d.commonOptions.RunID()
		slog.Debug("using RunID for cluster name", "clusterName", d.clusterName)
	}
}

// parseClusterNameFromConfig extracts the cluster name from an eksctl config file
func (d *deployer) parseClusterNameFromConfig(configFilePath string) (string, error) {
	configData, err := os.ReadFile(configFilePath)
	if err != nil {
		return "", fmt.Errorf("failed to read config file: %v", err)
	}

	// Simple YAML parsing to extract the cluster name
	var configMap map[string]interface{}
	if err := yaml.Unmarshal(configData, &configMap); err != nil {
		return "", fmt.Errorf("failed to parse config file YAML: %v", err)
	}

	// Extract metadata section
	metadata, ok := configMap["metadata"].(map[string]interface{})
	if !ok {
		return "", fmt.Errorf("metadata section missing in config file")
	}

	// Extract name field
	name, ok := metadata["name"].(string)
	if !ok || name == "" {
		return "", fmt.Errorf("cluster name not found in config file metadata")
	}

	return name, nil
}

// assert that deployer implements types.DeployerWithKubeconfig
var _ types.DeployerWithKubeconfig = &deployer{}


================================================
FILE: internal/deployers/eksctl/down.go
================================================
package eksctl

import (
	"fmt"
	"log/slog"

	"github.com/aws/aws-k8s-tester/internal/util"
)

func (d *deployer) Down() error {
	d.initClusterName()

	var err error

	if d.DeployTarget == "nodegroup" {
		slog.Info("deleting nodegroup", "nodegroupName", d.NodegroupName, "clusterName", d.clusterName)
		err = util.ExecuteCommand("eksctl", "delete", "nodegroup", "--cluster", d.clusterName, "--name", d.NodegroupName, "--drain=false", "--wait")
		if err != nil {
			return fmt.Errorf("failed to delete nodegroup: %v", err)
		}
		slog.Info("successfully deleted nodegroup", "nodegroupName", d.NodegroupName, "clusterName", d.clusterName)
	} else if d.DeployTarget == "cluster" {
		slog.Info("deleting cluster", "clusterName", d.clusterName)
		err = util.ExecuteCommand("eksctl", "delete", "cluster", "--name", d.clusterName, "--wait", "--disable-nodegroup-eviction")
		if err != nil {
			return fmt.Errorf("failed to delete cluster: %v", err)
		}
		slog.Info("successfully deleted cluster", "clusterName", d.clusterName)
	} else {
		return fmt.Errorf("Unsupported deploy target: %s, supported options: `cluster`, `nodegroup`.", d.DeployTarget)
	}
	return nil
}


================================================
FILE: internal/deployers/eksctl/up.go
================================================
package eksctl

import (
	"context"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"slices"

	"github.com/aws/aws-k8s-tester/internal/util"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
)

type UpOptions struct {
	Region                string   `flag:"region" desc:"AWS region for EKS cluster"`
	KubernetesVersion     string   `flag:"kubernetes-version" desc:"cluster Kubernetes version"`
	Nodes                 int      `flag:"nodes" desc:"number of nodes to launch in cluster"`
	AMI                   string   `flag:"ami" desc:"Node AMI"`
	InstanceTypes         []string `flag:"instance-types" desc:"Node instance types"`
	ConfigFile            string   `flag:"config-file" desc:"Path to eksctl config file (if provided, other flags are ignored)"`
	AvailabilityZones     []string `flag:"availability-zones" desc:"Node availability zones"`
	AMIFamily             string   `flag:"ami-family" desc:"AMI family to use (AmazonLinux2023, Bottlerocket)"`
	EFAEnabled            bool     `flag:"efa-enabled" desc:"Enable Elastic Fabric Adapter for the nodegroup"`
	VolumeSize            int      `flag:"volume-size" desc:"Size of the node root volume in GB"`
	PrivateNetworking     bool     `flag:"private-networking" desc:"Use private networking for nodes"`
	WithOIDC              bool     `flag:"with-oidc" desc:"Enable OIDC provider for IAM roles for service accounts"`
	DeployTarget          string   `flag:"deploy-target" desc:"The target to deploy, supported values: cluster | nodegroup (defaults to 'cluster'). It is a thin wrapper to eksctl create subcommand with limited supported values."`
	ClusterName           string   `flag:"cluster-name" desc:"Name of the EKS cluster (defaults to RunID if not specified)"`
	UseUnmanagedNodegroup bool     `flag:"unmanaged-nodegroup" desc:"Use unmanaged nodegroup instead of managed nodegroup"`
	NodegroupName         string   `flag:"nodegroup-name" desc:"Name of the nodegroup (defaults to 'ng-1')"`
}

func (d *deployer) verifyUpFlags() error {
	supportedDeployTargets := []string{"cluster", "nodegroup"}
	// Skip validation if using a config file
	if d.ConfigFile != "" {
		slog.Info("using config file, skipping command-line flag validation", "configFile", d.ConfigFile)
		return nil
	}

	if d.KubernetesVersion == "" {
		slog.Info("--kubernetes-version is empty, attempting to detect it...")
		detectedVersion, err := detectKubernetesVersion()
		if err != nil {
			return fmt.Errorf("unable to detect --kubernetes-version, flag cannot be empty")
		}
		slog.Info("detected kubernetes version", "version", detectedVersion)
		d.KubernetesVersion = detectedVersion
	}
	if d.Nodes < 0 {
		return fmt.Errorf("number of nodes must be greater than zero")
	}
	if d.Nodes == 0 {
		d.Nodes = 4
		slog.Debug("using default number of nodes", "nodes", d.Nodes)
	}

	// Validate instance types for unmanaged nodegroups
	if d.UseUnmanagedNodegroup {
		if len(d.InstanceTypes) > 1 {
			return fmt.Errorf("Unmanaged nodegroups only support a single instance type. Using the first one: %s", d.InstanceTypes[0])
		} else if len(d.InstanceTypes) == 0 {
			// If no instance type specified, use a default
			d.InstanceTypes = []string{"m5.xlarge"}
			slog.Info("no instance type specified for unmanaged nodegroup, using default", "instanceType", d.InstanceTypes[0])
		}
	}

	if d.DeployTarget != "" && !slices.Contains(supportedDeployTargets, d.DeployTarget) {
		return fmt.Errorf("Unsupported deploy target: %s, supported options: `cluster`, `nodegroup`.", d.DeployTarget)
	} else if d.DeployTarget == "" {
			// If no deploy target specified, use "cluster" as default
			d.DeployTarget = "cluster"
			slog.Info("no deploy target specified, using default", "deployTarget", d.DeployTarget)
	}

	return nil
}

func (d *deployer) Up() error {
	d.initClusterName()

	if err := d.verifyUpFlags(); err != nil {
		return fmt.Errorf("up flags are invalid: %v", err)
	}

	if d.UseUnmanagedNodegroup {
		slog.Info("using unmanaged nodegroup", "clusterName", d.clusterName)
	} else {
		slog.Info("using managed nodegroup", "clusterName", d.clusterName)
	}

	var args []string

	if d.ConfigFile != "" {
		// If config file is provided, use it
		args = d.renderEksctlArgs(d.ConfigFile)
	} else {
		// Use rendered cluster config
		clusterConfig, err := d.RenderClusterConfig()
		if err != nil {
			return err
		}
		slog.Info("rendered cluster config", "config", string(clusterConfig))

		clusterConfigFile, err := os.CreateTemp("", "kubetest2-eksctl-cluster-config")
		if err != nil {
			return err
		}
		defer clusterConfigFile.Close()

		_, err = clusterConfigFile.Write(clusterConfig)
		if err != nil {
			return err
		}

		args = d.renderEksctlArgs(clusterConfigFile.Name())
	}

	err := util.ExecuteCommand("eksctl", args...)
	if err != nil {
		return fmt.Errorf("failed to create cluster: %v", err)
	}

	// Write kubeconfig to the rundir
	kubeConfigPath, err := d.Kubeconfig()
	if err != nil {
		return fmt.Errorf("error determining kubeconfig path: %v", err)
	}

	// Create directory if it doesn't exist
	err = os.MkdirAll(filepath.Dir(kubeConfigPath), 0755)
	if err != nil {
		return fmt.Errorf("error creating directory for kubeconfig: %v", err)
	}

	slog.Info("writing kubeconfig", "path", kubeConfigPath)
	writeKubeconfigArgs := []string{
		"utils",
		"write-kubeconfig",
		"--cluster", d.clusterName,
		"--region", d.Region,
		"--kubeconfig", kubeConfigPath,
	}

	err = util.ExecuteCommand("eksctl", writeKubeconfigArgs...)
	if err != nil {
		return fmt.Errorf("failed to write kubeconfig: %v", err)
	}

	slog.Info("successfully wrote kubeconfig", "path", kubeConfigPath)
	d.KubeconfigPath = kubeConfigPath
	return nil
}

func (d *deployer) renderEksctlArgs(configFilePath string) []string {
	return []string{
		"create",
		d.DeployTarget,
		"--config-file", configFilePath,
	}
}

func (d *deployer) IsUp() (up bool, err error) {
	d.initClusterName()

	result, err := d.eksClient.DescribeCluster(context.TODO(), &eks.DescribeClusterInput{
		Name: aws.String(d.clusterName),
	})
	if err != nil {
		return false, err
	}
	switch result.Cluster.Status {
	case ekstypes.ClusterStatusActive:
		return true, nil
	case ekstypes.ClusterStatusCreating:
		return false, nil
	default:
		return false, fmt.Errorf("cluster status is: %v", result.Cluster.Status)
	}
}

func detectKubernetesVersion() (string, error) {
	detectedVersion, err := util.DetectKubernetesVersion()
	if err != nil {
		return "", err
	}
	minorVersion, err := util.ParseMinorVersion(detectedVersion)
	if err != nil {
		return "", err
	}
	return minorVersion, nil
}


================================================
FILE: internal/e2e/client.go
================================================
package e2e

import (
	"bytes"
	"context"
	"fmt"
	"html/template"
	"io"
	"os"

	batchv1 "k8s.io/api/batch/v1"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/meta"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/runtime/schema"
	"k8s.io/cli-runtime/pkg/resource"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/restmapper"
	"sigs.k8s.io/e2e-framework/klient/decoder"
	"sigs.k8s.io/e2e-framework/klient/k8s"
)

// ApplyFiles creates Kubernetes objects contained in manifest file(s), in a manner similar to `kubectl apply -f`
// Multiple objects may be in each manifest file.
// The manifest files are processed in order.
func ApplyFiles(restConfig *rest.Config, manifestFiles ...string) error {
	for _, manifestFile := range manifestFiles {
		if f, err := os.Open(manifestFile); err != nil {
			return err
		} else if err := applyManifests(restConfig, f); err != nil {
			return err
		}
	}
	return nil
}

// ApplyManifests creates Kubernetes objects contained in manifests, in a manner similar to `kubectl apply -f`
// Multiple objects may be in the manifest data.
func ApplyManifests(restConfig *rest.Config, manifests ...[]byte) error {
	return applyManifests(restConfig, bytesSlicesToReaders(manifests...)...)
}

func applyManifests(restConfig *rest.Config, manifests ...io.Reader) error {
	for _, manifest := range manifests {
		if objs, err := decoder.DecodeAll(context.TODO(), manifest); err != nil {
			return err
		} else if err := processObjects(restConfig, objs, func(client *resource.Helper, obj k8s.Object) error {
			namespace, err := meta.NewAccessor().Namespace(obj)
			if err != nil {
				return err
			}
			if namespace == "" {
				namespace = "default"
			}
			_, err = client.Create(namespace, false, obj)
			return err
		}); err != nil {
			return err
		}
	}
	return nil
}

// DeleteFiles deletes Kubernetes objects contained in manifest file(s), in a manner similar to `kubectl delete -f`
// Multiple objects may be in each manifest file.
func DeleteFiles(restConfig *rest.Config, manifestFiles ...string) error {
	for _, manifestFile := range manifestFiles {
		if f, err := os.Open(manifestFile); err != nil {
			return err
		} else if err := deleteManifests(restConfig, f); err != nil {
			return err
		}
	}
	return nil
}

// DeleteManifests deletes Kubernetes objects contained in manifest(s), in a manner similar to `kubectl delete -f`
// Multiple objects may be in each manifest.
func DeleteManifests(restConfig *rest.Config, manifests ...[]byte) error {
	return deleteManifests(restConfig, bytesSlicesToReaders(manifests...)...)
}

func deleteManifests(restConfig *rest.Config, manifests ...io.Reader) error {
	for _, manifest := range manifests {
		if objs, err := decoder.DecodeAll(context.TODO(), manifest); err != nil {
			return err
		} else if err := processObjects(restConfig, objs, func(client *resource.Helper, obj k8s.Object) error {
			name, err := meta.NewAccessor().Name(obj)
			if err != nil {
				return err
			}
			namespace, err := meta.NewAccessor().Namespace(obj)
			if err != nil {
				return err
			}
			if namespace == "" {
				namespace = "default"
			}
			deletePolicy := metav1.DeletePropagationBackground
			_, err = client.DeleteWithOptions(namespace, name, &metav1.DeleteOptions{
				PropagationPolicy: &deletePolicy,
			})
			return err
		}); err != nil {
			return err
		}
	}
	return nil
}

// RenderManifests renders manifests with the supplied data
func RenderManifests(file []byte, templateData interface{}) ([]byte, error) {
	tpl, err := template.New("Manifest").Parse(string(file))
	if err != nil {
		return nil, err
	}
	buf := bytes.Buffer{}
	err = tpl.Execute(&buf, templateData)
	return buf.Bytes(), err
}

// GetJobLogs get logs from MPIJob
func GetJobLogs(restConfig *rest.Config, job k8s.Object) (string, error) {
	ctx := context.Background()
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return "", err
	}
	var jobLabel string
	switch job.(type) {
	case *unstructured.Unstructured: // assume this is an MPIJob
		jobLabel = fmt.Sprintf("job-name=%s-launcher", job.GetName())
	case *batchv1.Job:
		jobLabel = fmt.Sprintf("job-name=%s", job.GetName())
	default:
		return "", fmt.Errorf("unsupported job type %T", job)
	}
	pods, err := clientset.CoreV1().Pods(job.GetNamespace()).List(ctx, metav1.ListOptions{LabelSelector: jobLabel})
	if err != nil {
		return "", err
	}
	if len(pods.Items) == 0 {
		return "", fmt.Errorf("no pods found for job %s", job.GetName())
	}
	log := clientset.CoreV1().Pods(job.GetNamespace()).GetLogs(pods.Items[0].Name, &corev1.PodLogOptions{})
	podLogs, err := log.Stream(ctx)
	if err != nil {
		return "", err
	}
	defer podLogs.Close()
	buf := new(bytes.Buffer)
	_, err = io.Copy(buf, podLogs)
	if err != nil {
		return "", err
	}
	str := buf.String()
	return str, nil
}

func bytesSlicesToReaders(byteSlices ...[]byte) []io.Reader {
	var readers []io.Reader
	for _, b := range byteSlices {
		readers = append(readers, bytes.NewReader(b))
	}
	return readers
}

// processObjects applies a processFunc to each object, supplying it a dynamically-typed client appropriate for the object
func processObjects(restConfig *rest.Config, objs []k8s.Object, processFunc func(client *resource.Helper, obj k8s.Object) error) error {
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return err
	}
	groupResources, err := restmapper.GetAPIGroupResources(clientset.Discovery())
	if err != nil {
		return err
	}
	rm := restmapper.NewDiscoveryRESTMapper(groupResources)
	for _, obj := range objs {
		client, err := newResourceHelper(restConfig, rm, obj)
		if err != nil {
			return err
		}
		processFunc(client, obj)
	}
	return nil
}

func newResourceHelper(restConfig *rest.Config, rm meta.RESTMapper, obj runtime.Object) (*resource.Helper, error) {
	gvk := obj.GetObjectKind().GroupVersionKind()
	gk := schema.GroupKind{Group: gvk.Group, Kind: gvk.Kind}
	mapping, err := rm.RESTMapping(gk, gvk.Version)
	if err != nil {
		return nil, err
	}
	gv := mapping.GroupVersionKind.GroupVersion()
	restConfig.ContentConfig = resource.UnstructuredPlusDefaultContentConfig()
	restConfig.GroupVersion = &gv
	if len(gv.Group) == 0 {
		restConfig.APIPath = "/api"
	} else {
		restConfig.APIPath = "/apis"
	}
	restClient, err := rest.RESTClientFor(restConfig)
	if err != nil {
		return nil, err
	}

	return resource.NewHelper(restClient, mapping), nil
}


================================================
FILE: internal/e2e/conditions.go
================================================
package e2e

import (
	"context"
	"fmt"

	appsv1 "k8s.io/api/apps/v1"
	batchv1 "k8s.io/api/batch/v1"
	v1 "k8s.io/api/core/v1"
	apimachinerywait "k8s.io/apimachinery/pkg/util/wait"

	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/k8s/resources"
)

type ConditionExtension struct {
	resources *resources.Resources
}

func NewConditionExtension(r *resources.Resources) *ConditionExtension {
	return &ConditionExtension{resources: r}
}

// ResourceMatch is a helper function used to check if the resource under question has met a pre-defined state. This can
// be leveraged for checking fields on a resource that may not be immediately present upon creation.
func (c *ConditionExtension) ResourceMatch(obj k8s.Object, matchFetcher func(object k8s.Object) bool) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		if err := c.resources.Get(ctx, obj.GetName(), obj.GetNamespace(), obj); err != nil {
			return false, err
		}
		return matchFetcher(obj), nil
	}
}

func (c *ConditionExtension) PodRunning(pod k8s.Object) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		if err := c.resources.Get(ctx, pod.GetName(), pod.GetNamespace(), pod); err != nil {
			return false, err
		}
		status := pod.(*v1.Pod).Status
		switch status.Phase {
		case v1.PodRunning:
			return true, nil
		case v1.PodPending:
			return false, nil
		default:
			return false, fmt.Errorf("pod cannot transition to running from current status: %s", status.Phase)
		}
	}
}

func (c *ConditionExtension) PodSucceeded(pod k8s.Object) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		if err := c.resources.Get(ctx, pod.GetName(), pod.GetNamespace(), pod); err != nil {
			return false, err
		}
		status := pod.(*v1.Pod).Status
		if status.Phase == v1.PodSucceeded {
			return true, nil
		} else if status.Phase == v1.PodFailed {
			return false, fmt.Errorf("Pod in Failed status")
		}
		return false, nil
	}
}

func (c *ConditionExtension) DaemonSetReady(daemonset k8s.Object) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		if err := c.resources.Get(ctx, daemonset.GetName(), daemonset.GetNamespace(), daemonset); err != nil {
			return false, err
		}
		status := daemonset.(*appsv1.DaemonSet).Status
		if status.NumberReady == status.DesiredNumberScheduled && status.NumberUnavailable == 0 {
			done = true
		}
		return
	}
}

func (c *ConditionExtension) JobSucceeded(job k8s.Object) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		if err := c.resources.Get(ctx, job.GetName(), job.GetNamespace(), job); err != nil {
			return false, err
		}
		batchJob := job.(*batchv1.Job)
		status := batchJob.Status
		spec := batchJob.Spec
		for _, condition := range status.Conditions {
			if condition.Type == batchv1.JobFailed && condition.Status == v1.ConditionTrue {
				return false, fmt.Errorf("job failed")
			}
		}
		if status.Succeeded != *spec.Completions {
			return false, nil
		}
		return true, nil
	}
}

func (c *ConditionExtension) AllNodesHaveNonZeroResourceCapacity(resourceLabel string) apimachinerywait.ConditionWithContextFunc {
	return func(ctx context.Context) (done bool, err error) {
		nodeList := &v1.NodeList{}
		if err := c.resources.List(ctx, nodeList); err != nil {
			return false, fmt.Errorf("failed to list nodes: %w", err)
		}
		if len(nodeList.Items) == 0 {
			return false, fmt.Errorf("no nodes found in the cluster")
		}
		for _, node := range nodeList.Items {
			resource, ok := node.Status.Capacity[v1.ResourceName(resourceLabel)]
			if !ok {
				return false, nil
			}
			if resource.Value() <= 0 {
				return false, nil
			}
		}
		return true, nil
	}
}


================================================
FILE: internal/e2e/doc.go
================================================
// Package frameworkext contains extensions to sigs.k8s.io/e2e-framework
package e2e


================================================
FILE: internal/e2e/ec2.go
================================================
package e2e

import (
	"context"
	"fmt"

	"github.com/aws/aws-k8s-tester/internal/awssdk"
	"github.com/aws/aws-sdk-go-v2/service/ec2"
	ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
)

type EC2Client interface {
	DescribeInstanceType(instanceType string) (ec2types.InstanceTypeInfo, error)
}

type ec2Client struct {
	client *ec2.Client
}

func NewEC2Client() *ec2Client {
	return &ec2Client{
		client: ec2.NewFromConfig(awssdk.NewConfig()),
	}
}

func (c *ec2Client) DescribeInstanceTopology(instanceIDs []string) ([]ec2types.InstanceTopology, error) {
	var instanceTopologies []ec2types.InstanceTopology
	paginator := ec2.NewDescribeInstanceTopologyPaginator(c.client, &ec2.DescribeInstanceTopologyInput{
		InstanceIds: instanceIDs,
	})
	for paginator.HasMorePages() {
		instanceTopologyOuput, err := paginator.NextPage(context.TODO())
		if err != nil {
			return []ec2types.InstanceTopology{}, err
		}
		instanceTopologies = append(instanceTopologies, instanceTopologyOuput.Instances...)
	}
	return instanceTopologies, nil
}

func (c *ec2Client) DescribeInstanceType(instanceType string) (ec2types.InstanceTypeInfo, error) {
	describeResponse, err := c.client.DescribeInstanceTypes(context.TODO(), &ec2.DescribeInstanceTypesInput{
		InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(instanceType)},
	})
	if err != nil {
		return ec2types.InstanceTypeInfo{}, fmt.Errorf("failed to describe instance type: %s: %v", instanceType, err)
	} else {
		return describeResponse.InstanceTypes[0], nil
	}
}


================================================
FILE: internal/e2e/health.go
================================================
package e2e

import (
	"context"
	"fmt"
	"strings"

	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
)

// KubeletIsResponsive returns true if the kubelet /healthz endpoint responds with a 200 status code, and propagates
// any non-connection specific errors
func KubeletIsResponsive(ctx context.Context, cfg *rest.Config, nodeName string) (bool, error) {
	client, err := kubernetes.NewForConfig(cfg)
	if err != nil {
		return false, fmt.Errorf("failed to initialize client set: %v", err)
	}

	nodeHealthResponse := client.CoreV1().RESTClient().Get().Resource("nodes").
		Name(nodeName).SubResource("proxy").Suffix("/healthz").
		Do(ctx)

	if nodeHealthResponse.Error() != nil {
		errMsg := nodeHealthResponse.Error().Error()
		// TODO: match errors against types, e.g. syscall.ECONNREFUSED instead, the k8s client doesn't
		// currently properly wrap the underlying error to allow this though
		if strings.Contains(errMsg, "connection refused") ||
			strings.Contains(errMsg, "connection reset by peer") ||
			strings.Contains(errMsg, "http2: client connection lost") {
			// these errors indicate reachability to the node in general but an unstable connection to kubelet
			return false, nil
		}

		// propagate other errors, e.g. i/o timeout, that may result from things unrelated to kubelet health,
		// e.g. security group rules on the instance restricting traffic from the CP
		return false, fmt.Errorf("could not reach /healthz endpoint for node %s: %w", nodeName, nodeHealthResponse.Error())
	}

	var statusCode int
	nodeHealthResponse.StatusCode(&statusCode)
	return statusCode == 200, nil
}


================================================
FILE: internal/e2e/logs.go
================================================
package e2e

import (
	"context"
	"fmt"
	"io"
	"testing"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
)

// PrintDaemonSetPodLogs retrieves logs from each container in each pod of a DaemonSet.
// namespace & labelSelector identify the DaemonSet's pods (e.g. "default", "app=containerd-check").
func PrintDaemonSetPodLogs(
	t *testing.T,
	ctx context.Context,
	restConfig *rest.Config,
	namespace string,
	labelSelector string,
) {
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		t.Logf("failed to create typed clientset: %v", err)
		return
	}

	pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
		LabelSelector: labelSelector,
	})
	if err != nil {
		t.Logf("failed to list pods: %v", err)
		return
	}
	if len(pods.Items) == 0 {
		t.Logf("No pods found for DaemonSet with label %q in namespace %q.", labelSelector, namespace)
		return
	}

	for _, pod := range pods.Items {
		t.Logf("Pod %s status: %s", pod.Name, pod.Status.Phase)
		for _, container := range pod.Spec.Containers {
			logs, logErr := ReadPodLogs(ctx, restConfig, pod.Namespace, pod.Name, container.Name)
			if logErr != nil {
				t.Logf("Failed reading logs from %s/%s: %v", pod.Name, container.Name, logErr)
			} else {
				t.Logf("=== Logs from %s/%s ===\n%s", pod.Name, container.Name, logs)
			}
		}
	}
}

// ReadPodLogs streams logs for a specific container in a pod.
func ReadPodLogs(
	ctx context.Context,
	restConfig *rest.Config,
	namespace, podName, containerName string,
) (string, error) {
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return "", fmt.Errorf("failed to create typed clientset: %w", err)
	}
	req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{
		Container: containerName,
	})
	stream, err := req.Stream(ctx)
	if err != nil {
		return "", fmt.Errorf("failed to open log stream for %s/%s: %w", podName, containerName, err)
	}
	defer stream.Close()

	data, err := io.ReadAll(stream)
	if err != nil {
		return "", fmt.Errorf("error reading logs: %w", err)
	}
	return string(data), nil
}


================================================
FILE: internal/e2e/mpijobs/conditions.go
================================================
package mpijobs

import (
	"fmt"

	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"sigs.k8s.io/e2e-framework/klient/k8s"
)

// MPIJobSucceeded returns true if the specified k8s.Object is an unstructured.Unstructured
// with .status.conditions["Succeeded"] = "True"
func MPIJobSucceeded(obj k8s.Object) bool {
	u := obj.(*unstructured.Unstructured)
	conditions, found, err := unstructured.NestedSlice(u.Object, "status", "conditions")
	if err != nil {
		panic(fmt.Errorf("MPIJob does not match expected schema: %v", err))
	}
	if !found {
		return false
	}
	for _, condition := range conditions {
		c := condition.(map[string]interface{})
		cType, found, err := unstructured.NestedString(c, "type")
		if err != nil {
			panic(fmt.Errorf("MPIJob does not match expected schema: %v", err))
		}
		if !found {
			continue
		}
		if cType == "Succeeded" {
			cStatus, found, err := unstructured.NestedString(c, "status")
			if err != nil {
				panic(fmt.Errorf("MPIJob does not match expected schema: %v", err))
			}
			if !found {
				continue
			}
			return cStatus == "True"
		}
	}
	return false
}


================================================
FILE: internal/e2e/mpijobs/conditions_test.go
================================================
package mpijobs

import (
	"testing"

	"github.com/stretchr/testify/assert"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
)

func Test_MPIJobSucceeded(t *testing.T) {
	u := unstructured.Unstructured{
		Object: map[string]interface{}{
			"status": map[string]interface{}{
				"conditions": []interface{}{
					map[string]interface{}{
						"type":   "Succeeded",
						"status": "True",
					},
				},
			},
		},
	}
	assert.True(t, MPIJobSucceeded(&u))

	u = unstructured.Unstructured{
		Object: map[string]interface{}{
			"status": map[string]interface{}{
				"conditions": []interface{}{
					map[string]interface{}{
						"type":   "Succeeded",
						"status": "False",
					},
				},
			},
		},
	}
	assert.False(t, MPIJobSucceeded(&u))
}


================================================
FILE: internal/e2e/mpijobs/types.go
================================================
package mpijobs

import (
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/runtime/schema"
)

var MPIJobGVK = schema.GroupVersionKind{
	Group:   "kubeflow.org",
	Version: "v2beta1",
	Kind:    "MPIJob",
}

func NewUnstructured(name, namespace string) *unstructured.Unstructured {
	u := unstructured.Unstructured{}
	u.SetGroupVersionKind(MPIJobGVK)
	u.SetName(name)
	u.SetNamespace(namespace)
	return &u
}


================================================
FILE: internal/e2e/resources.go
================================================
package e2e

import (
	"fmt"

	v1 "k8s.io/api/core/v1"
)

func GetNonZeroResourceCapacity(node *v1.Node, resourceName string) (int, error) {
	capacity, ok := node.Status.Capacity[v1.ResourceName(resourceName)]
	if !ok {
		return 0, fmt.Errorf("node %q has no resource %q", node.Name, resourceName)
	}
	if capacity.Value() == 0 {
		return 0, fmt.Errorf("node %q has zero capacity for resource %q", node.Name, resourceName)
	}
	return int(capacity.Value()), nil
}


================================================
FILE: internal/metrics/cloudwatch.go
================================================
package metrics

import (
	"context"
	"log/slog"
	"sync"
	"time"

	"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
	"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
	"github.com/aws/aws-sdk-go/aws"
)

// NewCloudWatchRegistry creates a new metric registry that will emit values using the specified cloudwatch client
func NewCloudWatchRegistry(cw *cloudwatch.Client) MetricRegistry {
	return &cloudwatchRegistry{
		cw:              cw,
		lock:            &sync.Mutex{},
		dataByNamespace: make(map[string][]*cloudwatchMetricDatum),
	}
}

type cloudwatchRegistry struct {
	cw              *cloudwatch.Client
	lock            *sync.Mutex
	dataByNamespace map[string][]*cloudwatchMetricDatum
}

type cloudwatchMetricDatum struct {
	spec       *MetricSpec
	value      float64
	dimensions map[string]string
	timestamp  time.Time
}

func (r *cloudwatchRegistry) Record(spec *MetricSpec, value float64, dimensions map[string]string) {
	r.lock.Lock()
	defer r.lock.Unlock()
	r.dataByNamespace[spec.Namespace] = append(r.dataByNamespace[spec.Namespace], &cloudwatchMetricDatum{
		spec:       spec,
		value:      value,
		dimensions: dimensions,
		timestamp:  time.Now(),
	})
}

func (r *cloudwatchRegistry) Emit() error {
	r.lock.Lock()
	defer r.lock.Unlock()
	for namespace, data := range r.dataByNamespace {
		for i := 0; i < len(data); {
			var metricData []types.MetricDatum
			// we can emit up to 1000 values per PutMetricData
			for j := 0; j < len(data) && j < 1000; j++ {
				datum := data[i]
				var dimensions []types.Dimension
				for key, val := range datum.dimensions {
					dimensions = append(dimensions, types.Dimension{
						Name:  aws.String(key),
						Value: aws.String(val),
					})
				}
				metricData = append(metricData, types.MetricDatum{
					MetricName: aws.String(datum.spec.Metric),
					Value:      aws.Float64(datum.value),
					Dimensions: dimensions,
					Timestamp:  &datum.timestamp,
				})
				i++
			}
			_, err := r.cw.PutMetricData(context.TODO(), &cloudwatch.PutMetricDataInput{
				Namespace:  aws.String(namespace),
				MetricData: metricData,
			})
			if err != nil {
				return err
			}
		}
		slog.Info("emitted metrics", "count", len(data), "namespace", namespace)
	}
	r.dataByNamespace = make(map[string][]*cloudwatchMetricDatum)
	return nil
}

func (r *cloudwatchRegistry) GetRegistered() int {
	r.lock.Lock()
	defer r.lock.Unlock()
	registered := 0
	for _, data := range r.dataByNamespace {
		registered += len(data)
	}
	return registered
}


================================================
FILE: internal/metrics/noop.go
================================================
package metrics

func NewNoopMetricRegistry() MetricRegistry {
	return &noopRegistry{}
}

type noopRegistry struct{}

func (r *noopRegistry) Record(spec *MetricSpec, value float64, dimensions map[string]string) {}

func (r *noopRegistry) Emit() error {
	return nil
}


================================================
FILE: internal/metrics/registry.go
================================================
package metrics

import (
	"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
)

type MetricRegistry interface {
	// Record adds a new metric value to the registry
	Record(spec *MetricSpec, value float64, dimensions map[string]string)
	// Emit sends all registered metric values to cloudwatch, emptying the registry
	Emit() error
}

type MetricSpec struct {
	Namespace string
	Metric    string
	Unit      types.StandardUnit
}


================================================
FILE: internal/testers/ginkgov1/LICENSE.original
================================================
THIS IS A COPY OF THE ORIGINAL LICENSE FOR `kubetest2` AT COMMIT `d7fcb799ce84ceda66c8b9b1ec8eefcbe226f293`.

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright {yyyy} {name of copyright owner}

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: internal/testers/ginkgov1/README.md
================================================
This tester supports ginkgo 1.x versions, which were used for Kubernetes versions prior to 1.25.

---

This is a fork of the `ginkgo` tester: https://github.com/kubernetes-sigs/kubetest2/tree/master/pkg/testers/ginkgo

The fork originated at commit `d7fcb799ce84ceda66c8b9b1ec8eefcbe226f293`.

A copy of the original license is provided in the file named `LICENSE.original`.


================================================
FILE: internal/testers/ginkgov1/ginkgo.go
================================================
// This file has been modified in the following ways:
// 1. The `ginkgo` package has been renamed to `ginkgov1`.
// 2. The `--timeout` flag has been removed.
// 3. The `--flake-attempts` flag has been implemented for ginkgo 1.x versions.

/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ginkgov1

import (
	"flag"
	"fmt"
	"os"
	stdexec "os/exec"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/kballard/go-shellquote"
	"github.com/urfave/sflags/gen/gpflag"
	"log/slog"

	"sigs.k8s.io/kubetest2/pkg/artifacts"
	"sigs.k8s.io/kubetest2/pkg/build"
	"sigs.k8s.io/kubetest2/pkg/exec"
	"sigs.k8s.io/kubetest2/pkg/testers"
)

var GitTag string

type Tester struct {
	FlakeAttempts       int      `desc:"Make up to this many attempts to run each spec."`
	GinkgoArgs          string   `desc:"Additional arguments supported by the ginkgo binary."`
	Parallel            int      `desc:"Run this many tests in parallel at once."`
	SkipRegex           string   `desc:"Regular expression of jobs to skip."`
	FocusRegex          string   `desc:"Regular expression of jobs to focus on."`
	TestPackageVersion  string   `desc:"The ginkgo tester uses a test package made during the kubernetes build. The tester downloads this test package from one of the release tars published to the Release bucket. Defaults to latest. visit https://kubernetes.io/releases/ to find release names. Example: v1.20.0-alpha.0"`
	TestPackageBucket   string   `desc:"The bucket which release tars will be downloaded from to acquire the test package. Defaults to the main kubernetes project bucket."`
	TestPackageDir      string   `desc:"The directory in the bucket which represents the type of release. Default to the release directory."`
	TestPackageMarker   string   `desc:"The version marker in the directory containing the package version to download when unspecified. Defaults to latest.txt."`
	TestArgs            string   `desc:"Additional arguments supported by the e2e test framework (https://godoc.org/k8s.io/kubernetes/test/e2e/framework#TestContextType)."`
	UseBuiltBinaries    bool     `desc:"Look for binaries in _rundir/$KUBETEST2_RUN_DIR instead of extracting from tars downloaded from GCS."`
	UseBinariesFromPath bool     `desc:"Look for binaries in the $PATH instead of extracting from tars downloaded from GCS."`
	Env                 []string `desc:"List of env variables to pass to ginkgo libraries"`

	kubeconfigPath string
	runDir         string

	// These paths are set up by AcquireTestPackage()
	e2eTestPath string
	ginkgoPath  string
	kubectlPath string
}

// Test runs the test
func (t *Tester) Test() error {
	if err := testers.WriteVersionToMetadata(GitTag, ""); err != nil {
		return err
	}

	if err := t.pretestSetup(); err != nil {
		return err
	}

	e2eTestArgs := []string{
		"--kubeconfig=" + t.kubeconfigPath,
		"--kubectl-path=" + t.kubectlPath,
		"--ginkgo.skip=" + t.SkipRegex,
		"--ginkgo.focus=" + t.FocusRegex,
		"--report-dir=" + artifacts.BaseDir(),
	}

	// some ginkgo flags and behaviors are not backwards compatible
	switch v := t.ginkgoMajorVersion(); v {
	case "1":
		e2eTestArgs = append(e2eTestArgs,
			"--ginkgo.flakeAttempts="+strconv.Itoa(t.FlakeAttempts),
		)
	case "2":
		e2eTestArgs = append(e2eTestArgs,
			"--ginkgo.flake-attempts="+strconv.Itoa(t.FlakeAttempts),
		)
	default:
		return fmt.Errorf("unsupported ginkgo version: %s", v)
	}

	extraE2EArgs, err := shellquote.Split(t.TestArgs)
	if err != nil {
		return fmt.Errorf("error parsing --test-args: %v", err)
	}
	e2eTestArgs = append(e2eTestArgs, extraE2EArgs...)

	extraGingkoArgs, err := shellquote.Split(t.GinkgoArgs)
	if err != nil {
		return fmt.Errorf("error parsing --gingko-args: %v", err)
	}

	ginkgoArgs := append(extraGingkoArgs,
		"--nodes="+strconv.Itoa(t.Parallel),
		t.e2eTestPath,
		"--")
	ginkgoArgs = append(ginkgoArgs, e2eTestArgs...)

	slog.Info("running ginkgo test", "path", t.ginkgoPath, "args", ginkgoArgs)
	cmd := exec.Command(t.ginkgoPath, ginkgoArgs...)
	cmd.SetEnv(t.Env...)
	exec.InheritOutput(cmd)
	return cmd.Run()
}

func (t *Tester) pretestSetup() error {
	if config := os.Getenv("KUBECONFIG"); config != "" {
		// The ginkgo tester errors out if the kubeconfig provided
		// is not an absolute path, likely because ginkgo changes its
		// working directory while executing. To get around this problem
		// we can manually edit the provided KUBECONFIG to ensure a
		// successful run.
		if !filepath.IsAbs(config) {
			newKubeconfig, err := filepath.Abs(config)
			if err != nil {
				return fmt.Errorf("failed to convert kubeconfig to absolute path: %s", err)
			}
			slog.Info("ginkgo tester received non-absolute KUBECONFIG path, updating", "path", newKubeconfig)
			config = newKubeconfig
		}

		t.kubeconfigPath = config
	} else {
		home, err := os.UserHomeDir()
		if err != nil {
			return fmt.Errorf("failed to find home directory: %v", err)
		}
		t.kubeconfigPath = filepath.Join(home, ".kube", "config")
	}
	slog.Info("using kubeconfig", "path", t.kubeconfigPath)

	if t.UseBuiltBinaries {
		return t.validateLocalBinaries()
	}
	if t.UseBinariesFromPath {
		return t.validateBinariesFromPath()
	}

	if err := t.AcquireTestPackage(); err != nil {
		return fmt.Errorf("failed to get ginkgo test package from published releases: %s", err)
	}

	return nil
}

func (t *Tester) validateLocalBinaries() error {
	slog.Debug("checking existing test binaries...")
	for _, binary := range build.CommonTestBinaries {
		path := filepath.Join(t.runDir, binary)
		if _, err := os.Stat(path); err != nil {
			logPath := path
			if abspath, err := filepath.Abs(path); err != nil {
				slog.Warn("failed to convert path to absolute path", "path", path, "error", err)
			} else {
				logPath = abspath
			}
			return fmt.Errorf("failed to validate pre-built binary %s (checked at %q): %w", binary, logPath, err)
		}
		slog.Debug("found existing binary", "binary", binary, "path", path)
	}
	t.e2eTestPath = filepath.Join(t.runDir, "e2e.test")
	t.ginkgoPath = filepath.Join(t.runDir, "ginkgo")
	t.kubectlPath = filepath.Join(t.runDir, "kubectl")
	return nil
}

func (t *Tester) validateBinariesFromPath() error {
	slog.Debug("checking for test binaries on PATH...")
	for _, binary := range build.CommonTestBinaries {
		path, err := stdexec.LookPath(binary)
		if err != nil {
			return fmt.Errorf("failed to validate binary %s from PATH: %w", binary, err)
		}
		slog.Debug("found existing binary", "binary", binary, "path", path)
		switch binary {
		case "e2e.test":
			t.e2eTestPath = path
		case "ginkgo":
			t.ginkgoPath = path
		case "kubectl":
			t.kubectlPath = path
		}
	}
	return nil
}

// ginkgoMajorVersion returns the ginkgo major version
// empty if not found
func (t *Tester) ginkgoMajorVersion() string {
	slog.Debug("checking ginkgo version...")
	cmd := exec.Command(t.ginkgoPath, "version")
	lines, err := exec.OutputLines(cmd)
	if err != nil || len(lines) != 1 {
		return ""
	}
	// the output is in the format
	// Ginkgo Version 1.14.0
	// Ginkgo Version 2.1.4
	parts := strings.Split(lines[0], " ")
	if len(parts) != 3 {
		return ""
	}
	vers := strings.Split(parts[2], ".")
	if len(vers) != 3 {
		return ""
	}
	return vers[0]
}

func (t *Tester) Execute() error {
	fs, err := gpflag.Parse(t)
	if err != nil {
		return fmt.Errorf("failed to initialize tester: %v", err)
	}

	fs.AddGoFlagSet(flag.CommandLine)

	help := fs.BoolP("help", "h", false, "")

	if err := fs.Parse(os.Args); err != nil {
		return fmt.Errorf("failed to parse flags: %v", err)
	}

	if *help {
		fs.SetOutput(os.Stdout)
		fs.PrintDefaults()
		return nil
	}

	if err := t.initKubetest2Info(); err != nil {
		return err
	}
	return t.Test()
}

// initializes relevant information from the well defined kubetest2 environment variables.
func (t *Tester) initKubetest2Info() error {
	if t.UseBuiltBinaries && t.UseBinariesFromPath {
		return fmt.Errorf("--use-built-binaries and --use-binaries-from-path are mutually exclusive")
	}
	if dir, ok := os.LookupEnv("KUBETEST2_RUN_DIR"); ok {
		t.runDir = dir
		return nil
	}
	// ginkgo/e2e.test/kubectl can be found in rundir when they are built
	if t.UseBuiltBinaries {
		t.runDir = artifacts.RunDir()
		return nil
	}
	// default to current working directory if for some reason the env is not set
	dir, err := os.Getwd()
	if err != nil {
		return fmt.Errorf("failed to set run dir: %v", err)
	}
	t.runDir = dir
	return nil
}

func (t *Tester) SetRunDir(dir string) {
	t.runDir = dir
}

func NewDefaultTester() *Tester {
	return &Tester{
		FlakeAttempts:     1,
		Parallel:          1,
		TestPackageBucket: "kubernetes-release",
		TestPackageDir:    "release",
		TestPackageMarker: "latest.txt",
		Env:               nil,
	}
}

func Main() {
	t := NewDefaultTester()
	if err := t.Execute(); err != nil {
		slog.Error("failed to run ginkgo tester", "error", err)
		os.Exit(1)
	}
}


================================================
FILE: internal/testers/ginkgov1/kubectl/kubectl.go
================================================
/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package kubectl

import (
	"fmt"
	"os"

	"sigs.k8s.io/kubetest2/pkg/exec"
)

const (
	kubectl = "kubectl"
)

// APIServerURL obtains the URL of the k8s master from kubectl
func APIServerURL() (string, error) {
	kubecontext, err := execAndResult(kubectl, "config", "view", "-o", "jsonpath=\"{.current-context}\"")
	if err != nil {
		return "", fmt.Errorf("Could not get kube context: %v", err)
	}

	clustername, err := execAndResult(kubectl, "config", "view", "-o",
		fmt.Sprintf("jsonpath=\"{.contexts[?(@.name == %s)].context.cluster}\"", kubecontext))
	if err != nil {
		return "", fmt.Errorf("Could not get cluster name: %v", err)
	}

	apiServerURL, err := execAndResult(kubectl, "config", "view", "-o",
		fmt.Sprintf("jsonpath={.clusters[?(@.name == %s)].cluster.server}", clustername))
	if err != nil {
		return "", err
	}
	return apiServerURL, nil
}

// execAndResult runs command with args and returns the entire output (or error)
func execAndResult(command string, args ...string) (string, error) {
	cmd := exec.Command(command, args...)
	cmd.SetStderr(os.Stderr)
	bytes, err := exec.Output(cmd)
	return string(bytes), err
}


================================================
FILE: internal/testers/ginkgov1/package.go
================================================
// This file has been modified in the following ways:
// 1. The `ginkgo` package has been renamed to `ginkgov1`.

/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ginkgov1

import (
	"archive/tar"
	"compress/gzip"
	"crypto/sha256"
	"encoding/hex"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"runtime"
	"strings"

	"log/slog"
	"sigs.k8s.io/kubetest2/pkg/artifacts"
	"sigs.k8s.io/kubetest2/pkg/exec"
)

// AcquireTestPackage obtains three test binaries and places them in $KUBETEST2_RUN_DIR.
// The first is "ginkgo", the actual ginkgo executable.
// The second is "e2e.test", which contains kubernetes e2e test cases.
// The third is "kubectl".
func (t *Tester) AcquireTestPackage() error {
	// first, get the name of the latest release (e.g. v1.20.0-alpha.0)
	if t.TestPackageVersion == "" {
		cmd := exec.Command(
			"gsutil",
			"cat",
			fmt.Sprintf("gs://%s/%s/%s", t.TestPackageBucket, t.TestPackageDir, t.TestPackageMarker),
		)
		lines, err := exec.OutputLines(cmd)
		if err != nil {
			return fmt.Errorf("failed to get latest release name: %s", err)
		}
		if len(lines) == 0 {
			return fmt.Errorf("getting latest release name had no output")
		}
		t.TestPackageVersion = lines[0]

		slog.Info("test package version not specified, using default", "marker", t.TestPackageMarker, "version", t.TestPackageVersion)
	}

	releaseTar := fmt.Sprintf("kubernetes-test-%s-%s.tar.gz", runtime.GOOS, runtime.GOARCH)

	downloadDir, err := os.UserCacheDir()
	if err != nil {
		return fmt.Errorf("failed to get user cache directory: %v", err)
	}

	downloadPath := filepath.Join(downloadDir, releaseTar)

	if err := t.ensureReleaseTar(downloadPath, releaseTar); err != nil {
		return err
	}
	if err := t.extractBinaries(downloadPath); err != nil {
		return err
	}

	t.kubectlPath = filepath.Join(artifacts.RunDir(), "kubectl")
	return t.ensureKubectl(t.kubectlPath)
}

func (t *Tester) extractBinaries(downloadPath string) error {
	// ensure the artifacts dir
	if err := os.MkdirAll(artifacts.BaseDir(), os.ModePerm); err != nil {
		return err
	}
	// ensure the rundir
	if err := os.MkdirAll(artifacts.RunDir(), os.ModePerm); err != nil {
		return err
	}

	// Extract files from the test package
	f, err := os.Open(downloadPath)
	if err != nil {
		return fmt.Errorf("failed to open downloaded tar at %s: %s", downloadPath, err)
	}
	defer f.Close()
	gzf, err := gzip.NewReader(f)
	if err != nil {
		return fmt.Errorf("failed to create gzip reader: %s", err)
	}
	defer gzf.Close()

	tarReader := tar.NewReader(gzf)

	// Map of paths in archive to destination paths
	t.e2eTestPath = filepath.Join(artifacts.RunDir(), "e2e.test")
	t.ginkgoPath = filepath.Join(artifacts.RunDir(), "ginkgo")
	extract := map[string]string{
		"kubernetes/test/bin/e2e.test": t.e2eTestPath,
		"kubernetes/test/bin/ginkgo":   t.ginkgoPath,
	}
	extracted := map[string]bool{}

	for {
		if len(extracted) == len(extract) {
			break
		}

		header, err := tarReader.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			return fmt.Errorf("error during tar read: %s", err)
		}

		if dest := extract[header.Name]; dest != "" {
			outFile, err := os.Create(dest)
			if err != nil {
				return fmt.Errorf("error creating file at %s: %s", dest, err)
			}
			defer outFile.Close()

			if err := outFile.Chmod(0700); err != nil {
				return fmt.Errorf("failed to make %s executable: %s", dest, err)
			}

			if _, err := io.Copy(outFile, tarReader); err != nil {
				return fmt.Errorf("error reading data from tar with header name %s: %s", header.Name, err)
			}

			extracted[header.Name] = true
		}
	}
	for k := range extract {
		if !extracted[k] {
			return fmt.Errorf("failed to find %s in %s", k, downloadPath)
		}
	}
	return nil
}

// ensureKubectl checks if the kubectl exists and verifies the hashes
// else downloads it from GCS
func (t *Tester) ensureKubectl(downloadPath string) error {

	kubectlPathInGCS := fmt.Sprintf(
		"gs://%s/%s/%s/bin/%s/%s/kubectl",
		t.TestPackageBucket,
		t.TestPackageDir,
		t.TestPackageVersion,
		runtime.GOOS,
		runtime.GOARCH,
	)
	if _, err := os.Stat(downloadPath); err == nil {
		slog.Info("found existing kubectl", "path", downloadPath)
		err := t.compareSHA(downloadPath, kubectlPathInGCS)
		if err == nil {
			slog.Info("validated hash for existing kubectl", "path", downloadPath)
			return nil
		}
		slog.Warn("hash validation failed", "error", err)
	}

	cmd := exec.Command("gsutil", "cp", kubectlPathInGCS, downloadPath)
	exec.InheritOutput(cmd)
	if err := cmd.Run(); err != nil {
		return fmt.Errorf("failed to download kubectl for release %s: %s", t.TestPackageVersion, err)
	}
	if err := os.Chmod(downloadPath, 0700); err != nil {
		return fmt.Errorf("failed to make %s executable: %s", downloadPath, err)
	}
	return nil
}

// ensureReleaseTar checks if the kubernetes test tarball already exists
// and verifies the hashes
// else downloads it from GCS
func (t *Tester) ensureReleaseTar(downloadPath, releaseTar string) error {

	releaseTarPathInGCS := fmt.Sprintf(
		"gs://%s/%s/%s/%s",
		t.TestPackageBucket,
		t.TestPackageDir,
		t.TestPackageVersion,
		releaseTar,
	)

	if _, err := os.Stat(downloadPath); err == nil {
		slog.Info("found existing tar", "path", downloadPath)
		err := t.compareSHA(downloadPath, releaseTarPathInGCS)
		if err == nil {
			slog.Info("validated hash for existing tar", "path", downloadPath)
			return nil
		}
		slog.Warn("hash validation failed", "error", err)
	}

	cmd := exec.Command("gsutil", "cp",
		releaseTarPathInGCS,
		downloadPath,
	)
	exec.InheritOutput(cmd)
	if err := cmd.Run(); err != nil {
		return fmt.Errorf("failed to download release tar %s for release %s: %s", releaseTar, t.TestPackageVersion, err)
	}
	return nil
}

func (t *Tester) compareSHA(downloadPath string, gcsFilePath string) error {
	cmd := exec.Command("gsutil", "cat",
		fmt.Sprintf("%s.sha256", gcsFilePath),
	)
	expectedSHABytes, err := exec.Output(cmd)
	if err != nil {
		return fmt.Errorf("failed to get sha256 for file %s for release %s: %s", gcsFilePath, t.TestPackageVersion, err)
	}
	expectedSHA := strings.TrimSuffix(string(expectedSHABytes), "\n")
	actualSHA, err := sha256sum(downloadPath)
	if err != nil {
		return fmt.Errorf("failed to compute sha256 for %q: %v", downloadPath, err)
	}
	if actualSHA != expectedSHA {
		return fmt.Errorf("sha256 does not match")
	}
	return nil
}

func sha256sum(path string) (string, error) {
	f, err := os.Open(path)
	if err != nil {
		return "", err
	}
	defer f.Close()

	h := sha256.New()
	if _, err := io.Copy(h, f); err != nil {
		return "", err
	}
	return hex.EncodeToString(h.Sum(nil)), nil
}


================================================
FILE: internal/testers/multi/cmd.go
================================================
package multi

import (
	"errors"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"strings"

	"github.com/aws/aws-k8s-tester/internal"
	"github.com/urfave/sflags/gen/gpflag"
	"sigs.k8s.io/kubetest2/pkg/app/shim"
	"sigs.k8s.io/kubetest2/pkg/artifacts"
	"sigs.k8s.io/kubetest2/pkg/process"
	"sigs.k8s.io/kubetest2/pkg/testers"
)

const TesterName = "multi"

const usage = `kubetest2 --test=multi -- [MultiTesterDriverArgs] -- [TesterName] [TesterArgs] -- ...

  MultiTesterDriverArgs: arguments passed to the multi-tester driver

  TesterName: the name of the tester to run
  TesterArgs: arguments passed to tester

  Each tester clause is separated by "--".
`

func Main() {
	if err := execute(); err != nil {
		slog.Error("failed to run multi tester", "error", err)
		os.Exit(1)
	}
}

type multiTesterDriver struct {
	argv []string
}

type tester struct {
	name string
	path string
	args []string
}

func execute() error {
	driverArgs, testerClauses := splitArguments(os.Args)
	driver := multiTesterDriver{
		argv: driverArgs,
	}
	fs, err := gpflag.Parse(&driver)
	if err != nil {
		return fmt.Errorf("failed to initialize tester: %v", err)
	}

	fs.Usage = func() {
		fmt.Print(usage)
	}

	if len(testerClauses) == 0 {
		fs.Usage()
		return nil
	}

	// gracefully handle -h or --help if it is the only argument
	help := fs.BoolP("help", "h", false, "")

	failFast := fs.Bool("fail-fast", false, "Exit immediately if any tester fails")

	// we don't care about errors, only if -h / --help was set
	err = fs.Parse(driver.argv)
	if err != nil {
		fs.Usage()
		return err
	}

	if *help {
		fs.Usage()
		return nil
	}

	if err := testers.WriteVersionToMetadata(internal.Version, ""); err != nil {
		return err
	}

	if testers, err := prepareTesters(testerClauses); err != nil {
		return err
	} else {
		return test(testers, *failFast)
	}
}

func test(testers []tester, failFast bool) error {
	metadataPath := filepath.Join(artifacts.BaseDir(), "metadata.json")
	backupMetdataPath := metadataPath + ".bak"
	if err := os.Rename(metadataPath, backupMetdataPath); err != nil {
		slog.Error("failed to backup driver metadata", "error", err)
	}
	var testerErrs []error
	for _, tester := range testers {
		if err := tester.run(); err != nil {
			slog.Error("tester failed", "tester", tester, "error", err)
			testerErrs = append(testerErrs, fmt.Errorf("%+v: %v", tester, err))
			if failFast {
				break
			}
		}
		// reset the metadata.json file
		// testers will try to set the tester-version key and cause conflicts
		if err := os.Remove(metadataPath); err != nil {
			return fmt.Errorf("failed to delete tester metadata: %v", err)
		}
	}
	if err := os.Rename(backupMetdataPath, metadataPath); err != nil {
		return fmt.Errorf("failed to restore driver metadata: %v", err)
	}
	if len(testerErrs) > 0 {
		return errors.Join(testerErrs...)
	}
	return nil
}

// splitArguments splits arguments into driver arguments and tester clauses, separated by "--".
func splitArguments(argv []string) ([]string, [][]string) {
	var clauses [][]string
	var last int
	for i, arg := range argv {
		if arg == "--" {
			clauses = append(clauses, argv[last:i])
			last = i + 1
		}
	}
	clauses = append(clauses, argv[last:])
	return clauses[0], clauses[1:]
}

func prepareTesters(testerClauses [][]string) ([]tester, error) {
	var testers []tester
	for _, clause := range testerClauses {
		testerName := clause[0]
		if testerName == TesterName {
			return nil, fmt.Errorf("nesting isn't possible with the %s tester", TesterName)
		}
		path, err := shim.FindTester(testerName)
		if err != nil {
			return nil, err
		}
		tester := tester{
			name: testerName,
			path: path,
			args: expandEnv(clause[1:]),
		}
		testers = append(testers, tester)
	}
	return testers, nil
}

func expandEnv(args []string) []string {
	expandedArgs := make([]string, len(args))
	for i, arg := range args {
		// best effort handle literal dollar for backward compatibility
		// this is not an all-purpose shell special character handler
		if strings.Contains(arg, `\$`) {
			expandedArgs[i] = strings.ReplaceAll(arg, `\$`, `$`)
		} else {
			expandedArgs[i] = os.ExpandEnv(arg)
		}
	}
	return expandedArgs
}

func (t *tester) run() error {
	slog.Info("running tester", "tester", t)
	return process.ExecJUnit(t.path, t.args, os.Environ())
}


================================================
FILE: internal/util/cloudformation.go
================================================
package util

import (
	"context"
	"fmt"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/service/cloudformation"
	types "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
)

// TODO: implement AWS client wrappers, and incorporate this into the cfn:CreateStack call
func WrapCFNStackFailure(ctx context.Context, cfnClient *cloudformation.Client, createStackErr error, stackName string) error {
	if createStackErr == nil {
		return nil
	}
	resourceByFailureMode := make(map[string][]string)
	eventsPaginator := cloudformation.NewDescribeStackEventsPaginator(cfnClient, &cloudformation.DescribeStackEventsInput{
		StackName: &stackName,
	})
	for eventsPaginator.HasMorePages() {
		page, err := eventsPaginator.NextPage(ctx)
		if err != nil {
			return createStackErr
		}
		for _, event := range page.StackEvents {
			if event.ResourceStatus == types.ResourceStatusCreateFailed {
				if _, ok := resourceByFailureMode[aws.ToString(event.ResourceStatusReason)]; !ok {
					resourceByFailureMode[aws.ToString(event.ResourceStatusReason)] = []string{}
				}
				resourceByFailureMode[aws.ToString(event.ResourceStatusReason)] = append(resourceByFailureMode[aws.ToString(event.ResourceStatusReason)], aws.ToString(event.LogicalResourceId))
			}
		}
	}
	nonCancellationFailure := len(resourceByFailureMode) > 1
	var enhancedDetails []string
	for reason, resources := range resourceByFailureMode {
		if nonCancellationFailure && reason == "Resource creation cancelled" {
			// Ignore resource cancellation errors if there's another failure reported, those failures
			// would just be a consequence of that failure. If all the failures are resource cancellation,
			// then there was likely a user initiated delete of the whole stack based on a timeout
			// waiting for one of the resources to create
			continue
		}
		enhancedDetails = append(enhancedDetails, fmt.Sprintf("%s: %s", strings.Join(resources, ","), reason))
	}
	return fmt.Errorf("%w: %s", createStackErr, strings.Join(enhancedDetails, "--"))
}


================================================
FILE: internal/util/exec.go
================================================
package util

import (
	"os"
	"os/exec"
)

func ExecuteCommand(name string, args ...string) error {
	command := exec.Command(name, args...)
	command.Stdout = os.Stdout
	command.Stderr = os.Stderr
	return command.Run()
}


================================================
FILE: internal/util/http.go
================================================
package util

import (
	"fmt"
	"strings"

	"github.com/aws/smithy-go/middleware"
	smithyhttp "github.com/aws/smithy-go/transport/http"
)

const httpHeaderBoundary = ": "

// NewHTTPHeaderAPIOptions returns a slice of middleware options that adds the
// specified HTTP headers to an API request.
// Each header should be of the format `Header-Key: Header-Value`, in the same manner
// as headers are passed with `curl`-s `-H` flag.
func NewHTTPHeaderAPIOptions(headers []string) ([]func(*middleware.Stack) error, error) {
	var opts []func(*middleware.Stack) error
	for _, header := range headers {
		boundary := strings.Index(header, httpHeaderBoundary)
		if boundary == -1 {
			return nil, fmt.Errorf("malformed HTTP header: '%s'", header)
		}
		key := header[:boundary]
		val := header[boundary+len(httpHeaderBoundary):]
		opts = append(opts, smithyhttp.AddHeaderValue(key, val))
	}
	return opts, nil
}


================================================
FILE: internal/util/http_test.go
================================================
package util

import (
	"testing"
)

func Test_NewHTTPHeaderAPIOptions(t *testing.T) {
	testCases := []struct {
		name        string
		headers     []string
		expectError bool
	}{
		{
			name:    "empty",
			headers: []string{},
		},
		{
			name:    "single valid header",
			headers: []string{"Content-Type: application/json"},
		},
		{
			name:    "multiple valid headers",
			headers: []string{"Content-Type: application/json", "Accept: application/json"},
		},
		{
			name:        "invalid header",
			headers:     []string{"Invalid header"},
			expectError: true,
		},
	}
	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			_, err := NewHTTPHeaderAPIOptions(tc.headers)
			if err != nil && !tc.expectError {
				t.Errorf("unexpected error: %v", err)
			}
			if err == nil && tc.expectError {
				t.Error("expected error but got none")
			}
		})
	}
}


================================================
FILE: internal/util/lang.go
================================================
package util

func Must[T any](t T, err error) T {
	if err != nil {
		panic(err)
	}
	return t
}


================================================
FILE: internal/util/path.go
================================================
package util

import (
	"errors"
	"os"
	"path/filepath"
	"syscall"
)

var ErrFileNotFoundInPath = errors.New("file not found in $PATH")

// LookPath finds a file on the PATH.
// It uses a similar process to exec.LookPath, but can find regular files.
func LookPath(file string) (string, error) {
	path := os.Getenv("PATH")
	for _, dir := range filepath.SplitList(path) {
		if dir == "" {
			// Unix shell semantics: path element "" means "."
			dir = "."
		}
		path := filepath.Join(dir, file)
		if err := checkFile(path); err == nil {
			return path, nil
		}
	}
	return "", ErrFileNotFoundInPath
}

func checkFile(file string) error {
	d, err := os.Stat(file)
	if err != nil {
		return err
	}
	m := d.Mode()
	if m.IsDir() {
		return syscall.EISDIR
	}
	return nil
}


================================================
FILE: internal/util/version.go
================================================
package util

import (
	"fmt"
	"os"
	"strings"
)

const KubernetesVersionFile = "kubernetes-version.txt"

func DetectKubernetesVersion() (string, error) {
	versionFile, err := LookPath(KubernetesVersionFile)
	if err != nil {
		return "", err
	}
	bytes, err := os.ReadFile(versionFile)
	if err != nil {
		return "", err
	}
	// "v1.2.3"
	versionTag := string(bytes)
	return strings.ReplaceAll(versionTag, "v", ""), nil
}

func ParseMinorVersion(semanticVersion string) (string, error) {
	parts := strings.Split(semanticVersion, ".")
	if len(parts) < 2 {
		return "", fmt.Errorf("malformed semantic version: '%s'", semanticVersion)
	}
	return strings.Join(parts[:2], "."), nil
}


================================================
FILE: internal/version.go
================================================
package internal

var Version string


================================================
FILE: test/cases/disruptive/graceful_reboot_test.go
================================================
//go:build e2e

package disruptive

import (
	"context"
	"fmt"
	"strings"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/awssdk"
	fwext "github.com/aws/aws-k8s-tester/internal/e2e"

	"github.com/aws/aws-sdk-go-v2/service/ec2"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func getSleepPodTemplate(name string) corev1.Pod {
	return corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:      name,
			Namespace: "default",
		},
		Spec: corev1.PodSpec{
			Containers: []corev1.Container{
				{
					Name:    name,
					Image:   "public.ecr.aws/amazonlinux/amazonlinux:2023",
					Command: []string{"sleep", "infinity"},
				},
			},
			RestartPolicy: corev1.RestartPolicyNever,
		},
	}
}

func TestGracefulReboot(t *testing.T) {
	terminationCanaryPodName := fmt.Sprintf("termination-canary-%d", time.Now().Unix())
	canaryPod := getSleepPodTemplate(terminationCanaryPodName)
	bootIndicatorPodName := fmt.Sprintf("boot-detection-%d", time.Now().Unix())
	bootIndicatorPod := getSleepPodTemplate(bootIndicatorPodName)

	feat := features.New("graceful-reboot").
		WithLabel("suite", "disruptive").
		Assess("Node gracefully reboots", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			// Create an initial pod to allow the default scheduler to do the work of identifying a healthy node.
			// Starting with a healthy node is essential to the test, as the only expectation is for the node to
			// return to its same initial state after the reboot.
			if err := cfg.Client().Resources().Create(ctx, &canaryPod); err != nil {
				t.Fatalf("Failed to create heartbeat pod: %v", err)
			}

			if err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).PodRunning(&canaryPod),
				wait.WithContext(ctx),
				wait.WithTimeout(5*time.Minute),
			); err != nil {
				t.Fatalf("Failed to wait for pod %s to go into running status: %v", terminationCanaryPodName, err)
			}

			var targetNode corev1.Node
			if err := cfg.Client().Resources().Get(ctx, canaryPod.Spec.NodeName, "", &targetNode); err != nil {
				t.Fatalf("Failed to get node %s: %v", canaryPod.Spec.NodeName, err)
			}

			t.Logf("Pod %s is running on node %s", terminationCanaryPodName, targetNode.Name)

			// Do an initial check of the /healthz endpoint reachability to ensure we can rely on it later.
			// This might fail even if the node is healthy if, for example, the node's security group rules
			// do not allow ingress traffic from the control plane.
			// Retry for up to 1 minute to handle transient TLS errors during cert rotation.
			var kubeletResponsive bool
			var err error
			healthCheckCtx, healthCheckCancel := context.WithTimeout(ctx, 5*time.Minute)
			defer healthCheckCancel()
			for {
				kubeletResponsive, err = fwext.KubeletIsResponsive(healthCheckCtx, cfg.Client().RESTConfig(), targetNode.Name)
				if err == nil && kubeletResponsive {
					break
				}
				select {
				case <-healthCheckCtx.Done():
					t.Fatalf("Node %s is not responding to initial /healthz checks: %v", targetNode.Name, err)
				case <-time.After(5 * time.Second):
					t.Logf("Retrying /healthz check for node %s (last error: %v, responsive: %v)", targetNode.Name, err, kubeletResponsive)
				}
			}

			providerIDParts := strings.Split(targetNode.Spec.ProviderID, "/")
			instanceID := providerIDParts[len(providerIDParts)-1]
			t.Logf("Rebooting underlying instance %s for node %s...", instanceID, targetNode.Name)

			ec2Client := ec2.NewFromConfig(awssdk.NewConfig())
			if _, err := ec2Client.RebootInstances(ctx, &ec2.RebootInstancesInput{
				InstanceIds: []string{instanceID},
			}); err != nil {
				t.Fatalf("Failed to reboot instance %s: %v", instanceID, err)
			}

			t.Logf("Successfully triggered reboot of instance %s, waiting for kubelet to become unresponsive...", instanceID)

			kubeletShutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
			defer cancel()

			// Use kubelet health probes as the signal for instance shutdown. Since the health endpoint
			// could previously be reached, a refused connection implies kubelet was killed.
			for kubeletResponsive {
				select {
				case <-kubeletShutdownCtx.Done():
					t.Fatalf("Failed to wait for kubelet to become unresponsive: %v", ctx.Err())
				case <-time.Tick(1 * time.Second):
					if kubeletResponsive, err = fwext.KubeletIsResponsive(ctx, cfg.Client().RESTConfig(), targetNode.Name); err != nil {
						t.Fatalf("Unpexected error while monitoring kubelet on node %s: %v", targetNode.Name, err)
					}
				}
			}

			t.Logf("Node %s has become unresponsive, waiting for the node to become schedulable again...", targetNode.Name)

			// Create a second pod, we will rely on this pod starting to run as an indication of a healthy state.
			// Since kubelet was killed at this point, we know the reboot must complete and kubelet must start
			// again for this pod to start running.
			bootIndicatorPod.Spec.NodeSelector = map[string]string{
				"kubernetes.io/hostname": targetNode.Name,
			}
			if err := cfg.Client().Resources().Create(ctx, &bootIndicatorPod); err != nil {
				t.Fatalf("Failed to create boot indicator pod: %v", err)
			}

			if err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).PodRunning(&bootIndicatorPod),
				wait.WithContext(ctx),
				wait.WithTimeout(10*time.Minute), // TODO: bring down this value after collecting some more data
			); err != nil {
				t.Fatalf("Failed to wait for pod to go into running status %s: %v", bootIndicatorPodName, err)
			}

			t.Logf("Node %s became ready and schedulable within %v!", targetNode.Name, time.Since(bootIndicatorPod.CreationTimestamp.Time))
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if err := cfg.Client().Resources().Delete(ctx, &canaryPod); err != nil {
				t.Logf("Failed to delete pod %s: %v", terminationCanaryPodName, err)
			}

			if err := cfg.Client().Resources().Delete(ctx, &bootIndicatorPod); err != nil {
				t.Logf("Failed to delete pod %s: %v", bootIndicatorPodName, err)
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, feat)
}


================================================
FILE: test/cases/disruptive/graceful_shutdown_test.go
================================================
//go:build e2e

package disruptive

import (
	"context"
	"fmt"
	"io"
	"log"
	"regexp"
	"strings"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/awssdk"
	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-sdk-go-v2/service/ec2"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"k8s.io/utils/pointer"

	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

// getPodLogs retrieves logs from a pod using kubernetes clientset
func getPodLogs(ctx context.Context, cfg *envconf.Config, podName, namespace string) (string, error) {
	client, err := kubernetes.NewForConfig(cfg.Client().RESTConfig())
	if err != nil {
		return "", err
	}

	req := client.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{})
	logs, err := req.Stream(ctx)
	if err != nil {
		return "", err
	}
	defer logs.Close()

	var result strings.Builder
	_, err = io.Copy(&result, logs)
	if err != nil {
		return "", err
	}
	return result.String(), nil
}

// checkLogPattern checks if a log pattern exists in the pod logs
func checkLogPattern(ctx context.Context, cfg *envconf.Config, podName, namespace, pattern string) (bool, error) {
	logs, err := getPodLogs(ctx, cfg, podName, namespace)
	if err != nil {
		return false, err
	}

	matched, err := regexp.MatchString(pattern, logs)
	if err != nil {
		return false, err
	}

	return matched, nil
}

// countLogMatches counts how many times a pattern appears in the logs
func countLogMatches(ctx context.Context, cfg *envconf.Config, podName, namespace, pattern string) (int, error) {
	logs, err := getPodLogs(ctx, cfg, podName, namespace)
	if err != nil {
		return 0, err
	}

	re, err := regexp.Compile(pattern)
	if err != nil {
		return 0, err
	}

	matches := re.FindAllString(logs, -1)
	return len(matches), nil
}

func TestKubeletGracefulShutdown(t *testing.T) {
	feat := features.New("kubelet-graceful-shutdown").
		WithLabel("suite", "disruptive").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[Setup] Setting up Kubelet Graceful Shutdown test...")
			return ctx
		}).
		Assess("Kubelet gracefully shuts down pods during node termination", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			// Create heartbeat pod that will log its status
			pod := &corev1.Pod{
				ObjectMeta: metav1.ObjectMeta{
					Name:      fmt.Sprintf("graceful-shutdown-test-%d", time.Now().Unix()),
					Namespace: "default",
					Labels: map[string]string{
						"app": "graceful-shutdown-test",
					},
				},
				Spec: corev1.PodSpec{
					Containers: []corev1.Container{
						{
							Name:    "heartbeat-container",
							Image:   "public.ecr.aws/amazonlinux/amazonlinux:2023",
							Command: []string{"/usr/bin/bash", "-c"},
							Args: []string{`
								set -x
								echo "[GRACEFUL-TEST] Starting graceful shutdown test pod..."
								
								function handle_sigterm() {
									echo "[GRACEFUL-TEST] $(date): SIGTERM-RECEIVED - starting graceful shutdown period"
									# Continue heartbeating until we are SIGKILL-d
									start_time=$(date +%s)
									while true; do
										current_time=$(date +%s)
										elapsed=$((current_time - start_time))
										echo "[GRACEFUL-TEST] $(date): HEARTBEAT-AFTER-SIGTERM elapsed=${elapsed}s"
										sleep 1
									done
								}
								
								trap handle_sigterm TERM
								
								# Initial heartbeat to show pod is running
								echo "[GRACEFUL-TEST] $(date): POD-STARTED - pod started successfully"
								
								# Keep running and heartbeating until terminated
								counter=0
								while true; do
									echo "[GRACEFUL-TEST] $(date): NORMAL-HEARTBEAT counter=$counter"
									counter=$((counter + 1))
									sleep 10
								done
							`},
						},
					},
					RestartPolicy:                 corev1.RestartPolicyNever,
					TerminationGracePeriodSeconds: pointer.Int64(150), // 2.5 minutes to allow for graceful shutdown testing
				},
			}

			if err := cfg.Client().Resources().Create(ctx, pod); err != nil {
				t.Fatalf("[Assess] Failed to create heartbeat pod: %v", err)
			}
			log.Printf("[Assess] Created heartbeat pod: %s", pod.Name)

			// Store pod name in context for cleanup
			ctx = context.WithValue(ctx, "podName", pod.Name)

			log.Printf("[Assess] Waiting for pod %s to start running...", pod.Name)
			err := wait.For(
				e2e.NewConditionExtension(cfg.Client().Resources()).ResourceMatch(pod, func(object k8s.Object) bool {
					pod := object.(*corev1.Pod)
					return pod.Status.Phase == corev1.PodRunning
				}),
				wait.WithTimeout(2*time.Minute),
			)
			if err != nil {
				t.Fatalf("[Assess] Pod did not start running: %v", err)
			}

			// Wait a bit for initial heartbeats
			log.Printf("[Assess] Waiting for initial heartbeats...")
			time.Sleep(30 * time.Second)

			// Verify pod started successfully by checking logs
			podStarted, err := checkLogPattern(ctx, cfg, pod.Name, pod.Namespace, `POD-STARTED`)
			if err != nil {
				t.Fatalf("[Assess] Failed to check pod logs: %v", err)
			}
			if !podStarted {
				t.Fatalf("[Assess] Pod did not log successful startup")
			}
			log.Printf("[Assess] ✓ Pod startup confirmed via logs")

			// Get the node the pod is running on
			if err := cfg.Client().Resources().Get(ctx, pod.Name, pod.Namespace, pod); err != nil {
				t.Fatalf("[Assess] Failed to get pod details: %v", err)
			}

			nodeName := pod.Spec.NodeName
			if nodeName == "" {
				t.Fatalf("[Assess] Pod is not scheduled to any node")
			}
			log.Printf("[Assess] Pod is running on node: %s", nodeName)

			// Get the EC2 instance ID for this node
			var node corev1.Node
			if err := cfg.Client().Resources().Get(ctx, nodeName, "", &node); err != nil {
				t.Fatalf("[Assess] Failed to get node %s: %v", nodeName, err)
			}
			providerID := node.Spec.ProviderID
			if providerID == "" {
				t.Fatalf("[Assess] Node %s has no providerID", nodeName)
			}
			parts := strings.Split(providerID, "/")
			if len(parts) < 2 {
				t.Fatalf("[Assess] Invalid providerID format: %s", providerID)
			}
			instanceID := parts[len(parts)-1]
			log.Printf("[Assess] Node %s corresponds to EC2 instance: %s", nodeName, instanceID)

			// Terminate the EC2 instance
			log.Printf("[Assess] Terminating EC2 instance %s to test graceful shutdown...", instanceID)
			ec2Client := ec2.NewFromConfig(awssdk.NewConfig())
			_, err = ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
				InstanceIds: []string{instanceID},
			})
			if err != nil {
				t.Fatalf("[Assess] Failed to terminate EC2 instance %s: %v", instanceID, err)
			}
			log.Printf("[Assess] Successfully initiated termination of instance %s", instanceID)

			// Wait and monitor the graceful shutdown process via logs
			log.Printf("[Assess] Monitoring graceful shutdown process for 3 minutes...")

			// Wait for SIGTERM to be received (should happen within 60 seconds)
			sigtermReceived := false
			for i := 0; i < 30; i++ {
				received, err := checkLogPattern(ctx, cfg, pod.Name, pod.Namespace, `SIGTERM-RECEIVED`)
				if err != nil {
					log.Printf("[Assess] Warning: Failed to check logs: %v", err)
				} else if received {
					sigtermReceived = true
					log.Printf("[Assess] ✓ SIGTERM received by pod (detected after %d seconds)", i*2)
					break
				}
				time.Sleep(2 * time.Second)
			}

			if !sigtermReceived {
				t.Fatalf("[Assess] Pod did not receive SIGTERM within 60 seconds of instance termination")
			}

			// Monitor heartbeats for the next 2+ minutes to verify graceful shutdown behavior
			log.Printf("[Assess] Verifying pod continues running during graceful shutdown period...")
			gracefulShutdownStart := time.Now()

			var heartbeatsAfterSigterm int
			for time.Since(gracefulShutdownStart) < 2*time.Minute { // Monitor for 2 minutes
				// Count heartbeats after SIGTERM
				matches, err := countLogMatches(ctx, cfg, pod.Name, pod.Namespace, `HEARTBEAT-AFTER-SIGTERM`)
				if err != nil {
					log.Printf("[Assess] Warning: Failed to count heartbeats: %v", err)
				} else if matches > 0 {
					log.Printf("[Assess] ✓ Pod still running after SIGTERM (%d heartbeats logged)", matches)
					heartbeatsAfterSigterm = matches
				}

				time.Sleep(1 * time.Second)
			}

			// Verify we got heartbeats during the graceful shutdown period
			// These happen once a second, so we should observe at least 110 of them for a 2 minute grace period
			if heartbeatsAfterSigterm < 110 {
				t.Fatalf("[Assess] Expected at least 110 heartbeats during graceful shutdown, got %d", heartbeatsAfterSigterm)
			}

			log.Printf("[Assess] ✓ Pod continued running and heartbeating for graceful shutdown period")
			log.Printf("[Assess] ✓ Total heartbeats after SIGTERM: %d", heartbeatsAfterSigterm)

			// Check for graceful exit
			gracefulExit, err := checkLogPattern(ctx, cfg, pod.Name, pod.Namespace, `GRACEFUL-EXIT`)
			if err != nil {
				log.Printf("[Assess] Warning: Failed to check for graceful exit: %v", err)
			} else if gracefulExit {
				log.Printf("[Assess] ✓ Pod logged graceful exit")
			}

			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			podName, ok := ctx.Value("podName").(string)
			if !ok {
				log.Printf("[Teardown] No pod name in context, nothing to clean up")
				return ctx
			}

			log.Printf("[Teardown] Cleaning up test pod %s...", podName)

			// Get final logs for debugging if needed
			logs, err := getPodLogs(ctx, cfg, podName, "default")
			if err != nil {
				log.Printf("[Teardown] Warning: Failed to get final logs: %v", err)
			} else {
				log.Printf("[Teardown] Final pod logs:\n%s", logs)
			}

			// Delete the pod (it may already be terminated)
			pod := &corev1.Pod{
				ObjectMeta: metav1.ObjectMeta{
					Name:      podName,
					Namespace: "default",
				},
			}
			if err := cfg.Client().Resources().Delete(ctx, pod); err != nil {
				log.Printf("[Teardown] Warning: Failed to delete pod %s: %v", podName, err)
			} else {
				log.Printf("[Teardown] Successfully cleaned up pod %s", podName)
			}

			return ctx
		}).
		Feature()

	testenv.Test(t, feat)
}


================================================
FILE: test/cases/disruptive/main_test.go
================================================
//go:build e2e

package disruptive

import (
	"context"
	_ "embed"
	"log"
	"os"
	"os/signal"
	"testing"

	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

var (
	testenv env.Environment
)

func TestMain(m *testing.M) {
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = testenv.WithContext(ctx)

	testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) {
		log.Println("Starting quick test suite...")
		return ctx, nil
	})

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/dra/dra_example_driver_test.go
================================================
//go:build e2e

package dra

import (
	"context"
	_ "embed"
	"fmt"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"

	"github.com/stretchr/testify/assert"
	appsv1 "k8s.io/api/apps/v1"

	corev1 "k8s.io/api/core/v1"
	rbacv1 "k8s.io/api/rbac/v1"
	"k8s.io/api/resource/v1beta1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

// see: https://github.com/kubernetes-sigs/dra-example-driver
func TestDraExampleDriver(t *testing.T) {
	draDriverResources := draDriverResources()
	deviceClass, resourceClaimTemplate, pod := testResources()

	exampleDraDriver := features.New("dra-example-driver").
		WithLabel("feature", "dra").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			for _, obj := range draDriverResources {
				assert.NoError(t, cfg.Client().Resources().Create(ctx, obj))
			}
			assert.NoError(t, cfg.Client().Resources().Create(ctx, &deviceClass))
			assert.NoError(t, cfg.Client().Resources().Create(ctx, &resourceClaimTemplate))
			return ctx
		}).
		Assess("device driver present", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			assert.NoError(t, cfg.Client().Resources().Create(ctx, &pod))
			defer func() {
				assert.NoError(t, cfg.Client().Resources().Delete(ctx, &pod))
				assert.NoError(t, wait.For(conditions.New(cfg.Client().Resources()).ResourceDeleted(&pod),
					wait.WithTimeout(time.Minute),
					wait.WithContext(ctx),
				))
			}()

			assert.NoError(t, wait.For(conditions.New(cfg.Client().Resources()).PodRunning(&pod),
				wait.WithTimeout(time.Minute),
				wait.WithContext(ctx),
			))

			podLogs, err := e2e.ReadPodLogs(ctx, cfg.Client().RESTConfig(), pod.Namespace, pod.Name, pod.Spec.Containers[0].Name)
			if assert.NoErrorf(t, err, "skipping error getting pod logs %q: %v", pod.Name, err) {
				t.Logf("Logs for %q\n%s", pod.Name, podLogs)
				assert.Contains(t, podLogs, fmt.Sprintf(`DRA_RESOURCE_DRIVER_NAME="%s"`, deviceClass.Name))
			}

			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			assert.NoError(t, cfg.Client().Resources().Delete(ctx, &deviceClass))
			assert.NoError(t, cfg.Client().Resources().Delete(ctx, &resourceClaimTemplate))
			for _, obj := range draDriverResources {
				assert.NoError(t, cfg.Client().Resources().Delete(ctx, obj))
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, exampleDraDriver)
}

func testResources() (v1beta1.DeviceClass, v1beta1.ResourceClaimTemplate, corev1.Pod) {
	deviceClass := v1beta1.DeviceClass{
		TypeMeta: metav1.TypeMeta{
			APIVersion: "resource.k8s.io/v1beta1",
			Kind:       "DeviceClass",
		},
		ObjectMeta: metav1.ObjectMeta{
			Name: "gpu.example.com",
		},
		Spec: v1beta1.DeviceClassSpec{
			Selectors: []v1beta1.DeviceSelector{
				{
					CEL: &v1beta1.CELDeviceSelector{
						Expression: "device.driver == 'gpu.example.com'",
					},
				},
			},
		},
	}

	deviceRequest := v1beta1.DeviceRequest{
		Name:            "gpu",
		DeviceClassName: deviceClass.Name,
	}

	resourceClaimTemplate := v1beta1.ResourceClaimTemplate{
		TypeMeta: metav1.TypeMeta{
			APIVersion: "resource.k8s.io/v1beta1",
			Kind:       "ResourceClaimTemplate",
		},
		ObjectMeta: metav1.ObjectMeta{
			Name:      "single-gpu",
			Namespace: corev1.NamespaceDefault,
		},
		Spec: v1beta1.ResourceClaimTemplateSpec{
			Spec: v1beta1.ResourceClaimSpec{
				Devices: v1beta1.DeviceClaim{
					Requests: []v1beta1.DeviceRequest{deviceRequest},
				},
			},
		},
	}

	pod := corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "pod0",
			Namespace: corev1.NamespaceDefault,
		},
		Spec: corev1.PodSpec{
			Containers: []corev1.Container{
				{
					Name:    "ctr0",
					Image:   "public.ecr.aws/amazonlinux/amazonlinux:latest",
					Command: []string{"bash", "-c"},
					Args:    []string{"export; trap 'exit 0' TERM; sleep infinity & wait"},
					Resources: corev1.ResourceRequirements{
						Claims: []corev1.ResourceClaim{
							{
								Name: deviceRequest.Name,
							},
						},
					},
				},
			},
			ResourceClaims: []corev1.PodResourceClaim{
				{
					Name:                      deviceRequest.Name,
					ResourceClaimTemplateName: &resourceClaimTemplate.Name,
				},
			},
		},
	}

	return deviceClass, resourceClaimTemplate, pod
}

func draDriverResources() []k8s.Object {
	serviceAccount := corev1.ServiceAccount{
		TypeMeta: metav1.TypeMeta{
			APIVersion: "rbac.authorization.k8s.io/v1",
			Kind:       "ServiceAccount",
		},
		ObjectMeta: metav1.ObjectMeta{
			Name:      "dra-service-account",
			Namespace: corev1.NamespaceDefault,
		},
	}

	clusterRole := rbacv1.ClusterRole{
		TypeMeta: metav1.TypeMeta{
			APIVersion: "rbac.authorization.k8s.io/v1",
			Kind:       "ClusterRole",
		},
		ObjectMeta: metav1.ObjectMeta{
			Name:      "dra-example-driver-role",
			Namespace: corev1.NamespaceDefault,
		},
		Rules: []rbacv1.PolicyRule{
			{
				APIGroups: []string{"resource.k8s.io"},
				Resources: []string{"resourceclaims"},
				Verbs:     []string{"get"},
			},
			{
				APIGroups: []string{""},
				Resources: []string{"nodes"},
				Verbs:     []string{"get"},
			},
			{
				APIGroups: []string{"resource.k8s.io"},
				Resources: []string{"resourceslices"},
				Verbs:     []string{"get", "list", "watch", "create", "update", "patch", "delete"},
			},
		},
	}

	clusterRoleBinding := rbacv1.ClusterRoleBinding{
		TypeMeta: metav1.TypeMeta{
			APIVersion: "rbac.authorization.k8s.io/v1",
			Kind:       "ClusterRoleBinding",
		},
		ObjectMeta: metav1.ObjectMeta{
			Name:      "dra-example-driver-role-binding",
			Namespace: corev1.NamespaceDefault,
		},
		Subjects: []rbacv1.Subject{
			{
				Kind:      serviceAccount.Kind,
				Name:      serviceAccount.Name,
				Namespace: serviceAccount.Namespace,
			},
		},
		RoleRef: rbacv1.RoleRef{
			Name:     clusterRole.Name,
			Kind:     clusterRole.Kind,
			APIGroup: "rbac.authorization.k8s.io",
		},
	}

	driverDaemonset := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "dra-example-driver-kubeletplugin",
			Namespace: corev1.NamespaceDefault,
			Labels: map[string]string{
				"app.kubernetes.io/name":      "dra-example-driver",
				"app.kubernetes.io/instance":  "dra-example-driver",
				"app.kubernetes.io/component": "kubeletplugin",
			},
		},
		Spec: appsv1.DaemonSetSpec{
			Selector: &metav1.LabelSelector{
				MatchLabels: map[string]string{
					"app.kubernetes.io/name":      "dra-example-driver",
					"app.kubernetes.io/instance":  "dra-example-driver",
					"app.kubernetes.io/component": "kubeletplugin",
				},
			},
			UpdateStrategy: appsv1.DaemonSetUpdateStrategy{
				Type: appsv1.RollingUpdateDaemonSetStrategyType,
			},
			Template: corev1.PodTemplateSpec{
				ObjectMeta: metav1.ObjectMeta{
					Labels: map[string]string{
						"app.kubernetes.io/name":      "dra-example-driver",
						"app.kubernetes.io/instance":  "dra-example-driver",
						"app.kubernetes.io/component": "kubeletplugin",
					},
				},
				Spec: corev1.PodSpec{
					ServiceAccountName: serviceAccount.Name,
					PriorityClassName:  "system-node-critical",
					Containers: []corev1.Container{
						{
							Name:            "plugin",
							SecurityContext: &corev1.SecurityContext{Privileged: &[]bool{true}[0]},
							Image:           "registry.k8s.io/dra-example-driver/dra-example-driver:v0.1.0",
							ImagePullPolicy: corev1.PullIfNotPresent,
							Command:         []string{"dra-example-kubeletplugin"},
							Env: []corev1.EnvVar{
								{Name: "CDI_ROOT", Value: "/var/run/cdi"},
								{Name: "NODE_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "spec.nodeName"}}},
								{Name: "NAMESPACE", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"}}},
								// NOTE: this is what arbitrarily decides the
								// number of GPUs being mocked on the node.
								{Name: "NUM_DEVICES", Value: "8"},
							},
							VolumeMounts: []corev1.VolumeMount{
								{Name: "plugins-registry", MountPath: "/var/lib/kubelet/plugins_registry"},
								{Name: "plugins", MountPath: "/var/lib/kubelet/plugins"},
								{Name: "cdi", MountPath: "/var/run/cdi"},
							},
						},
					},
					Volumes: []corev1.Volume{
						{Name: "plugins-registry", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/var/lib/kubelet/plugins_registry"}}},
						{Name: "plugins", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/var/lib/kubelet/plugins"}}},
						{Name: "cdi", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/var/run/cdi"}}},
					},
				},
			},
		},
	}

	return []k8s.Object{
		&clusterRoleBinding,
		&clusterRole,
		&serviceAccount,
		&driverDaemonset,
	}
}


================================================
FILE: test/cases/dra/main_test.go
================================================
//go:build e2e

package dra

import (
	"context"
	_ "embed"
	"log"
	"os"
	"os/signal"
	"testing"

	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

var (
	testenv env.Environment
)

func TestMain(m *testing.M) {
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg).WithContext(ctx)
	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/efa/commons.go
================================================
//go:build e2e

package efa

import (
	"context"
	_ "embed"
	"fmt"
	"log"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-sdk-go-v2/aws"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var (
	testenv   env.Environment
	ec2Client e2e.EC2Client

	testImage *string

	pingPongSize            *string
	pingPongIters           *int
	pingPongDeadlineSeconds *int

	nodeType               *string
	expectedEFADeviceCount *int

	verbose *bool
)

const (
	EFA_RESOURCE_NAME   = "vpc.amazonaws.com/efa"
	TEST_NAMESPACE_NAME = "efa-tests"
)

func getEfaCapacity(node corev1.Node) int {
	capacity, ok := node.Status.Capacity[v1.ResourceName(EFA_RESOURCE_NAME)]
	if !ok {
		return 0
	}
	return int(capacity.Value())
}

func getEfaNodes(ctx context.Context, config *envconf.Config) ([]corev1.Node, error) {
	var efaNodes []corev1.Node
	clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return []corev1.Node{}, fmt.Errorf("failed to create Kubernetes client: %w", err)
	}

	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return []corev1.Node{}, fmt.Errorf("failed to list nodes: %w", err)
	}

	if len(nodes.Items) == 0 {
		return []corev1.Node{}, fmt.Errorf("no nodes found in the cluster")
	}

	for _, node := range nodes.Items {
		instanceType := node.Labels["node.kubernetes.io/instance-type"]

		if aws.ToString(nodeType) != "" && instanceType != aws.ToString(nodeType) {
			log.Printf("[INFO] Skipping node %s (type: %s), node is not of target type %s", node.Name, instanceType, aws.ToString(nodeType))
			continue
		}

		numEfaDevices, err := e2e.GetNonZeroResourceCapacity(&node, EFA_RESOURCE_NAME)
		if err != nil {
			log.Printf("[INFO] Skipping node %s (type: %s): %v", node.Name, instanceType, err)
			continue
		}

		expectedDeviceCount := aws.ToInt(expectedEFADeviceCount)
		if expectedDeviceCount < 0 {
			instanceInfo, err := ec2Client.DescribeInstanceType(instanceType)
			if err != nil {
				return []corev1.Node{}, err
			}
			expectedDeviceCount = int(aws.ToInt32(instanceInfo.NetworkInfo.EfaInfo.MaximumEfaInterfaces))
		}

		if expectedDeviceCount != numEfaDevices {
			return []corev1.Node{}, fmt.Errorf("unexpected EFA device capacity on node %s: expected %d, got %d", node.Name, expectedDeviceCount, numEfaDevices)
		}

		efaNodes = append(efaNodes, node)
	}

	if len(efaNodes) == 0 {
		return []corev1.Node{}, fmt.Errorf("no nodes with EFA capacity found in the cluster")
	}

	return efaNodes, nil
}


================================================
FILE: test/cases/efa/main_test.go
================================================
//go:build e2e

package efa

import (
	"context"
	_ "embed"
	"flag"
	"log"
	"os"
	"os/signal"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

func getTestNamespace() *corev1.Namespace {
	return &corev1.Namespace{
		ObjectMeta: metav1.ObjectMeta{
			Name: TEST_NAMESPACE_NAME,
		},
	}
}

func deployEFAPlugin(ctx context.Context, config *envconf.Config) (context.Context, error) {
	err := e2e.ApplyManifests(config.Client().RESTConfig(), manifests.EfaDevicePluginManifest)
	if err != nil {
		return ctx, err
	}
	efaDS := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "aws-efa-k8s-device-plugin-daemonset", Namespace: "kube-system"},
	}
	err = wait.For(e2e.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&efaDS),
		wait.WithContext(ctx),
		wait.WithTimeout(5*time.Minute),
	)
	if err != nil {
		return ctx, err
	}

	return ctx, nil
}

func TestMain(m *testing.M) {
	testImage = flag.String("testImage", "", "container image to use for tests")
	pingPongSize = flag.String("pingPongSize", "all", "sizes to use for ping pong")
	pingPongIters = flag.Int("pingPongIters", 10000, "number of iterations to use for ping pong")
	pingPongDeadlineSeconds = flag.Int("pingPongDeadlineSeconds", 120, "maximum run time for a ping pong attempt")
	nodeType = flag.String("nodeType", "", "instance type to target for tests")
	expectedEFADeviceCount = flag.Int("expectedEFADeviceCount", -1, "expected number of efa devices for the target nodes")
	verbose = flag.Bool("verbose", true, "use verbose mode for tests")

	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	if *testImage == "" {
		log.Fatal("--testImage must be set, use https://github.com/aws/aws-k8s-tester/blob/main/test/efa/Dockerfile to build the image")
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	timedCtx, cancel := context.WithTimeout(ctx, 55*time.Minute)
	defer cancel()

	testenv = env.NewWithConfig(cfg)
	testenv = testenv.WithContext(timedCtx)

	ec2Client = e2e.NewEC2Client()

	testenv.Setup(
		deployEFAPlugin,
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			select {
			case <-ctx.Done():
			// Cooldown to let device plugin update node object with resources
			case <-time.After(15 * time.Second):
			}

			return ctx, cfg.Client().Resources().Create(ctx, getTestNamespace())
		},
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			cfg.Client().Resources().Delete(context.TODO(), getTestNamespace())
			err := e2e.DeleteManifests(cfg.Client().RESTConfig(), manifests.EfaDevicePluginManifest)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/efa/pingpong_test.go
================================================
//go:build e2e

package efa

import (
	"context"
	_ "embed"
	"fmt"
	"log"
	"testing"
	"time"

	"k8s.io/apimachinery/pkg/api/resource"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/stretchr/testify/assert"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
	PING_PONG_SERVICE_NAME = "pingpong-service"
	SERVER_POD_NAME        = "pingpong-server"
	CLIENT_POD_NAME        = "pingpong-client"
	PINGPONG_COMMAND       = "fi_pingpong"
)

func getPingPongPodName(server bool) string {
	if server {
		return SERVER_POD_NAME
	} else {
		return CLIENT_POD_NAME
	}
}

func getPingPongArgs(server bool) (args []string) {
	args = []string{"-S", aws.ToString(pingPongSize), "-I", fmt.Sprint(aws.ToInt(pingPongIters)), "-p", "efa"}
	if aws.ToBool(verbose) {
		args = append(args, "-v")
	}
	if !server {
		args = append(args, fmt.Sprintf("%s.%s", SERVER_POD_NAME, PING_PONG_SERVICE_NAME))
	}
	return
}

func getPingPongResourceLabels(server bool) map[string]string {
	return map[string]string{
		"test-suite":      "pingpong",
		"pingpong-server": fmt.Sprint(server),
	}
}

func generatePingPongServiceManifest() corev1.Service {
	return corev1.Service{
		ObjectMeta: metav1.ObjectMeta{
			Name:      PING_PONG_SERVICE_NAME,
			Namespace: TEST_NAMESPACE_NAME,
		},
		Spec: v1.ServiceSpec{
			Selector:  getPingPongResourceLabels(true),
			ClusterIP: "None",
		},
	}
}

func generatePingPongPodManifest(server bool, node corev1.Node) corev1.Pod {
	efaResourceQuantity := resource.MustParse(fmt.Sprint(getEfaCapacity(node)))
	return corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:      getPingPongPodName(server),
			Namespace: TEST_NAMESPACE_NAME,
			Labels:    getPingPongResourceLabels(server),
		},
		Spec: corev1.PodSpec{
			Hostname:      getPingPongPodName(server),
			Subdomain:     PING_PONG_SERVICE_NAME,
			RestartPolicy: v1.RestartPolicyOnFailure,
			// TODO: centralize re-usable logic for pod spec formatting
			Affinity: &corev1.Affinity{
				NodeAffinity: &corev1.NodeAffinity{
					RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
						NodeSelectorTerms: []corev1.NodeSelectorTerm{
							{
								MatchExpressions: []corev1.NodeSelectorRequirement{
									{
										Key:      "kubernetes.io/hostname",
										Operator: "In",
										Values: []string{
											node.Name,
										},
									},
								},
							},
						},
					},
				},
			},
			Containers: []corev1.Container{
				{
					Name:    "pingpong",
					Image:   aws.ToString(testImage),
					Command: []string{"timeout", fmt.Sprintf("%ds", aws.ToInt(pingPongDeadlineSeconds)), PINGPONG_COMMAND},
					Args:    getPingPongArgs(server),
					Resources: corev1.ResourceRequirements{
						Requests: corev1.ResourceList{
							EFA_RESOURCE_NAME: efaResourceQuantity,
						},
						Limits: corev1.ResourceList{
							EFA_RESOURCE_NAME: efaResourceQuantity,
						},
					},
				},
			},
		},
	}
}

func getPingPongPods(ctx context.Context, config *envconf.Config) (corev1.Pod, corev1.Pod, error) {
	efaNodes, err := getEfaNodes(ctx, config)
	if err != nil {
		return corev1.Pod{}, corev1.Pod{}, err
	}

	if len(efaNodes) < 2 {
		return corev1.Pod{}, corev1.Pod{}, fmt.Errorf("need at least 2 nodes with EFA capacity, got %d", len(efaNodes))
	}

	serverNode := efaNodes[0]
	log.Printf("[INFO] Using node %s (type: %s), as server", serverNode.Name, serverNode.Labels["node.kubernetes.io/instance-type"])

	clientNode := efaNodes[1]
	log.Printf("[INFO] Using node %s (type: %s), as client", clientNode.Name, clientNode.Labels["node.kubernetes.io/instance-type"])

	return generatePingPongPodManifest(true, serverNode), generatePingPongPodManifest(false, clientNode), nil
}

func TestPingPong(t *testing.T) {
	var err error
	var pingPongService corev1.Service
	var client, server corev1.Pod
	pingpong := features.New("pingpong").
		WithLabel("suite", "efa").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			pingPongService = generatePingPongServiceManifest()
			client, server, err = getPingPongPods(ctx, cfg)
			if err != nil {
				t.Fatal(err)
			}

			assert.NoError(t, cfg.Client().Resources().Create(ctx, &pingPongService))
			assert.NoError(t, cfg.Client().Resources().Create(ctx, &server))
			assert.NoError(t, cfg.Client().Resources().Create(ctx, &client))
			return ctx
		}).
		Assess("Pingpong between nodes succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			assert.NoError(t, wait.For(conditions.New(cfg.Client().Resources()).PodPhaseMatch(&server, v1.PodSucceeded),
				wait.WithTimeout(15*time.Minute),
				wait.WithContext(ctx),
			))

			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			serverPodLogs, err := e2e.ReadPodLogs(ctx, cfg.Client().RESTConfig(), server.Namespace, server.Name, server.Spec.Containers[0].Name)
			if err != nil {
				t.Logf("Could not get pods for server")
			}
			t.Logf("Logs for server\n%s", serverPodLogs)

			assert.NoError(t, cfg.Client().Resources().Delete(ctx, &pingPongService))
			assert.NoError(t, cfg.Client().Resources().Delete(ctx, &server))
			assert.NoError(t, cfg.Client().Resources().Delete(ctx, &client))
			return ctx
		}).
		Feature()
	testenv.Test(t, pingpong)
}


================================================
FILE: test/cases/efa/unit_test.go
================================================
//go:build e2e

package efa

import (
	"context"
	_ "embed"
	"fmt"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/stretchr/testify/assert"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func generateUnitTestManifest(node corev1.Node, testIndex int) corev1.Pod {
	efaAllocatable := fmt.Sprint(getEfaCapacity(node))
	efaResourceQuantity := resource.MustParse(efaAllocatable)
	return corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:      fmt.Sprintf("efa-unit-%d", testIndex),
			Namespace: TEST_NAMESPACE_NAME,
		},
		Spec: corev1.PodSpec{
			RestartPolicy: v1.RestartPolicyOnFailure,
			// TODO: centralize re-usable logic for pod spec fkormatting
			Affinity: &corev1.Affinity{
				NodeAffinity: &corev1.NodeAffinity{
					RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
						NodeSelectorTerms: []corev1.NodeSelectorTerm{
							{
								MatchExpressions: []corev1.NodeSelectorRequirement{
									{
										Key:      "kubernetes.io/hostname",
										Operator: "In",
										Values: []string{
											node.Name,
										},
									},
								},
							},
						},
					},
				},
			},
			Containers: []corev1.Container{
				{
					Name:    "unit-test",
					Image:   aws.ToString(testImage),
					Command: []string{"./scripts/unit-test.sh"},
					Env: []v1.EnvVar{
						{
							Name:  "EXPECTED_EFA_DEVICE_COUNT",
							Value: efaAllocatable,
						},
						{
							Name:  "EC2_INSTANCE_TYPE",
							Value: node.Labels["node.kubernetes.io/instance-type"],
						},
					},
					Resources: corev1.ResourceRequirements{
						Requests: corev1.ResourceList{
							EFA_RESOURCE_NAME: efaResourceQuantity,
						},
						Limits: corev1.ResourceList{
							EFA_RESOURCE_NAME: efaResourceQuantity,
						},
					},
				},
			},
		},
	}
}

func getUnitTestPodManifests(ctx context.Context, config *envconf.Config) ([]corev1.Pod, error) {
	var podManifests []corev1.Pod
	efaNodes, err := getEfaNodes(ctx, config)
	if err != nil {
		return []corev1.Pod{}, err
	}

	for nodeIndex, node := range efaNodes {
		podManifests = append(podManifests, generateUnitTestManifest(node, nodeIndex))
	}

	return podManifests, err
}

func TestUnit(t *testing.T) {
	var err error
	var pods []corev1.Pod
	unit := features.New("unit").
		WithLabel("suite", "efa").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			pods, err = getUnitTestPodManifests(ctx, cfg)
			if err != nil {
				t.Fatalf("Failed to generate unit test manifests: %v", err)
			}

			for _, pod := range pods {
				assert.NoError(t, cfg.Client().Resources().Create(ctx, &pod))
			}

			return ctx
		}).
		Assess("Unit test succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			suiteCtx, cancel := context.WithTimeout(ctx, 20*time.Minute)
			defer cancel()
			for _, pod := range pods {
				assert.NoError(t, wait.For(conditions.New(cfg.Client().Resources()).PodPhaseMatch(&pod, v1.PodSucceeded),
					wait.WithContext(suiteCtx),
				))
			}

			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			for _, pod := range pods {
				podLogs, err := e2e.ReadPodLogs(ctx, cfg.Client().RESTConfig(), pod.Namespace, pod.Name, pod.Spec.Containers[0].Name)
				if err != nil {
					t.Logf("Could not get logs for pod %q", pod.Name)
				} else {
					t.Logf("Logs for pod %q\n%s", pod.Name, podLogs)
				}
			}

			for _, pod := range pods {
				assert.NoError(t, cfg.Client().Resources().Delete(ctx, &pod))
			}
			return ctx
		}).
		Feature()
	testenv.Test(t, unit)
}


================================================
FILE: test/cases/fips/README.md
================================================
# FIPS TLS Compliance Test

This test validates that FIPS-enabled EKS nodes enforce FIPS-compliant TLS cipher suites when pulling container images.

## What It Does

1. Deploys two local container registries as DaemonSets on each node:
   - `registry-fips` (port 5000) — serves TLS using the node's default (FIPS-compliant) cipher suites
   - `registry-nonfips` (port 5001) — an nginx reverse proxy configured to only offer `ECDHE-RSA-CHACHA20-POLY1305`, a non-FIPS cipher
2. Seeds both registries with a test image via `skopeo`
3. Runs two test pods:
   - `test-pull-fips` — pulls from `localhost:5000` and expects success
   - `test-pull-nonfips` — pulls from `localhost:5001` and expects `ImagePullBackOff` (TLS handshake failure)

## Prerequisites

- An EKS cluster with FIPS-enabled nodes
- TLS certificates available on each node at `/mnt/server-conf/certs/`:
  - `server.crt` — server certificate
  - `server.key` — private key
- `kubeconfig` configured for the target cluster
- Go 1.21+

## Host Setup

### Amazon Linux 2023

FIPS mode must be enabled at launch time via the EKS AMI. Use a FIPS-enabled AL2023 AMI when creating the nodegroup:

```bash
# Create a FIPS-enabled nodegroup with eksctl
kubetest2 eksctl \
  --kubernetes-version=X.XX \
  --ami-family=AmazonLinux2023 \
  --up \
  --down \
  --test=exec \
  -- <test command>
```

Verify FIPS is active on a node:
```bash
# SSH into a node and check
cat /proc/sys/crypto/fips_enabled
# Expected output: 1

# Or check via sysctl
sysctl crypto.fips_enabled
# Expected output: crypto.fips_enabled = 1
```

Generate the TLS certificates on each node:
```bash
sudo mkdir -p /mnt/server-conf/certs
sudo openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
  -keyout /mnt/server-conf/certs/server.key \
  -out /mnt/server-conf/certs/server.crt \
  -subj "/CN=localhost" \
  -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
```

Add the certificate to the node's trust store so containerd trusts the local registries:
```bash
sudo cp /mnt/server-conf/certs/server.crt /etc/pki/ca-trust/source/anchors/
sudo update-ca-trust
sudo systemctl restart containerd
```

Without this, containerd will reject the self-signed cert and both test pods would fail with `ImagePullBackOff`.

### Bottlerocket

Bottlerocket is an immutable OS — you can't SSH in and run `openssl` directly. Certs must be provisioned via a bootstrap container that runs before kubelet starts.

**1. Build the bootstrap container image**

The Dockerfile is minimal — it runs a user-data script at boot:

```dockerfile

FROM public.ecr.aws/docker/library/alpine:latest
RUN apk add --no-cache openssl curl
ENTRYPOINT ["/bin/sh", "/.bottlerocket/bootstrap-containers/gen-certs/user-data"]
```

Build and push to ECR:
```bash
docker build -t <your-account-id>.dkr.ecr.<region>.amazonaws.com/cert-bootstrap:v1 .
docker push <your-account-id>.dkr.ecr.<region>.amazonaws.com/cert-bootstrap:v1
```

**2. Prepare the cert generation script**

The cert generation script generates a CA + server cert, writes them to the host at `/mnt/server-conf/certs/`, and registers the CA with Bottlerocket's trust store via `apiclient`:

```bash
#!/bin/sh
set -xe

WORK_DIR=$(mktemp -d)
CERTS_DIR=/.bottlerocket/rootfs/mnt/server-conf/certs
CSR_CONF=${WORK_DIR}/csr.conf
CA_CRT=${WORK_DIR}/ca.crt
CA_KEY=${WORK_DIR}/ca.key

mkdir -p ${CERTS_DIR}

# Generate CA
openssl genrsa -out ${CA_KEY} 2048
openssl req -x509 -new -nodes -key ${CA_KEY} \
  -subj "/CN=Bottlerocket Test CA/C=US/ST=WASHINGTON/L=Seattle/O=Bottlerocket" \
  -days 1825 -out ${CA_CRT}

# Get instance metadata
TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
DOMAIN=$(curl -H "X-aws-ec2-metadata-token: ${TOKEN}" http://169.254.169.254/latest/meta-data/public-hostname)
IP=$(curl -H "X-aws-ec2-metadata-token: ${TOKEN}" http://169.254.169.254/latest/meta-data/public-ipv4)

# Generate CSR config with real values
cat > ${CSR_CONF} <<EOF
[ req ]
default_bits = 2048
prompt = no
default_md = sha256
distinguished_name = dn
req_extensions = req_ext

[ dn ]
C = US
ST = WASHINGTON
L = Seattle
O = Bottlerocket
OU = Bottlerocket Dev

[ req_ext ]
subjectAltName = @alt_names

[ alt_names ]
DNS.1 = localhost
DNS.2 = ${DOMAIN}
IP.1 = 127.0.0.1
IP.2 = ${IP}
EOF

# Generate server cert signed by CA
openssl genrsa -out ${CERTS_DIR}/server.key 2048
openssl req -new -key ${CERTS_DIR}/server.key -out ${WORK_DIR}/server.csr -config ${CSR_CONF}
openssl x509 -req -in ${WORK_DIR}/server.csr -CA ${CA_CRT} -CAkey ${CA_KEY} \
  -CAcreateserial -out ${CERTS_DIR}/server.crt -days 10000 \
  -extensions req_ext -extfile ${CSR_CONF}

# Push CA to Bottlerocket trust store
BUNDLE=$(base64 -w0 ${CA_CRT})
apiclient set pki.local-registry.data=${BUNDLE}
apiclient set pki.local-registry.trusted=true

rm -rf ${WORK_DIR}
```

Once you've created your script, you'll need to base64-encode it and set it as the value of the bootstrap container's user-data setting.

**3. Configure the bootstrap container in Bottlerocket TOML**

Add this to your Bottlerocket user data:

```toml
[settings.bootstrap-containers.gen-certs]
source = "<your-account-id>.dkr.ecr.<region>.amazonaws.com/cert-bootstrap:v1"
mode = "once"
essential = true
user-data = "<paste base64-encoded set-up-host-v2 here>"
```

- `mode = "once"` — runs only on first boot
- `essential = true` — node won't start if cert generation fails
- The script runs before kubelet, so certs are ready when pods start

**4. Launch with a FIPS Bottlerocket AMI**

```bash
kubetest2 eksctl \
  --kubernetes-version=X.XX \
  --ami-family=Bottlerocket \
  --up \
  --down \
  --test=exec \
  -- <test command>
```

Verify FIPS on Bottlerocket (via the admin container):
```bash
cat /proc/sys/crypto/fips_enabled
# Expected output: 1
```

## Running the Test

```bash
# Run all FIPS test cases
go test -tags e2e -v ./test/cases/fips/ --kubeconfig=$HOME/.kube/config

# Run a specific test case by label
go test -tags e2e -v ./test/cases/fips/ --kubeconfig=$HOME/.kube/config -labels="suite=fips"
```

Or via `kubetest2`:
```bash
kubetest2 eksctl \
  --kubernetes-version=X.XX \
  --ami-family=<AMI_Family> \
  --up \
  --down \
  --test=exec \
  -- fips.test -v
```

## Test Cases

| Test | Description | Expected Result |
|------|-------------|-----------------|
| `fips-tls-pull` | Pull image from FIPS-cipher registry (port 5000) | Pod succeeds |
| `nonfips-tls-pull` | Pull image from non-FIPS-cipher registry (port 5001) | `ImagePullBackOff` — TLS handshake rejected |


================================================
FILE: test/cases/fips/fips_test.go
================================================
//go:build e2e

package fips

import (
	"context"
	_ "embed"
	"io"
	"strings"
	"testing"
	"time"

	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

const (
	pullTimeout   = 5 * time.Minute
	rejectTimeout = 2 * time.Minute
)

var (
	//go:embed manifests/registry-fips.yaml
	registryFIPSManifest []byte
	//go:embed manifests/registry-nonfips.yaml
	registryNonFIPSManifest []byte

	//go:embed manifests/test-pods.yaml
	testPodsManifest []byte
)

func verifyNonfipsCipherRejection(ctx context.Context, t *testing.T, cfg *envconf.Config) {
	t.Helper()
	clientset, err := kubernetes.NewForConfig(cfg.Client().RESTConfig())
	if err != nil {
		t.Fatalf("could not create clientset for log verification: %v", err)
	}
	logCtx, logCancel := context.WithTimeout(ctx, logFetchTimeout)
	defer logCancel()
	pods, err := clientset.CoreV1().Pods("default").List(logCtx, metav1.ListOptions{
		LabelSelector: "name=registry-nonfips",
	})
	if err != nil {
		t.Fatalf("failed to list registry-nonfips pods: %v", err)
	}
	if len(pods.Items) == 0 {
		t.Fatal("no registry-nonfips pods found for log verification")
	}
	for _, pod := range pods.Items {
		req := clientset.CoreV1().Pods("default").GetLogs(pod.Name, &v1.PodLogOptions{
			Container: "nginx",
			TailLines: int64Ptr(50),
		})
		stream, err := req.Stream(logCtx)
		if err != nil {
			continue
		}
		body, _ := io.ReadAll(stream)
		stream.Close()
		logs := string(body)
		t.Logf("registry-nonfips nginx logs:\n%s", logs)
		if strings.Contains(logs, "no shared cipher") {
			t.Log("Verified: FIPS node rejected non-FIPS cipher suite (no shared cipher)")
			return
		}
	}
	t.Fatal("Expected 'no shared cipher' in registry-nonfips nginx logs but not found")
}

func TestFIPSTLS(t *testing.T) {
	fipsPull := features.New("fips-tls-pull").
		WithLabel("suite", "fips").
		Assess("Pull from FIPS-cipher registry succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			pod := &v1.Pod{
				ObjectMeta: metav1.ObjectMeta{Name: "test-pull-fips", Namespace: "default"},
			}
			err := wait.For(
				conditions.New(cfg.Client().Resources()).PodPhaseMatch(pod, v1.PodSucceeded),
				wait.WithContext(ctx),
				wait.WithTimeout(pullTimeout),
			)
			if err != nil {
				t.Fatalf("test-pull-fips pod did not succeed: %v", err)
			}
			t.Log("FIPS TLS pull succeeded as expected")
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			cfg.Client().Resources().Delete(ctx, &v1.Pod{
				ObjectMeta: metav1.ObjectMeta{Name: "test-pull-fips", Namespace: "default"},
			})
			return ctx
		}).
		Feature()

	nonfipsPull := features.New("nonfips-tls-pull").
		WithLabel("suite", "fips").
		Assess("Pull from non-FIPS-cipher registry fails on FIPS node", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			pod := &v1.Pod{
				ObjectMeta: metav1.ObjectMeta{Name: "test-pull-nonfips", Namespace: "default"},
			}
			// Poll for ImagePullBackOff/ErrImagePull — pod won't reach PodFailed phase
			deadline := time.Now().Add(rejectTimeout)
			for time.Now().Before(deadline) {
				select {
				case <-ctx.Done():
					t.Fatalf("context cancelled while waiting for ImagePullBackOff: %v", ctx.Err())
				default:
				}
				err := cfg.Client().Resources().Get(ctx, "test-pull-nonfips", "default", pod)
				if err != nil {
					t.Fatalf("failed to get test-pull-nonfips pod: %v", err)
				}
				// #1: Log pod status during polling
				t.Logf("Polling test-pull-nonfips: Phase=%s", pod.Status.Phase)
				for _, cs := range pod.Status.ContainerStatuses {
					if cs.State.Waiting != nil {
						t.Logf("  Container %s: Waiting (Reason=%s)", cs.Name, cs.State.Waiting.Reason)
					} else if cs.State.Running != nil {
						t.Logf("  Container %s: Running", cs.Name)
					} else if cs.State.Terminated != nil {
						t.Logf("  Container %s: Terminated (Reason=%s)", cs.Name, cs.State.Terminated.Reason)
					}
				}
				// #2: Detect unexpected success
				if pod.Status.Phase == v1.PodSucceeded {
					t.Fatal("test-pull-nonfips pod succeeded — expected ImagePullBackOff. Is this a FIPS node?")
				}
				for _, cs := range pod.Status.ContainerStatuses {
					if cs.State.Running != nil && cs.Ready {
						t.Fatal("test-pull-nonfips container is running — image pull succeeded. Is this a FIPS node?")
					}
					if cs.State.Waiting != nil && (cs.State.Waiting.Reason == "ImagePullBackOff" || cs.State.Waiting.Reason == "ErrImagePull") {
						verifyNonfipsCipherRejection(ctx, t, cfg)
						t.Log("Non-FIPS TLS pull correctly rejected (ImagePullBackOff)")
						return ctx
					}
				}
				time.Sleep(pollInterval)
			}
			t.Fatal("test-pull-nonfips did not reach ImagePullBackOff within timeout")
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			cfg.Client().Resources().Delete(ctx, &v1.Pod{
				ObjectMeta: metav1.ObjectMeta{Name: "test-pull-nonfips", Namespace: "default"},
			})
			return ctx
		}).
		Feature()

	testenv.Test(t, fipsPull, nonfipsPull)
}


================================================
FILE: test/cases/fips/main_test.go
================================================
//go:build e2e

package fips

import (
	"context"
	"fmt"
	"io"
	"log"
	"os"
	"os/signal"
	"strings"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	appsv1 "k8s.io/api/apps/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

const (
	pollInterval     = 5 * time.Second  // polling interval for waitForSeed and status checks
	seedTimeout      = 5 * time.Minute  // apk install + skopeo copy can be slow on first pull
	daemonSetTimeout = 2 * time.Minute  // per DaemonSet; image pulls vary by network
	logFetchTimeout  = 30 * time.Second // timeout for fetching pod logs
	// Worst-case Setup: 2x daemonSetTimeout (4m) + 2x seedTimeout (6m) = ~10m
)

var testenv env.Environment

func int64Ptr(i int64) *int64 { return &i }


func logDaemonSetDiagnostics(ctx context.Context, clientset *kubernetes.Clientset, dsName string) {
	log.Printf("=== Diagnostics for DaemonSet %s ===", dsName)
	pods, err := clientset.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
		LabelSelector: "name=" + dsName,
	})
	if err != nil {
		log.Printf("Failed to list pods: %v", err)
		return
	}
	for _, pod := range pods.Items {
		log.Printf("Pod %s: Phase=%s", pod.Name, pod.Status.Phase)
		for _, cond := range pod.Status.Conditions {
			log.Printf("  Condition %s: %s (Reason: %s)", cond.Type, cond.Status, cond.Reason)
		}
		for _, cs := range pod.Status.ContainerStatuses {
			log.Printf("  Container %s: Ready=%v, RestartCount=%d", cs.Name, cs.Ready, cs.RestartCount)
			if cs.State.Waiting != nil {
				log.Printf("    Waiting: %s - %s", cs.State.Waiting.Reason, cs.State.Waiting.Message)
			}
			if cs.State.Terminated != nil {
				log.Printf("    Terminated: %s - %s", cs.State.Terminated.Reason, cs.State.Terminated.Message)
			}
			if (cs.State.Waiting != nil && cs.State.Waiting.Reason == "CrashLoopBackOff") || cs.RestartCount > 0 {
				req := clientset.CoreV1().Pods("default").GetLogs(pod.Name, &v1.PodLogOptions{
					Container: cs.Name,
					TailLines: int64Ptr(20),
				})
				stream, err := req.Stream(ctx)
				if err == nil {
					body, _ := io.ReadAll(stream)
					stream.Close()
					log.Printf("    Last logs:\n%s", string(body))
				}
			}
		}
	}
}


func logNodeInfo(ctx context.Context, clientset *kubernetes.Clientset) {
	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		log.Printf("Warning: could not list nodes: %v", err)
		return
	}
	for _, node := range nodes.Items {
		osImage := node.Status.NodeInfo.OSImage
		isFIPS := strings.Contains(strings.ToLower(osImage), "fips")
		log.Printf("Node %s: OS=%s, FIPS=%v", node.Name, osImage, isFIPS)
	}
}

// normally this will only take couple seconds.
func waitForSeed(ctx context.Context, clientset *kubernetes.Clientset, dsName string) error {
	log.Printf("Waiting for %s seed container to complete...", dsName)
	deadline := time.Now().Add(seedTimeout)
	var lastLogs string
	for time.Now().Before(deadline) {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		pods, err := clientset.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
			LabelSelector: "name=" + dsName,
		})
		if err != nil {
			return err
		}
		if len(pods.Items) == 0 {
			log.Printf("%s: no pods found yet, waiting...", dsName)
			time.Sleep(pollInterval)
			continue
		}
		allSeeded := true
		for _, pod := range pods.Items {
			req := clientset.CoreV1().Pods("default").GetLogs(pod.Name, &v1.PodLogOptions{
				Container: "seed-image",
			})
			logCtx, logCancel := context.WithTimeout(ctx, logFetchTimeout)
			stream, err := req.Stream(logCtx)
			if err != nil {
				logCancel()
				log.Printf("Failed to get logs for %s/%s: %v", dsName, pod.Name, err)
				allSeeded = false
				continue
			}
			body, _ := io.ReadAll(stream)
			stream.Close()
			logCancel()
			logs := string(body)
			if strings.Contains(logs, "level=fatal") {
				return fmt.Errorf("%s seed failed: %s", dsName, logs)
			}
			if !strings.Contains(logs, "Image seeded successfully") {
				allSeeded = false
				lastLogs = logs
			}
		}
		if allSeeded {
			log.Printf("%s seed completed successfully on all %d pods", dsName, len(pods.Items))
			return nil
		}
		log.Printf("%s seed still waiting... (got %d bytes of logs)", dsName, len(lastLogs))
		time.Sleep(pollInterval)
	}
	// Dump last logs on timeout
	if lastLogs != "" {
		log.Printf("%s seed timeout - last logs:\n%s", dsName, lastLogs)
	}
	return fmt.Errorf("%s seed did not complete within timeout", dsName)
}

func TestMain(m *testing.M) {
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = testenv.WithContext(ctx)

	testenv.Setup(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
			if err != nil {
				return ctx, fmt.Errorf("failed to create Kubernetes client: %w", err)
			}
			logNodeInfo(ctx, clientset)
			if err := fwext.ApplyManifests(config.Client().RESTConfig(), registryFIPSManifest); err != nil {
				return ctx, fmt.Errorf("failed to apply registry-fips manifest: %w", err)
			}
			log.Println("registry-fips DaemonSet deployed")

			if err := fwext.ApplyManifests(config.Client().RESTConfig(), registryNonFIPSManifest); err != nil {
				return ctx, fmt.Errorf("failed to apply registry-nonfips manifest: %w", err)
			}
			log.Println("registry-nonfips DaemonSet deployed")

			for _, name := range []string{"registry-fips", "registry-nonfips"} {
				ds := appsv1.DaemonSet{
					ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "default"},
				}
				log.Printf("Waiting for %s DaemonSet to be ready...", name)
				err := wait.For(
					fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
					wait.WithContext(ctx),
					wait.WithTimeout(daemonSetTimeout),
				)
				if err != nil {
					logDaemonSetDiagnostics(ctx, clientset, name)
					return ctx, fmt.Errorf("%s DaemonSet not ready: %w", name, err)
				}
				log.Printf("%s DaemonSet is ready", name)
			}

			for _, dsName := range []string{"registry-fips", "registry-nonfips"} {
				if err := waitForSeed(ctx, clientset, dsName); err != nil {
					return ctx, fmt.Errorf("seed verification failed for %s: %w", dsName, err)
				}
			}

			if err := fwext.ApplyManifests(config.Client().RESTConfig(), testPodsManifest); err != nil {
				return ctx, fmt.Errorf("failed to apply test-pods manifest: %w", err)
			}
			log.Println("test pods deployed")

			return ctx, nil
		},
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			fwext.DeleteManifests(config.Client().RESTConfig(), registryFIPSManifest)
			fwext.DeleteManifests(config.Client().RESTConfig(), registryNonFIPSManifest)
			fwext.DeleteManifests(config.Client().RESTConfig(), testPodsManifest)
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/fips/manifests/registry-fips.yaml
================================================
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: registry-fips
spec:
  selector:
    matchLabels:
      name: registry-fips
  template:
    metadata:
      labels:
        name: registry-fips
    spec:
      hostNetwork: true
      containers:
        - name: registry
          image: registry:2
          env:
            - name: REGISTRY_HTTP_ADDR
              value: "0.0.0.0:5000"
            - name: REGISTRY_HTTP_TLS_CERTIFICATE
              value: "/certs/server.crt"
            - name: REGISTRY_HTTP_TLS_KEY
              value: "/certs/server.key"
          volumeMounts:
            - name: certs
              mountPath: /certs
        - name: seed-image
          image: public.ecr.aws/docker/library/alpine:latest
          command:
            - /bin/sh
            - -c
            - |
              apk add --no-cache --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community skopeo
              sleep 5
              skopeo copy --dest-tls-verify=false docker://public.ecr.aws/docker/library/alpine:latest docker://127.0.0.1:5000/test:latest
              echo "Image seeded successfully"
              sleep infinity
      volumes:
        - name: certs
          hostPath:
            path: /mnt/server-conf/certs


================================================
FILE: test/cases/fips/manifests/registry-nonfips.yaml
================================================
apiVersion: v1
kind: ConfigMap
metadata:
  name: nginx-nonfips-config
data:
  nginx.conf: |
    events {}
    http {
      error_log /dev/stderr debug;
      server {
        listen 5001 ssl;
        ssl_certificate /certs/server.crt;
        ssl_certificate_key /certs/server.key;
        ssl_protocols TLSv1.2;
        ssl_ciphers ECDHE-RSA-CHACHA20-POLY1305;
        location / {
          proxy_pass http://127.0.0.1:5002;
          proxy_set_header Host $host;
          proxy_set_header X-Real-IP $remote_addr;
        }
      }
    }
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: registry-nonfips
spec:
  selector:
    matchLabels:
      name: registry-nonfips
  template:
    metadata:
      labels:
        name: registry-nonfips
    spec:
      hostNetwork: true
      containers:
        - name: nginx
          image: public.ecr.aws/nginx/nginx:stable-alpine
          volumeMounts:
            - name: certs
              mountPath: /certs
            - name: nginx-config
              mountPath: /etc/nginx/nginx.conf
              subPath: nginx.conf
        - name: registry
          image: registry:2
          env:
            - name: REGISTRY_HTTP_ADDR
              value: "127.0.0.1:5002"
        - name: seed-image
          image: public.ecr.aws/docker/library/alpine:latest
          command:
            - /bin/sh
            - -c
            - |
              apk add --no-cache --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community skopeo
              sleep 5
              skopeo copy --dest-tls-verify=false docker://public.ecr.aws/docker/library/alpine:latest docker://127.0.0.1:5002/test:latest
              echo "Image seeded successfully"
              sleep infinity
      volumes:
        - name: certs
          hostPath:
            path: /mnt/server-conf/certs
        - name: nginx-config
          configMap:
            name: nginx-nonfips-config


================================================
FILE: test/cases/fips/manifests/test-pods.yaml
================================================
apiVersion: v1
kind: Pod
metadata:
  name: test-pull-fips
spec:
  containers:
    - name: test
      image: localhost:5000/test:latest
      command: ["echo", "FIPS cipher works"]
  restartPolicy: Never
---
apiVersion: v1
kind: Pod
metadata:
  name: test-pull-nonfips
spec:
  containers:
    - name: test
      image: localhost:5001/test:latest
      command: ["echo", "should not reach here"]
  restartPolicy: Never


================================================
FILE: test/cases/netpol/main_test.go
================================================
//go:build e2e

package netpol

import (
	"context"
	"flag"
	"log"
	"os"
	"testing"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/service/eks"
	"github.com/aws/aws-sdk-go-v2/service/eks/types"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/pkg/errors"
	appsv1 "k8s.io/api/apps/v1"
	corev1 "k8s.io/api/core/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

var (
	testenv           env.Environment
	clusterName       string
	endPointUrl       string
	kubernetesVersion string
	addonName         string = "vpc-cni"
)

func TestMain(m *testing.M) {

	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	config, err := config.LoadDefaultConfig(context.TODO())
	eksclient := eks.NewFromConfig(config)
	testenv = env.NewWithConfig(cfg)

	flag.StringVar(&clusterName, "cluster-name", "", "Name of the cluster")
	flag.StringVar(&endPointUrl, "endpoint-url", "", "Endpoint url to use")
	flag.Parse()

	namespaces := []string{"a", "b", "c"}

	testenv.Setup(

		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			client, err := config.NewClient()
			if err != nil {
				return ctx, err
			}

			servers := map[string]string{
				"a": "a-server",
				"b": "b-server",
				"c": "c-server",
			}

			// 1. Install Latest CNI version
			log.Print("Install the latest VPC-CNI on the cluster")
			kubernetesVersion, err = getClusterVersion(ctx, eksclient)
			if err != nil {
				return ctx, err
			}

			err = installLatestCNIVersion(ctx, config, eksclient)
			if err != nil {
				return ctx, err
			}

			// 2. Create three namespaces
			log.Print("Creating the test namespaces")
			for _, ns := range namespaces {
				err = createNamespace(ns, client, ctx)
				if err != nil {
					return ctx, errors.Wrapf(err, "Failed to create namespace %s", ns)
				}
			}

			// 3. Create deployment and service
			log.Print("Creating the test deployment and service")
			for ns, server := range servers {
				err = createServerAndService(ns, server, 1, client, ctx)
				if err != nil {
					return ctx, errors.Wrapf(err, "Failed to create deployment and service for %s", server)
				}
			}

			return ctx, nil
		},
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			client, err := config.NewClient()

			if err != nil {
				return ctx, err
			}

			log.Print("Deleting the test namespaces")
			for _, ns := range namespaces {
				client.Resources().Delete(ctx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns, Namespace: ns}})
			}

			log.Print("Installing the Default version of VPC-CNI on the cluster")
			err = installDefaultCNIVersion(ctx, config, eksclient)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}

func installDefaultCNIVersion(ctx context.Context, config *envconf.Config, eksclient *eks.Client) error {

	// Uninstall the currently install addon
	uninstallCNIAddon(ctx, config, eksclient)

	// Passing addonVersion empty installs the default version of addon
	err := installCNIAddon(ctx, config, eksclient, "", "")
	if err != nil {
		return errors.Wrap(err, "Could not install the default addon version")
	}

	return nil
}

func installLatestCNIVersion(ctx context.Context, config *envconf.Config, eksclient *eks.Client) error {

	version, err := getLatestCNIAddon(ctx, eksclient)
	if err != nil {
		return err
	}

	configurationValues := "{\"enableNetworkPolicy\": \"true\"}"
	err = installCNIAddon(ctx, config, eksclient, version, configurationValues)
	if err != nil {
		return err
	}

	return nil
}

func uninstallCNIAddon(ctx context.Context, config *envconf.Config, eksclient *eks.Client) error {

	cniDS := &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "aws-node", Namespace: "kube-system"}}

	_, err := eksclient.DeleteAddon(ctx, &eks.DeleteAddonInput{
		AddonName:   aws.String(addonName),
		ClusterName: aws.String(clusterName),
	})

	err = wait.For(conditions.New(config.Client().Resources()).ResourceDeleted(cniDS), wait.WithTimeout(time.Minute*5))
	if err != nil {
		return errors.Wrap(err, "Daemonset could not be deleted")
	}

	return nil
}

func getLatestCNIAddon(ctx context.Context, eksclient *eks.Client) (string, error) {

	addonVersions, err := eksclient.DescribeAddonVersions(ctx, &eks.DescribeAddonVersionsInput{
		AddonName:         aws.String(addonName),
		KubernetesVersion: aws.String(kubernetesVersion),
	})

	if err != nil {
		return "", err
	}

	if len(*&addonVersions.Addons) > 0 {
		return *addonVersions.Addons[0].AddonVersions[0].AddonVersion, nil
	} else {
		return "", errors.Errorf("Addon versions not available")
	}
}

func installCNIAddon(ctx context.Context, config *envconf.Config, eksclient *eks.Client, addonVersion string, configurationValues string) error {

	// Delete old Daemonset if exists
	cniDS := &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "aws-node", Namespace: "kube-system"}}
	config.Client().Resources().Delete(ctx, cniDS)

	_, err := eksclient.CreateAddon(ctx, &eks.CreateAddonInput{
		AddonName:           aws.String(addonName),
		ClusterName:         aws.String(clusterName),
		AddonVersion:        aws.String(addonVersion),
		ConfigurationValues: aws.String(configurationValues),
		ResolveConflicts:    types.ResolveConflictsOverwrite,
	})

	if err != nil {
		return errors.Wrap(err, "Failed to create addon")
	}

	err = wait.For(fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(cniDS), wait.WithTimeout(time.Minute*5))

	if err != nil {
		return errors.Wrap(err, "Daemonset failed to reach running state")
	}

	return nil
}

func getClusterVersion(ctx context.Context, eksclient *eks.Client) (string, error) {

	cluster, err := eksclient.DescribeCluster(ctx, &eks.DescribeClusterInput{
		Name: aws.String(clusterName),
	})

	if err != nil {
		return "", err
	}

	return *cluster.Cluster.Version, nil
}

func createNamespace(name string, client klient.Client, ctx context.Context) error {

	ns := &v1.Namespace{
		ObjectMeta: metav1.ObjectMeta{
			Name:      name,
			Namespace: name,
			Labels:    map[string]string{"ns": name},
		},
	}

	if err := client.Resources().Create(ctx, ns); err != nil {
		return err
	}
	return nil
}

func createServerAndService(namespace string, name string, replicas int32, client klient.Client, ctx context.Context) error {

	labels := map[string]string{"app": name}

	service := &v1.Service{
		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
		Spec: v1.ServiceSpec{
			Ports:    []v1.ServicePort{{Name: name, Protocol: "TCP", Port: 80}},
			Selector: labels,
		},
	}

	if err := client.Resources().Create(ctx, service); err != nil {
		return err
	}

	deploy := &appsv1.Deployment{
		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
		Spec: appsv1.DeploymentSpec{
			Replicas: &replicas,
			Selector: &metav1.LabelSelector{
				MatchLabels: labels,
			},
			Template: corev1.PodTemplateSpec{
				ObjectMeta: metav1.ObjectMeta{Labels: labels},
				Spec:       corev1.PodSpec{Containers: []corev1.Container{{Name: name, Image: "nginx"}}},
			},
		},
	}

	if err := client.Resources().Create(ctx, deploy); err != nil {
		return err
	}

	err := wait.For(conditions.New(client.Resources()).DeploymentConditionMatch(deploy, appsv1.DeploymentAvailable, v1.ConditionTrue),
		wait.WithTimeout(time.Minute*5))
	if err != nil {
		return err
	}

	return nil
}


================================================
FILE: test/cases/netpol/np_test.go
================================================
//go:build e2e

package netpol

import (
	"bytes"
	"context"
	"log"
	"strings"
	"testing"
	"time"

	corev1 "k8s.io/api/core/v1"
	networking "k8s.io/api/networking/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/util/intstr"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func TestNetworkPolicyCases(t *testing.T) {

	protocolTCP := corev1.ProtocolTCP
	protocolUDP := corev1.ProtocolUDP
	networkPolicy := networking.NetworkPolicy{
		ObjectMeta: metav1.ObjectMeta{Name: "block-c-to-a", Namespace: "a"},
		Spec: networking.NetworkPolicySpec{
			PodSelector: metav1.LabelSelector{MatchLabels: map[string]string{"app": "a-server"}},
			PolicyTypes: []networking.PolicyType{networking.PolicyTypeIngress, networking.PolicyTypeEgress},
			Ingress: []networking.NetworkPolicyIngressRule{
				{
					From: []networking.NetworkPolicyPeer{
						{
							PodSelector:       &metav1.LabelSelector{MatchLabels: map[string]string{"app": "b-server"}},
							NamespaceSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"ns": "b"}},
						},
					},
					Ports: []networking.NetworkPolicyPort{
						{
							Protocol: &protocolTCP,
							Port:     &intstr.IntOrString{IntVal: 80},
						},
					},
				},
			},
			Egress: []networking.NetworkPolicyEgressRule{
				{
					To: []networking.NetworkPolicyPeer{
						{
							PodSelector:       &metav1.LabelSelector{MatchLabels: map[string]string{"app": "b-server"}},
							NamespaceSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"ns": "b"}},
						},
					},
					Ports: []networking.NetworkPolicyPort{
						{
							Protocol: &protocolTCP,
							Port:     &intstr.IntOrString{IntVal: 80},
						},
					},
				},
				{
					Ports: []networking.NetworkPolicyPort{
						{
							Protocol: &protocolUDP,
							Port:     &intstr.IntOrString{IntVal: 53},
						},
					},
				},
			},
		},
	}

	allowAll := features.New("allowAll").
		WithLabel("suite", "netpol").
		WithLabel("policy", "none").
		Assess("curl from A to B succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}
			pods := &corev1.PodList{}
			namespace := "a"
			containerName := "a-server"
			err = client.Resources("a").List(context.TODO(), pods)
			if err != nil || pods.Items == nil {
				t.Error("error while getting pods", err)
			}
			podName := pods.Items[0].Name

			var stdout, stderr bytes.Buffer
			command := []string{"curl", "-m", "2", "-I", "http://b-server.b:80"}
			client.Resources().ExecInPod(context.TODO(), namespace, podName, containerName, command, &stdout, &stderr)

			httpStatus := strings.Split(stdout.String(), "\n")[0]
			if !strings.Contains(httpStatus, "200") {
				t.Fatal("Couldn't connect to server B")
			}
			return ctx

		}).
		Assess("curl from C to A succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}
			namespace := "c"
			containerName := "c-server"
			pods := &corev1.PodList{}
			err = client.Resources("c").List(context.TODO(), pods)
			if err != nil || pods.Items == nil {
				t.Error("error while getting pods", err)
			}
			podName := pods.Items[0].Name

			var stdout, stderr bytes.Buffer
			command := []string{"curl", "-m", "2", "-I", "http://a-server.a:80"}
			client.Resources().ExecInPod(context.TODO(), namespace, podName, containerName, command, &stdout, &stderr)

			httpStatus := strings.Split(stdout.String(), "\n")[0]
			if !strings.Contains(httpStatus, "200") {
				t.Fatal("Couldn't connect to server A")
			}
			return ctx
		}).
		Feature()

	blockCToA := features.New("blockCToA").
		WithLabel("suite", "netpol").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}

			log.Print("Applying Network Policy")
			if err := client.Resources().Create(ctx, &networkPolicy); err != nil {
				t.Error("error while applying Network Policy", err)
				return ctx
			}

			// This time-wait is to account for Network Policy Controller to start up, run leader election in the control plane
			// and to apply the network policy
			time.Sleep(1 * time.Minute)

			return ctx

		}).
		Assess("curl from A to B succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}
			pods := &corev1.PodList{}
			namespace := "a"
			containerName := "a-server"
			err = client.Resources("a").List(context.TODO(), pods)
			if err != nil || pods.Items == nil {
				t.Error("error while getting pods", err)
			}
			podName := pods.Items[0].Name

			var stdout, stderr bytes.Buffer
			command := []string{"curl", "-m", "2", "-I", "http://b-server.b:80"}
			client.Resources().ExecInPod(context.TODO(), namespace, podName, containerName, command, &stdout, &stderr)

			httpStatus := strings.Split(stdout.String(), "\n")[0]
			if !strings.Contains(httpStatus, "200") {
				t.Fatal("Couldn't connect to server B")
			}
			return ctx
		}).
		Assess("curl from C to A fails", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}
			namespace := "c"
			containerName := "c-server"
			pods := &corev1.PodList{}
			err = client.Resources("c").List(context.TODO(), pods)
			if err != nil || pods.Items == nil {
				t.Error("error while getting pods", err)
			}
			podName := pods.Items[0].Name

			var stdout, stderr bytes.Buffer
			command := []string{"curl", "-m", "2", "-I", "http://a-server.a:80"}
			client.Resources().ExecInPod(context.TODO(), namespace, podName, containerName, command, &stdout, &stderr)

			httpStatus := strings.Split(stdout.String(), "\n")[0]
			if strings.Contains(httpStatus, "200") {
				t.Fatal("Network Policy didn't block connection to server A")
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := cfg.NewClient()
			if err != nil {
				return ctx
			}

			if err := client.Resources().Delete(ctx, &networkPolicy); err != nil {
				t.Error("error while deleting Network Policy", err)
				return ctx
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, allowAll, blockCToA)
}


================================================
FILE: test/cases/neuron/main_test.go
================================================
//go:build e2e

package neuron

import (
	"context"
	_ "embed"
	"flag"
	"fmt"
	"log"
	"os"
	"os/signal"
	"slices"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	"github.com/aws/aws-sdk-go-v2/aws"
	appsv1 "k8s.io/api/apps/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

var (
	testenv             env.Environment
	nodeType            *string
	efaEnabled          *bool
	nodeCount           int
	neuronPerNode       int
	neuronCorePerNode   int
	efaPerNode          int
	neuronTestImage     *string
	installDevicePlugin *bool
)

func deployNeuronDevicePlugin(ctx context.Context, config *envconf.Config) (context.Context, error) {
	ds := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "neuron-device-plugin-daemonset", Namespace: "kube-system"},
	}
	err := wait.For(fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
		wait.WithContext(ctx))
	if err != nil {
		return ctx, err
	}
	return ctx, nil
}

func deployMPIOperator(ctx context.Context, config *envconf.Config) (context.Context, error) {
	dep := appsv1.Deployment{
		ObjectMeta: metav1.ObjectMeta{Name: "mpi-operator", Namespace: "mpi-operator"},
	}
	err := wait.For(conditions.New(config.Client().Resources()).DeploymentConditionMatch(&dep, appsv1.DeploymentAvailable, v1.ConditionTrue),
		wait.WithContext(ctx))
	if err != nil {
		return ctx, fmt.Errorf("failed to deploy mpi-operator: %v", err)
	}
	return ctx, nil
}

func deployEFAPlugin(ctx context.Context, config *envconf.Config) (context.Context, error) {
	err := fwext.ApplyManifests(config.Client().RESTConfig(), manifests.EfaDevicePluginManifest)
	if err != nil {
		return ctx, err
	}

	ds := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "aws-efa-k8s-device-plugin-daemonset", Namespace: "kube-system"},
	}
	err = wait.For(fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
		wait.WithContext(ctx))
	if err != nil {
		return ctx, fmt.Errorf("failed to deploy efa-device-plugin: %v", err)
	}

	return ctx, nil
}

func checkNodeTypes(ctx context.Context, config *envconf.Config) (context.Context, error) {
	time.Sleep(time.Minute) // give node info time to populate

	clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, fmt.Errorf("failed to create Kubernetes client: %w", err)
	}

	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return ctx, fmt.Errorf("failed to list nodes: %w", err)
	}

	if len(nodes.Items) == 0 {
		return ctx, fmt.Errorf("no nodes found in the cluster")
	}

	var totalEfaCount, totalNeuronCoreCount, totalNeuronCount int
	if *nodeType == "" {
		nodeType = aws.String(nodes.Items[0].Labels["node.kubernetes.io/instance-type"])
		log.Printf("No node type specified. Using the node type %s in the node groups.", *nodeType)
	}
	for _, node := range nodes.Items {
		if node.Labels["node.kubernetes.io/instance-type"] != *nodeType {
			continue
		}
		neuron, err := e2e.GetNonZeroResourceCapacity(&node, "aws.amazon.com/neuron")
		if err != nil {
			return nil, err
		}
		totalNeuronCount += neuron

		// Check for NeuronCore capacity
		neuronCore, err := e2e.GetNonZeroResourceCapacity(&node, "aws.amazon.com/neuroncore")
		if err != nil {
			return nil, err
		}
		totalNeuronCoreCount += neuronCore

		// Check for EFA capacity
		if *efaEnabled {
			efa, err := e2e.GetNonZeroResourceCapacity(&node, "vpc.amazonaws.com/efa")
			if err != nil {
				return nil, err
			}
			totalEfaCount += efa
		}
		nodeCount += 1
	}

	// Update global capacities
	if nodeCount > 0 {
		neuronPerNode = totalNeuronCount / nodeCount
		neuronCorePerNode = totalNeuronCoreCount / nodeCount
		efaPerNode = totalEfaCount / nodeCount
	} else {
		return nil, fmt.Errorf("no nodes of type %q found", *nodeType)
	}

	log.Printf("[INFO] Total Nodes: %d", nodeCount)
	log.Printf("[INFO] Total Neuron Count: %d, Neuron Per Node: %d", totalNeuronCount, neuronPerNode)
	log.Printf("[INFO] Total Neuron Core Count: %d, Neuron Core Per Node: %d", totalNeuronCoreCount, neuronCorePerNode)
	log.Printf("[INFO] Total EFA Count: %d, EFA Per Node: %d", totalEfaCount, efaPerNode)

	return ctx, nil
}

func TestMain(m *testing.M) {
	nodeType = flag.String("nodeType", "", "node type for the tests")
	efaEnabled = flag.Bool("efaEnabled", false, "enable efa tests")
	neuronTestImage = flag.String("neuronTestImage", "", "image for neuron single node test")
	installDevicePlugin = flag.Bool("installDevicePlugin", true, "install neuron device plugin")
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = testenv.WithContext(ctx)

	deploymentManifests := [][]byte{
		manifests.MpiOperatorManifest,
	}
	setUpFunctions := []env.Func{
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			err := fwext.ApplyManifests(config.Client().RESTConfig(), deploymentManifests...)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
		deployMPIOperator,
	}

	if *installDevicePlugin {
		deploymentManifests = append(deploymentManifests, manifests.NeuronDevicePluginManifest, manifests.NeuronDevicePluginRbacManifest)
		setUpFunctions = append(setUpFunctions, deployNeuronDevicePlugin)
	}

	if *efaEnabled {
		setUpFunctions = append(setUpFunctions, deployEFAPlugin)
	}

	setUpFunctions = append(setUpFunctions, checkNodeTypes)
	testenv.Setup(setUpFunctions...)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			err := fwext.DeleteManifests(cfg.Client().RESTConfig(), manifests.EfaDevicePluginManifest)
			if err != nil {
				return ctx, err
			}
			slices.Reverse(deploymentManifests)
			err = fwext.DeleteManifests(config.Client().RESTConfig(), deploymentManifests...)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/neuron/manifests/multi-node-test-neuron.yaml
================================================
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: multi-node-nccom-test
spec:
  slotsPerWorker: {{.NeuronPerNode}}
  runPolicy:
    backoffLimit: 20
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          restartPolicy: OnFailure
          containers:
          - image: {{.NeuronTestImage}}
            imagePullPolicy: Always
            name: nccom-test-launcher
            env:
            - name: POD_IP
              valueFrom:
                fieldRef:
                  fieldPath: status.podIP
            command:
            - /bin/bash
            args:
            - -c
            - |
                WORKER_IPS=()
                for i in $(seq 0 $(({{.WorkerNodeCount}} - 1))); do
                  WORKER_IP=$(getent hosts multi-node-nccom-test-worker-$i.multi-node-nccom-test | awk '{print $1}')
                  WORKER_IPS+=("$WORKER_IP")
                done

                export CCOM_SOCKET_IFNAME=eth0
                export NEURON_RT_ROOT_COMM_ID=${WORKER_IPS[0]}:63182
                nccom-test -r $(({{.NeuronCorePerNode}}*{{.WorkerNodeCount}})) -N {{.WorkerNodeCount}} -b "8" -e "2G" -f "2" -n "5" -w "5" -d "fp32" allr --hosts ${WORKER_IPS[*]} --data-collector-host $POD_IP --data-collector-port 60006 --debug
    Worker:
      replicas: {{.WorkerNodeCount}}
      template:
        spec:
          securityContext:
            runAsUser: 1000
            runAsGroup: 2000
            fsGroup: 3000
          containers:
          - image: {{.NeuronTestImage}}
            name: nccom-test-worker
            command: ["/bin/bash"]
            args: ["-c", "echo password | sudo -S /usr/sbin/sshd -D"]
            imagePullPolicy: Always
            resources:
              limits:
                aws.amazon.com/neuron: {{.NeuronPerNode}}
                aws.amazon.com/neuroncore: {{.NeuronCorePerNode}}
                vpc.amazonaws.com/efa: {{.EfaInterfacePerNode}}
              requests:
                aws.amazon.com/neuron: {{.NeuronPerNode}}
                aws.amazon.com/neuroncore: {{.NeuronCorePerNode}}
                vpc.amazonaws.com/efa: {{.EfaInterfacePerNode}}

================================================
FILE: test/cases/neuron/manifests/single-node-test-neuronx.yaml
================================================
kind: Job
apiVersion: batch/v1
metadata:
  name: neuronx-single-node
  labels:
    app: neuronx-single-node
spec:
  template:
    metadata:
      labels:
        app: neuronx-single-node
    spec:
      containers:
      - name: neuronx-single-node-test
        image: {{.NeuronTestImage}}
        command:
        - /bin/bash
        - ./tests/singleNodeTest.sh
        imagePullPolicy: Always
        resources:
          limits:
            cpu: "4"
            memory: 4Gi
            aws.amazon.com/neuron: "1"
          requests:
            cpu: "1"
            memory: 1Gi
            aws.amazon.com/neuron: "1"
      restartPolicy: Never
      securityContext:
        runAsUser: 1000
        runAsGroup: 2000
        fsGroup: 3000
  backoffLimit: 4


================================================
FILE: test/cases/neuron/neuron_test.go
================================================
//go:build e2e

package neuron

import (
	"context"
	_ "embed"
	"fmt"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/internal/e2e/mpijobs"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	batchv1 "k8s.io/api/batch/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var (
	//go:embed manifests/single-node-test-neuronx.yaml
	neuronSingleNodeManifest []byte
	//go:embed manifests/multi-node-test-neuron.yaml
	neuronMultiNodeManifest          []byte
	renderedNeuronSingleNodeManifest []byte
	renderedNeuronMultiNodeManifest  []byte
)

type neuronSingleNodeManifestTplVars struct {
	NeuronTestImage string
}

type neuronMultiNodeTestManifestTplVars struct {
	WorkerNodeCount       int
	WorkerNodeNeuronCount int
	NeuronPerNode         int
	NeuronCorePerNode     int
	NeuronTestImage       string
	EfaInterfacePerNode   int
}

func TestNeuronNodes(t *testing.T) {
	singleNode := features.New("single-node").
		WithLabel("suite", "neuron").
		WithLabel("hardware", "neuron").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if *neuronTestImage == "" {
				t.Fatal(fmt.Errorf("neuronTestImage must be set to run neuron single node test, use https://github.com/aws/aws-k8s-tester/blob/main/test/images/neuron/Dockerfile to build the image and -neuronTestImage to set the image url"))
			}
			var err error
			renderedNeuronSingleNodeManifest, err = fwext.RenderManifests(neuronSingleNodeManifest, neuronSingleNodeManifestTplVars{
				NeuronTestImage: *neuronTestImage,
			})
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Applying single node manifest")
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedNeuronSingleNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Manifest applied successfully")
			return ctx
		}).
		Assess("Single node test Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "neuronx-single-node", Namespace: "default"},
			}
			t.Log("Waiting for single node job to complete")
			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithContext(ctx),
				wait.WithTimeout(time.Minute*20),
			)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "neuronx-single-node", Namespace: "default"},
			})
			if err != nil {
				t.Error(err)
			} else {
				t.Log("Test log for neuronx-single-node:")
				t.Log(log)
			}
			err = fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedNeuronSingleNodeManifest)
			if err != nil {
				t.Error(err)
			}
			return ctx
		}).
		Feature()

	multiNode := features.New("multi-node").
		WithLabel("suite", "neuron").
		WithLabel("hardware", "neuron").
		WithLabel("hardware", "efa").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if *neuronTestImage == "" {
				t.Fatal(fmt.Errorf("neuronTestImage must be set to run unit test, use https://github.com/aws/aws-k8s-tester/blob/main/test/images/neuron/Dockerfile to build the image and -neuronTestImage to set the image url"))
			}
			renderedNeuronMultiNodeManifest, err := fwext.RenderManifests(neuronMultiNodeManifest, neuronMultiNodeTestManifestTplVars{
				// one of the nodes will be used for the master pod
				WorkerNodeCount:       nodeCount,
				WorkerNodeNeuronCount: nodeCount * neuronPerNode,
				NeuronPerNode:         neuronPerNode,
				NeuronCorePerNode:     neuronCorePerNode,
				NeuronTestImage:       *neuronTestImage,
				EfaInterfacePerNode:   efaPerNode,
			})
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Applying multi node manifest")
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedNeuronMultiNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Applied manifest successfully")
			return ctx
		}).
		Assess("NCCOM test succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			mpiJob := mpijobs.NewUnstructured("multi-node-nccom-test", "default")
			ctx = context.WithValue(ctx, "mpiJob", mpiJob)
			t.Log("Waiting for MPIJob to complete")
			err := wait.For(conditions.New(cfg.Client().Resources()).ResourceMatch(mpiJob, mpijobs.MPIJobSucceeded),
				wait.WithContext(ctx),
				wait.WithTimeout(time.Minute*30),
			)
			if err != nil {
				t.Fatal(err)
			}

			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), mpiJob)
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Test log for multi-node-nccom-test:")
			t.Log(log)
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			err := fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedNeuronMultiNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, singleNode, multiNode)
}


================================================
FILE: test/cases/neuron-dra/main_test.go
================================================
//go:build e2e

package neuron_dra

import (
	"context"
	"embed"
	"flag"
	"fmt"
	"log"
	"os"
	"os/exec"
	"os/signal"
	"path/filepath"
	"slices"
	"strings"
	"sync"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/common"
	"github.com/aws/aws-k8s-tester/test/manifests"
	"golang.org/x/sync/errgroup"
	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

//go:embed rcts
var rctsFS embed.FS

var (
	testenv                   env.Environment
	clientset                 kubernetes.Interface
	nodeType                  *string
	rdmaDeviceDraDriverImage  *string
	acceleratorDraDriverImage *string
	containerTestImage        *string
	nodeCount                 int
)

// supportedRdmaTypes lists the recognized RDMA device types.
var supportedRdmaTypes = []string{"efa"}

func validateConfig() error {
	if err := common.ValidateRequiredFlags(map[string]string{
		"rdmaDeviceDraDriverImage": *rdmaDeviceDraDriverImage,
		"containerTestImage":       *containerTestImage,
		"nodeType":                 *nodeType,
	}); err != nil {
		return err
	}
	// Validate that nodeType maps to a known topology (and thus a known RDMA type)
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		return fmt.Errorf("invalid -nodeType: %w", err)
	}
	if !slices.Contains(supportedRdmaTypes, topo.RdmaType) {
		return fmt.Errorf("instance family %q has unsupported RDMA type %q; supported: %v", topo.Family, topo.RdmaType, supportedRdmaTypes)
	}
	// Verify helm is available on the PATH.
	if _, err := exec.LookPath("helm"); err != nil {
		return fmt.Errorf("helm is required but not found on PATH: %w", err)
	}
	return nil
}

const (
	neuronHelmReleaseName = "neuron-helm-chart"
	neuronHelmChartOCI    = "oci://public.ecr.aws/neuron/neuron-helm-chart"
	neuronDRANamespace    = "neuron-dra-driver"
)

// installNeuronDRADriverHelm installs the Neuron DRA driver via the public Helm chart.
// If acceleratorDraDriverImage is non-empty, it splits on the last ":" to extract
// repository and tag and passes them as --set overrides.
func installNeuronDRADriverHelm(ctx context.Context, config *envconf.Config) (context.Context, error) {
	args := []string{
		"upgrade", "--install", neuronHelmReleaseName, neuronHelmChartOCI,
		"--namespace", neuronDRANamespace,
		"--create-namespace",
		"--set", "devicePlugin.enabled=false",
		"--set", "npd.enabled=false",
		"--set", "draDriver.enabled=true",
		"--wait",
		"--timeout", "5m",
	}
	if *acceleratorDraDriverImage != "" {
		repo, tag := common.SplitImageRepoTag(*acceleratorDraDriverImage)
		args = append(args,
			"--set", fmt.Sprintf("draDriver.image.repository=%s", repo),
			"--set", fmt.Sprintf("draDriver.image.tag=%s", tag),
		)
	}
	log.Printf("[INFO] Installing Neuron DRA driver via Helm: helm %s", strings.Join(args, " "))
	cmd := exec.CommandContext(ctx, "helm", args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		return ctx, fmt.Errorf("helm install neuron-dra-driver failed: %w", err)
	}
	log.Println("Neuron DRA driver Helm release installed successfully.")
	return ctx, nil
}

// uninstallNeuronDRADriverHelm uninstalls the Neuron DRA driver Helm release.
func uninstallNeuronDRADriverHelm(ctx context.Context, config *envconf.Config) (context.Context, error) {
	args := []string{
		"uninstall", neuronHelmReleaseName,
		"--namespace", neuronDRANamespace,
	}
	log.Printf("[INFO] Uninstalling Neuron DRA driver Helm release: helm %s", strings.Join(args, " "))
	cmd := exec.CommandContext(ctx, "helm", args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		log.Printf("[WARN] helm uninstall neuron-dra-driver failed (may already be removed): %v", err)
	}
	return ctx, nil
}

func deployNeuronDRADriver(ctx context.Context, config *envconf.Config) (context.Context, error) {
	ds := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "neuron-dra-driver-kubelet-plugin", Namespace: neuronDRANamespace},
	}
	err := wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
		wait.WithTimeout(5*time.Minute),
		wait.WithContext(ctx),
	)
	if err != nil {
		return ctx, fmt.Errorf("neuron-dra-driver daemonset is not ready: %w", err)
	}
	log.Println("neuron-dra-driver daemonset is ready.")
	return ctx, nil
}

func TestMain(m *testing.M) {
	nodeType = flag.String("nodeType", "", "instance type for the cluster (e.g. trn1.32xlarge)")
	rdmaDeviceDraDriverImage = flag.String("rdmaDeviceDraDriverImage", "", "container image for the dranet DRA driver")
	acceleratorDraDriverImage = flag.String("acceleratorDraDriverImage", "", "container image for the Neuron DRA driver")
	containerTestImage = flag.String("containerTestImage", "", "container image for the nccom test workload")

	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	if err := validateConfig(); err != nil {
		log.Fatalf("invalid configuration: %v", err)
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = env.NewWithConfig(cfg).WithContext(ctx)

	// Build the manifest list and setup functions dynamically.
	// Resolve topology to determine RDMA type from nodeType.
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		log.Fatalf("failed to resolve topology: %v", err)
	}

	manifestsList := [][]byte{
		manifests.MpiOperatorManifest,
	}
	setUpFunctions := []env.Func{
		// Run independent setup steps concurrently.
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			var mu sync.Mutex
			g, gctx := errgroup.WithContext(ctx)

			// Deploy MPI operator.
			g.Go(func() error {
				return common.DeployMPIOperator(gctx, config)
			})

			// Deploy dranet and RCTs based on topology's RDMA type.
			if topo.RdmaType == "efa" {
				rctManifests, err := common.LoadRCTManifests(rctsFS, filepath.Join("rcts", topo.RCTSubDir))
				if err != nil {
					return ctx, fmt.Errorf("failed to load RCT manifests: %w", err)
				}
				mu.Lock()
				manifestsList = append(manifestsList, rctManifests...)
				mu.Unlock()

				g.Go(func() error {
					renderedDranet, err := common.DeployDranet(gctx, config, *rdmaDeviceDraDriverImage)
					if err != nil {
						return err
					}
					mu.Lock()
					manifestsList = append(manifestsList, renderedDranet)
					mu.Unlock()
					return nil
				})

				g.Go(func() error {
					return fwext.ApplyManifests(config.Client().RESTConfig(), rctManifests...)
				})
			}

			// Install Neuron DRA driver via Helm chart.
			g.Go(func() error {
				_, err := installNeuronDRADriverHelm(gctx, config)
				return err
			})

			if err := g.Wait(); err != nil {
				return ctx, err
			}
			return ctx, nil
		},
		deployNeuronDRADriver,
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			var err error
			clientset, err = kubernetes.NewForConfig(config.Client().RESTConfig())
			if err != nil {
				return ctx, err
			}
			nodeCount, err = common.CountNodesByType(ctx, clientset, *nodeType)
			return ctx, err
		},
	}
	testenv.Setup(setUpFunctions...)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			// Uninstall Neuron DRA driver Helm release first.
			ctx, _ = uninstallNeuronDRADriverHelm(ctx, config)
			// Delete remaining manifests in reverse order.
			slices.Reverse(manifestsList)
			if err := fwext.DeleteManifests(config.Client().RESTConfig(), manifestsList...); err != nil {
				return ctx, fmt.Errorf("failed to delete manifests: %w", err)
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/neuron-dra/neuron_dra_test.go
================================================
//go:build e2e

package neuron_dra

import (
	"embed"
	"path/filepath"
	"testing"

	"github.com/aws/aws-k8s-tester/test/common"
)

//go:embed testcases
var embeddedTestCases embed.FS

func TestNeuronDRAMultiNode(t *testing.T) {
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		t.Fatalf("resolving topology for %s: %v", *nodeType, err)
	}

	rctDir := filepath.Join("rcts", topo.RCTSubDir)
	rctIndex, err := common.LoadRCTIndex(rctsFS, rctDir)
	if err != nil {
		t.Fatalf("loading RCT index from %s: %v", rctDir, err)
	}

	tcDir := filepath.Join("testcases", topo.TestCaseSubDir)

	featureList, err := common.DiscoverAndBuildFeatures(
		embeddedTestCases,
		tcDir,
		rctIndex,
		"neuron-dra",
		"multi-node-nccom-test",
		nodeCount,
		func(tc *common.TestCaseSpec, rctIndex map[string]*common.ResourceClaimTemplateSpec) ([]byte, error) {
			params, err := ComputeMPIJobParamsFromTestCase(tc, rctIndex, topo, nodeCount, *containerTestImage)
			if err != nil {
				return nil, err
			}
			return RenderMPIJobYAML(*params)
		},
		clientset,
	)
	if err != nil {
		t.Fatalf("discovering and building features: %v", err)
	}

	if len(featureList) == 0 {
		t.Logf("no test cases found under %s, skipping", tcDir)
		return
	}

	testenv.Test(t, featureList...)
}


================================================
FILE: test/cases/neuron-dra/rcts/trn1/rct-2-efas-4-neurons-wrong-match.yaml
================================================
apiVersion: resource.k8s.io/v1beta1
kind: ResourceClaimTemplate
metadata:
  namespace: default
  name: rct-2-efas-4-neurons-wrong-match
spec:
  spec:
    devices:
      requests:
      - name: 4-neurons
        deviceClassName: neuron.aws.com
        allocationMode: ExactCount
        count: 4
      - name: 2-efas
        deviceClassName: efa.networking.k8s.aws
        allocationMode: ExactCount
        count: 2
      constraints:
      - requests: ["4-neurons", "2-efas"]
        matchAttribute: "resource.aws.com/devicegroup1_id"


================================================
FILE: test/cases/neuron-dra/rcts/trn1/rct-all-efas-all-neurons.yaml
================================================
apiVersion: resource.k8s.io/v1beta1
kind: ResourceClaimTemplate
metadata:
  namespace: default
  name: rct-all-efas-all-neurons
spec:
  spec:
    devices:
      requests:
      - name: all-neurons
        deviceClassName: neuron.aws.com
        allocationMode: All
      - name: all-efas
        deviceClassName: efa.networking.k8s.aws
        allocationMode: All


================================================
FILE: test/cases/neuron-dra/templates/nccom-test-mpijob.yaml.tmpl
================================================
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: multi-node-nccom-test
spec:
  slotsPerWorker: {{.SlotsPerWorker}}
  runPolicy:
    backoffLimit: 20
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          restartPolicy: OnFailure
          containers:
            - name: nccom-test-launcher
              image: {{.ContainerTestImage}}
              imagePullPolicy: Always
              env:
                - name: POD_IP
                  valueFrom:
                    fieldRef:
                      fieldPath: status.podIP
              command:
                - /bin/bash
                - -lc
              args:
                - |
                  set -euo pipefail

                  WORKER_IPS=()
                  for i in $(seq 0 $(({{.WorkerReplicas}} - 1))); do
                    WORKER_IP=$(getent hosts multi-node-nccom-test-worker-$i.multi-node-nccom-test | awk '{print $1}')
                    WORKER_IPS+=("$WORKER_IP")
                  done

                  export NCCOM_SOCKET_IFNAME=eth0
                  export NEURON_RT_ROOT_COMM_ID=${WORKER_IPS[0]}:63182

                  nccom-test \
                    -r {{.TotalRanks}} \
                    -N {{.WorkerReplicas}} \
                    -b 8 \
                    -e 2G \
                    -f 2 \
                    -n 5 \
                    -w 5 \
                    -d fp32 \
                    allr \
                    --hosts ${WORKER_IPS[*]} \
                    --data-collector-host "${POD_IP}" \
                    --data-collector-port 60006 \
                    --debug

    Worker:
      replicas: {{.WorkerReplicas}}
      template:
        spec:
          restartPolicy: OnFailure
          securityContext:
            runAsUser: 0
          containers:
            - name: nccom-test-worker
              image: {{.ContainerTestImage}}
              imagePullPolicy: Always
              securityContext:
                capabilities:
                  add: ["NET_ADMIN"]
              env:
                - name: FI_EFA_USE_DEVICE_RDMA
                  value: "1"
              command:
                - /bin/bash
                - -lc
              args:
                - |
                  set -euo pipefail

                  MY_IP=$(hostname -i)
                  ip addr add ${MY_IP}/16 dev eth0 label eth0:ccom
                  ip route del 192.168.0.0/16 dev eth0 2>/dev/null || true
                  /usr/sbin/sshd -D
              resources:
                claims:
{{- range .ResourceClaims}}
                - name: {{.Name}}
{{- end}}
          resourceClaims:
{{- range .ResourceClaims}}
          - name: {{.Name}}
            resourceClaimTemplateName: {{.TemplateName}}
{{- end}}


================================================
FILE: test/cases/neuron-dra/testcases/trn1/2-efas-4-neurons-wrong-match.yaml
================================================
expectFailure: true
resourceClaims:
- name: 2-efas-4-neurons-wrong-match
  resourceClaimTemplateName: rct-2-efas-4-neurons-wrong-match


================================================
FILE: test/cases/neuron-dra/testcases/trn1/all-efas-all-neurons.yaml
================================================
resourceClaims:
- name: all-efas-all-neurons
  resourceClaimTemplateName: rct-all-efas-all-neurons


================================================
FILE: test/cases/neuron-dra/topology.go
================================================
package neuron_dra

import (
	"bytes"
	_ "embed"
	"fmt"
	"strings"
	"text/template"

	"github.com/aws/aws-k8s-tester/test/common"
)

//go:embed templates/nccom-test-mpijob.yaml.tmpl
var mpijobTemplate string

// ---------------------------------------------------------------------------
// Instance topology
// ---------------------------------------------------------------------------

// InstanceTopology describes the Neuron/EFA hardware topology for an instance family.
type InstanceTopology struct {
	Family               string
	NeuronCoresPerDevice int
	AllNeuronCount       int
	RdmaType             string // RDMA device type (e.g. "efa")
	RCTSubDir            string // subdirectory under rcts/
	TestCaseSubDir       string // subdirectory under testcases/
}

var instanceTopologies = map[string]InstanceTopology{
	"trn1": {
		Family:               "trn1",
		NeuronCoresPerDevice: 2,
		AllNeuronCount:       16,
		RdmaType:             "efa",
		RCTSubDir:            "trn1",
		TestCaseSubDir:       "trn1",
	},
}

// GetTopologyForNodeType returns the InstanceTopology for a given node type
// (e.g. "trn1.32xlarge"). It extracts the family prefix before the first "."
// and looks it up in the registry.
func GetTopologyForNodeType(nodeType string) (*InstanceTopology, error) {
	family := common.ExtractFamily(nodeType)
	topo, ok := instanceTopologies[family]
	if !ok {
		return nil, fmt.Errorf("unsupported instance family %q (from %q); supported: %s",
			family, nodeType, supportedFamilies())
	}
	return &topo, nil
}

func supportedFamilies() string {
	families := make([]string, 0, len(instanceTopologies))
	for k := range instanceTopologies {
		families = append(families, k)
	}
	return strings.Join(families, ", ")
}

// ---------------------------------------------------------------------------
// MPIJob rendering
// ---------------------------------------------------------------------------

// MPIJobParams holds all template parameters for rendering the MPIJob YAML.
type MPIJobParams struct {
	SlotsPerWorker     int
	TotalRanks         int
	WorkerReplicas     int
	ContainerTestImage string
	ResourceClaims     []common.ResourceClaimRef
}

// RenderMPIJobYAML renders the embedded MPIJob Go template with the given params
// and returns the resulting YAML bytes.
func RenderMPIJobYAML(params MPIJobParams) ([]byte, error) {
	tmpl, err := template.New("mpijob").Parse(mpijobTemplate)
	if err != nil {
		return nil, fmt.Errorf("parsing MPIJob template: %w", err)
	}
	var buf bytes.Buffer
	if err := tmpl.Execute(&buf, params); err != nil {
		return nil, fmt.Errorf("rendering MPIJob template: %w", err)
	}
	return buf.Bytes(), nil
}

// ---------------------------------------------------------------------------
// Neuron-specific helpers
// ---------------------------------------------------------------------------

// getNeuronCount returns the neuron device count from an RCT.
// For AllocationMode "All" it returns the topology's AllNeuronCount;
// otherwise it returns the explicit Count from the neuron request.
func getNeuronCount(rct *common.ResourceClaimTemplateSpec, topo *InstanceTopology) int {
	for _, req := range rct.Spec.Spec.Devices.Requests {
		if req.DeviceClassName != "neuron.aws.com" {
			continue
		}
		if req.AllocationMode == "All" {
			return topo.AllNeuronCount
		}
		return req.Count
	}
	return 0
}

// ComputeMPIJobParamsFromTestCase computes MPIJob parameters from a test case spec.
// It resolves each claim's resourceClaimTemplateName against the RCT index to
// get the neuron count, then calculates SlotsPerWorker and TotalRanks.
func ComputeMPIJobParamsFromTestCase(tc *common.TestCaseSpec, rctIndex map[string]*common.ResourceClaimTemplateSpec, topo *InstanceTopology, workerReplicas int, containerTestImage string) (*MPIJobParams, error) {
	if topo == nil {
		return nil, fmt.Errorf("instance topology is required")
	}
	if workerReplicas <= 0 {
		return nil, fmt.Errorf("workerReplicas must be positive, got %d", workerReplicas)
	}
	if containerTestImage == "" {
		return nil, fmt.Errorf("containerTestImage is required")
	}

	totalNeurons := 0
	var claims []common.ResourceClaimRef

	for _, tcClaim := range tc.ResourceClaims {
		rct, ok := rctIndex[tcClaim.ResourceClaimTemplateName]
		if !ok {
			return nil, fmt.Errorf("resource claim template %q not found in RCT index", tcClaim.ResourceClaimTemplateName)
		}

		totalNeurons += getNeuronCount(rct, topo)

		claims = append(claims, common.ResourceClaimRef{
			Name:         tcClaim.Name,
			TemplateName: tcClaim.ResourceClaimTemplateName,
		})
	}

	slotsPerWorker := totalNeurons * topo.NeuronCoresPerDevice
	totalRanks := slotsPerWorker * workerReplicas

	return &MPIJobParams{
		SlotsPerWorker:     slotsPerWorker,
		TotalRanks:         totalRanks,
		WorkerReplicas:     workerReplicas,
		ContainerTestImage: containerTestImage,
		ResourceClaims:     claims,
	}, nil
}


================================================
FILE: test/cases/neuron-inference/bert_inference_test.go
================================================
//go:build e2e

package inference

import (
	"context"
	_ "embed"
	"fmt"
	"io"
	"log"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	batchv1 "k8s.io/api/batch/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

//go:embed manifests/neuron-bert-inference.yaml
var neuronBertInferenceManifest []byte

var renderedManifest []byte

func TestNeuronInference(t *testing.T) {
	if *bertInferenceImage == "" {
		t.Fatal("bertInferenceImage must be set to run the test")
	}

	log.Printf("[INFO] Using nodeType=%s, inferenceMode=%s", *nodeType, *inferenceMode)
	log.Printf("[INFO] Discovered neuronPerNode=%d, neuronCorePerNode=%d", neuronPerNode, neuronCorePerNode)

	renderVars := map[string]string{
		"BertInferenceImage": *bertInferenceImage,
		"NodeType":           *nodeType,      // e.g. "inf2.xlarge"
		"InferenceMode":      *inferenceMode, // "throughput" or "latency"
		"NeuronPerNode":      fmt.Sprintf("%d", neuronPerNode),
		"NeuronCorePerNode":  fmt.Sprintf("%d", neuronCorePerNode),
	}

	// Render the manifest
	renderedManifest, err := fwext.RenderManifests(neuronBertInferenceManifest, renderVars)
	if err != nil {
		t.Fatalf("[ERROR] Failed to render Neuron inference manifest: %v", err)
	}

	feature := features.New("neuron-inference").
		WithLabel("suite", "neuron").
		WithLabel("hardware", "neuron").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[INFO] Applying rendered Neuron inference manifest.")
			err := fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedManifest)
			if err != nil {
				t.Fatalf("[ERROR] Failed to apply Neuron inference manifest: %v", err)
			}
			log.Println("[INFO] Successfully applied Neuron inference manifest.")
			return ctx
		}).
		Assess("BERT inference Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[INFO] Checking 'neuron-inference' job completion...")

			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "neuron-inference", Namespace: "default"},
			}
			if err := wait.For(
				fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithTimeout(60*time.Minute),
			); err != nil {
				log.Println("[ERROR] Neuron inference job failed. Gathering logs...")
				if err := printJobLogs(ctx, cfg, "default", "neuron-inference"); err != nil {
					t.Logf("[WARNING] Failed to retrieve neuron-inference job logs: %v", err)
				}
				t.Fatalf("[ERROR] Neuron inference job did not succeed: %v", err)
			}

			log.Println("[INFO] Neuron inference job succeeded. Gathering logs...")
			applyTime := ctx.Value("applyTime")
			if applyTime != nil {
				if start, ok := applyTime.(time.Time); ok {
					duration := time.Since(start)
					log.Printf("[INFO] Neuron inference job completed in %s", duration)
				}
			}

			if err := printJobLogs(ctx, cfg, "default", "neuron-inference"); err != nil {
				t.Logf("[WARNING] Failed to retrieve neuron-inference job logs: %v", err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[INFO] Cleaning up neuron-inference job resources...")
			if err := fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedManifest); err != nil {
				t.Fatalf("[ERROR] Failed to delete inference job resources: %v", err)
			}
			log.Println("[INFO] Inference job cleanup complete.")
			return ctx
		}).
		Feature()

	testenv.Test(t, feature)
}

func printJobLogs(ctx context.Context, cfg *envconf.Config, namespace, jobName string) error {
	cs, err := getClientset(cfg.Client().RESTConfig())
	if err != nil {
		return fmt.Errorf("[ERROR] failed to create kubernetes client: %w", err)
	}

	pods, err := cs.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
		LabelSelector: fmt.Sprintf("job-name=%s", jobName),
	})
	if err != nil {
		return fmt.Errorf("[ERROR] failed to list pods for job %s: %w", jobName, err)
	}
	if len(pods.Items) == 0 {
		return fmt.Errorf("[ERROR] no pods found for job %s", jobName)
	}

	for _, pod := range pods.Items {
		log.Printf("[INFO] Pod %s is on node %s", pod.Name, pod.Spec.NodeName)
		stream, err := cs.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{}).Stream(ctx)
		if err != nil {
			return fmt.Errorf("[ERROR] failed to get logs from pod %s: %w", pod.Name, err)
		}
		defer stream.Close()

		buf := make([]byte, 4096)
		for {
			n, readErr := stream.Read(buf)
			if n > 0 {
				log.Printf("[INFO] Logs from Pod %s:\n%s", pod.Name, string(buf[:n]))
			}
			if readErr == io.EOF {
				log.Printf("[INFO] Completed log stream for pod %s.", pod.Name)
				break
			}
			if readErr != nil {
				return fmt.Errorf("[ERROR] reading logs from pod %s: %w", pod.Name, readErr)
			}
		}
	}
	return nil
}

func getClientset(restConfig *rest.Config) (*kubernetes.Clientset, error) {
	cs, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return nil, fmt.Errorf("cannot create kubernetes clientset: %w", err)
	}
	return cs, nil
}


================================================
FILE: test/cases/neuron-inference/main_test.go
================================================
//go:build e2e

package inference

import (
	"context"
	_ "embed"
	"flag"
	"fmt"
	"log"
	"os"
	"slices"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

func TestMain(m *testing.M) {

	flag.Parse()

	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("[ERROR] Failed to create test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)

	deploymentManifests := [][]byte{
		manifests.NeuronDevicePluginRbacManifest,
		manifests.NeuronDevicePluginManifest,
	}

	// Setup steps: apply the device plugin, wait for DS readiness, discover capacity
	testenv.Setup(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Applying Neuron device plugin RBAC and Neuron device plugin manifests.")
			err := fwext.ApplyManifests(config.Client().RESTConfig(), deploymentManifests...)
			if err != nil {
				return ctx, fmt.Errorf("failed to apply manifests: %w", err)
			}
			log.Println("Successfully applied Neuron device plugin RBAC and Neuron device plugin manifests.")
			return ctx, nil
		},
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Waiting for Neuron Device Plugin daemonset to be ready.")
			daemonset := appsv1.DaemonSet{
				ObjectMeta: metav1.ObjectMeta{Name: "neuron-device-plugin-daemonset", Namespace: "kube-system"},
			}
			err := wait.For(
				fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&daemonset),
				wait.WithTimeout(time.Minute*5),
			)
			if err != nil {
				return ctx, fmt.Errorf("Neuron Device Plugin daemonset is not ready: %w", err)
			}
			log.Println("Neuron Device Plugin daemonset is ready.")
			return ctx, nil
		},
		discoverNeuronCoreCapacity,
		getNodeCapacity,
	)

	// Finish steps: remove device plugin if desired
	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("[INFO] Cleaning up Neuron device plugin.")
			slices.Reverse(deploymentManifests)
			if err := fwext.DeleteManifests(config.Client().RESTConfig(), deploymentManifests...); err != nil {
				return ctx, fmt.Errorf("failed to delete neuron device plugin: %w", err)
			}
			log.Println("[INFO] Neuron device plugin cleanup complete.")
			return ctx, nil
		},
	)

	exitCode := testenv.Run(m)
	log.Printf("[INFO] Test environment finished with exit code %d", exitCode)
	os.Exit(exitCode)
}

// discoverNeuronCoreCapacity sets neuronPerNode and neuronCorePerNode by scanning the cluster
func discoverNeuronCoreCapacity(ctx context.Context, config *envconf.Config) (context.Context, error) {
	log.Println("[INFO] Discovering cluster's Neuron capacity...")

	// Check Neuron devices
	log.Println("[INFO] Checking Neuron device capacity on nodes")
	err := wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).AllNodesHaveNonZeroResourceCapacity("aws.amazon.com/neuron"),
		wait.WithTimeout(time.Second*60),
		wait.WithInterval(time.Second*5),
	)
	if err != nil {
		return ctx, fmt.Errorf("failed to verify Neuron device capacity on nodes: %w", err)
	}
	log.Println("[INFO] Neuron devices check passed - all nodes have non-zero capacity")

	// Check Neuron cores
	log.Println("[INFO] Checking Neuron core capacity on nodes")
	err = wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).AllNodesHaveNonZeroResourceCapacity("aws.amazon.com/neuroncore"),
		wait.WithTimeout(time.Second*60),
		wait.WithInterval(time.Second*5),
	)
	if err != nil {
		return ctx, fmt.Errorf("failed to verify Neuron core capacity on nodes: %w", err)
	}
	log.Println("[INFO] Neuron cores check passed - all nodes have non-zero capacity")

	log.Println("[INFO] Neuron capacity discovery complete.")
	return ctx, nil
}

func getNodeCapacity(ctx context.Context, config *envconf.Config) (context.Context, error) {
	cs, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, fmt.Errorf("failed to create kubernetes client: %w", err)
	}

	nodes, err := cs.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return ctx, fmt.Errorf("failed to list nodes: %w", err)
	}
	if len(nodes.Items) == 0 {
		return ctx, fmt.Errorf("no nodes found in the cluster")
	}
	var totalNeuron, totalNeuronCore, nodeCount int
	// if nodeType not set, use the instance type discovered
	if *nodeType == "" {
		*nodeType = nodes.Items[0].Labels["node.kubernetes.io/instance-type"]
	}
	for _, node := range nodes.Items {
		instanceType := node.Labels["node.kubernetes.io/instance-type"]
		neuronCap, hasNeuron := node.Status.Capacity["aws.amazon.com/neuron"]
		neuronCoreCap, hasNeuronCore := node.Status.Capacity["aws.amazon.com/neuroncore"]
		if instanceType == *nodeType {
			nodeCount++
			if hasNeuron {
				totalNeuron += int(neuronCap.Value())
			} else {
				log.Printf("[WARN] Node %s (type=%s) lacks 'aws.amazon.com/neuron'.", node.Name, instanceType)
			}
			if hasNeuronCore {
				totalNeuronCore += int(neuronCoreCap.Value())
			} else {
				log.Printf("[WARN] Node %s (type=%s) lacks 'aws.amazon.com/neuroncore'.", node.Name, instanceType)
			}
		}
	}
	if nodeCount > 0 {
		neuronPerNode = totalNeuron / nodeCount
		neuronCorePerNode = totalNeuronCore / nodeCount
	} else {
		return ctx, fmt.Errorf("no nodes with %s node type found in the cluster", *nodeType)
	}
	log.Printf("[INFO] Discovered neuronPerNode=%d, neuronCorePerNode=%d (across %d node(s))", neuronPerNode, neuronCorePerNode, nodeCount)
	return ctx, nil
}


================================================
FILE: test/cases/neuron-inference/manifests/neuron-bert-inference.yaml
================================================
apiVersion: batch/v1
kind: Job
metadata:
  name: neuron-inference
spec:
  backoffLimit: 4
  template:
    spec:
      restartPolicy: OnFailure
      volumes:
        - name: dshm
          emptyDir:
            medium: Memory
      containers:
        - name: neuron-inference
          image: {{.BertInferenceImage}}
          imagePullPolicy: Always
          command: ["python", "/app/infer.py"]
          env:
            - name: INFERENCE_MODE
              value: "{{.InferenceMode}}"
          volumeMounts:
            - mountPath: /dev/shm
              name: dshm
          resources:
            requests:
              aws.amazon.com/neuroncore: "{{.NeuronCorePerNode}}"
            limits:
              aws.amazon.com/neuroncore: "{{.NeuronCorePerNode}}"
      nodeSelector:
        node.kubernetes.io/instance-type: {{.NodeType}}


================================================
FILE: test/cases/neuron-inference/vars.go
================================================
//go:build e2e

package inference

import (
	"flag"

	"sigs.k8s.io/e2e-framework/pkg/env"
)

// Shared global variables
var (
	// The e2e-framework environment
	testenv env.Environment

	// Passed in as flags
	bertInferenceImage *string
	nodeType           *string
	inferenceMode      *string

	// Discovered in main_test.go
	neuronPerNode     int
	neuronCorePerNode int
)

// init() runs before TestMain and sets up the flags
func init() {
	bertInferenceImage = flag.String("bertInferenceImage", "",
		"[REQUIRED] Docker image used for Neuron-based BERT inference")
	nodeType = flag.String("nodeType", "",
		"Node type label for K8s nodes, e.g., trn1.32xlarge or inf2.xlarge")
	inferenceMode = flag.String("inferenceMode", "throughput",
		"Inference mode for BERT (throughput or latency)")
}


================================================
FILE: test/cases/neuron-training/bert_training_test.go
================================================
//go:build e2e

package training

import (
	"bytes"
	"context"
	_ "embed"
	"fmt"
	"log"
	"regexp"
	"strconv"
	"testing"
	"time"

	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	batchv1 "k8s.io/api/batch/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

var (
	//go:embed manifests/bert-training.yaml
	bertTrainingJobManifest []byte

	//go:embed manifests/training-comm-service.yaml
	trainingPodCommServiceManifest []byte

	// Regex to match lines like:
	// local_throughput=5.00 samples/s
	rankThroughputRegex = regexp.MustCompile(
		`local_throughput\s*=\s*([\d\.]+)\s+samples\/s`,
	)

	// Regex to match lines like:
	// local_avg_epoch_time=12.50s
	rankEpochTimeRegex = regexp.MustCompile(
		`local_avg_epoch_time=([\d\.]+)s`,
	)
)

// TestBertTraining runs the Neuron-based BERT training test
func TestBertTraining(t *testing.T) {
	if *bertTrainingImage == "" {
		t.Fatal("bertTrainingImage must be set to run the test")
	}

	// Render the templated manifest with dynamic variables
	renderVars := map[string]string{
		"BertTrainingImage": *bertTrainingImage,
		"NodeType":          *nodeType,
		"SlotsPerWorker":    fmt.Sprintf("%d", nodeCount),
		"NodeCount":         fmt.Sprintf("%d", nodeCount),
		"NeuronPerNode":     fmt.Sprintf("%d", neuronPerNode),
		"NeuronCorePerNode": fmt.Sprintf("%d", neuronCorePerNode),
		"EFAPerNode":        fmt.Sprintf("%d", efaPerNode),
	}

	// Render the manifest
	renderedManifest, err := fwext.RenderManifests(bertTrainingJobManifest, renderVars)
	if err != nil {
		t.Fatalf("failed to render neuron BERT training manifest: %v", err)
	}

	renderedCommServiceManifest, err := fwext.RenderManifests(trainingPodCommServiceManifest, renderVars)
	if err != nil {
		t.Fatalf("failed to render pod communication manifest: %v", err)
	}

	// Define a feature for the Neuron BERT training
	neuronTraining := features.New("bert-training").
		WithLabel("suite", "neuron").
		WithLabel("hardware", "neuron").
		Assess("Neuron training Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			manifests := [][]byte{renderedCommServiceManifest, renderedManifest}
			maxAttempts := (*retries) + 1

			for attempt := 0; attempt < maxAttempts; attempt++ {
				log.Printf("Applying manifests for BERT training test (Attempt #%d)", attempt+1)

				if err := applyManifests(cfg, manifests); err != nil {
					log.Printf("Failed to apply manifests: %v", err)
					cleanupManifests(cfg, manifests)
					continue
				}

				job, err := waitForJobCreation(cfg)
				if err != nil {
					log.Printf("Failed to detect job creation: %v", err)
					cleanupManifests(cfg, manifests)
					continue
				}

				if err := waitForJobCompletion(job, cfg); err != nil {
					log.Printf("Job did not complete successfully: %v", err)
					logsBuf, err := gatherJobLogs(ctx, cfg, "default", "bert-training")
					if err != nil {
						log.Printf("failed to get logs: %v", err)
					} else {
						log.Println(logsBuf.String())
					}
					cleanupManifests(cfg, manifests)
					continue
				}

				// Job completed successfully
				if err := processJobLogs(ctx, cfg); err != nil {
					log.Printf("Failed to process job logs: %v", err)
					cleanupManifests(cfg, manifests)
					continue
				}

				// Test succeeded, clean up and return
				cleanupManifests(cfg, manifests)
				log.Printf("BERT training test succeeded on attempt #%d", attempt+1)
				return ctx
			}

			// If we've exhausted all attempts
			t.Fatalf("BERT training test did not succeed after %d attempts", maxAttempts)
			return ctx
		}).
		Feature()

	// Run the feature
	testenv.Test(t, neuronTraining)
}

// gatherJobLogs retrieves logs from all pods of the specified jobName, returning them as a buffer.
func gatherJobLogs(ctx context.Context, cfg *envconf.Config, namespace, jobName string) (*bytes.Buffer, error) {
	clientset, err := getClientset(cfg.Client().RESTConfig())
	if err != nil {
		return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err)
	}

	podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
		LabelSelector: fmt.Sprintf("job-name=%s", jobName),
	})
	if err != nil {
		return nil, fmt.Errorf("failed to list pods for job %s: %w", jobName, err)
	}
	if len(podList.Items) == 0 {
		return nil, fmt.Errorf("no pods found for job %s", jobName)
	}

	var out bytes.Buffer
	for _, pod := range podList.Items {
		req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{})
		logStream, err := req.Stream(ctx)
		if err != nil {
			return &out, fmt.Errorf("failed to get logs from pod %s: %w", pod.Name, err)
		}
		defer logStream.Close()

		// Copy logs into our buffer
		if _, err := out.ReadFrom(logStream); err != nil {
			return &out, fmt.Errorf("failed to read logs from pod %s: %w", pod.Name, err)
		}
	}

	return &out, nil
}

// aggregateMetricFromLogs scans the log output for lines based on a provided RegEx.
// The RegEx is assumed to take a sufficiently unique form like <metric>=<value> to avoid
// collisions, but also to simplify parsing.
//
// returns the average, sum, and count for all occurrences of the metric.
func aggregateMetricFromLogs(metricRegex *regexp.Regexp, logs string) (avg float64, sum float64, count int) {
	matches := metricRegex.FindAllStringSubmatch(logs, -1)
	for _, match := range matches {
		val, err := strconv.ParseFloat(match[1], 64)
		if err == nil {
			sum += val
			count++
		}
	}
	if count > 0 {
		avg = sum / float64(count)
	}
	return avg, sum, count
}

func applyManifests(cfg *envconf.Config, manifests [][]byte) error {
	fwext.ApplyManifests(cfg.Client().RESTConfig(), manifests...)
	log.Println("Successfully applied test manifests.")
	return nil
}

func waitForJobCreation(cfg *envconf.Config) (*batchv1.Job, error) {
	job := &batchv1.Job{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "bert-training",
			Namespace: "default",
		},
	}

	log.Println("Waiting for the 'bert-training' Job resource to be created...")
	return job, wait.For(
		conditions.New(cfg.Client().Resources()).ResourceMatch(job, func(object k8s.Object) bool {
			return true
		}),
		wait.WithTimeout(time.Minute*5),
	)
}

func waitForJobCompletion(job *batchv1.Job, cfg *envconf.Config) error {
	log.Println("Waiting for 'bert-training' Job to succeed...")
	return wait.For(
		fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
		wait.WithTimeout(30*time.Minute),
	)
}

func processJobLogs(ctx context.Context, cfg *envconf.Config) error {
	logsBuf, err := gatherJobLogs(ctx, cfg, "default", "bert-training")
	if err != nil {
		return fmt.Errorf("failed to retrieve bert-training job logs: %v", err)
	}

	log.Println("== Raw Logs from the launcher pods ==")
	log.Println(logsBuf.String())

	processMetrics(logsBuf.String())
	return nil
}

func processMetrics(logs string) {
	// Process throughput
	avgThru, sumThru, countThru := aggregateMetricFromLogs(rankThroughputRegex, logs)
	if countThru == 0 {
		log.Printf("No throughput lines found. Possibly missing in logs.")
	} else {
		log.Printf("Parsed throughput from %d ranks. Total=%.2f samples/s, Average=%.2f samples/s",
			countThru, sumThru, avgThru)
		log.Printf("Average Throughput: %.2f samples/second", avgThru)
	}

	// Process epoch time
	avgEp, sumEp, countEp := aggregateMetricFromLogs(rankEpochTimeRegex, logs)
	if countEp == 0 {
		log.Printf("No epoch time lines found. Possibly missing in logs.")
	} else {
		log.Printf("Parsed average epoch time from %d ranks. Sum=%.2fs, Average=%.2fs",
			countEp, sumEp, avgEp)
	}
}

func cleanupManifests(cfg *envconf.Config, manifests [][]byte) {
	log.Println("Deleting test manifests.")
	if err := fwext.DeleteManifests(cfg.Client().RESTConfig(), manifests...); err != nil {
		log.Printf("Failed to delete manifests: %v", err)
	}
}

// getClientset creates a Kubernetes clientset from the given REST config
func getClientset(restConfig *rest.Config) (*kubernetes.Clientset, error) {
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err)
	}
	return clientset, nil
}


================================================
FILE: test/cases/neuron-training/main_test.go
================================================
//go:build e2e

package training

import (
	"context"
	_ "embed"
	"fmt"
	"log"
	"os"
	"slices"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

func TestMain(m *testing.M) {
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)

	manifests := [][]byte{
		manifests.NeuronDevicePluginRbacManifest,
		manifests.NeuronDevicePluginManifest,
		manifests.EfaDevicePluginManifest,
	}

	testenv.Setup(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Applying Neuron device plugin RBAC, Neuron device plugin and EFA device plugin manifests.")
			err := fwext.ApplyManifests(config.Client().RESTConfig(), manifests...)
			if err != nil {
				return ctx, fmt.Errorf("failed to apply manifests: %w", err)
			}
			log.Println("Successfully applied Neuron device plugin RBAC, Neuron device plugin and EFA device plugin manifests.")
			return ctx, nil
		},
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Waiting for Neuron Device Plugin daemonset to be ready.")
			daemonset := appsv1.DaemonSet{
				ObjectMeta: metav1.ObjectMeta{Name: "neuron-device-plugin-daemonset", Namespace: "kube-system"},
			}
			err := wait.For(
				fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&daemonset),
				wait.WithTimeout(time.Minute*5),
			)
			if err != nil {
				return ctx, fmt.Errorf("Neuron Device Plugin daemonset is not ready: %w", err)
			}
			log.Println("Neuron Device Plugin daemonset is ready.")
			return ctx, nil
		},
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Waiting for EFA Device Plugin daemonset to be ready.")
			daemonset := appsv1.DaemonSet{
				ObjectMeta: metav1.ObjectMeta{Name: "aws-efa-k8s-device-plugin-daemonset", Namespace: "kube-system"},
			}
			err := wait.For(
				fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&daemonset),
				wait.WithTimeout(time.Minute*5),
			)
			if err != nil {
				return ctx, fmt.Errorf("EFA Device Plugin daemonset is not ready: %w", err)
			}
			log.Println("EFA Device Plugin daemonset is ready.")
			return ctx, nil
		},
		checkNonZeroResourceCapacity,
		checkNodeTypes,
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Deleting Neuron device plugin and EFA device plugin manifests.")
			slices.Reverse(manifests)
			err := fwext.DeleteManifests(config.Client().RESTConfig(), manifests...)
			if err != nil {
				return ctx, fmt.Errorf("failed to delete manifests: %w", err)
			}
			log.Println("Successfully deleted Neuron device plugin and EFA device plugin manifests.")
			return ctx, nil
		},
	)

	log.Println("Starting tests...")
	exitCode := testenv.Run(m)
	log.Printf("Tests finished with exit code %d", exitCode)
	os.Exit(exitCode)
}

func checkNodeTypes(ctx context.Context, config *envconf.Config) (context.Context, error) {
	clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, fmt.Errorf("failed to create Kubernetes client: %w", err)
	}

	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return ctx, fmt.Errorf("failed to list nodes: %w", err)
	}

	if len(nodes.Items) == 0 {
		return ctx, fmt.Errorf("no nodes found in the cluster")
	}

	// Check if all nodes have the same instance type
	for i := 1; i < len(nodes.Items); i++ {
		currentInstanceType := nodes.Items[i].Labels["node.kubernetes.io/instance-type"]
		if currentInstanceType != nodes.Items[i-1].Labels["node.kubernetes.io/instance-type"] {
			return ctx, fmt.Errorf("inconsistent node types detected, all nodes must have the same instance type")
		} else if *nodeType == "" {
			log.Printf("[INFO] nodeType was not set, discovered type %s", currentInstanceType)
			*nodeType = currentInstanceType
		}
	}

	// Calculate capacities for all nodes
	totalNeuronCount := 0
	totalNeuronCoreCount := 0
	totalEfaCount := 0
	nodeCount = len(nodes.Items) // Store global node count

	for _, node := range nodes.Items {
		log.Printf("[INFO] Processing node %s", node.Name)

		// Check for Neuron capacity
		neuron, ok := node.Status.Capacity["aws.amazon.com/neuron"]
		if ok {
			totalNeuronCount += int(neuron.Value())
		} else {
			log.Printf("[WARN] Node %s does not have 'aws.amazon.com/neuron' capacity", node.Name)
		}

		// Check for NeuronCore capacity
		neuronCore, ok := node.Status.Capacity["aws.amazon.com/neuroncore"]
		if ok {
			totalNeuronCoreCount += int(neuronCore.Value())
		} else {
			log.Printf("[WARN] Node %s does not have 'aws.amazon.com/neuroncore' capacity", node.Name)
		}

		// Check for EFA capacity
		efa, ok := node.Status.Capacity["vpc.amazonaws.com/efa"]
		if ok {
			totalEfaCount += int(efa.Value())
		} else {
			log.Printf("[WARN] Node %s does not have 'vpc.amazonaws.com/efa' capacity", node.Name)
		}
	}

	// Update global capacities
	if nodeCount > 0 {
		neuronPerNode = totalNeuronCount / nodeCount
		neuronCorePerNode = totalNeuronCoreCount / nodeCount
		efaPerNode = totalEfaCount / nodeCount
	} else {
		log.Printf("[WARN] No nodes found, setting capacities to 0")
		neuronPerNode = 0
		neuronCorePerNode = 0
		efaPerNode = 0
	}

	log.Printf("[INFO] Total Nodes: %d", nodeCount)
	log.Printf("[INFO] Total Neuron Count: %d, Neuron Per Node: %d", totalNeuronCount, neuronPerNode)
	log.Printf("[INFO] Total Neuron Core Count: %d, Neuron Core Per Node: %d", totalNeuronCoreCount, neuronCorePerNode)
	log.Printf("[INFO] Total EFA Count: %d, EFA Per Node: %d", totalEfaCount, efaPerNode)

	return ctx, nil
}

func checkNonZeroResourceCapacity(ctx context.Context, config *envconf.Config) (context.Context, error) {
	log.Println("[INFO] Starting resource capacity checks")

	// Check Neuron devices
	log.Println("Checking Neuron device capacity on nodes")
	err := wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).AllNodesHaveNonZeroResourceCapacity("aws.amazon.com/neuron"),
		wait.WithTimeout(time.Second*60),
		wait.WithInterval(time.Second*5),
	)
	if err != nil {
		return ctx, fmt.Errorf("failed to verify Neuron device capacity on nodes: %w", err)
	}
	log.Println("Neuron devices check passed - all nodes have non-zero capacity")

	// Check Neuron cores
	log.Println("Checking Neuron core capacity on nodes")
	err = wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).AllNodesHaveNonZeroResourceCapacity("aws.amazon.com/neuroncore"),
		wait.WithTimeout(time.Second*60),
		wait.WithInterval(time.Second*5),
	)
	if err != nil {
		return ctx, fmt.Errorf("failed to verify Neuron core capacity on nodes: %w", err)
	}
	log.Println("Neuron cores check passed - all nodes have non-zero capacity")

	// Check EFA devices
	log.Println("Checking EFA device capacity on nodes")
	err = wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).AllNodesHaveNonZeroResourceCapacity("vpc.amazonaws.com/efa"),
		wait.WithTimeout(time.Second*60),
		wait.WithInterval(time.Second*5),
	)
	if err != nil {
		return ctx, fmt.Errorf("failed to verify EFA device capacity on nodes: %w", err)
	}
	log.Println("EFA devices check passed - all nodes have non-zero capacity")

	log.Println("[INFO] All resource capacity checks completed successfully")
	return ctx, nil
}


================================================
FILE: test/cases/neuron-training/manifests/bert-training.yaml
================================================
apiVersion: batch/v1
kind: Job
metadata:
  labels:
    app: bert-training
  name: bert-training
spec:
  completionMode: Indexed
  completions: {{.NodeCount}}
  parallelism: {{.NodeCount}}
  backoffLimit: 0
  template:
    spec:
      restartPolicy: Never
      containers:
      - image: {{.BertTrainingImage}}
        name: bert-training
        env:
        - name: MASTER_ADDR
          value: bert-training-0.training
        args:
        - sh
        - -c 
        - |
          # Enable EFA https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-runtime/nrt-troubleshoot.html#fi-efa-fork-safe (AL2 legacy requirement)
          export FI_EFA_FORK_SAFE=1
          export CCOM_SOCKET_IFNAME=eth0
          export NCCL_DEBUG=ERROR
          torchrun --nproc_per_node {{.NeuronCorePerNode}} --nnodes {{.NodeCount}} --node_rank $JOB_COMPLETION_INDEX --master_addr $MASTER_ADDR train.py
        volumeMounts:
        - name: dshm
          mountPath: /dev/shm 
        resources:
          requests:
            aws.amazon.com/neuron: {{.NeuronPerNode}}
            aws.amazon.com/neuroncore: {{.NeuronCorePerNode}}
            vpc.amazonaws.com/efa: {{.EFAPerNode}}
          limits:
            aws.amazon.com/neuron: {{.NeuronPerNode}}
            aws.amazon.com/neuroncore: {{.NeuronCorePerNode}}
            vpc.amazonaws.com/efa: {{.EFAPerNode}}
      nodeSelector:
        node.kubernetes.io/instance-type: {{.NodeType}}
      subdomain: training
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory


================================================
FILE: test/cases/neuron-training/manifests/training-comm-service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  name: training
  labels:
    app: training
spec:
  clusterIP: None
  selector:
    job-name: bert-training


================================================
FILE: test/cases/neuron-training/vars.go
================================================
package training

import (
	"flag"

	"sigs.k8s.io/e2e-framework/pkg/env"
)

// Shared global variables
var (
	testenv env.Environment

	bertTrainingImage *string
	efaEnabled        *bool
	nodeType          *string
	nodeCount         int
	efaPerNode        int
	neuronPerNode     int
	neuronCorePerNode int
	retries           *int
)

func init() {
	// Define command-line flags
	bertTrainingImage = flag.String("bertTrainingImage", "", "Docker image used for BERT training workload")
	efaEnabled = flag.Bool("efaEnabled", false, "Enable Elastic Fabric Adapter (EFA)")
	nodeType = flag.String("nodeType", "", "Instance type for cluster nodes (e.g., inf1.24xlarge)")
	retries = flag.Int("retries", 2, "Number of retries to attempt before marking the test as failed.")
}


================================================
FILE: test/cases/nvidia/capabilities_test.go
================================================
//go:build e2e

package nvidia

import (
	"context"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"

	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"k8s.io/apimachinery/pkg/util/wait"
	e2ewait "sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	_ "embed"
)

//go:embed manifests/nvidia-driver-capabilities-check.yaml
var capabilitiesCheckPod []byte

const (
	PodName      = "moderngl-pod"
	PodNamespace = "default"
)

func TestNvidiaDriverCapabilities(t *testing.T) {
	feat := features.New("nvidia-driver-capabilities-check").
		WithLabel("suite", "nvidia").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("Applying nvidia driver capabilities check pod manifest.")
			// capabilitiesCheckPod only run moderngl.create_standalone_context() with NVIDIA_DRIVER_CAPABILITIES=all to load all capabilities enabled by nvidia driver.
			// If any lib required by any of nvidia driver capabilities is missing, it will failed with exception.
			if err := e2e.ApplyManifests(cfg.Client().RESTConfig(), capabilitiesCheckPod); err != nil {
				t.Fatalf("Failed to apply capabilities check pod manifest: %v", err)
			}
			return ctx
		}).
		Assess("Check Pod becomes ready", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("Waiting up to 5 minute for pod to complete...")
			pod := &v1.Pod{
				ObjectMeta: metav1.ObjectMeta{
					Name:      PodName,
					Namespace: PodNamespace,
				},
			}
			err := e2ewait.For(
				e2e.NewConditionExtension(cfg.Client().Resources()).PodSucceeded(pod),
				e2ewait.WithTimeout(5*time.Minute),
			)
			if err != nil {
				if err == wait.ErrWaitTimeout {
					t.Fatalf("nvidia capabilities check pod not in compeleted phase (succeeded or failed) within 5 minute and waiter timeout: %v", err)
				}
				t.Fatalf("nvidia capabilities pod in Failed status, ModernGL check failed. Could be caused by required library missing")
			}
			t.Log("nvidia driver capabilities check succeeded.")
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("Removing nvidia driver capabilities check pod.")
			if err := e2e.DeleteManifests(cfg.Client().RESTConfig(), capabilitiesCheckPod); err != nil {
				t.Errorf("Failed to delete pod: %v", err)
			}
			t.Log("all test resources removed successfully.")
			return ctx
		}).
		Feature()

	testenv.Test(t, feat)
}


================================================
FILE: test/cases/nvidia/containerd_test.go
================================================
//go:build e2e

package nvidia

import (
	"context"
	"log"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"

	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	_ "embed"
)

//go:embed manifests/daemonset-containerd-check.yaml
var containerdCheckDS []byte

func TestContainerdConfig(t *testing.T) {
	feat := features.New("containerd-config-check").
		WithLabel("suite", "nvidia").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[Setup] Applying containerd-check DaemonSet manifest.")
			if err := e2e.ApplyManifests(cfg.Client().RESTConfig(), containerdCheckDS); err != nil {
				t.Fatalf("Failed to apply containerd-check DS: %v", err)
			}
			return ctx
		}).
		Assess("DaemonSet becomes ready", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			dsName := "containerd-check"
			dsNS := "default"

			log.Println("[Assess] Waiting up to 1 minute for containerd-check DS to become Ready...")
			ds := &appsv1.DaemonSet{
				ObjectMeta: metav1.ObjectMeta{
					Name:      dsName,
					Namespace: dsNS,
				},
			}
			err := wait.For(
				e2e.NewConditionExtension(cfg.Client().Resources()).DaemonSetReady(ds),
				wait.WithTimeout(1*time.Minute),
			)
			if err != nil {
				t.Logf("[Assess] containerd-check DS did not become Ready: %v", err)
				e2e.PrintDaemonSetPodLogs(t, ctx, cfg.Client().RESTConfig(), dsNS, "app=containerd-check")
				t.Fatalf("containerd-check DS not Ready within 1 minute")
			}

			log.Println("[Assess] containerd-check DS is Ready.")
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("[Teardown] Removing containerd-check DS (no additional logs).")
			if err := e2e.DeleteManifests(cfg.Client().RESTConfig(), containerdCheckDS); err != nil {
				t.Fatalf("Failed to delete containerd-check DS: %v", err)
			}
			t.Log("[Teardown] containerd-check DS removed successfully.")
			return ctx
		}).
		Feature()

	testenv.Test(t, feat)
}


================================================
FILE: test/cases/nvidia/main_test.go
================================================
//go:build e2e

package nvidia

import (
	"context"
	_ "embed"
	"fmt"
	"log"
	"os"
	"os/signal"
	"slices"
	"testing"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/common"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

type Config struct {
	common.MetricOps
	NodeType               string `flag:"nodeType" desc:"node type for the tests"`
	InstallDevicePlugin    bool   `flag:"installDevicePlugin" desc:"install nvidia device plugin"`
	EfaEnabled             bool   `flag:"efaEnabled" desc:"enable efa tests"`
	NvidiaTestImage        string `flag:"nvidiaTestImage" desc:"nccl test image for nccl tests"`
	PytorchImage           string `flag:"pytorchImage" desc:"pytorch cuda image for single node tests"`
	SkipUnitTestSubcommand string `flag:"skipUnitTestSubcommand" desc:"optional command to skip specified unit test"`
}

var (
	testenv    env.Environment
	testConfig Config
	nodeCount  int
	gpuPerNode int
	efaPerNode int
)

func deployMPIOperator(ctx context.Context, config *envconf.Config) (context.Context, error) {
	dep := appsv1.Deployment{
		ObjectMeta: metav1.ObjectMeta{Name: "mpi-operator", Namespace: "mpi-operator"},
	}
	err := wait.For(conditions.New(config.Client().Resources()).DeploymentConditionMatch(&dep, appsv1.DeploymentAvailable, v1.ConditionTrue),
		wait.WithContext(ctx))
	if err != nil {
		return ctx, fmt.Errorf("failed to deploy mpi-operator: %v", err)
	}
	return ctx, nil
}

func checkNodeTypes(ctx context.Context, config *envconf.Config) (context.Context, error) {
	clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, err
	}

	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return ctx, err
	}

	for i := 1; i < len(nodes.Items)-1; i++ {
		if nodes.Items[i].Labels["node.kubernetes.io/instance-type"] != nodes.Items[i-1].Labels["node.kubernetes.io/instance-type"] {
			return ctx, fmt.Errorf("Node types are not the same, all node types must be the same in the cluster")
		}
	}

	if testConfig.NodeType != "" {
		for _, v := range nodes.Items {
			if v.Labels["node.kubernetes.io/instance-type"] == testConfig.NodeType {
				nodeCount++
				gpu := v.Status.Capacity["nvidia.com/gpu"]
				gpuPerNode = int(gpu.Value())
				efa := v.Status.Capacity["vpc.amazonaws.com/efa"]
				efaPerNode = int(efa.Value())
			}
		}
	} else {
		log.Printf("No node type specified. Using the node type %s in the node groups.", nodes.Items[0].Labels["node.kubernetes.io/instance-type"])
		testConfig.NodeType = nodes.Items[0].Labels["node.kubernetes.io/instance-type"]
		nodeCount = len(nodes.Items)
		gpu := nodes.Items[0].Status.Capacity["nvidia.com/gpu"]
		gpuPerNode = int(gpu.Value())
		efa := nodes.Items[0].Status.Capacity["vpc.amazonaws.com/efa"]
		efaPerNode = int(efa.Value())
	}

	return ctx, nil
}

func TestMain(m *testing.M) {
	testConfig = Config{
		InstallDevicePlugin: true,
		PytorchImage:        "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:2.1.0-gpu-py310-cu121-ubuntu20.04-ec2",
	}

	_, err := common.ParseFlags(&testConfig)
	if err != nil {
		log.Fatalf("failed to parse flags: %v", err)
	}
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = env.NewWithConfig(cfg).WithContext(ctx)

	manifestsList := [][]byte{
		manifests.MpiOperatorManifest,
	}

	setUpFunctions := []env.Func{
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			err := fwext.ApplyManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
		deployMPIOperator,
	}

	if testConfig.InstallDevicePlugin {
		manifestsList = append(manifestsList, manifests.NvidiaDevicePluginManifest)
		setUpFunctions = append(setUpFunctions, func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			return common.DeployDaemonSet("nvidia-device-plugin-daemonset", "kube-system")(ctx, config)
		})
	}

	if testConfig.EfaEnabled {
		manifestsList = append(manifestsList, manifests.EfaDevicePluginManifest)
		setUpFunctions = append(setUpFunctions, func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			return common.DeployDaemonSet("aws-efa-k8s-device-plugin-daemonset", "kube-system")(ctx, config)
		})
	}

	if len(testConfig.MetricDimensions) > 0 {
		renderedCloudWatchAgentManifest, err := manifests.RenderCloudWatchAgentManifest(testConfig.MetricDimensions)
		if err != nil {
			log.Printf("Warning: failed to render CloudWatch Agent manifest: %v", err)
		}
		manifestsList = append(manifestsList, manifests.DCGMExporterManifest, renderedCloudWatchAgentManifest)
		setUpFunctions = append(setUpFunctions, func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			if ctx, err := common.DeployDaemonSet("dcgm-exporter", "kube-system")(ctx, config); err != nil {
				return ctx, err
			}
			if ctx, err := common.DeployDaemonSet("cwagent", "amazon-cloudwatch")(ctx, config); err != nil {
				return ctx, err
			}
			return ctx, nil
		})
	}

	setUpFunctions = append(setUpFunctions, checkNodeTypes)
	testenv.Setup(setUpFunctions...)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			slices.Reverse(manifestsList)
			err := fwext.DeleteManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, err
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/nvidia/manifests/daemonset-containerd-check.yaml
================================================
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: containerd-check
  namespace: default
  labels:
    app: containerd-check
spec:
  selector:
    matchLabels:
      app: containerd-check
  template:
    metadata:
      labels:
        app: containerd-check
    spec:
      containers:
      - name: containerd-check
        image: public.ecr.aws/amazonlinux/amazonlinux:latest
        command:
        - sh
        - -c
        - |
          # 1. Ensure the script fails on any command or pipeline error
          set -e
          set -o pipefail

          echo "=== content read by the container ==="
          cat /host-etc/containerd/config.toml

          # 2. Check containerd config version and look for appropriate sandbox field
          #    In containerd config version = 2 expect to find pattern `sandbox_image = "registry.k8s.io/pause:3.10.1"`
          #    In containerd config version = 3 expect to find pattern `sandbox = "registry.k8s.io/pause:3.10.1"`
          #    For more details: https://github.com/containerd/containerd/blob/main/docs/cri/config.md
          version_line=$(grep -E '^version\s*=' /host-etc/containerd/config.toml || true)
          if [ -z "$version_line" ]; then
            echo "FAIL: no version line found in containerd config"
            exit 1
          fi

          version=$(echo "$version_line" | cut -d'=' -f2 | tr -d ' ')
          echo "INFO: containerd config version = $version"
          if [ "$version" = "2" ]; then
            sandbox_line=$(grep -E 'sandbox_image\s*=' /host-etc/containerd/config.toml || true)
          elif [ "$version" = "3" ]; then
            sandbox_line=$(grep -E 'sandbox\s*=' /host-etc/containerd/config.toml || true)
          else
            echo "FAIL: unsupported containerd config version: $version"
            exit 1
          fi

          # 3. If no sandbox configuration is found, fail explicitly
          if [ -z "$sandbox_line" ]; then
            echo "FAIL: no sandbox_image or sandbox line found"
            echo "=== debug ==="
            exit 1
          fi
          sandbox_image=$(echo "$sandbox_line" | cut -d'"' -f2)

          # 4. Check that $sandbox_image references .ecr. or is provided on the instance
          if [[ "$sandbox_image" == "localhost"* ]]; then
            echo "INFO: skipping .ecr. check for localhost sandbox image"
          else
            if [[ "$sandbox_image" != *".ecr."* ]]; then
              echo "FAIL: no .ecr. reference in $sandbox_image"
              echo "=== debug ==="
              exit 1
            fi
          fi

          # 5. Check for 'nvidia-container-runtime'
          if ! grep -q "nvidia-container-runtime" /host-etc/containerd/config.toml; then
            echo "FAIL: no nvidia-container-runtime found"
            echo "=== debug ==="
            exit 1
          fi

          # 6. Check for 'systemd_cgroup = true' or 'SystemdCgroup = true'
          if ! ( grep -q 'systemd_cgroup = true' /host-etc/containerd/config.toml || \
                 grep -q 'SystemdCgroup = true' /host-etc/containerd/config.toml ); then
            echo "FAIL: no systemd cgroup setting"
            echo "=== debug ==="
            exit 1
          fi

          echo "containerd config check PASSED."
          # Keep container running so DS can be marked Ready
          tail -f /dev/null
        volumeMounts:
        - name: containerd-config
          mountPath: /host-etc/containerd
          readOnly: true
      volumes:
      - name: containerd-config
        hostPath:
          path: /etc/containerd


================================================
FILE: test/cases/nvidia/manifests/job-hpc-benchmarks.yaml
================================================
kind: Job
apiVersion: batch/v1
metadata:
  name: hpc-benckmarks-job
  labels:
    app: hpc-benckmarks-job
spec:
  completions: 1
  parallelism: 1
  template:
    metadata:
      labels:
        app: hpc-benckmarks-job
    spec:
      volumes:
        - name: dshm
          emptyDir:
            medium: Memory
      containers:
      - name: hpc-benchmarks
        image: "nvcr.io/nvidia/hpc-benchmarks:25.04"
        command:
        - mpirun
        - --allow-run-as-root
        - -np
        - "{{.GpuPerNode}}"
        - -bind-to
        - none
        - -x
        - NCCL_DEBUG=INFO
        - -x 
        - HPL_FCT_COMM_POLICY=1 
        - -x 
        - HPL_USE_NVSHMEM=0
        # TODO: for arm it will be
        # - hpl-aarch64.sh
        - hpl.sh 
        - --mem-affinity 
        - 0:0:0:0:1:1:1:1 
        # --cpu-affinity needs to be tuned depending on the number of CPUs
        # available on the instance type.
        - --cpu-affinity 
        - 0-13:14-27:28-41:42-55:56-69:70-83:84-97:98-111
        - --no-multinode 
        - --dat 
        - hpl-linux-x86_64/sample-dat/HPL-dgx-1N.dat
        # TODO: the path differs for arm64
        # - hpl-linux-aarch64-gpu/sample-dat/HPL-dgx-1N.dat
        volumeMounts:
        - mountPath: /dev/shm
          name: dshm
        imagePullPolicy: Always
        resources:
          limits:
            nvidia.com/gpu: {{.GpuPerNode}}
        env:
        - name: UCX_TLS
          value: "^sysv"
      restartPolicy: Never
  backoffLimit: 4


================================================
FILE: test/cases/nvidia/manifests/job-unit-test-single-node.yaml
================================================
kind: Job
apiVersion: batch/v1
metadata:
  name: unit-test-job
  labels:
    app: unit-test-job
spec:
  template:
    metadata:
      labels:
        app: unit-test-job
    spec:
      containers:
      - name: unit-test-container
        image: {{.NvidiaTestImage}}
        command: 
        - /bin/bash
        - ./gpu_unit_tests/unit_test
        env:
          - name: SKIP_TESTS_SUBCOMMAND
            value: {{.SkipTestSubcommand}}
          # because we started building these from source, this is just a
          # regular binary.
          - name: DEMO_SUITE_DIR
            value: /usr/bin
          - name: EC2_INSTANCE_TYPE
            value: {{.NodeType}}
        imagePullPolicy: Always
        resources:
          limits:
            nvidia.com/gpu: {{.GpuPerNode}}
          requests:
            cpu: "1"
            memory: 1Gi
      restartPolicy: Never
  backoffLimit: 1


================================================
FILE: test/cases/nvidia/manifests/mpi-job-nccl-test-multi-node.yaml
================================================
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: {{.JobName}}
spec:
  slotsPerWorker: {{.GpuPerNode}}
  runPolicy:
    # it may take a bit for the workers to get ready (the container image is heavy)
    # and we don't want the launcher to reach it's CrashLoopBackoff limit in the meantime
    backoffLimit: 20
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          restartPolicy: OnFailure
          containers:
          - image: {{.NvidiaTestImage}}
            imagePullPolicy: Always
            name: nccl-test-launcher
            env:
            command:
            - mpirun
            - --allow-run-as-root
            - --tag-output
            - -np
            - "{{.WorkerNodeGpuCount}}"
            - -bind-to
            - none
            - -map-by
            - slot
            - -x
            - PATH
            - -x
            - LD_LIBRARY_PATH
            - -x
            - NCCL_DEBUG=INFO
            - -x
            - NCCL_BUFFSIZE={{.NcclBuffSize}}
            - -x
            - NCCL_TUNER_PLUGIN=/opt/aws-ofi-nccl/install/lib/libnccl-ofi-tuner.so
            - --mca
            - pml
            - ^cm,ucx
            - --mca
            - btl
            - tcp,self
            - --mca
            - btl_tcp_if_exclude
            - lo,docker0,veth_def_agent
            - /opt/nccl-tests/build/{{.TestName}}
            - -b
            - "8"
            - -e
            - {{.MaxBytes}}
            - -f
            - "2"
            - -c
            - "1"
            - -n
            - "10"
    Worker:
      replicas: {{.WorkerNodeCount}}
      template:
        spec:
          volumes:
          - name: dshm
            emptyDir:
              medium: Memory
          containers:
          - image: {{.NvidiaTestImage}}
            imagePullPolicy: Always
            name: nccl-test-worker
            volumeMounts:
            - mountPath: /dev/shm
              name: dshm
            resources:
              requests:
                nvidia.com/gpu: {{.GpuPerNode}}
                vpc.amazonaws.com/efa: {{.EfaInterfacePerNode}}
              limits:
                nvidia.com/gpu: {{.GpuPerNode}}
                vpc.amazonaws.com/efa: {{.EfaInterfacePerNode}}


================================================
FILE: test/cases/nvidia/manifests/mpi-job-pytorch-training-single-node.yaml
================================================
---
# container image from: https://github.com/aws/deep-learning-containers/blob/master/available_images.md
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: pytorch-training-single-node
spec:
  slotsPerWorker: 4
  runPolicy:
    cleanPodPolicy: Running
  mpiImplementation: OpenMPI
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
         spec:
           restartPolicy: OnFailure
           containers:
           - image: {{.PytorchTestImage}}
             name: gpu-test
             command:
              - mpirun
              - --allow-run-as-root
              - -np
              - "1"
              - -mca
              - btl_tcp_if_exclude
              - lo
              - -mca
              - pml
              - ob1
              - -mca
              - btl
              - ^openib
              - --bind-to
              - none
              - -map-by
              - slot
              - -x
              - LD_LIBRARY_PATH
              - -x
              - PATH
              - -x
              - NCCL_SOCKET_IFNAME=eth0
              - -x
              - NCCL_DEBUG=INFO
              - -x
              - MXNET_CUDNN_AUTOTUNE_DEFAULT=0
              - python
              - -c
              - import os; os.system("git clone https://github.com/pytorch/examples.git pytorch-examples"); os.system("git -C pytorch-examples checkout 0f0c9131ca5c79d1332dce1f4c06fe942fbdc665"); os.system("python pytorch-examples/mnist/main.py --epochs 1")
             resources:
               limits:
                 nvidia.com/gpu: 1


================================================
FILE: test/cases/nvidia/manifests/nvidia-driver-capabilities-check.yaml
================================================
apiVersion: v1
kind: Pod
metadata:
  name: moderngl-pod
spec:
  restartPolicy: Never
  tolerations:
  - key: "nvidia.com/gpu"
    operator: "Exists"
    effect: "NoSchedule"
  containers:
  - name: moderngl-container
    env:
    - name: NVIDIA_DRIVER_CAPABILITIES
      value: "all"
    image: public.ecr.aws/ubuntu/ubuntu:22.04
    command: ["/bin/bash"]
    args:
      - -c
      - |
        set -e
        apt-get update
        apt-get install -y \
          python3 \
          python3-pip \
          libgl1-mesa-glx \
          libegl1-mesa-dev \
          libgles2-mesa-dev \
          mesa-utils \
          xvfb
        pip3 install moderngl
        sleep 60
        cat <<'EOF' > moderngl-script.py
        import moderngl
        moderngl.create_standalone_context(backend='egl')
        EOF
        python3 moderngl-script.py
    resources:
      requests:
        memory: "50Gi"
        cpu: "15"
        "nvidia.com/gpu": "1"
      limits:
        memory: "50Gi"
        "nvidia.com/gpu": "1"


================================================
FILE: test/cases/nvidia/mpi_test.go
================================================
//go:build e2e

package nvidia

import (
	"context"
	_ "embed"
	"fmt"
	"regexp"
	"strings"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/internal/e2e/mpijobs"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/utils/strings/slices"
)

var (
	instanceSupportsRdmaRead = []string{"p5.48xlarge", "p4d.24xlarge", "p4de.24xlarge", "p5e.48xlarge", "p5en.48xlarge"}
)

var (
	//go:embed manifests/mpi-job-pytorch-training-single-node.yaml
	mpiJobPytorchTrainingSingleNodeManifest []byte
	//go:embed manifests/mpi-job-nccl-test-multi-node.yaml
	mpiJobNcclTestMultiNodeManifest []byte
)

type ncclTestManifestTplVars struct {
	WorkerNodeCount     int
	WorkerNodeGpuCount  int
	GpuPerNode          int
	NvidiaTestImage     string
	EfaInterfacePerNode int
	MaxBytes            string
	NcclBuffSize        string
	TestName            string
	JobName             string
}

func TestMPIJobPytorchTraining(t *testing.T) {
	testenv.Test(t,
		singleNode(),
		multiNode("all_reduce_perf"),
		multiNode("all_gather_perf"),
		multiNode("alltoall_perf"),
	)
}

func multiNode(testName string) features.Feature {
	var renderedMpiJobNcclTestMultiNodeManifest []byte
	jobName := strings.ReplaceAll(fmt.Sprintf("multi-node-%s", testName), "_", "-")

	return features.New(fmt.Sprintf("multi-node:%s", testName)).
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		WithLabel("hardware", "efa").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if testConfig.NvidiaTestImage == "" {
				t.Fatal(fmt.Errorf("nvidiaTestImage must be set to run unit test, use https://github.com/aws/aws-k8s-tester/blob/main/test/images/nvidia/Dockerfile to build the image and -nvidiaTestImage to set the image url"))
			}
			maxBytes := "2G"
			ncclBuffSize := "4194304"
			if slices.Contains(instanceSupportsRdmaRead, testConfig.NodeType) {
				t.Log("Instance supports RDMA")
				// TODO: revisit this with some kind of per-instance optimizer, or maybe use the defaults for all instance types unless specified
				if testName == "alltoall_perf" && strings.Contains(testConfig.NodeType, "p4") {
					// Keep default values for P4 running all-to-all
				} else {
					maxBytes = "16G"
					ncclBuffSize = "8388608"
				}
			}
			var err error
			renderedMpiJobNcclTestMultiNodeManifest, err = fwext.RenderManifests(mpiJobNcclTestMultiNodeManifest, ncclTestManifestTplVars{
				// one of the nodes will be used for the master pod
				WorkerNodeCount:     nodeCount,
				WorkerNodeGpuCount:  nodeCount * gpuPerNode,
				GpuPerNode:          gpuPerNode,
				NvidiaTestImage:     testConfig.NvidiaTestImage,
				EfaInterfacePerNode: efaPerNode,
				MaxBytes:            maxBytes,
				NcclBuffSize:        ncclBuffSize,
				TestName:            testName,
				JobName:             jobName,
			})
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Applying multi node manifest")
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedMpiJobNcclTestMultiNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Manifest applied successfully")
			return ctx
		}).
		Assess("MPIJob succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			mpiJob := mpijobs.NewUnstructured(jobName, "default")
			t.Log("Waiting for multi node job to complete")
			err := wait.For(conditions.New(cfg.Client().Resources()).ResourceMatch(mpiJob, mpijobs.MPIJobSucceeded),
				wait.WithContext(ctx),
				wait.WithTimeout(60*time.Minute),
			)
			if err != nil {
				t.Error(err)
			}
			t.Logf("final mpijob resource: %v", mpiJob)
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), mpiJob)
			if err != nil {
				t.Errorf("failed to get job logs: %v", err)
			}
			t.Logf("Test log for %s:", jobName)
			t.Log(log)

			if !t.Failed() {
				t.Log("Multi node job completed")
				// Verify GPU Direct RDMA is used on P4/P5
				if testConfig.EfaEnabled && slices.Contains(instanceSupportsRdmaRead, testConfig.NodeType) {
					pattern := regexp.MustCompile(`\[send\] via NET/.*Libfabric/.*/GDRDMA`)
					if !pattern.MatchString(log) {
						t.Errorf("GPU Direct RDMA is not utilized for inter-node communication in NCCL tests on instances that support GDRDMA: %s", testConfig.NodeType)
					}
				}
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			err := fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedMpiJobNcclTestMultiNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Feature()
}

func singleNode() features.Feature {
	var renderedSingleNodeManifest []byte

	return features.New("single-node").
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("Applying single node manifest")
			var err error
			renderedSingleNodeManifest, err = fwext.RenderManifests(mpiJobPytorchTrainingSingleNodeManifest, struct {
				PytorchTestImage string
			}{
				PytorchTestImage: testConfig.PytorchImage,
			})
			if err != nil {
				t.Fatal(err)
			}
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedSingleNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			t.Log("Manifest applied successfully")
			return ctx
		}).
		Assess("MPIJob succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			mpiJob := mpijobs.NewUnstructured("pytorch-training-single-node", "default")
			ctx = context.WithValue(ctx, "mpiJob", mpiJob)
			t.Log("Waiting for single node job to complete")
			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).ResourceMatch(mpiJob, mpijobs.MPIJobSucceeded),
				wait.WithContext(ctx),
				wait.WithTimeout(30*time.Minute),
			)
			if err != nil {
				t.Error(err)
			} else {
				t.Log("Single node job completed")
			}
			t.Logf("final mpijob resource: %v", mpiJob)
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := ctx.Value("mpiJob")
			if job == nil {
				// nothing to do
				return ctx
			}
			u, ok := job.(*unstructured.Unstructured)
			if !ok {
				t.Errorf("mpiJob in context is not unstructured: %v", job)
			}
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), u)
			if err != nil {
				t.Errorf("failed to get job logs: %v", err)
			}
			t.Log("Test log for pytorch-training-single-node:")
			t.Log(log)
			err = fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedSingleNodeManifest)
			if err != nil {
				t.Error(err)
			}
			return ctx
		}).
		Feature()

}


================================================
FILE: test/cases/nvidia/unit_test.go
================================================
//go:build e2e

package nvidia

import (
	"context"
	_ "embed"
	"fmt"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"

	batchv1 "k8s.io/api/batch/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var (
	//go:embed manifests/job-unit-test-single-node.yaml
	jobUnitTestSingleNodeManifest         []byte
	renderedJobUnitTestSingleNodeManifest []byte
	//go:embed manifests/job-hpc-benchmarks.yaml
	jobHpcBenchmarksSingleNodeManifest         []byte
	renderedJobHpcBenchmarksSingleNodeManifest []byte
)

type unitTestManifestTplVars struct {
	NvidiaTestImage    string
	SkipTestSubcommand string
	GpuPerNode         int
	NodeType           string
}

type hpcTestManifestTplVars struct {
	GpuPerNode int
}

func TestSingleNodeUnitTest(t *testing.T) {
	unitTest := features.New("unit-test").
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if testConfig.NvidiaTestImage == "" {
				t.Fatal(fmt.Errorf("nvidiaTestImage must be set to run unit test, use https://github.com/aws/aws-k8s-tester/blob/main/test/images/nvidia/Dockerfile to build the image and -nvidiaTestImage to set the image url"))
			}
			var err error
			renderedJobUnitTestSingleNodeManifest, err = fwext.RenderManifests(jobUnitTestSingleNodeManifest, unitTestManifestTplVars{
				NvidiaTestImage:    testConfig.NvidiaTestImage,
				SkipTestSubcommand: testConfig.SkipUnitTestSubcommand,
				GpuPerNode:         gpuPerNode,
				NodeType:           testConfig.NodeType,
			})
			if err != nil {
				t.Fatal(err)
			}
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedJobUnitTestSingleNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Assess("Unit test Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "unit-test-job", Namespace: "default"},
			}
			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithContext(ctx),
				wait.WithTimeout(60*time.Minute))
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "unit-test-job", Namespace: "default"},
			})
			if err != nil {
				t.Error(err)
			}
			t.Log("Test log for unit-test-job:")
			t.Log(log)
			err = fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedJobUnitTestSingleNodeManifest)
			if err != nil {
				t.Error(err)
			}
			return ctx
		}).
		Feature()

	hpcTest := features.New("hpc-benckmarks").
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			var err error
			renderedJobHpcBenchmarksSingleNodeManifest, err = fwext.RenderManifests(jobHpcBenchmarksSingleNodeManifest, hpcTestManifestTplVars{
				GpuPerNode: gpuPerNode,
			})
			if err != nil {
				t.Fatal(err)
			}
			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedJobHpcBenchmarksSingleNodeManifest)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Assess("HPC test Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "hpc-benckmarks-job", Namespace: "default"},
			}
			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithContext(ctx))
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "hpc-benckmarks-job", Namespace: "default"},
			})
			if err != nil {
				t.Error(err)
			}
			t.Log("Test log for hpc-benckmarks-job:")
			t.Log(log)
			err = fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedJobHpcBenchmarksSingleNodeManifest)
			if err != nil {
				t.Error(err)
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, unitTest, hpcTest)
}


================================================
FILE: test/cases/nvidia-dra/main_test.go
================================================
//go:build e2e

package nvidia_dra

import (
	"context"
	"embed"
	"flag"
	"fmt"
	"log"
	"os"
	"os/exec"
	"os/signal"
	"path/filepath"
	"slices"
	"strings"
	"sync"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/common"
	"github.com/aws/aws-k8s-tester/test/manifests"
	"golang.org/x/sync/errgroup"
	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

//go:embed rcts
var rctsFS embed.FS

var (
	testenv                   env.Environment
	clientset                 kubernetes.Interface
	nodeType                  *string
	rdmaDeviceDraDriverImage  *string
	acceleratorDraDriverImage *string
	containerTestImage        *string
	nodeCount                 int
)

// supportedRdmaTypes lists the recognized RDMA device types.
var supportedRdmaTypes = []string{"efa"}

func validateConfig() error {
	if err := common.ValidateRequiredFlags(map[string]string{
		"rdmaDeviceDraDriverImage": *rdmaDeviceDraDriverImage,
		"containerTestImage":       *containerTestImage,
		"nodeType":                 *nodeType,
	}); err != nil {
		return err
	}
	// Validate that nodeType maps to a known topology (and thus a known RDMA type).
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		return fmt.Errorf("invalid -nodeType: %w", err)
	}
	if !slices.Contains(supportedRdmaTypes, topo.RdmaType) {
		return fmt.Errorf("instance family %q has unsupported RDMA type %q; supported: %v", topo.Family, topo.RdmaType, supportedRdmaTypes)
	}
	// Verify helm is available on the PATH.
	if _, err := exec.LookPath("helm"); err != nil {
		return fmt.Errorf("helm is required but not found on PATH: %w", err)
	}
	// Verify kubectl is available on the PATH.
	if _, err := exec.LookPath("kubectl"); err != nil {
		return fmt.Errorf("kubectl is required but not found on PATH: %w", err)
	}
	return nil
}

const (
	nvidiaDRAHelmReleaseName = "nvidia-dra-driver-gpu"
	nvidiaDRAHelmRepoName    = "nvidia-dra"
	nvidiaDRAHelmRepoURL     = "https://helm.ngc.nvidia.com/nvidia"
	nvidiaDRANamespace       = "nvidia-dra-driver-gpu"
	nvidiaDRAHelmChartVer    = "25.8.1"
)

// labelNodesGPUPresent labels all nodes with nvidia.com/gpu.present=true.
func labelNodesGPUPresent(ctx context.Context) error {
	args := []string{
		"label", "nodes", "--all",
		"nvidia.com/gpu.present=true",
		"--overwrite",
	}
	log.Printf("[INFO] Labeling nodes: kubectl %s", strings.Join(args, " "))
	cmd := exec.CommandContext(ctx, "kubectl", args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		return fmt.Errorf("kubectl label nodes failed: %w", err)
	}
	log.Println("All nodes labeled with nvidia.com/gpu.present=true.")
	return nil
}

// installNvidiaDRADriverHelm adds the NVIDIA Helm repo and installs the NVIDIA DRA driver.
// If acceleratorDraDriverImage is non-empty, it splits on the last ":" to extract
// repository and tag and passes them as --set overrides.
func installNvidiaDRADriverHelm(ctx context.Context, config *envconf.Config) (context.Context, error) {
	// Add the Helm repo.
	repoArgs := []string{"repo", "add", nvidiaDRAHelmRepoName, nvidiaDRAHelmRepoURL}
	log.Printf("[INFO] Adding NVIDIA Helm repo: helm %s", strings.Join(repoArgs, " "))
	repoCmd := exec.CommandContext(ctx, "helm", repoArgs...)
	repoCmd.Stdout = os.Stdout
	repoCmd.Stderr = os.Stderr
	if err := repoCmd.Run(); err != nil {
		return ctx, fmt.Errorf("helm repo add nvidia-dra failed: %w", err)
	}

	// Install (or upgrade) the chart.
	args := []string{
		"upgrade", "--install", nvidiaDRAHelmReleaseName,
		fmt.Sprintf("%s/%s", nvidiaDRAHelmRepoName, nvidiaDRAHelmReleaseName),
		"--version", nvidiaDRAHelmChartVer,
		"--create-namespace",
		"--namespace", nvidiaDRANamespace,
		"--set", "resources.gpus.enabled=true",
		"--set", "gpuResourcesEnabledOverride=true",
		"--timeout", "5m",
	}
	if *acceleratorDraDriverImage != "" {
		repo, tag := common.SplitImageRepoTag(*acceleratorDraDriverImage)
		args = append(args,
			"--set", fmt.Sprintf("image.repository=%s", repo),
			"--set", fmt.Sprintf("image.tag=%s", tag),
		)
	}
	log.Printf("[INFO] Installing NVIDIA DRA driver via Helm: helm %s", strings.Join(args, " "))
	cmd := exec.CommandContext(ctx, "helm", args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		return ctx, fmt.Errorf("helm install nvidia-dra-driver-gpu failed: %w", err)
	}
	log.Println("NVIDIA DRA driver Helm release installed successfully.")
	return ctx, nil
}

// uninstallNvidiaDRADriverHelm uninstalls the NVIDIA DRA driver Helm release.
func uninstallNvidiaDRADriverHelm(ctx context.Context, config *envconf.Config) (context.Context, error) {
	args := []string{
		"uninstall", nvidiaDRAHelmReleaseName,
		"--namespace", nvidiaDRANamespace,
	}
	log.Printf("[INFO] Uninstalling NVIDIA DRA driver Helm release: helm %s", strings.Join(args, " "))
	cmd := exec.CommandContext(ctx, "helm", args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		log.Printf("[WARN] helm uninstall nvidia-dra-driver-gpu failed (may already be removed): %v", err)
	}
	return ctx, nil
}

func waitForNvidiaDRADriverReady(ctx context.Context, config *envconf.Config) (context.Context, error) {
	ds := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "nvidia-dra-driver-gpu-kubelet-plugin", Namespace: nvidiaDRANamespace},
	}
	err := wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
		wait.WithTimeout(5*time.Minute),
		wait.WithContext(ctx),
	)
	if err != nil {
		return ctx, fmt.Errorf("nvidia-dra-driver daemonset is not ready: %w", err)
	}
	log.Println("nvidia-dra-driver daemonset is ready.")
	return ctx, nil
}

func TestMain(m *testing.M) {
	nodeType = flag.String("nodeType", "", "instance type for the cluster (e.g. p5.48xlarge)")
	rdmaDeviceDraDriverImage = flag.String("rdmaDeviceDraDriverImage", "", "container image for the dranet DRA driver")
	acceleratorDraDriverImage = flag.String("acceleratorDraDriverImage", "", "container image for the NVIDIA DRA driver")
	containerTestImage = flag.String("containerTestImage", "", "container image for the NCCL test workload")

	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	if err := validateConfig(); err != nil {
		log.Fatalf("invalid configuration: %v", err)
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = env.NewWithConfig(cfg).WithContext(ctx)

	// Resolve topology to determine RDMA type from nodeType.
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		log.Fatalf("failed to resolve topology: %v", err)
	}

	manifestsList := [][]byte{
		manifests.MpiOperatorManifest,
	}
	setUpFunctions := []env.Func{
		// Run independent setup steps concurrently.
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			var mu sync.Mutex
			g, gctx := errgroup.WithContext(ctx)

			// Deploy MPI operator.
			g.Go(func() error {
				return common.DeployMPIOperator(gctx, config)
			})

			// Deploy dranet and RCTs based on topology's RDMA type.
			if topo.RdmaType == "efa" {
				rctManifests, err := common.LoadRCTManifests(rctsFS, filepath.Join("rcts", topo.RCTSubDir))
				if err != nil {
					return ctx, fmt.Errorf("failed to load RCT manifests: %w", err)
				}
				mu.Lock()
				manifestsList = append(manifestsList, rctManifests...)
				mu.Unlock()

				g.Go(func() error {
					renderedDranet, err := common.DeployDranet(gctx, config, *rdmaDeviceDraDriverImage)
					if err != nil {
						return err
					}
					mu.Lock()
					manifestsList = append(manifestsList, renderedDranet)
					mu.Unlock()
					return nil
				})

				g.Go(func() error {
					return fwext.ApplyManifests(config.Client().RESTConfig(), rctManifests...)
				})
			}

			// Label all nodes with nvidia.com/gpu.present=true.
			g.Go(func() error {
				return labelNodesGPUPresent(gctx)
			})

			// Add NVIDIA Helm repo and install NVIDIA DRA driver.
			g.Go(func() error {
				_, err := installNvidiaDRADriverHelm(gctx, config)
				return err
			})

			if err := g.Wait(); err != nil {
				return ctx, err
			}
			return ctx, nil
		},
		waitForNvidiaDRADriverReady,
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			var err error
			clientset, err = kubernetes.NewForConfig(config.Client().RESTConfig())
			if err != nil {
				return ctx, err
			}
			nodeCount, err = common.CountNodesByType(ctx, clientset, *nodeType)
			return ctx, err
		},
	}
	testenv.Setup(setUpFunctions...)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			// Uninstall NVIDIA DRA driver Helm release first.
			ctx, _ = uninstallNvidiaDRADriverHelm(ctx, config)
			// Delete remaining manifests in reverse order.
			slices.Reverse(manifestsList)
			if err := fwext.DeleteManifests(config.Client().RESTConfig(), manifestsList...); err != nil {
				return ctx, fmt.Errorf("failed to delete manifests: %w", err)
			}
			return ctx, nil
		},
	)

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/nvidia-dra/nvidia_dra_test.go
================================================
//go:build e2e

package nvidia_dra

import (
	"embed"
	"path/filepath"
	"testing"

	"github.com/aws/aws-k8s-tester/test/common"
)

//go:embed testcases
var embeddedTestCases embed.FS

func TestNvidiaDRAMultiNode(t *testing.T) {
	topo, err := GetTopologyForNodeType(*nodeType)
	if err != nil {
		t.Fatalf("resolving topology for %s: %v", *nodeType, err)
	}

	rctDir := filepath.Join("rcts", topo.RCTSubDir)
	rctIndex, err := common.LoadRCTIndex(rctsFS, rctDir)
	if err != nil {
		t.Fatalf("loading RCT index from %s: %v", rctDir, err)
	}

	tcDir := filepath.Join("testcases", topo.TestCaseSubDir)

	featureList, err := common.DiscoverAndBuildFeatures(
		embeddedTestCases,
		tcDir,
		rctIndex,
		"nvidia-dra",
		"multi-node-nccl-test",
		nodeCount,
		func(tc *common.TestCaseSpec, rctIndex map[string]*common.ResourceClaimTemplateSpec) ([]byte, error) {
			params, err := ComputeNvidiaMPIJobParams(tc, rctIndex, topo, nodeCount, *containerTestImage)
			if err != nil {
				return nil, err
			}
			return RenderNvidiaMPIJobYAML(*params)
		},
		clientset,
	)
	if err != nil {
		t.Fatalf("discovering and building features: %v", err)
	}

	if len(featureList) == 0 {
		t.Logf("no test cases found under %s, skipping", tcDir)
		return
	}

	testenv.Test(t, featureList...)
}


================================================
FILE: test/cases/nvidia-dra/rcts/p5/rct-all-efas.yaml
================================================
apiVersion: resource.k8s.io/v1beta1
kind: ResourceClaimTemplate
metadata:
  namespace: default
  name: rct-all-efas
spec:
  spec:
    devices:
      requests:
      - name: all-efas
        deviceClassName: efa.networking.k8s.aws
        allocationMode: All


================================================
FILE: test/cases/nvidia-dra/rcts/p5/rct-all-gpus.yaml
================================================
apiVersion: resource.k8s.io/v1beta1
kind: ResourceClaimTemplate
metadata:
  namespace: default
  name: rct-all-gpus
spec:
  spec:
    devices:
      requests:
      - name: all-gpus
        deviceClassName: gpu.nvidia.com
        allocationMode: All


================================================
FILE: test/cases/nvidia-dra/rcts/p5/rct-five-efas-one-gpu.yaml
================================================
apiVersion: resource.k8s.io/v1beta1
kind: ResourceClaimTemplate
metadata:
  namespace: default
  name: rct-five-efas-one-gpu
spec:
  spec:
    devices:
      requests:
      - name: five-efas
        deviceClassName: efa.networking.k8s.aws
        allocationMode: ExactCount
        count: 5
      - name: one-gpu
        deviceClassName: gpu.nvidia.com
        allocationMode: ExactCount
        count: 1
      constraints:
      - requests: ["five-efas", "one-gpu"]
        matchAttribute: "resource.kubernetes.io/pcieRoot"


================================================
FILE: test/cases/nvidia-dra/templates/nccl-test-mpijob.yaml.tmpl
================================================
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: multi-node-nccl-test
spec:
  slotsPerWorker: {{.SlotsPerWorker}}
  runPolicy:
    backoffLimit: 20
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          restartPolicy: OnFailure
          containers:
            - name: nccl-test-launcher
              image: {{.ContainerTestImage}}
              imagePullPolicy: IfNotPresent
              env:
                - name: PATH
                  value: $PATH:/opt/amazon/efa/bin:/usr/bin
              command:
                - /opt/amazon/openmpi/bin/mpirun
                - --allow-run-as-root
                - --tag-output
                - -np
                - "{{.TotalProcesses}}"
                - -N
                - "{{.SlotsPerWorker}}"
                - --bind-to
                - none
                - -x
                - PATH
                - -x
                - LD_LIBRARY_PATH
                - -x
                - NCCL_DEBUG=INFO
                - -x
                - NCCL_BUFFSIZE=8388608
                - -x
                - NCCL_P2P_NET_CHUNKSIZE=524288
                - -x
                - NCCL_TUNER_PLUGIN=/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu/libnccl-ofi-tuner.so
                - --mca
                - pml
                - ^cm,ucx
                - --mca
                - btl
                - tcp,self
                - --mca
                - btl_tcp_if_exclude
                - lo,docker0,veth_def_agent
                - /opt/nccl-tests/build/all_reduce_perf
                - -b
                - "8"
                - -e
                - "16G"
                - -f
                - "2"
                - -g
                - "1"
                - -c
                - "1"
                - -n
                - "100"
    Worker:
      replicas: {{.WorkerReplicas}}
      template:
        spec:
          containers:
            - name: nccl-tests-worker
              image: {{.ContainerTestImage}}
              imagePullPolicy: IfNotPresent
              volumeMounts:
                - name: shmem
                  mountPath: /dev/shm
              resources:
                claims:
{{- range .ResourceClaims}}
                - name: {{.Name}}
{{- end}}
          resourceClaims:
{{- range .ResourceClaims}}
          - name: {{.Name}}
            resourceClaimTemplateName: {{.TemplateName}}
{{- end}}
          volumes:
            - name: shmem
              hostPath:
                path: /dev/shm


================================================
FILE: test/cases/nvidia-dra/testcases/p5/all-efas-all-gpus.yaml
================================================
resourceClaims:
- name: all-efas
  resourceClaimTemplateName: rct-all-efas
- name: all-gpus
  resourceClaimTemplateName: rct-all-gpus


================================================
FILE: test/cases/nvidia-dra/testcases/p5/five-efas-one-gpu-negative-test.yaml
================================================
expectFailure: true
resourceClaims:
- name: five-efas-one-gpu
  resourceClaimTemplateName: rct-five-efas-one-gpu


================================================
FILE: test/cases/nvidia-dra/topology.go
================================================
package nvidia_dra

import (
	"bytes"
	_ "embed"
	"fmt"
	"log"
	"strings"
	"text/template"

	"github.com/aws/aws-k8s-tester/test/common"
)

//go:embed templates/nccl-test-mpijob.yaml.tmpl
var mpijobTemplate string

// ---------------------------------------------------------------------------
// Instance topology
// ---------------------------------------------------------------------------

// NvidiaInstanceTopology describes the GPU/EFA hardware topology for an NVIDIA instance family.
type NvidiaInstanceTopology struct {
	Family         string
	GPUsPerNode    int    // total GPUs per node (e.g. 8 for p5.48xlarge)
	AllGPUCount    int    // same as GPUsPerNode for "All" allocation mode
	RdmaType       string // RDMA device type (e.g. "efa")
	RCTSubDir      string // subdirectory under rcts/
	TestCaseSubDir string // subdirectory under testcases/
}

var instanceTopologies = map[string]NvidiaInstanceTopology{
	"p5": {
		Family:         "p5",
		GPUsPerNode:    8,
		AllGPUCount:    8,
		RdmaType:       "efa",
		RCTSubDir:      "p5",
		TestCaseSubDir: "p5",
	},
}

// GetTopologyForNodeType returns the NvidiaInstanceTopology for a given node type
// (e.g. "p5.48xlarge"). It extracts the family prefix before the first "."
// and looks it up in the registry.
func GetTopologyForNodeType(nodeType string) (*NvidiaInstanceTopology, error) {
	family := common.ExtractFamily(nodeType)
	topo, ok := instanceTopologies[family]
	if !ok {
		return nil, fmt.Errorf("unsupported instance family %q (from %q); supported: %s",
			family, nodeType, supportedFamilies())
	}
	return &topo, nil
}

func supportedFamilies() string {
	families := make([]string, 0, len(instanceTopologies))
	for k := range instanceTopologies {
		families = append(families, k)
	}
	return strings.Join(families, ", ")
}

// ---------------------------------------------------------------------------
// MPIJob rendering
// ---------------------------------------------------------------------------

// NvidiaMPIJobParams holds all template parameters for rendering the NCCL MPIJob YAML.
type NvidiaMPIJobParams struct {
	SlotsPerWorker     int
	TotalProcesses     int
	WorkerReplicas     int
	ContainerTestImage string
	ResourceClaims     []common.ResourceClaimRef
}

// RenderNvidiaMPIJobYAML renders the embedded NCCL MPIJob Go template with the given params
// and returns the resulting YAML bytes.
func RenderNvidiaMPIJobYAML(params NvidiaMPIJobParams) ([]byte, error) {
	tmpl, err := template.New("mpijob").Parse(mpijobTemplate)
	if err != nil {
		return nil, fmt.Errorf("parsing MPIJob template: %w", err)
	}
	var buf bytes.Buffer
	if err := tmpl.Execute(&buf, params); err != nil {
		return nil, fmt.Errorf("rendering MPIJob template: %w", err)
	}
	return buf.Bytes(), nil
}

// ---------------------------------------------------------------------------
// NVIDIA-specific helpers
// ---------------------------------------------------------------------------

// getGPUCount returns the GPU device count from an RCT.
// For AllocationMode "All" it returns the topology's AllGPUCount;
// otherwise it returns the explicit Count from the gpu.nvidia.com request.
func getGPUCount(rct *common.ResourceClaimTemplateSpec, topo *NvidiaInstanceTopology) int {
	for _, req := range rct.Spec.Spec.Devices.Requests {
		if req.DeviceClassName != "gpu.nvidia.com" {
			continue
		}
		if req.AllocationMode == "All" {
			return topo.AllGPUCount
		}
		if req.Count <= 0 {
			log.Printf("[WARN] gpu.nvidia.com request has non-positive count: %d", req.Count)
		}
		return req.Count
	}
	log.Printf("[WARN] no gpu.nvidia.com device request found in RCT, returning GPU count 0")
	return 0
}

// ComputeNvidiaMPIJobParams computes MPIJob parameters from a test case spec.
// It resolves each claim's resourceClaimTemplateName against the RCT index to
// get the GPU count, then calculates SlotsPerWorker and TotalProcesses.
func ComputeNvidiaMPIJobParams(tc *common.TestCaseSpec, rctIndex map[string]*common.ResourceClaimTemplateSpec, topo *NvidiaInstanceTopology, workerReplicas int, containerTestImage string) (*NvidiaMPIJobParams, error) {
	if topo == nil {
		return nil, fmt.Errorf("instance topology is required")
	}
	if workerReplicas <= 0 {
		return nil, fmt.Errorf("workerReplicas must be positive, got %d", workerReplicas)
	}
	if containerTestImage == "" {
		return nil, fmt.Errorf("containerTestImage is required")
	}

	totalGPUs := 0
	var claims []common.ResourceClaimRef

	for _, tcClaim := range tc.ResourceClaims {
		rct, ok := rctIndex[tcClaim.ResourceClaimTemplateName]
		if !ok {
			return nil, fmt.Errorf("resource claim template %q not found in RCT index", tcClaim.ResourceClaimTemplateName)
		}

		totalGPUs += getGPUCount(rct, topo)

		claims = append(claims, common.ResourceClaimRef{
			Name:         tcClaim.Name,
			TemplateName: tcClaim.ResourceClaimTemplateName,
		})
	}

	slotsPerWorker := totalGPUs
	totalProcesses := slotsPerWorker * workerReplicas

	return &NvidiaMPIJobParams{
		SlotsPerWorker:     slotsPerWorker,
		TotalProcesses:     totalProcesses,
		WorkerReplicas:     workerReplicas,
		ContainerTestImage: containerTestImage,
		ResourceClaims:     claims,
	}, nil
}


================================================
FILE: test/cases/nvidia-inference/bert_inference_test.go
================================================
//go:build e2e

package inference

import (
	"context"
	_ "embed"
	"fmt"
	"io"
	"log"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	batchv1 "k8s.io/api/batch/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

//go:embed manifests/bert-inference.yaml
var bertInferenceManifest []byte

var renderedBertInferenceManifest []byte

type bertInferenceManifestTplVars struct {
	BertInferenceImage string
	InferenceMode      string
	GPUPerNode         string
}

func TestBertInference(t *testing.T) {
	feature := features.New("bert-inference").
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if testConfig.BertInferenceImage == "" {
				t.Fatalf("[ERROR] bertInferenceImage must be set")
			}

			log.Println("[INFO] Rendering BERT inference manifest...")
			var err error
			renderedBertInferenceManifest, err = fwext.RenderManifests(
				bertInferenceManifest,
				bertInferenceManifestTplVars{
					BertInferenceImage: testConfig.BertInferenceImage,
					InferenceMode:      testConfig.InferenceMode,
					GPUPerNode:         fmt.Sprintf("%d", testConfig.GpuRequested),
				},
			)
			if err != nil {
				t.Fatalf("[ERROR] Failed to render BERT inference manifest: %v", err)
			}

			log.Println("[INFO] Applying BERT inference manifest...")
			if applyErr := fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedBertInferenceManifest); applyErr != nil {
				t.Fatalf("[ERROR] Failed to apply BERT inference manifest: %v", applyErr)
			}
			log.Println("[INFO] BERT inference manifest applied successfully.")

			// Record time after applying the manifest
			ctx = context.WithValue(ctx, "applyTime", time.Now())
			return ctx
		}).
		Assess("BERT inference Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[INFO] Checking BERT inference job completion...")
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "bert-inference", Namespace: "default"},
			}
			if err := wait.For(
				fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithTimeout(20*time.Minute),
			); err != nil {
				log.Println("[ERROR] BERT inference job failed. Gathering logs...")
				if err := printJobLogs(ctx, cfg, "default", "bert-inference"); err != nil {
					t.Logf("[WARNING] Failed to retrieve bert-inference job logs: %v", err)
				}
				t.Fatalf("[ERROR] BERT inference job did not succeed: %v", err)
			}

			log.Println("[INFO] BERT inference job succeeded. Gathering logs...")
			// Compute duration from manifest apply to job success
			startVal := ctx.Value("applyTime")
			if startVal != nil {
				if applyTime, ok := startVal.(time.Time); ok {
					duration := time.Since(applyTime)
					log.Printf("[INFO] BERT inference job completed in %s", duration)
				}
			}

			// Print logs (including node name) for the Pod
			if err := printJobLogs(ctx, cfg, "default", "bert-inference"); err != nil {
				t.Logf("[WARNING] Failed to retrieve BERT inference job logs: %v", err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[INFO] Cleaning up BERT inference job resources...")
			if err := fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedBertInferenceManifest); err != nil {
				t.Fatalf("[ERROR] Failed to delete BERT inference manifest: %v", err)
			}
			log.Println("[INFO] BERT inference job resources cleaned up.")
			return ctx
		}).
		Feature()

	testenv.Test(t, feature)
}

func printJobLogs(ctx context.Context, cfg *envconf.Config, namespace, jobName string) error {
	cs, err := getClientset(cfg.Client().RESTConfig())
	if err != nil {
		return fmt.Errorf("[ERROR] Failed to create kubernetes clientset: %w", err)
	}

	pods, err := cs.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
		LabelSelector: fmt.Sprintf("job-name=%s", jobName),
	})
	if err != nil {
		return fmt.Errorf("[ERROR] Failed to list pods for job %s: %w", jobName, err)
	}
	if len(pods.Items) == 0 {
		return fmt.Errorf("[ERROR] No pods found for job %s", jobName)
	}

	for _, pod := range pods.Items {
		log.Printf("[INFO] Pod %s is running on node %s", pod.Name, pod.Spec.NodeName)

		log.Printf("[INFO] Retrieving logs from pod %s...", pod.Name)
		stream, err := cs.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{}).Stream(ctx)
		if err != nil {
			return fmt.Errorf("[ERROR] Failed to get logs from pod %s: %w", pod.Name, err)
		}
		defer stream.Close()

		buf := make([]byte, 4096)
		for {
			n, readErr := stream.Read(buf)
			if n > 0 {
				log.Printf("[INFO] Logs from Pod %s:\n%s", pod.Name, string(buf[:n]))
			}
			if readErr == io.EOF {
				log.Printf("[INFO] Completed log stream for pod %s.", pod.Name)
				break
			}
			if readErr != nil {
				return fmt.Errorf("[ERROR] Failed to read logs from pod %s: %w", pod.Name, readErr)
			}
		}
	}
	return nil
}

func getClientset(restConfig *rest.Config) (*kubernetes.Clientset, error) {
	cs, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return nil, fmt.Errorf("[ERROR] Cannot create kubernetes clientset: %w", err)
	}
	return cs, nil
}


================================================
FILE: test/cases/nvidia-inference/main_test.go
================================================
//go:build e2e

package inference

import (
	"context"
	_ "embed"
	"fmt"
	"log"
	"os"
	"os/signal"
	"slices"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/common"
	"github.com/aws/aws-k8s-tester/test/manifests"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

type TestConfig struct {
	common.MetricOps
	BertInferenceImage string `flag:"bertInferenceImage" desc:"BERT inference container image"`
	InferenceMode      string `flag:"inferenceMode" desc:"Inference mode for BERT (throughput or latency)"`
	GpuRequested       int    `flag:"gpuRequested" desc:"Number of GPUs required for inference"`
}

var (
	testenv    env.Environment
	testConfig TestConfig
)

func TestMain(m *testing.M) {
	// Initialize testConfig with default values
	testConfig = TestConfig{
		InferenceMode: "throughput",
		GpuRequested:  1,
	}

	_, err := common.ParseFlags(&testConfig)
	if err != nil {
		log.Fatalf("[ERROR] Failed to parse flags: %v", err)
	}
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("[ERROR] Failed to initialize test environment: %v", err)
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = env.NewWithConfig(cfg).WithContext(ctx)

	manifestsList := [][]byte{
		manifests.NvidiaDevicePluginManifest,
	}

	if len(testConfig.MetricDimensions) > 0 {
		// Render CloudWatch Agent manifest with dynamic dimensions
		renderedCloudWatchAgentManifest, err := manifests.RenderCloudWatchAgentManifest(testConfig.MetricDimensions)
		if err != nil {
			log.Printf("Warning: Failed to render CloudWatch Agent manifest: %v", err)
		}
		manifestsList = append(manifestsList, manifests.DCGMExporterManifest, renderedCloudWatchAgentManifest)
	}

	testenv.Setup(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("[INFO] Applying manifests.")
			err := fwext.ApplyManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, fmt.Errorf("[ERROR] Failed to apply manifests: %w", err)
			}
			log.Println("[INFO] Successfully applied manifests.")
			return ctx, nil
		},
		common.DeployDaemonSet("nvidia-device-plugin-daemonset", "kube-system"),
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			if len(testConfig.MetricDimensions) > 0 {
				if ctx, err := common.DeployDaemonSet("dcgm-exporter", "kube-system")(ctx, config); err != nil {
					return ctx, err
				}
				if ctx, err := common.DeployDaemonSet("cwagent", "amazon-cloudwatch")(ctx, config); err != nil {
					return ctx, err
				}
			}
			return ctx, nil
		},
		checkGpuCapacity,
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("[INFO] Deleting manifests.")
			slices.Reverse(manifestsList)
			err := fwext.DeleteManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, fmt.Errorf("[ERROR] failed to delete manifests: %w", err)
			}
			log.Println("[INFO] Successfully deleted manifests.")
			return ctx, nil
		},
	)

	exitCode := testenv.Run(m)
	log.Printf("[INFO] Tests finished with exit code %d", exitCode)
	os.Exit(exitCode)
}

// checkGpuCapacity ensures at least one node has >= the requested number of GPUs,
// and logs each node's instance type.
func checkGpuCapacity(ctx context.Context, config *envconf.Config) (context.Context, error) {
	log.Printf("[INFO] Validating cluster has at least %d GPU(s).", testConfig.GpuRequested)

	cs, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, fmt.Errorf("failed to create kubernetes client: %w", err)
	}

	err = wait.For(func(ctx context.Context) (bool, error) {
		nodes, err := cs.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
		if err != nil {
			return false, fmt.Errorf("failed to list nodes: %w", err)
		} else if len(nodes.Items) == 0 {
			return false, fmt.Errorf("no nodes found in the cluster")
		}
		for _, node := range nodes.Items {
			instanceType := node.Labels["node.kubernetes.io/instance-type"]
			gpuCap, ok := node.Status.Capacity["nvidia.com/gpu"]
			if ok && int(gpuCap.Value()) >= testConfig.GpuRequested {
				log.Printf("[INFO] Node %s (type: %s) meets the request of %d GPU(s).",
					node.Name, instanceType, testConfig.GpuRequested)
				return true, nil
			}
			log.Printf("[INFO] Node %s (type: %s) has no GPU capacity.", node.Name, instanceType)
		}
		log.Printf("[INFO] No node meets the GPU requirement. The GPU info might not be propagated yet. Retrying...")
		return false, nil
	}, wait.WithTimeout(5*time.Minute), wait.WithInterval(10*time.Second))

	if err != nil {
		return ctx, fmt.Errorf("no node has >= %d GPU(s)", testConfig.GpuRequested)
	}

	log.Println("[INFO] GPU capacity check passed.")
	return ctx, nil
}


================================================
FILE: test/cases/nvidia-inference/manifests/bert-inference.yaml
================================================
# Single-node BERT inference job with GPU. Memory-backed volume for /dev/shm
apiVersion: batch/v1
kind: Job
metadata:
  name: bert-inference
spec:
  backoffLimit: 4
  template:
    spec:
      restartPolicy: OnFailure
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
      containers:
      - name: bert-inference
        image: {{.BertInferenceImage}}
        imagePullPolicy: Always
        command: ["python", "infer.py"]
        env:
        - name: INFERENCE_MODE
          value: "{{.InferenceMode}}"
        volumeMounts:
        - mountPath: /dev/shm
          name: dshm
        resources:
          requests:
            nvidia.com/gpu: {{.GPUPerNode}}
          limits:
            nvidia.com/gpu: {{.GPUPerNode}}


================================================
FILE: test/cases/nvidia-training/bert_training_test.go
================================================
//go:build e2e

package training

import (
	"context"
	_ "embed"
	"fmt"
	"testing"
	"time"

	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	batchv1 "k8s.io/api/batch/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

// Use the parameterized manifest
var (
	//go:embed manifests/bert-training.yaml
	bertTrainingManifest []byte
)

func TestBertTraining(t *testing.T) {
	if testConfig.BertTrainingImage == "" {
		t.Fatal(fmt.Errorf("bertTrainingImage must be set to run the test"))
	}

	slotsPerWorker := gpuPerNode
	workerReplicas := nodeCount
	np := slotsPerWorker * workerReplicas
	efaRequested := 0
	if testConfig.EfaEnabled && efaPerNode > 0 {
		efaRequested = 1
	}

	bertTraining := features.New("bert-training").
		WithLabel("suite", "nvidia").
		WithLabel("hardware", "gpu").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			renderVars := map[string]string{
				"BertTrainingImage": testConfig.BertTrainingImage,
				"SlotsPerWorker":    fmt.Sprintf("%d", slotsPerWorker),
				"NP":                fmt.Sprintf("%d", np),
				"WorkerReplicas":    fmt.Sprintf("%d", workerReplicas),
				"GPUPerNode":        fmt.Sprintf("%d", gpuPerNode),
				"EFARequested":      fmt.Sprintf("%d", efaRequested),
			}

			renderedManifest, err := fwext.RenderManifests(bertTrainingManifest, renderVars)
			if err != nil {
				t.Fatal(err)
			}

			err = fwext.ApplyManifests(cfg.Client().RESTConfig(), renderedManifest)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Assess("BERT training Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: "bert-training-launcher", Namespace: "default"},
			}
			if err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithTimeout(time.Minute*20),
				wait.WithContext(ctx),
			); err != nil {
				t.Logf("[ERROR] BERT training job failed. Gathering logs...")
				if err = printJobLogs(ctx, cfg, "default", "bert-training-launcher"); err != nil {
					t.Logf("Warning: failed to retrieve bert-training job logs: %v", err)
				}
				t.Fatalf("[ERROR] BERT training job did not succeed: %v", err)
			}
			t.Logf("[INFO] BERT training job succeeded. Gathering logs...")

			err := printJobLogs(ctx, cfg, "default", "bert-training-launcher")
			if err != nil {
				t.Logf("Warning: failed to retrieve bert-training job logs: %v", err)
			}

			return ctx
		}).
		Feature()

	testenv.Test(t, bertTraining)
}

func printJobLogs(ctx context.Context, cfg *envconf.Config, namespace, jobName string) error {
	clientset, err := getClientset(cfg.Client().RESTConfig())
	if err != nil {
		return fmt.Errorf("failed to create kubernetes clientset: %w", err)
	}

	podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
		LabelSelector: fmt.Sprintf("job-name=%s", jobName),
	})
	if err != nil {
		return fmt.Errorf("failed to list pods for job %s: %w", jobName, err)
	}

	if len(podList.Items) == 0 {
		return fmt.Errorf("no pods found for job %s", jobName)
	}

	for _, pod := range podList.Items {
		req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{})
		logStream, err := req.Stream(ctx)
		if err != nil {
			return fmt.Errorf("failed to get logs from pod %s: %w", pod.Name, err)
		}
		defer logStream.Close()

		buf := make([]byte, 4096)
		for {
			n, err := logStream.Read(buf)
			if n > 0 {
				fmt.Printf("Logs from Pod %s: \n%s\n", pod.Name, string(buf[:n]))
			}
			if err != nil {
				break
			}
		}
	}

	return nil
}

func getClientset(restConfig *rest.Config) (*kubernetes.Clientset, error) {
	clientset, err := kubernetes.NewForConfig(restConfig)
	if err != nil {
		return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err)
	}
	return clientset, nil
}


================================================
FILE: test/cases/nvidia-training/main_test.go
================================================
//go:build e2e

package training

import (
	"context"
	_ "embed"
	"fmt"
	"github.com/aws/aws-k8s-tester/test/common"
	"log"
	"os"
	"os/signal"
	"slices"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

func TestMain(m *testing.M) {
	_, err := common.ParseFlags(&testConfig)
	if err != nil {
		log.Fatalf("failed to parse flags: %v", err)
	}
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}

	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = env.NewWithConfig(cfg).WithContext(ctx)

	manifestsList := [][]byte{
		manifests.NvidiaDevicePluginManifest,
		manifests.MpiOperatorManifest,
		manifests.EfaDevicePluginManifest,
	}

	if len(testConfig.MetricDimensions) > 0 {
		// Render CloudWatch Agent manifest with dynamic dimensions
		renderedCloudWatchAgentManifest, err := manifests.RenderCloudWatchAgentManifest(testConfig.MetricDimensions)
		if err != nil {
			log.Printf("Warning: failed to render CloudWatch Agent manifest: %v", err)
		}
		manifestsList = append(manifestsList, manifests.DCGMExporterManifest, renderedCloudWatchAgentManifest)
	}

	testenv.Setup(
		// Apply all manifests
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Applying manifests.")

			err := fwext.ApplyManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, fmt.Errorf("failed to apply manifests: %w", err)
			}
			log.Println("Successfully applied manifests.")
			return ctx, nil
		},

		// Wait for MPI Operator deployment
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Waiting for MPI Operator deployment to be available.")
			deployment := appsv1.Deployment{
				ObjectMeta: metav1.ObjectMeta{Name: "mpi-operator", Namespace: "mpi-operator"},
			}
			err := wait.For(
				conditions.New(config.Client().Resources()).DeploymentConditionMatch(
					&deployment, appsv1.DeploymentAvailable, v1.ConditionTrue,
				),
				wait.WithTimeout(time.Minute*5),
			)
			if err != nil {
				return ctx, fmt.Errorf("MPI Operator deployment is not available: %w", err)
			}
			log.Println("MPI Operator deployment is available.")
			return ctx, nil
		},

		// Wait for required DaemonSets
		common.DeployDaemonSet("nvidia-device-plugin-daemonset", "kube-system"),
		common.DeployDaemonSet("aws-efa-k8s-device-plugin-daemonset", "kube-system"),

		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			if len(testConfig.MetricDimensions) > 0 {
				if ctx, err := common.DeployDaemonSet("dcgm-exporter", "kube-system")(ctx, config); err != nil {
					return ctx, err
				}
				if ctx, err := common.DeployDaemonSet("cwagent", "amazon-cloudwatch")(ctx, config); err != nil {
					return ctx, err
				}
			}
			return ctx, nil
		}, // Deploy CloudWatch Agent + DCGM only if MetricDimensions are set

		checkNodeTypes, // Dynamically check node types and capacities after device plugins are ready
	)

	testenv.Finish(
		func(ctx context.Context, config *envconf.Config) (context.Context, error) {
			log.Println("Deleting NVIDIA device plugin, MPI operator, EFA device plugin DCGM Exporter and CloudWatch Agent manifests.")
			slices.Reverse(manifestsList)
			err := fwext.DeleteManifests(config.Client().RESTConfig(), manifestsList...)
			if err != nil {
				return ctx, fmt.Errorf("failed to delete manifests: %w", err)
			}
			log.Println("Successfully deleted NVIDIA device plugin, MPI operator, EFA device plugin, DCGM Exporter and CloudWatch Agent manifests.")
			return ctx, nil
		},
	)

	log.Println("Starting tests...")
	exitCode := testenv.Run(m)
	log.Printf("Tests finished with exit code %d", exitCode)
	os.Exit(exitCode)
}

func checkNodeTypes(ctx context.Context, config *envconf.Config) (context.Context, error) {
	clientset, err := kubernetes.NewForConfig(config.Client().RESTConfig())
	if err != nil {
		return ctx, fmt.Errorf("failed to create Kubernetes client: %w", err)
	}

	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
	if err != nil {
		return ctx, fmt.Errorf("failed to list nodes: %w", err)
	}

	if len(nodes.Items) == 0 {
		return ctx, fmt.Errorf("no nodes found in the cluster")
	}

	for i := 1; i < len(nodes.Items); i++ {
		if nodes.Items[i].Labels["node.kubernetes.io/instance-type"] != nodes.Items[i-1].Labels["node.kubernetes.io/instance-type"] {
			return ctx, fmt.Errorf("node types are not the same, all node types must be the same in the cluster")
		}
	}

	if testConfig.NodeType != "" {
		count := 0
		for _, v := range nodes.Items {
			if v.Labels["node.kubernetes.io/instance-type"] == testConfig.NodeType {
				count++
				if gpuCap, ok := v.Status.Capacity["nvidia.com/gpu"]; ok {
					gpuPerNode = int(gpuCap.Value())
				}
				if efaCap, ok := v.Status.Capacity["vpc.amazonaws.com/efa"]; ok {
					efaPerNode = int(efaCap.Value())
				}
			}
		}
		if count == 0 {
			return ctx, fmt.Errorf("no nodes match the specified nodeType: %s", testConfig.NodeType)
		}
		nodeCount = count
	} else {
		testConfig.NodeType = nodes.Items[0].Labels["node.kubernetes.io/instance-type"]
		nodeCount = len(nodes.Items)
		if gpuCap, ok := nodes.Items[0].Status.Capacity["nvidia.com/gpu"]; ok {
			gpuPerNode = int(gpuCap.Value())
		}
		if efaCap, ok := nodes.Items[0].Status.Capacity["vpc.amazonaws.com/efa"]; ok {
			efaPerNode = int(efaCap.Value())
		}
	}

	log.Printf("[INFO] Node Type: %s", testConfig.NodeType)
	log.Printf("[INFO] Node Count: %d", nodeCount)
	log.Printf("[INFO] GPU Per Node: %d", gpuPerNode)
	log.Printf("[INFO] EFA Per Node: %d", efaPerNode)

	return ctx, nil
}


================================================
FILE: test/cases/nvidia-training/manifests/bert-training.yaml
================================================
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: bert-training
spec:
  slotsPerWorker: {{.SlotsPerWorker}}
  runPolicy:
    backoffLimit: 20
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          restartPolicy: OnFailure
          containers:
          - image: {{.BertTrainingImage}}
            imagePullPolicy: Always
            name: bert-training
            env:
            - name: NCCL_DEBUG
              value: "TRACE"
            - name: MASTER_ADDR
              value: "bert-training"
            - name: MASTER_PORT
              value: "12355"
            command:
            - /opt/amazon/openmpi/bin/mpirun
            - --allow-run-as-root
            - --tag-output
            - -np
            - "{{.NP}}"           # Number of processes derived from node/gpu calculations
            - -bind-to
            - none
            - -map-by
            - slot
            - -x
            - PATH
            - -x
            - LD_LIBRARY_PATH
            - -x
            - NCCL_DEBUG
            - -x
            - MASTER_ADDR
            - -x
            - MASTER_PORT
            - --mca 
            - pml
            - "^cm"
            - --mca
            - routed
            - direct
            - --oversubscribe
            - --mca
            - orte_base_help_aggregate 
            - "0"
            - python
            - train.py
    Worker:
      replicas: {{.WorkerReplicas}}
      template:
        spec:
          volumes:
          - name: dshm
            emptyDir:
              medium: Memory
          containers:
          - image: {{.BertTrainingImage}}
            imagePullPolicy: Always
            name: bert-training-worker
            volumeMounts:
            - mountPath: /dev/shm
              name: dshm
            resources:
              requests:
                nvidia.com/gpu: {{.GPUPerNode}}
                vpc.amazonaws.com/efa: {{.EFARequested}}
              limits:
                nvidia.com/gpu: {{.GPUPerNode}}
                vpc.amazonaws.com/efa: {{.EFARequested}}


================================================
FILE: test/cases/nvidia-training/vars.go
================================================
//go:build e2e

package training

import (
	"github.com/aws/aws-k8s-tester/test/common"
	"sigs.k8s.io/e2e-framework/pkg/env"
)

type Config struct {
	common.MetricOps
	BertTrainingImage string `flag:"bertTrainingImage" desc:"Docker image used for BERT training workload"`
	EfaEnabled        bool   `flag:"efaEnabled" desc:"Enable Elastic Fabric Adapter (EFA)"`
	NodeType          string `flag:"nodeType" desc:"Instance type for cluster nodes"`
}

// Shared global variables
var (
	testenv    env.Environment
	testConfig Config

	nodeCount  int
	gpuPerNode int
	efaPerNode int
)


================================================
FILE: test/cases/quick/io_uring_test.go
================================================
//go:build e2e

package quick

import (
	"context"
	"log"
	"testing"
	"time"

	"github.com/aws/aws-k8s-tester/internal/e2e"

	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func TestNpmInstallWithCPULimits(t *testing.T) {
	feat := features.New("npm-install").
		WithLabel("suite", "quick").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log.Println("[Setup] Verifying cluster nodes...")
			var nodeList corev1.NodeList
			if err := cfg.Client().Resources().List(ctx, &nodeList); err != nil {
				t.Fatalf("Failed to list nodes: %v", err)
			}

			// Log node information
			for _, node := range nodeList.Items {
				arch := node.Labels["kubernetes.io/arch"]
				kernelVersion := node.Status.NodeInfo.KernelVersion
				t.Logf("Node: %s, Architecture: %s, Kernel: %s", node.Name, arch, kernelVersion)
			}
			return ctx
		}).
		Assess("Pod can successfully run npm install", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			podName := "npm-install-test"
			podNS := "default"

			pod := &corev1.Pod{
				ObjectMeta: metav1.ObjectMeta{
					Name:      podName,
					Namespace: podNS,
					Labels: map[string]string{
						"app": "npm-install-test",
					},
				},
				Spec: corev1.PodSpec{
					Containers: []corev1.Container{
						{
							Name:    "test-container",
							Image:   "public.ecr.aws/ubuntu/ubuntu:noble",
							Command: []string{"/bin/sh", "-c"},
							Args: []string{`
								set -x
								echo "[Test] Starting npm installation test..."
								mkdir asd && 
								cd asd && 
								apt-get update && 
								apt-get install -y npm nodejs && 
								echo "[Test] Starting npm install webpack..."
								npm install webpack --loglevel verbose || exit 1
								echo "[Test] npm install completed successfully"
							`},
						},
					},
					RestartPolicy: corev1.RestartPolicyNever,
				},
			}

			if err := cfg.Client().Resources().Create(ctx, pod); err != nil {
				t.Fatalf("[Assess] Failed to create pod: %v", err)
			}

			log.Printf("[Assess] Waiting up to 10 minutes for pod %s to complete...", podName)
			err := wait.For(
				e2e.NewConditionExtension(cfg.Client().Resources()).ResourceMatch(pod, func(object k8s.Object) bool {
					pod := object.(*corev1.Pod)
					return pod.Status.Phase == corev1.PodSucceeded
				}),
				wait.WithTimeout(10*time.Minute),
			)
			if err != nil {
				t.Logf("[Assess] Pod did not complete successfully: %v", err)
				e2e.PrintDaemonSetPodLogs(t, ctx, cfg.Client().RESTConfig(), podNS, "app=npm-install-test")
				t.Fatal("Pod did not complete within 10 minutes - possible io_uring hang detected")
			}

			log.Printf("[Assess] Pod %s completed successfully", podName)
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			podName := "npm-install-test"
			podNS := "default"

			t.Logf("[Teardown] Cleaning up pod %s/%s...", podNS, podName)
			pod := &corev1.Pod{
				ObjectMeta: metav1.ObjectMeta{
					Name:      podName,
					Namespace: podNS,
				},
			}
			if err := cfg.Client().Resources().Delete(ctx, pod); err != nil {
				t.Logf("[Teardown] Failed to delete pod: %v", err)
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, feat)
}


================================================
FILE: test/cases/quick/limit_test.go
================================================
//go:build e2e

package quick

import (
	"bytes"
	"context"
	_ "embed"
	"io"
	"strings"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/k8s"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

var (
	//go:embed manifests/ulimit.yaml
	ulimitManifest []byte

	expectedResourceLimit = map[string]string{
		"-R": "unlimited",
		"-c": "unlimited",
		"-d": "unlimited",
		"-e": "0",
		"-f": "unlimited",
		"-i": "30446",
		"-l": "unlimited",
		"-m": "unlimited",
		"-n": "1048576",
		"-p": "8",
		"-q": "819200",
		"-r": "0",
		"-s": "10240",
		"-t": "unlimited",
		"-u": "unlimited",
		"-v": "unlimited",
		"-x": "unlimited",
	}
)

func TestUserLimits(t *testing.T) {
	f1 := features.New("ulimit pod").
		WithLabel("type", "ulimit").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			err := fwext.ApplyManifests(cfg.Client().RESTConfig(), ulimitManifest)
			if err != nil {
				t.Fatalf("failed to apply manifests: %v", err)
			}
			pod := &corev1.Pod{
				ObjectMeta: metav1.ObjectMeta{Name: "ulimit", Namespace: "default"},
			}
			err = wait.For(conditions.New(cfg.Client().Resources()).ResourceMatch(pod, containerTerminated),
				wait.WithTimeout(time.Minute*5))
			if err != nil {
				t.Fatalf("encounter error when waiting for container finished running commands: %v", err)
			}
			return ctx
		}).
		Assess("Use default resources limit", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			client, err := kubernetes.NewForConfig(cfg.Client().RESTConfig())
			if err != nil {
				t.Fatal(err)
			}
			tailLine := int64(10000)
			podLogOptions := corev1.PodLogOptions{
				Container: "al2023",
				TailLines: &tailLine,
			}
			req := client.CoreV1().Pods("default").GetLogs("ulimit", &podLogOptions)
			logs, err := req.Stream(ctx)
			if err != nil {
				t.Fatalf("error in opening stream: %v", err)
			}
			defer logs.Close()
			compareResourceLimitsWithExpectedValues(t, logs)
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			err := fwext.DeleteManifests(cfg.Client().RESTConfig(), ulimitManifest)
			if err != nil {
				t.Fatalf("failed to delete manifests: %v", err)
			}
			return ctx
		}).Feature()

	// test feature
	testenv.Test(t, f1)
}

func compareResourceLimitsWithExpectedValues(t *testing.T, logs io.ReadCloser) {
	buf := new(bytes.Buffer)
	_, err := io.Copy(buf, logs)
	if err != nil {
		t.Fatalf("error in copy information from podLogs to buf: %v", err)
	}
	str := buf.String()

	lines := strings.Split(str, "\n")
	for _, line := range lines[:len(lines)-1] {
		info := strings.Split(line, " ")
		marker := getMarker(info[len(info)-2])
		value := info[len(info)-1]
		if expectedResourceLimit[marker] != value {
			t.Errorf("resource limit doesn't match with the default value, limit we get %v, but default value is %v", line, expectedResourceLimit[marker])
		} else {
			t.Logf("resrouce limit fetched from ulimit: %v. Equal to the default value %v", line, expectedResourceLimit[marker])
		}
	}
}

func containerTerminated(obj k8s.Object) bool {
	j := obj.(*corev1.Pod)
	containerTerminatedState := j.Status.ContainerStatuses[0].State.Terminated
	return containerTerminatedState.Reason == "Completed"
}

func getMarker(str string) string {
	startIndex := 0
	if str[:1] == "(" {
		startIndex = 1
	}
	return str[startIndex : len(str)-1]
}


================================================
FILE: test/cases/quick/main_test.go
================================================
//go:build e2e

package quick

import (
	"context"
	_ "embed"
	"log"
	"os"
	"os/signal"
	"testing"

	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

var (
	testenv env.Environment
)

func TestMain(m *testing.M) {
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = testenv.WithContext(ctx)

	testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) {
		log.Println("Starting quick test suite...")
		return ctx, nil
	})

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/quick/manifests/ulimit.yaml
================================================
apiVersion: v1
kind: Pod
metadata:
  name: ulimit
spec:
  restartPolicy: Never
  containers:
  - name: al2023
    image: public.ecr.aws/amazonlinux/amazonlinux:2023
    command: ["ulimit"]
    args:
      - -a


================================================
FILE: test/cases/quick/node_topology_test.go
================================================
//go:build e2e

package quick

import (
	"context"
	_ "embed"
	"strconv"
	"strings"
	"testing"

	"github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-sdk-go-v2/aws"
	v1 "k8s.io/api/core/v1"
	cloudprovider "k8s.io/cloud-provider-aws/pkg/providers/v1"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func TestNodeTopology(t *testing.T) {
	topology := features.New("node-topology").
		WithLabel("suite", "node-topology").
		Assess("Nodes have correct network topology labels", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {

			var nodes v1.NodeList
			cfg.Client().Resources().List(ctx, &nodes)

			if len(nodes.Items) == 0 {
				t.Fatal("no nodes found in the cluster")
			}

			nodeMap := make(map[string]v1.Node)
			var instanceIDs []string
			ec2Client := e2e.NewEC2Client()
			for _, node := range nodes.Items {
				providerIDParts := strings.Split(node.Spec.ProviderID, "/")
				instanceID := providerIDParts[len(providerIDParts)-1]
				instanceIDs = append(instanceIDs, instanceID)
				nodeMap[instanceID] = node
			}

			nodeTopologies, err := ec2Client.DescribeInstanceTopology(instanceIDs)
			if err != nil {
				t.Fatalf("could not describe instance topologies: %v", err)
			}

			t.Logf("checking instance topologies for %d node(s) (out of %d node(s) in the cluster)", len(nodeTopologies), len(instanceIDs))

			for _, nodeTopology := range nodeTopologies {
				node := nodeMap[aws.ToString(nodeTopology.InstanceId)]
				instanceType := node.Labels["node.kubernetes.io/instance-type"]

				t.Logf("verifying instance topology for node %s (type: %s)", node.Name, instanceType)

				for i, networkNode := range nodeTopology.NetworkNodes {
					// https://github.com/kubernetes/cloud-provider-aws/blob/b47d2cf2a33ae655cd353ec42ea43362b804c397/pkg/providers/v1/well_known_labels.go#L26
					expectedLabel := cloudprovider.LabelNetworkNodePrefix + strconv.Itoa(i+1)
					if actualValue, ok := node.Labels[expectedLabel]; !ok {
						t.Errorf("node %s (type: %s) does not have expected network label %s", node.Name, instanceType, expectedLabel)
					} else if actualValue != networkNode {
						t.Errorf("node %s (type: %s) has incorrect value for label %s: expected %s, got %s", node.Name, instanceType, expectedLabel, networkNode, actualValue)
					}
				}

				// https://github.com/kubernetes/cloud-provider-aws/blob/b47d2cf2a33ae655cd353ec42ea43362b804c397/pkg/providers/v1/well_known_labels.go#L22C2-L22C13
				if aws.ToString(nodeTopology.ZoneId) != node.Labels[cloudprovider.LabelZoneID] {
					t.Logf("node %s (type: %s) has incorrect value for label %s: expected %s, got %s", node.Name, instanceType, cloudprovider.LabelZoneID, aws.ToString(nodeTopology.ZoneId), node.Labels[cloudprovider.LabelZoneID])
					t.Fail()
				}
			}

			return ctx
		}).Feature()

	testenv.Test(t, topology)
}


================================================
FILE: test/cases/workload/main_test.go
================================================
//go:build e2e

package workload

import (
	"context"
	"flag"
	"fmt"
	"log"
	"os"
	"os/signal"
	"testing"
	"time"

	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

const (
	defaultWorkloadTestTimeout = 10 * time.Minute
)

var (
	testenv               env.Environment
	workloadTestCommand   *string
	workloadTestImage     *string
	workloadTestName      *string
	workloadTestResources *string
	workloadTestTimeout   *time.Duration
)

func TestMain(m *testing.M) {
	workloadTestCommand = flag.String("workloadTestCommand", "", "command for workload test")
	workloadTestImage = flag.String("workloadTestImage", "", "image for workload test")
	workloadTestName = flag.String("workloadTestName", "workload-test", "name for workload test")
	workloadTestResources = flag.String("workloadTestResources", "", "JSON map of resources for workload test (e.g., '{\"nvidia.com/gpu\": \"1\"}')")
	workloadTestTimeout = flag.Duration("workloadTestTimeout", defaultWorkloadTestTimeout, fmt.Sprintf("timeout for workload test (default: %s)", defaultWorkloadTestTimeout))
	cfg, err := envconf.NewFromFlags()
	if err != nil {
		log.Fatalf("failed to initialize test environment: %v", err)
	}
	testenv = env.NewWithConfig(cfg)
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()
	testenv = testenv.WithContext(ctx)

	testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) {
		log.Println("Starting workload test suite...")
		return ctx, nil
	})

	os.Exit(testenv.Run(m))
}


================================================
FILE: test/cases/workload/workload_test.go
================================================
//go:build e2e

package workload

import (
	"context"
	"encoding/json"
	"strings"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/smithy-go/ptr"
	batchv1 "k8s.io/api/batch/v1"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

func createWorkloadJob(name, image, command string, resources map[string]string, timeout time.Duration) *batchv1.Job {
	container := corev1.Container{
		Name:            name,
		Image:           image,
		ImagePullPolicy: corev1.PullAlways,
		Resources:       buildResourceRequirements(resources),
	}

	// Override entrypoint if command is provided
	if command != "" {
		container.Command = strings.Fields(command)
	}

	podSpec := corev1.PodSpec{
		RestartPolicy:         corev1.RestartPolicyNever,
		ActiveDeadlineSeconds: ptr.Int64(int64(timeout.Seconds())),
		Containers:            []corev1.Container{container},
	}

	job := &batchv1.Job{
		ObjectMeta: metav1.ObjectMeta{
			Name:      name,
			Namespace: corev1.NamespaceDefault,
			Labels:    map[string]string{"app": name},
		},
		Spec: batchv1.JobSpec{
			BackoffLimit: ptr.Int32(4),
			Template: corev1.PodTemplateSpec{
				ObjectMeta: metav1.ObjectMeta{
					Labels: map[string]string{"app": name},
				},
				Spec: podSpec,
			},
		},
	}

	return job
}

func buildResourceRequirements(resources map[string]string) corev1.ResourceRequirements {
	if len(resources) == 0 {
		return corev1.ResourceRequirements{}
	}
	rl := make(corev1.ResourceList)
	for name, qty := range resources {
		rl[corev1.ResourceName(name)] = resource.MustParse(qty)
	}
	return corev1.ResourceRequirements{Limits: rl, Requests: rl}
}

func parseResources(resourcesJSON string) (map[string]string, error) {
	if resourcesJSON == "" {
		return nil, nil
	}
	var resources map[string]string
	if err := json.Unmarshal([]byte(resourcesJSON), &resources); err != nil {
		return nil, err
	}
	for name, qty := range resources {
		if q, err := resource.ParseQuantity(qty); err != nil || q.IsZero() {
			delete(resources, name)
		}
	}
	return resources, nil
}

func TestWorkload(t *testing.T) {
	name := ptr.ToString(workloadTestName)
	image := ptr.ToString(workloadTestImage)
	command := ptr.ToString(workloadTestCommand)
	timeout := ptr.ToDuration(workloadTestTimeout)

	if name == "" {
		t.Fatal("workloadTestName must be set to run the test")
	}
	if image == "" {
		t.Fatal("workloadTestImage must be set to run the test")
	}

	resources, err := parseResources(ptr.ToString(workloadTestResources))
	if err != nil {
		t.Fatalf("Failed to parse workloadTestResources: %v", err)
	}

	feature := features.New(name).WithLabel("suite", "workload")
	if _, ok := resources["aws.amazon.com/neuron"]; ok {
		feature = feature.WithLabel("hardware", "neuron")
	}
	if _, ok := resources["nvidia.com/gpu"]; ok {
		feature = feature.WithLabel("hardware", "gpu")
	}

	workload := feature.Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
		job := createWorkloadJob(name, image, command, resources, timeout)
		if len(resources) > 0 {
			t.Logf("Creating %s job with resources: %v", name, resources)
		} else {
			t.Logf("Creating %s job", name)
		}
		if err := cfg.Client().Resources().Create(ctx, job); err != nil {
			t.Fatal(err)
		}
		t.Logf("%s job created successfully", name)
		return ctx
	}).
		Assess("Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: corev1.NamespaceDefault},
			}
			t.Logf("Waiting for %s job to complete", name)
			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
				wait.WithContext(ctx),
				wait.WithTimeout(timeout),
			)
			if err != nil {
				t.Fatal(err)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: corev1.NamespaceDefault},
			})
			if err != nil {
				t.Error(err)
			}
			t.Logf("Test log for %s:", name)
			t.Log(log)
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: corev1.NamespaceDefault},
			}
			if err := cfg.Client().Resources().Delete(ctx, job, func(do *metav1.DeleteOptions) {
				policy := metav1.DeletePropagationBackground
				do.PropagationPolicy = &policy
			}); err != nil {
				t.Error(err)
			}
			return ctx
		}).
		Feature()

	testenv.Test(t, workload)
}


================================================
FILE: test/common/dra.go
================================================
//go:build e2e

package common

import (
	"context"
	"fmt"
	"log"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/test/manifests"
	appsv1 "k8s.io/api/apps/v1"
	v1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

// DeployDranet renders the dranet manifest template with the given image,
// applies it to the cluster, and waits for the dranet DaemonSet to be ready.
// Returns the rendered manifest bytes for later cleanup.
func DeployDranet(ctx context.Context, config *envconf.Config, rdmaDeviceDraDriverImage string) (renderedManifest []byte, err error) {
	renderedManifest, err = fwext.RenderManifests(manifests.DranetManifest, struct {
		RdmaDeviceDraDriverImage string
	}{
		RdmaDeviceDraDriverImage: rdmaDeviceDraDriverImage,
	})
	if err != nil {
		return nil, fmt.Errorf("failed to render dranet manifest: %w", err)
	}
	if err := fwext.ApplyManifests(config.Client().RESTConfig(), renderedManifest); err != nil {
		return nil, fmt.Errorf("failed to apply dranet manifest: %w", err)
	}
	ds := appsv1.DaemonSet{
		ObjectMeta: metav1.ObjectMeta{Name: "dranet-aws-dranet", Namespace: "kube-system"},
	}
	err = wait.For(
		fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&ds),
		wait.WithTimeout(5*time.Minute),
		wait.WithContext(ctx),
	)
	if err != nil {
		return nil, fmt.Errorf("dranet daemonset is not ready: %w", err)
	}
	log.Println("dranet daemonset is ready.")
	return renderedManifest, nil
}

// CountNodesByType lists cluster nodes and returns the count of nodes matching
// the given node.kubernetes.io/instance-type label. Returns an error if the
// count is 0.
func CountNodesByType(ctx context.Context, clientset kubernetes.Interface, nodeType string) (int, error) {
	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{
		LabelSelector: "node.kubernetes.io/instance-type=" + nodeType,
	})
	if err != nil {
		return 0, fmt.Errorf("failed to list nodes: %w", err)
	}
	count := len(nodes.Items)
	if count == 0 {
		return 0, fmt.Errorf("no nodes of type %q found", nodeType)
	}
	log.Printf("[INFO] Found %d node(s) of type %s", count, nodeType)
	return count, nil
}

// DeployMPIOperator applies the MPI operator manifest and waits for the
// mpi-operator Deployment to become available.
func DeployMPIOperator(ctx context.Context, config *envconf.Config) error {
	if err := fwext.ApplyManifests(config.Client().RESTConfig(), manifests.MpiOperatorManifest); err != nil {
		return fmt.Errorf("failed to apply mpi-operator manifest: %w", err)
	}
	dep := appsv1.Deployment{
		ObjectMeta: metav1.ObjectMeta{Name: "mpi-operator", Namespace: "mpi-operator"},
	}
	err := wait.For(conditions.New(config.Client().Resources()).DeploymentConditionMatch(&dep, appsv1.DeploymentAvailable, v1.ConditionTrue),
		wait.WithContext(ctx))
	if err != nil {
		return fmt.Errorf("failed to deploy mpi-operator: %w", err)
	}
	log.Println("mpi-operator deployment is available.")
	return nil
}


================================================
FILE: test/common/dra_features.go
================================================
//go:build e2e

package common

import (
	"context"
	"fmt"
	"io/fs"
	"path/filepath"
	"strings"
	"testing"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	"github.com/aws/aws-k8s-tester/internal/e2e/mpijobs"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/kubernetes"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
	"sigs.k8s.io/e2e-framework/pkg/features"
)

const (
	// NegativeTestTimeout is the duration to wait before checking that a
	// negative test case's worker pods are still Pending.
	NegativeTestTimeout = 1 * time.Minute
	// NegativeTestStabilizationTimeout is the duration to wait after pods
	// are first observed as Pending before re-checking they remain Pending.
	NegativeTestStabilizationTimeout = 2 * time.Minute
	// PositiveTestTimeout is the duration to wait for an MPIJob to succeed.
	PositiveTestTimeout = 20 * time.Minute
)

// ComputeAndRenderFunc is a callback that computes MPIJob parameters and renders
// the MPIJob YAML for a given test case. Each package provides its own implementation
// that calls its package-specific ComputeMPIJobParams and RenderMPIJobYAML functions.
type ComputeAndRenderFunc func(tc *TestCaseSpec, rctIndex map[string]*ResourceClaimTemplateSpec) (renderedYAML []byte, err error)

// BuildPositiveFeature constructs an e2e-framework Feature for a positive DRA
// test case. It applies the manifest, waits for the MPIJob to succeed, retrieves
// logs, and cleans up.
func BuildPositiveFeature(name, suiteName, mpiJobName string, manifest []byte) features.Feature {
	return features.New(name).
		WithLabel("suite", suiteName).
		WithLabel("type", "positive").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Logf("Applying MPIJob manifest for %s", name)
			if err := fwext.ApplyManifests(cfg.Client().RESTConfig(), manifest); err != nil {
				t.Fatalf("applying MPIJob manifest: %v", err)
			}
			return ctx
		}).
		Assess("MPIJob succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			mpiJob := mpijobs.NewUnstructured(mpiJobName, "default")
			t.Log("Waiting for MPIJob to complete")
			err := wait.For(
				conditions.New(cfg.Client().Resources()).ResourceMatch(mpiJob, mpijobs.MPIJobSucceeded),
				wait.WithContext(ctx),
				wait.WithTimeout(PositiveTestTimeout),
			)
			if err != nil {
				t.Errorf("MPIJob did not succeed: %v", err)
			}

			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), mpiJob)
			if err != nil {
				t.Errorf("failed to get job logs: %v", err)
			} else {
				t.Logf("Test log for %s:", name)
				t.Log(log)
			}
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if err := fwext.DeleteManifests(cfg.Client().RESTConfig(), manifest); err != nil {
				t.Errorf("deleting MPIJob manifest: %v", err)
			}
			return ctx
		}).
		Feature()
}

// BuildNegativeFeature constructs an e2e-framework Feature for a negative DRA
// test case. It applies the manifest, waits for a timeout, verifies worker pods
// remain Pending, and cleans up.
func BuildNegativeFeature(name, suiteName, mpiJobName string, manifest []byte, expectedPendingCount int, clientset kubernetes.Interface) features.Feature {
	return features.New(name).
		WithLabel("suite", suiteName).
		WithLabel("type", "negative").
		Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Logf("Applying MPIJob manifest for negative test %s", name)
			if err := fwext.ApplyManifests(cfg.Client().RESTConfig(), manifest); err != nil {
				t.Fatalf("applying MPIJob manifest: %v", err)
			}
			return ctx
		}).
		Assess("Worker pods remain Pending", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			t.Log("Waiting for worker pods to be Pending...")
			selector := fmt.Sprintf("training.kubeflow.org/job-name=%s,training.kubeflow.org/job-role=worker", mpiJobName)
			listOpts := metav1.ListOptions{
				LabelSelector: selector,
				FieldSelector: "status.phase=Pending",
			}
			err := wait.For(func(ctx context.Context) (bool, error) {
				pods, err := clientset.CoreV1().Pods("default").List(ctx, listOpts)
				if err != nil {
					return false, nil
				}
				return len(pods.Items) >= expectedPendingCount, nil
			}, wait.WithContext(ctx), wait.WithTimeout(NegativeTestTimeout))
			if err != nil {
				t.Fatalf("expected %d worker pods in Pending state: %v", expectedPendingCount, err)
			}
			t.Logf("Found %d Pending worker pods, waiting %v to confirm they remain unschedulable...", expectedPendingCount, NegativeTestStabilizationTimeout)
			time.Sleep(NegativeTestStabilizationTimeout)
			pods, err := clientset.CoreV1().Pods("default").List(ctx, listOpts)
			if err != nil {
				t.Fatalf("re-checking Pending pods: %v", err)
			}
			if len(pods.Items) < expectedPendingCount {
				t.Fatalf("expected %d Pending worker pods after stabilization, but found %d", expectedPendingCount, len(pods.Items))
			}
			t.Logf("All %d worker pods are still Pending after stabilization (scheduling failure confirmed)", expectedPendingCount)
			return ctx
		}).
		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
			if err := fwext.DeleteManifests(cfg.Client().RESTConfig(), manifest); err != nil {
				t.Errorf("deleting MPIJob manifest: %v", err)
			}
			return ctx
		}).
		Feature()
}

// DiscoverAndBuildFeatures encapsulates the common test discovery loop:
//  1. Reads test case YAML files from testCasesFS at testCaseDir
//  2. Parses each via ParseTestCaseSpec
//  3. Invokes computeAndRender to get the rendered MPIJob YAML
//  4. Builds positive or negative features based on ExpectFailure
func DiscoverAndBuildFeatures(
	testCasesFS fs.FS,
	testCaseDir string,
	rctIndex map[string]*ResourceClaimTemplateSpec,
	suiteName string,
	mpiJobName string,
	nodeCount int,
	computeAndRender ComputeAndRenderFunc,
	clientset kubernetes.Interface,
) ([]features.Feature, error) {
	entries, err := fs.ReadDir(testCasesFS, testCaseDir)
	if err != nil {
		return nil, fmt.Errorf("reading test case directory %s: %w", testCaseDir, err)
	}

	var featureList []features.Feature
	for _, entry := range entries {
		if entry.IsDir() || !IsYAMLFile(entry.Name()) {
			continue
		}

		tcName := strings.TrimSuffix(entry.Name(), filepath.Ext(entry.Name()))
		tcPath := filepath.Join(testCaseDir, entry.Name())

		tcData, err := fs.ReadFile(testCasesFS, tcPath)
		if err != nil {
			return nil, fmt.Errorf("reading test case %s: %w", tcPath, err)
		}

		tc, err := ParseTestCaseSpec(tcData)
		if err != nil {
			return nil, fmt.Errorf("parsing test case %s: %w", tcPath, err)
		}

		renderedYAML, err := computeAndRender(tc, rctIndex)
		if err != nil {
			return nil, fmt.Errorf("computing/rendering MPIJob for %s: %w", tcName, err)
		}

		if tc.ExpectFailure {
			featureList = append(featureList, BuildNegativeFeature(tcName, suiteName, mpiJobName, renderedYAML, nodeCount, clientset))
		} else {
			featureList = append(featureList, BuildPositiveFeature(tcName, suiteName, mpiJobName, renderedYAML))
		}
	}
	return featureList, nil
}


================================================
FILE: test/common/dra_types.go
================================================
package common

import (
	"fmt"
	"io/fs"
	"path/filepath"
	"strings"

	yaml "gopkg.in/yaml.v2"
)

// ---------------------------------------------------------------------------
// Test case spec — what the user authors per test
// ---------------------------------------------------------------------------

// TestCaseClaimRef is a single entry in a test case YAML file.
type TestCaseClaimRef struct {
	Name                      string `yaml:"name"`
	ResourceClaimTemplateName string `yaml:"resourceClaimTemplateName"`
}

// TestCaseSpec is the structure of a test case YAML file.
// Each file defines the resourceClaims that a single MPIJob test should use.
// When ExpectFailure is true, the test runner treats the case as a negative test —
// it expects the MPIJob's worker pods to remain Pending (unschedulable).
type TestCaseSpec struct {
	ExpectFailure  bool               `yaml:"expectFailure"`
	ResourceClaims []TestCaseClaimRef `yaml:"resourceClaims"`
}

// ---------------------------------------------------------------------------
// ResourceClaimTemplate parsing
// ---------------------------------------------------------------------------

// ResourceClaimTemplateSpec mirrors the relevant parts of a ResourceClaimTemplate YAML.
type ResourceClaimTemplateSpec struct {
	Metadata struct {
		Name string `yaml:"name"`
	} `yaml:"metadata"`
	Spec struct {
		Spec struct {
			Devices struct {
				Requests []struct {
					Name            string `yaml:"name"`
					DeviceClassName string `yaml:"deviceClassName"`
					AllocationMode  string `yaml:"allocationMode"`
					Count           int    `yaml:"count"`
				} `yaml:"requests"`
			} `yaml:"devices"`
		} `yaml:"spec"`
	} `yaml:"spec"`
}

// ---------------------------------------------------------------------------
// MPIJob rendering helpers
// ---------------------------------------------------------------------------

// ResourceClaimRef holds the name and template name for a single resource claim
// in the rendered MPIJob.
type ResourceClaimRef struct {
	Name         string
	TemplateName string
}

// ---------------------------------------------------------------------------
// Parsing helpers
// ---------------------------------------------------------------------------

// ParseTestCaseSpec parses YAML bytes into a TestCaseSpec.
// It returns an error if the YAML is invalid or contains no resourceClaims.
func ParseTestCaseSpec(data []byte) (*TestCaseSpec, error) {
	var tc TestCaseSpec
	if err := yaml.Unmarshal(data, &tc); err != nil {
		return nil, fmt.Errorf("parsing test case YAML: %w", err)
	}
	if len(tc.ResourceClaims) == 0 {
		return nil, fmt.Errorf("test case has no resourceClaims")
	}
	return &tc, nil
}

// IsYAMLFile reports whether the given filename has a .yaml or .yml extension.
func IsYAMLFile(name string) bool {
	ext := filepath.Ext(name)
	return ext == ".yaml" || ext == ".yml"
}

// LoadRCTIndex scans a directory of RCT YAML files from the given fs.FS and
// returns a map of metadata.name → parsed spec.
func LoadRCTIndex(fsys fs.FS, dir string) (map[string]*ResourceClaimTemplateSpec, error) {
	entries, err := fs.ReadDir(fsys, dir)
	if err != nil {
		return nil, fmt.Errorf("reading RCT directory %s: %w", dir, err)
	}
	index := make(map[string]*ResourceClaimTemplateSpec)
	for _, entry := range entries {
		if entry.IsDir() || !IsYAMLFile(entry.Name()) {
			continue
		}
		data, err := fs.ReadFile(fsys, filepath.Join(dir, entry.Name()))
		if err != nil {
			return nil, fmt.Errorf("reading %s: %w", entry.Name(), err)
		}
		var rct ResourceClaimTemplateSpec
		if err := yaml.Unmarshal(data, &rct); err != nil {
			return nil, fmt.Errorf("parsing %s: %w", entry.Name(), err)
		}
		index[rct.Metadata.Name] = &rct
	}
	return index, nil
}

// ExtractFamily extracts the instance family prefix from a node type string
// (before the first "."). For example, "trn1.32xlarge" returns "trn1".
func ExtractFamily(nodeType string) string {
	if idx := strings.Index(nodeType, "."); idx > 0 {
		return nodeType[:idx]
	}
	return nodeType
}

// ---------------------------------------------------------------------------
// Runtime helpers
// ---------------------------------------------------------------------------

// SplitImageRepoTag splits a container image reference on the last ":" into
// repository and tag. If there is no ":", the entire string is treated as the
// repository and the tag defaults to "latest".
func SplitImageRepoTag(image string) (repo, tag string) {
	idx := strings.LastIndex(image, ":")
	if idx < 0 {
		return image, "latest"
	}
	return image[:idx], image[idx+1:]
}

// ValidateRequiredFlags validates that all flag values in the provided map are
// non-empty. Returns a descriptive error for the first missing flag, or nil if
// all flags are present.
func ValidateRequiredFlags(flags map[string]string) error {
	for name, value := range flags {
		if value == "" {
			return fmt.Errorf("-%s is required and must be non-empty", name)
		}
	}
	return nil
}

// LoadRCTManifests reads all YAML files from the given RCT subdirectory in an
// embedded filesystem and returns them as raw byte slices suitable for
// fwext.ApplyManifests.
func LoadRCTManifests(fsys fs.FS, rctSubDir string) ([][]byte, error) {
	entries, err := fs.ReadDir(fsys, rctSubDir)
	if err != nil {
		return nil, fmt.Errorf("reading RCT directory %s: %w", rctSubDir, err)
	}
	var manifests [][]byte
	for _, entry := range entries {
		if entry.IsDir() || !IsYAMLFile(entry.Name()) {
			continue
		}
		data, err := fs.ReadFile(fsys, filepath.Join(rctSubDir, entry.Name()))
		if err != nil {
			return nil, fmt.Errorf("reading %s: %w", entry.Name(), err)
		}
		manifests = append(manifests, data)
	}
	return manifests, nil
}


================================================
FILE: test/common/flags.go
================================================
//go:build e2e

package common

import (
	"flag"
	"fmt"
	"github.com/urfave/sflags/gen/gpflag"
	"github.com/spf13/pflag"
	"reflect"
)

// For CloudWatch metric dimension flag
type MetricOps struct {
	// gpflag supports map[string]string but with a different non-standard parsing format (key:val) that doesn't match
	// what the project wants (comma separated key=value pairs). So, we force it to skip parsing under gpflag.Parse.
	MetricDimensions map[string]string `flag:"-"`
}

func ParseFlags(config interface{}) (*pflag.FlagSet, error) {
	flags, err := gpflag.Parse(config)
	if err != nil {
		return nil, fmt.Errorf("failed to parse flags: %w", err)
	}

	// gpflag supports map[string]string but with a different non-standard parsing format (key:val) that doesn't
	// match what the project wants (key=val,key=val). So, we handle MetricDimensions separately here to accept
	// comma separated key=value pairs.
	if _, hasField := reflect.TypeOf(config).Elem().FieldByName("MetricDimensions"); hasField {
		field := reflect.ValueOf(config).Elem().FieldByName("MetricDimensions")
		metricDims := field.Addr().Interface().(*map[string]string)
		flags.StringToStringVar(metricDims, "metricDimensions", nil, "CloudWatch metric dimensions as comma-separated key=value pairs")
	}

	flags.VisitAll(func(pf *pflag.Flag) {
		flag.CommandLine.Var(pf.Value, pf.Name, pf.Usage)
	})

	return flags, nil
}


================================================
FILE: test/common/resources.go
================================================
//go:build e2e

package common

import (
	"context"
	"fmt"
	"log"
	"time"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
	appsv1 "k8s.io/api/apps/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"sigs.k8s.io/e2e-framework/klient/wait"
	"sigs.k8s.io/e2e-framework/pkg/env"
	"sigs.k8s.io/e2e-framework/pkg/envconf"
)

// DeployDaemonSet returns a function to deploy and wait for a DaemonSet to be ready
func DeployDaemonSet(name, namespace string) env.Func {
	return func(ctx context.Context, config *envconf.Config) (context.Context, error) {
		log.Printf("Waiting for %s daemonset to be ready.", name)
		daemonset := appsv1.DaemonSet{
			ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
		}
		err := wait.For(
			fwext.NewConditionExtension(config.Client().Resources()).DaemonSetReady(&daemonset),
			wait.WithTimeout(5*time.Minute),
			wait.WithContext(ctx),
		)
		if err != nil {
			return ctx, fmt.Errorf("%s daemonset is not ready: %w", name, err)
		}
		log.Printf("%s daemonset is ready.", name)
		return ctx, nil
	}
}


================================================
FILE: test/images/efa/Dockerfile
================================================
FROM public.ecr.aws/amazonlinux/amazonlinux:2023

ARG EFA_BIN_PATH="/opt/amazon/efa/bin"

RUN dnf -y swap gnupg2-minimal gnupg2 && \
    dnf install -y \
    gcc gcc-c++ make \  
    ca-certificates \
    cmake \
    emacs \
    git \
    jq \
    wget \
    unzip \
    vim \
    zlib-devel \      
    openssl \
    openssl-devel \    
    sqlite-devel \   
    gdbm-devel \      
    glibc-devel \     
    bzip2-devel \     
    ncurses-devel \    
    tk-devel \        
    libffi-devel \     
    libcap-devel \  
    tar \
    gnupg2 

ENV PATH="$PATH:$EFA_BIN_PATH"

RUN cd $HOME \
    && curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
    && wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
    && cat aws-efa-installer.key | gpg --fingerprint \
    && wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
    && tar -xf aws-efa-installer-latest.tar.gz \
    && cd aws-efa-installer \
    && ./efa_installer.sh -y -d --skip-kmod --skip-limit-conf --no-verify \
    # TODO: remove this in favor of letting the efa installer add it if that ever becomes an option.
    # At the moment, this is only installed if omitting --no-verify, which would require
    # building in a context with EFA available
    && install -T -m 0755 efa_test.sh "${EFA_BIN_PATH}/efa_test.sh" \
    && cd $HOME \
    && rm -rf aws-efa-installer

RUN dnf clean all

RUN INSTALL_DIR=$(mktemp -d) && \
    cd $INSTALL_DIR && \
    curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" && \
    unzip awscliv2.zip && \
    ./aws/install  && \
    cd && \ 
    rm -rf $INSTALL_DIR

COPY test/images/efa/scripts ./scripts

RUN chmod -R +x ./scripts

================================================
FILE: test/images/efa/scripts/unit-test.sh
================================================
#!/usr/bin/env bash

set -eu

get_instance_type()
{

    local token=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" 2>/dev/null)

    if [ -n "$token" ]; then
        curl -H "X-aws-ec2-metadata-token: $token" http://169.254.169.254/latest/meta-data/instance-type
    else
        curl http://169.254.169.254/latest/meta-data/instance-type
    fi
}

get_expected_efa_device_count() 
{
    aws ec2 describe-instance-types --instance-type="$EC2_INSTANCE_TYPE" | jq -r '.InstanceTypes[].NetworkInfo.EfaInfo.MaximumEfaInterfaces'
}

EC2_INSTANCE_TYPE=${EC2_INSTANCE_TYPE:-$(get_instance_type)}
EXPECTED_EFA_DEVICE_COUNT=${EXPECTED_EFA_DEVICE_COUNT:-$(get_expected_efa_device_count)}

echo "Running test on a $EC2_INSTANCE_TYPE"

fi_info -p efa
DGRAM_ENDPOINT_COUNT=$(fi_info -p efa | grep 'type:\sFI_EP_DGRAM$' | wc -l)
if ! test $EXPECTED_EFA_DEVICE_COUNT -le $DGRAM_ENDPOINT_COUNT; then
    echo "Expected at least $EXPECTED_EFA_DEVICE_COUNT DGRAM endpoint(s) but found $DGRAM_ENDPOINT_COUNT"
    exit 1
else
    echo "Verified at least $EXPECTED_EFA_DEVICE_COUNT DGRAM endpoint(s) are available (found $DGRAM_ENDPOINT_COUNT)"
fi

RDM_ENDPOINT_COUNT=$(fi_info -p efa | grep 'type:\sFI_EP_RDM$' | wc -l)
if ! test $EXPECTED_EFA_DEVICE_COUNT -le $RDM_ENDPOINT_COUNT; then
    echo "Expected at least $EXPECTED_EFA_DEVICE_COUNT RDM endpoint(s) but found $RDM_ENDPOINT_COUNT"
    exit 1
else
    echo "Verified at least $EXPECTED_EFA_DEVICE_COUNT RDM endpoint(s) are available (found $RDM_ENDPOINT_COUNT)"
fi


echo "Running single-node efa test"

# Run efa_test.sh, a utility added during the build while installing EFA
efa_test.sh

echo "Success!"

================================================
FILE: test/images/neuron/Dockerfile
================================================
FROM public.ecr.aws/docker/library/ubuntu:22.04

# Neuron SDK components version numbers
# https://github.com/aws-neuron/deep-learning-containers/blob/main/docker/pytorch/training/2.5.1/Dockerfile.neuronx
ARG NEURONX_DISTRIBUTED_VERSION=0.16.25997+f431c02e
ARG NEURONX_CC_VERSION=2.22.12471.0+b4a00d10
ARG NEURONX_FRAMEWORK_VERSION=2.9.0.2.11.19912+e48cd891
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.29.41.0-681fef5f5
ARG NEURONX_RUNTIME_LIB_VERSION=2.29.40.0-f954cd7a5
ARG NEURONX_TOOLS_VERSION=2.27.33.0-5d9c0b901

ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12
ARG PIP=pip3
ARG OMPI_VERSION=4.1.5

# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
ARG DEBIAN_FRONTEND=noninteractive

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/aws/neuron/lib"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/efa/lib"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/efa/lib64"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/openmpi/lib64"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
ENV PATH /opt/aws/neuron/bin/:$PATH
# ENV SAGEMAKER_TRAINING_MODULE=sagemaker_pytorch_container.training:main
ENV DGLBACKEND=pytorch

RUN apt-get update \
    && apt-get upgrade -y \
    && apt-get install -y --no-install-recommends \
    build-essential \
    ca-certificates \
    cmake \
    curl \
    emacs \
    git \
    jq \
    libopencv-dev \
    software-properties-common \
    wget \
    unzip \
    vim \
    zlib1g-dev \
    openssl \
    libssl-dev \
    libsqlite3-dev \
    libgdbm-dev \
    libc6-dev \
    libbz2-dev \
    libncurses-dev \
    tk-dev \
    libffi-dev \
    libcap-dev \
    gnupg2 \
    gpg-agent \
    libarchive13 \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean

RUN apt update
RUN apt install -y openssh-server openssh-client wget gnupg2 sudo

# Install Neuron packages
RUN . /etc/os-release
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -

RUN apt-get update \
    && apt-get install -y \
    aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
    aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
    aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
    && rm -rf /var/lib/apt/lists/* \
    && rm -rf /tmp/tmp* \
    && apt-get clean

# Install Open MPI
RUN mkdir -p /tmp/openmpi \
    && cd /tmp/openmpi \
    && wget --quiet https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.gz \
    && tar zxf openmpi-${OMPI_VERSION}.tar.gz \
    && cd openmpi-${OMPI_VERSION} \
    && ./configure --enable-orterun-prefix-by-default \
    && make -j $(nproc) all \
    && make install \
    && ldconfig \
    && rm -rf /tmp/openmpi

# install Python
RUN wget -q https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
    && tar -xzf Python-$PYTHON_VERSION.tgz \
    && cd Python-$PYTHON_VERSION \
    && ./configure --enable-shared --prefix=/usr/local \
    && make -j $(nproc) && make install \
    && cd .. && rm -rf ../Python-$PYTHON_VERSION* \
    && ln -s /usr/local/bin/pip3 /usr/bin/pip \
    && ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
    && ${PIP} --no-cache-dir install --upgrade \
    pip \
    setuptools

WORKDIR /

# The ENV variables declared below are changed in the previous section
# Grouping these ENV variables in the first section causes
# ompi_info to fail. This is only observed in CPU containers
ENV PATH="$PATH:/home/.openmpi/bin"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value

RUN ${PIP} install --no-cache-dir -U \
    "bokeh>=2.3,<3" \
    "awscli<2" \
    scipy \
    click \
    "cryptography" \
    psutil==5.6.7 \
    dataset \
    tenacity \
    transformers==4.36.2 \
    Pillow

RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt
RUN ${PIP} config set global.extra-index-url https://pip.repos.neuron.amazonaws.com \
    && ${PIP} install --force-reinstall torch-neuronx==$NEURONX_FRAMEWORK_VERSION --extra-index-url https://pip.repos.neuron.amazonaws.com \
    && ${PIP} install --force-reinstall neuronx-cc==$NEURONX_CC_VERSION --extra-index-url https://pip.repos.neuron.amazonaws.com \
    && ${PIP} install --force-reinstall --no-deps neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION --extra-index-url https://pip.repos.neuron.amazonaws.com

# attrs, neuronx-cc required: >=19.2.0, sagemaker <24,>=23.1.0
# protobuf neuronx-cc<4, sagemaker-training >=3.9.2,<=3.20.3
# awscli 1.25.47 has requirement docutils<0.17,>=0.10
# etcd for kubernetes installation
# awscli 1.27.127 has requirement rsa<4.8,>=3.1.2, but you have rsa 4.9.
# awscli 1.27.127 requires urllib3 < 1.27, python-etcd requires urllib3 >= 1.7, latest urllib3 release is 2.0.2
RUN ${PIP} install --no-cache-dir -U \
    "attrs<24,>=23.1.0" \
    "protobuf>=3.18.3,<=3.20.3" \
    "docutils>=0.10,<0.17" \
    "rsa<4.8,>=3.1.2" \
    "urllib3>=1.26.0,<1.27"

# EFA Installer does apt get. Make sure to run apt update before that
RUN apt-get update
RUN cd $HOME \
    && curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
    && wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
    && cat aws-efa-installer.key | gpg --fingerprint \
    && wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
    && tar -xf aws-efa-installer-latest.tar.gz \
    && cd aws-efa-installer \
    && ./efa_installer.sh -y -g -d --skip-kmod --skip-limit-conf --no-verify \
    && cd $HOME

# Clean up after apt update
RUN rm -rf /var/lib/apt/lists/* \
    && rm -rf /tmp/tmp* \
    && apt-get clean

# Install some common packages used by training scripts
# torchvision needed for MLP. since it depends on torch and torch neuron/torch
# is already installed install it with nodeps
RUN pip3 install --no-cache-dir --no-deps -U \
    torchvision==0.16.*

RUN HOME_DIR=/root \
    && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
    && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
    && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
    && chmod +x /usr/local/bin/testOSSCompliance \
    && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
    && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
    && rm -rf ${HOME_DIR}/oss_compliance* \
    && rm -rf /tmp/tmp*

RUN curl -o /license.txt  https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.1/license.txt

RUN mkdir -p /var/run/sshd && \
    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
    sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config && \
    sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config

COPY test/images/neuron/hack/install-test-resources.sh ./hack/install-test-resources.sh
RUN chmod +x ./hack/install-test-resources.sh && \
    ./hack/install-test-resources.sh /home/ubuntu && \
    rm -f ./hack/install-test-resources.sh

RUN useradd -ms /bin/bash ubuntu
RUN echo 'ubuntu:password' | chpasswd
RUN usermod -aG sudo ubuntu &&\
    chown -R ubuntu /home/ubuntu

WORKDIR /home/ubuntu
USER ubuntu

RUN mkdir -p /home/ubuntu/.ssh && \ 
    ssh-keygen -t rsa -f /home/ubuntu/.ssh/id_rsa -N '' && \
    cp /home/ubuntu/.ssh/id_rsa.pub /home/ubuntu/.ssh/authorized_keys

COPY test/images/neuron/tests ./tests


================================================
FILE: test/images/neuron/hack/install-test-resources.sh
================================================
#!/bin/bash

set -o pipefail
set -o nounset
set -o errexit

USER_DIR=${1:-"/root"}
# attempt to cache dataset to avoid runtime download. 
# needs to match https://github.com/pytorch/vision/blob/c0331c5e2933c621db9a44623f4f3981fe2342e0/torchvision/datasets/mnist.py#L42
MNIST_RESOURCES=("train-images-idx3-ubyte.gz" "train-labels-idx1-ubyte.gz" "t10k-images-idx3-ubyte.gz" "t10k-labels-idx1-ubyte.gz")
for i in {0..1}; do 
    # we need to populate data for each rank, and we currently always run with 2
    DOWNLOAD_DIR="${USER_DIR}/MNIST_DATA_train/${i}/MNIST/raw"
    mkdir -p "$DOWNLOAD_DIR"
    for RESOURCE in ${MNIST_RESOURCES[@]}; do
        DEST_FILE="${DOWNLOAD_DIR}/${RESOURCE}"
        SOURCE_URL="https://ossci-datasets.s3.amazonaws.com/mnist/${RESOURCE}"
        echo "Downloading ${SOURCE_URL} to ${DEST_FILE}"
        curl -o "$DEST_FILE" "$SOURCE_URL"
    done 
done

================================================
FILE: test/images/neuron/tests/singleNodeTest.sh
================================================
#!/usr/bin/env bash

set -e

torchrun --nproc_per_node=2 --nnodes=1 tests/testNeuronSingleAllReduce.py
torchrun --nproc_per_node=2 --nnodes=1 tests/testNeuronParallelState.py
torchrun --nproc_per_node=2 --nnodes=1 tests/testNeuronMlp.py

================================================
FILE: test/images/neuron/tests/testNeuronMlp.py
================================================
# Source: https://github.com/aws/deep-learning-containers/blob/master/test/dlc_tests/container_tests/bin/pytorch_tests/testNeuronMlp
import os
import time
import torch

from torchvision.datasets import mnist
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

# XLA imports
import torch_xla.core.xla_model as xm
import torch_xla.runtime as xr

# XLA imports for parallel loader and multi-processing
import torch_xla.distributed.parallel_loader as pl
from torch.utils.data.distributed import DistributedSampler

# Initialize XLA process group for torchrun
import torch_xla.distributed.xla_backend
import torch.nn as nn
import torch.nn.functional as F

torch.distributed.init_process_group('xla')

# Global constants
EPOCHS = 4
WARMUP_STEPS = 2
BATCH_SIZE = 32

# Load MNIST train dataset
train_dataset = mnist.MNIST(root=os.path.join(os.path.expanduser("~") + '/MNIST_DATA_train', str(xr.global_ordinal())),
                            train=True, download=True, transform=ToTensor())

# Declare 3-layer MLP for MNIST dataset
class MLP(nn.Module):
  def __init__(self, input_size = 28 * 28, output_size = 10, layers = [120, 84]):
      super(MLP, self).__init__()
      self.fc1 = nn.Linear(input_size, layers[0])
      self.fc2 = nn.Linear(layers[0], layers[1])
      self.fc3 = nn.Linear(layers[1], output_size)

  def forward(self, x):
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return F.log_softmax(x, dim=1)


def main():
    # XLA MP: get world size
    world_size = xr.world_size()
    # multi-processing: ensure each worker has same initial weights
    torch.manual_seed(0)

    # Move model to device and declare optimizer and loss function
    device = 'xla'
    model = MLP().to(device)
    # For multiprocessing, scale up learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01 * world_size)
    loss_fn = torch.nn.NLLLoss()

    # Prepare data loader
    train_sampler = None
    if world_size > 1:
        train_sampler = DistributedSampler(train_dataset,
                                           num_replicas=world_size,
                                           rank=xr.global_ordinal(),
                                           shuffle=True)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=train_sampler,
                              shuffle=False if train_sampler else True)
    # XLA MP: use MpDeviceLoader from torch_xla.distributed
    train_device_loader = pl.MpDeviceLoader(train_loader, device)

    # Run the training loop
    print('----------Training ---------------')
    model.train()
    for epoch in range(EPOCHS):
        start = time.time()
        for idx, (train_x, train_label) in enumerate(train_device_loader):
            optimizer.zero_grad()
            train_x = train_x.view(train_x.size(0), -1)
            output = model(train_x)
            loss = loss_fn(output, train_label)
            loss.backward()
            xm.optimizer_step(optimizer) # XLA MP: performs grad allreduce and optimizer step
            if idx < WARMUP_STEPS: # skip warmup iterations
                start = time.time()

    # Compute statistics for the last epoch
    interval = idx - WARMUP_STEPS # skip warmup iterations
    throughput = interval / (time.time() - start)
    print("Train throughput (iter/sec): {}".format(throughput))
    print("Final loss is {:0.4f}".format(loss.detach().to('cpu')))

    # Save checkpoint for evaluation (xm.save ensures only one process save)
    os.makedirs(os.path.expanduser("~") + "/checkpoints", exist_ok=True)
    checkpoint = {'state_dict': model.state_dict()}
    xm.save(checkpoint, os.path.expanduser("~") + '/checkpoints/checkpoint.pt')

    print('----------End Training ---------------')

if __name__ == '__main__':
    main()


================================================
FILE: test/images/neuron/tests/testNeuronParallelState.py
================================================
# Source: https://github.com/aws/deep-learning-containers/blob/master/test/dlc_tests/container_tests/bin/pytorch_tests/testNeuronParallelState
import argparse
import atexit
import json
import os
import traceback
from datetime import datetime

import torch
import torch_xla.core.xla_model as xm
import torch_xla.debug.metrics as met
import torch_xla.runtime as xr

from neuronx_distributed.parallel_layers import parallel_state
from neuronx_distributed.parallel_layers.utils import is_pjrt_device

datetime_str = str(datetime.now())


results = {"inference_success": 1}


def test_initialize_model_parallel(tensor_model_parallel_size):
    def _test_initialize_model_parallel():
        if torch.distributed.get_rank() == 0:
            print("testing initialize_model_parallel with size {}".format(tensor_model_parallel_size))
        tensor_model_parallel_size_ = min(tensor_model_parallel_size, torch.distributed.get_world_size())
        assert not parallel_state.model_parallel_is_initialized()
        parallel_state.initialize_model_parallel(tensor_model_parallel_size=tensor_model_parallel_size_)
        assert parallel_state.model_parallel_is_initialized()

        # Checks.
        def check(group, world_size, rank):
            assert world_size == torch.distributed.get_world_size(group=group)
            assert rank == torch.distributed.get_rank(group=group)

        # Model parallel.
        world_size = tensor_model_parallel_size_
        rank = torch.distributed.get_rank() % tensor_model_parallel_size_
        assert world_size == parallel_state.get_tensor_model_parallel_size()
        assert rank == parallel_state.get_tensor_model_parallel_rank()
        check(parallel_state.get_tensor_model_parallel_group(), world_size, rank)

        # Data parallel.
        world_size = torch.distributed.get_world_size() // tensor_model_parallel_size_
        rank = torch.distributed.get_rank() // tensor_model_parallel_size
        assert world_size == parallel_state.get_data_parallel_size()
        assert rank == parallel_state.get_data_parallel_rank()
        check(parallel_state.get_data_parallel_group(), world_size, rank)

        # Reset groups
        parallel_state.destroy_model_parallel()

        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            print("test passed")

    global results
    try:
        _test_initialize_model_parallel()
    except:
        results["inference_success"] = 0
        print(traceback.format_exc())
        raise


def test_get_tensor_model_parallel_src_rank(tensor_model_parallel_size_):
    def _test_get_tensor_model_parallel_src_rank():
        if torch.distributed.get_rank() == 0:
            print("testing get_tensor_model_parallel_src_rank with size {}".format(tensor_model_parallel_size_))
        tensor_model_parallel_size = min(tensor_model_parallel_size_, torch.distributed.get_world_size())
        assert not parallel_state.model_parallel_is_initialized()
        parallel_state.initialize_model_parallel(tensor_model_parallel_size)
        assert parallel_state.model_parallel_is_initialized()

        # Checks
        src_rank = torch.distributed.get_rank() - parallel_state.get_tensor_model_parallel_rank()
        assert parallel_state.get_tensor_model_parallel_src_rank() == src_rank

        # Reset groups
        parallel_state.destroy_model_parallel()

        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            print("test passed")

    global results
    try:
        _test_get_tensor_model_parallel_src_rank()
    except:
        results["inference_success"] = 0
        print(traceback.format_exc())
        raise


if __name__ == "__main__":
    if is_pjrt_device():
        import torch_xla.experimental.pjrt_backend
        torch.distributed.init_process_group("xla", init_method="pjrt://")
    else:
        torch.distributed.init_process_group("xla")
    world_size = xr.world_size()
    tensor_model_parallel_size = 1
    while tensor_model_parallel_size <= world_size:
        test_initialize_model_parallel(tensor_model_parallel_size)
        test_get_tensor_model_parallel_src_rank(tensor_model_parallel_size)
        tensor_model_parallel_size *= 2


================================================
FILE: test/images/neuron/tests/testNeuronSingleAllReduce.py
================================================
# Source: https://github.com/aws/deep-learning-containers/blob/master/test/dlc_tests/container_tests/bin/pytorch_tests/testNeuronSingleAllReduce
import os
import torch
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_backend
import torch_xla.runtime as xr
torch.distributed.init_process_group('xla')
import torch_xla.distributed.xla_multiprocessing as xmp
os.environ["NEURON_RT_EXEC_TIMEOUT"] = "20"
os.environ["NCCL_DEBUG"] = "WARN"
os.environ["NCCL_DEBUG_SUBSYS"] = "ALL"
def _mp_fn():
  world_size = xr.world_size()
  device = xm.xla_device()
  rank = xr.global_ordinal()
  ones = torch.ones((2, 3))
  xones = ones.to(device)
  if world_size > 0:
    print("running all reduce")
    for i in range(0, 5):
        print(f'at iteration {i}, with local rank {rank}', flush=True)
        result = xm.all_reduce(xm.REDUCE_SUM, xones)
        result_cpu = result.cpu()
        #xm.mark_step()
        print(result_cpu, flush = True)
    expected = torch.ones((2,3))*world_size
    assert expected.allclose(result_cpu)
    print('PASS')
if __name__ == '__main__':
    _mp_fn()
    #xmp.spawn(_mp_fn, args=(),nprocs=2, join=True)


================================================
FILE: test/images/neuron-inference/Dockerfile
================================================
###############################################################################
# 0) Base image, arguments, and environment
###############################################################################
FROM public.ecr.aws/docker/library/ubuntu:22.04

# Disable interactive prompts
ENV DEBIAN_FRONTEND=noninteractive

# Ensure Python prints are unbuffered so we see logs in real time
ENV PYTHONUNBUFFERED=1

# Neuron SDK components version numbers
# https://github.com/aws-neuron/deep-learning-containers/blob/main/docker/pytorch/inference/2.5.1/Dockerfile.neuronx
ARG NEURONX_CC_VERSION=2.22.12471.0+b4a00d10
ARG NEURONX_FRAMEWORK_VERSION=2.9.0.2.11.19912+e48cd891
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.29.41.0-681fef5f5
ARG NEURONX_RUNTIME_LIB_VERSION=2.29.40.0-f954cd7a5
ARG NEURONX_TOOLS_VERSION=2.27.33.0-5d9c0b901

# Python
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12

ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     PYTHONIOENCODING=UTF-8 \
     LANG=C.UTF-8 \
     LC_ALL=C.UTF-8

# Extend library paths for Neuron
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/aws/neuron/lib"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
ENV PATH="/opt/aws/neuron/bin:${PATH}"

###############################################################################
# 1) Base system packages, user setup
###############################################################################
RUN apt-get update \
     && apt-get upgrade -y \
     && apt-get install -y --no-install-recommends \
     build-essential \
     ca-certificates \
     curl \
     git \
     jq \
     wget \
     unzip \
     vim \
     zlib1g-dev \
     openssl \
     libssl-dev \
     libsqlite3-dev \
     libgdbm-dev \
     libc6-dev \
     libbz2-dev \
     libncurses-dev \
     tk-dev \
     libffi-dev \
     gnupg2 \
     gpg-agent \
     libarchive13 \
     openssh-server \
     sudo \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean

###############################################################################
# 2) Neuron SDK
###############################################################################
RUN . /etc/os-release \
     && echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list \
     && wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - \
     && apt-get update -y \
     && apt-get install -y \
     aws-neuronx-tools=${NEURONX_TOOLS_VERSION} \
     aws-neuronx-collectives=${NEURONX_COLLECTIVES_LIB_VERSION} \
     aws-neuronx-runtime-lib=${NEURONX_RUNTIME_LIB_VERSION} \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean

###############################################################################
# 3) Python 3.10 from source
###############################################################################
RUN wget -q https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
     && tar -xzf Python-${PYTHON_VERSION}.tgz \
     && cd Python-${PYTHON_VERSION} \
     && ./configure --enable-shared --prefix=/usr/local \
     && make -j $(nproc) && make install \
     && cd .. && rm -rf Python-${PYTHON_VERSION}* \
     && ln -s /usr/local/bin/pip3 /usr/bin/pip \
     && ln -s /usr/local/bin/${PYTHON} /usr/local/bin/python \
     && pip --no-cache-dir install --upgrade pip setuptools wheel

###############################################################################
# 4) Install PyTorch Neuron, Transformers Neuron, etc. via pip
###############################################################################
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com \
     && pip install --force-reinstall \
     "torch-neuronx==${NEURONX_FRAMEWORK_VERSION}" \
     "neuronx-cc==${NEURONX_CC_VERSION}" \
     "transformers==4.36.2"

###############################################################################
# 5) Application files and Python dependencies
###############################################################################
WORKDIR /app
COPY infer.py /app/


================================================
FILE: test/images/neuron-inference/infer.py
================================================
import logging
import os
import sys
import time
import json
import subprocess
import random
import concurrent.futures
import numpy as np
from copy import deepcopy

import torch
import torch_neuronx
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertForPreTraining, BertTokenizer

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger("BERTNeuronInference")
logger.setLevel(logging.INFO) 

def get_neuron_monitor_stats():
    """
    Runs neuron-monitor command and returns the first JSON output as a dictionary.
    Also validates if the environment has Inferentia1/2 device and proper device count.
    
    Returns:
        dict: Parsed JSON output containing neuron monitor statistics
        
    Raises:
        RuntimeError: If neuron-monitor command is not found or fails to execute
        RuntimeError: If environment doesn't have proper Neuron support
        json.JSONDecodeError: If the output cannot be parsed as valid JSON
    """
    try:
        # Run neuron-monitor with timeout to get first output
        process = subprocess.Popen(
            ['neuron-monitor'], 
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        # Wait for first line of output
        output = process.stdout.readline()
        
        # Terminate the process since we only need first output
        process.terminate()
        process.wait()
        
        if not output:
            raise RuntimeError("No output received from neuron-monitor")
            
        # Parse JSON output
        stats = json.loads(output)
        
        # Check for Neuron hardware support
        hardware_info = stats.get('neuron_hardware_info', {})
        device_type = hardware_info.get('neuron_device_type', '').lower()
        neuroncore_per_device_count = hardware_info.get('neuroncore_per_device_count', 0)
        
        if neuroncore_per_device_count <= 0:
            raise RuntimeError(f"No Neuron devices found (neuroncore_per_device_count: {neuroncore_per_device_count})")
            
        return neuroncore_per_device_count
        
    except FileNotFoundError:
        raise RuntimeError("neuron-monitor command not found")
    except json.JSONDecodeError as e:
        raise RuntimeError(f"Failed to parse JSON output: {e}")
    except Exception as e:
        raise RuntimeError(f"Error running neuron-monitor: {e}")


def print_info(msg: str):
    """Helper function to prefix all info messages uniformly."""
    logger.info(f"[INFO] {msg}")


def print_warning(msg: str):
    """Helper function for warnings."""
    logger.warning(f"[WARNING] {msg}")


def print_error(msg: str):
    """Helper function for errors."""
    logger.error(f"[ERROR] {msg}")


def create_dummy_data(tokenizer, batch_size, num_samples=10000, max_length=128, seed=42):
    """
    Creates a realistic Next Sentence Prediction (NSP) dataset for BERT model testing.

    Args:
        tokenizer (BertTokenizer): instance used to tokenize the input sentences
        batch_size (int): specifying the size of each batch
        num_samples (int): specifying total number of samples to generate (default: 100)
        max_length (int): specifying maximum sequence length for tokenization (default: 128)
        seed (int): for random seed to ensure reproducibility (default: 42)

    Returns:
        TensorDataset containing:
            - input_ids (torcTensor):  of tokenized input sequences
            - attention_mask:  of attention masks
            - nsp_labels: Tensor of NSP labels (0 for random next sentence, 1 for actual next sentence)

    Notes:
        - Automatically adjusts num_samples to be a multiple of batch_size
        - Creates balanced dataset with 50% true next sentences and 50% random sentences
        - Uses a predefined set of sample sentences for generating pairs
    """

    random.seed(seed)

    if num_samples % batch_size != 0:
        adjusted = (num_samples // batch_size) * batch_size
        print_info(
            f"Adjusting num_samples from {num_samples} to {adjusted} "
            "to ensure full batches."
        )
        num_samples = adjusted

    sample_sentences = [
        "The dog loves playing fetch in the park.",
        "Artificial intelligence is reshaping the future.",
        "Movies with complex storylines can be very engaging.",
        "This restaurant serves an amazing brunch on weekends.",
        "Many researchers are exploring neural network architectures.",
        "A day at the beach can reduce stress and improve well-being.",
        "ChatGPT is a popular large language model by OpenAI.",
        "The annual developer conference showcased innovative technologies.",
        "Hiking in the mountains offers both challenge and relaxation.",
        "Robotics and automation are revolutionizing many industries.",
    ]

    sentences_a = []
    sentences_b = []
    nsp_labels = []

    for _ in range(num_samples):
        idx_a = random.randint(0, len(sample_sentences) - 1)
        if random.random() < 0.5:
            # “True” next sentence
            idx_b = (idx_a + 1) % len(sample_sentences)
            nsp_labels.append(1)
        else:
            # Random sentence
            idx_b = random.randint(0, len(sample_sentences) - 1)
            nsp_labels.append(0)

        sentences_a.append(sample_sentences[idx_a])
        sentences_b.append(sample_sentences[idx_b])

    inputs = tokenizer(
        sentences_a,
        sentences_b,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )

    return TensorDataset(
        inputs.input_ids,
        inputs.attention_mask,
        torch.tensor(nsp_labels, dtype=torch.long)
    )


def run_inference(model, tokenizer, batch_size, mode, n_models=2, n_threads=2):
    """
    Runs BERT model inference using Neuron runtime with dummy NSP (Next Sentence Prediction) data.

    Args:
        model (BertForPreTraining): model instance to be used for inference
        tokenizer (BertTokenizer): instance for processing input text
        batch_size (int): specifying batch size (8 for throughput mode, 1 for latency mode)
        mode (str): indicating inference mode ('throughput' or 'latency')
        n_models (int): number of models to spawn
        n_threads (int): number of threads for inference

    Returns:
        None, but prints performance metrics including:
        - Duration of the job
        - Average time per batch
        - Throughput (samples per second)
        - P50, P95, P99 latency 
        - Batch Size
        - Total Batches Processed
        - Total Inferences

    Notes:
        - Performance metrics are logged with prefix [BERT_INFERENCE_NEURON_METRICS]
        - Uses torch_neuronx for model compilation
        - Handles both throughput and latency testing modes
        - Runs inference with no gradient computation (torch.no_grad)    
    """
    
    print_info("About to create dummy data...")
    try:
        dataset = create_dummy_data(tokenizer, batch_size=batch_size)
    except Exception as e:
        print_error(f"Failed to create dummy data: {e}")
        raise

    print_info("Dummy data creation completed.")

    dataloader = DataLoader(
        dataset, 
        batch_size=batch_size
    )
    
    # First compile the model for Neuron: 
    # Since we run inference in batches, we must first
    # split the dataset into the size of input expected in a
    # single batch. This input signature would then be used
    # to call the .trace() method and compile the Bert model to Neuron.
    _input_ids, _attention_masks, _output_ids = dataset.tensors
    _split_input_ids = torch.split(_input_ids, batch_size)[0]
    _split_attention_masks = torch.split(_attention_masks, batch_size)[0]
    batch_input = (_split_input_ids, _split_attention_masks)
    try:
        # Use multicore context for automatic core allocation
        with torch_neuronx.experimental.multicore_context():
            model_neuron = torch_neuronx.trace(model, batch_input)
    except Exception as e:
        logger.exception(f"[ERROR] Failed to trace BERT model. Failed with error: {e}")
        raise e

    latencies = []
    rows_processed = 0

    print_info(f"Starting Neuron inference ...")
    begin = time.time()
    
    with torch.no_grad():
        for batch in dataloader:
            batch_input_tensor, batch_attention_tensor, _ = batch            
            batch_input = (batch_input_tensor, batch_attention_tensor)
            start = time.time()
            _ = model_neuron(*batch_input)
            finish = time.time()
            
            latencies.append((finish - start) * 1000)
            rows_processed += len(batch_input_tensor)

    end = time.time()

    # Compute metrics
    boundaries = [50, 95, 99]
    percentiles = {}

    for boundary in boundaries:
        name = f'latency_p{boundary}'
        percentiles[name] = np.percentile(latencies, boundary)
    
    duration = end - begin
    inferences = rows_processed
    throughput = inferences / duration
    avg_time_per_batch = np.mean(latencies)

    # Print metrics
    print_info("Neuron inference completed.")

    # Print metrics to support old logging format
    print_info(
        "[BERT_INFERENCE_NEURON_METRICS] "
        f"mode={mode} "
        f"avg_time_per_batch={avg_time_per_batch:.6f} "
        f"throughput_samples_per_sec={throughput:.6f}"
    )

    # performance metrics
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] mode={mode}")
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] duration={duration:.6f}")
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] avg_time_per_batch={avg_time_per_batch:.6f}")
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] throughput_samples_per_sec={throughput:.6f}")

    # latency metrics
    for name, value in percentiles.items():
        print_info(f"[BERT_INFERENCE_NEURON_METRICS] {name}={value:.6f}")
    
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] batch_size={batch_size}")
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] total_batches_processed={len(latencies)}")
    print_info(f"[BERT_INFERENCE_NEURON_METRICS] total_inferences={inferences}")


def main():
    """Main entry"""
    print_info("Starting main()...")
    try:
        neuroncore_per_device_count = get_neuron_monitor_stats()
        print_info(f"Spawing a total of {neuroncore_per_device_count} models")
    except RuntimeError as e:
        print_error(f"Neuron environment not detected. Failed with error: {e}")
        sys.exit(1)

    mode = os.environ.get("INFERENCE_MODE", "throughput").lower()
    if mode not in ["throughput", "latency"]:
        print_warning(
            f"Unrecognized INFERENCE_MODE '{mode}'. "
            "Falling back to 'throughput'."
        )
        mode = "throughput"

    batch_size = 1 if mode == "latency" else 8
    print_info(f"Running Neuron inference in {mode} mode with batch size {batch_size}.")

    print_info("Loading tokenizer and model...")
    try:
        model_name = "bert-base-uncased"
        tokenizer = BertTokenizer.from_pretrained(model_name)
        model = BertForPreTraining.from_pretrained(model_name, torchscript=True)

    except Exception as e:
        print_error(f"Failed to load model/tokenizer: {e}")
        sys.exit(1)
    print_info("Model and tokenizer loaded successfully.")

    run_inference(model, tokenizer, batch_size, mode, n_models=neuroncore_per_device_count)
    print_info("main() completed all steps successfully.")


if __name__ == "__main__":
    main()


================================================
FILE: test/images/neuron-training/Dockerfile
================================================
FROM public.ecr.aws/docker/library/ubuntu:22.04

###############################################################################
# 0) Arguments and environment
###############################################################################
ARG DEBIAN_FRONTEND=noninteractive

# Neuron SDK component versions (pin these precisely)
# https://github.com/aws-neuron/deep-learning-containers/blob/main/docker/pytorch/training/2.5.1/Dockerfile.neuronx
ARG NEURONX_CC_VERSION=2.22.12471.0+b4a00d10
ARG NEURONX_FRAMEWORK_VERSION=2.9.0.2.11.19912+e48cd891
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.29.41.0-681fef5f5
ARG NEURONX_RUNTIME_LIB_VERSION=2.29.40.0-f954cd7a5
ARG NEURONX_TOOLS_VERSION=2.27.33.0-5d9c0b901

# Python
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12

ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     PYTHONIOENCODING=UTF-8 \
     LANG=C.UTF-8 \
     LC_ALL=C.UTF-8

# Extend library paths for Neuron & EFA
ENV LD_LIBRARY_PATH="/opt/aws/neuron/lib:/opt/amazon/efa/lib:/opt/amazon/efa/lib64:/usr/local/lib:${LD_LIBRARY_PATH}"
ENV PATH="/opt/aws/neuron/bin:${PATH}"

###############################################################################
# 1) Base system packages, user setup
###############################################################################
RUN apt-get update \
     && apt-get upgrade -y \
     && apt-get install -y --no-install-recommends \
     build-essential \
     ca-certificates \
     curl \
     git \
     jq \
     wget \
     unzip \
     vim \
     lcov \
     pkg-config \
     zlib1g-dev \
     openssl \
     libssl-dev \
     libsqlite3-dev \
     libgdbm-dev \
     libc6-dev \
     libbz2-dev \
     libncurses-dev \
     tk-dev \
     libffi-dev \
     gnupg2 \
     gpg-agent \
     libarchive13 \
     openssh-server \ 
     openssh-client \
     sudo \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean

###############################################################################
# 2) Neuron SDK
###############################################################################
RUN . /etc/os-release \
     && echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list \
     && wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - \
     && apt-get update \
     && apt-get install -y \
     aws-neuronx-tools=${NEURONX_TOOLS_VERSION} \
     aws-neuronx-collectives=${NEURONX_COLLECTIVES_LIB_VERSION} \
     aws-neuronx-runtime-lib=${NEURONX_RUNTIME_LIB_VERSION} \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean

###############################################################################
# 3) EFA installer (for MPI-based jobs)
###############################################################################
RUN apt-get update \
     && cd /tmp \
     && curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
     && wget https://efa-installer.amazonaws.com/aws-efa-installer.key \
     && gpg --import aws-efa-installer.key \
     && cat aws-efa-installer.key | gpg --fingerprint \
     && wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig \
     && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
     && tar -xf aws-efa-installer-latest.tar.gz \
     && cd aws-efa-installer \
     && ./efa_installer.sh -y -g -d --skip-kmod --skip-limit-conf --no-verify \
     && cd /tmp \
     && rm -rf aws-efa-installer* \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean

ENV PATH="/opt/amazon/openmpi/bin:${PATH}"
ENV LD_LIBRARY_PATH="/opt/amazon/openmpi/lib64:${LD_LIBRARY_PATH}"

###############################################################################
# 4) Python 3.10 from source
###############################################################################
RUN wget -q https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
     && tar -xzf Python-${PYTHON_VERSION}.tgz \
     && cd Python-${PYTHON_VERSION} \
     && ./configure --enable-shared --prefix=/usr/local \
     && make -j $(nproc) && make install \
     && cd .. && rm -rf Python-${PYTHON_VERSION}* \
     && ln -s /usr/local/bin/pip3 /usr/bin/pip \
     && ln -s /usr/local/bin/${PYTHON} /usr/local/bin/python \
     && pip --no-cache-dir install --upgrade pip setuptools wheel

###############################################################################
# 5) Install pinned Python packages
###############################################################################
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com \
     && pip install --force-reinstall \
     "torch-neuronx==${NEURONX_FRAMEWORK_VERSION}" \
     "neuronx-cc==${NEURONX_CC_VERSION}" \
     "transformers==4.36.2"

###############################################################################
# 6) SSH and finalize
###############################################################################
# Configure SSH (auto-accept new host keys)
RUN mkdir -p /var/run/sshd && \
     sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config


WORKDIR /app/
COPY train.py /app/


================================================
FILE: test/images/neuron-training/train.py
================================================
import os
import time
import random

import torch
import torch.distributed as dist

# === torch_xla imports for device and parallel loader ===
import torch_xla.core.xla_model as xm
import torch_xla.runtime as xr
import torch_xla.distributed.xla_backend
import torch_xla.distributed.parallel_loader as pl

from torch.utils.data import DataLoader, TensorDataset, DistributedSampler
from transformers import BertForPreTraining, BertTokenizer

RANK = int(os.environ.get("RANK", 0))
WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))

def create_dummy_data(tokenizer, num_samples=100, max_length=128):
    """
    Creates dummy BERT pretraining data (MLM + NSP).
    """
    print(f"Creating dummy data: {num_samples} samples, max_length={max_length}")
    sentences = [f"This is a dummy sentence number {i}" for i in range(num_samples)]
    encodings = tokenizer(
        sentences,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )
    labels = encodings.input_ids.detach().clone()

    # Randomly mask some tokens for MLM
    mlm_probability = 0.15
    input_ids, labels = mask_tokens(encodings.input_ids, tokenizer, mlm_probability)

    # Dummy next-sentence prediction labels
    next_sentence_labels = torch.randint(0, 2, (num_samples,))

    return TensorDataset(input_ids, encodings.attention_mask, labels, next_sentence_labels)


def mask_tokens(inputs, tokenizer, mlm_probability):
    """
    Randomly mask tokens for MLM. Unmasked tokens => label = -100
    so we don't compute loss on them.
    """
    labels = inputs.clone()
    probability_matrix = torch.full(labels.shape, mlm_probability)
    special_tokens_mask = [
        tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True)
        for val in labels.tolist()
    ]
    probability_matrix.masked_fill_(
        torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0
    )
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -100
    inputs[masked_indices] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

    return inputs, labels

def complete_epoch(epoch, optimizer, parallel_loader, model):

    for step_idx, batch in enumerate(parallel_loader, start=1):
        optimizer.zero_grad()
        input_ids, attention_mask, mlm_labels, next_sentence_labels = batch

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=mlm_labels,
            next_sentence_label=next_sentence_labels,
        )
        loss = outputs.loss
        loss.backward()

        xm.optimizer_step(optimizer)

        if step_idx % 10 == 0:
            print(f"[Rank {RANK}] - Epoch {epoch}, Step {step_idx}, Loss={loss.item():.4f}")

def main():
    dist.init_process_group(
        "xla",
        init_method="xla://"
    )

    # print info with xla runtime functions to sanity check run context correctly propagates to backend
    print(f"Starting train.py with rank={xr.global_ordinal()}, world_size={xr.world_size()}")

    # Seed everything for reproducibility
    SEED = 42
    random.seed(SEED)
    torch.manual_seed(SEED)

    device = xm.xla_device()

    # Preload model + tokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model = BertForPreTraining.from_pretrained("bert-base-uncased")
    print(f"[Rank {RANK}]: Model & tokenizer loaded.")

    # Create dummy dataset
    dataset = create_dummy_data(tokenizer, num_samples=1000, max_length=128)

    # Shard dataset for each RANK
    sampler = DistributedSampler(
        dataset,
        num_replicas=WORLD_SIZE,
        rank=RANK,
        shuffle=True,
        drop_last=False,
    )
    train_loader = DataLoader(dataset, batch_size=512, sampler=sampler)

    # XLA parallel data loader
    parallel_loader = pl.MpDeviceLoader(train_loader, device)

    # Move model to XLA device
    model = model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

    # Let's do 5 epochs
    epochs = 5

    model.train()

    # TODO: precompile the model. This warmup is arbitrary based on observed behavior
    # neuronx-cc seems to recompile for the first 2 runs for some reason tbd
    print(f"[Rank {RANK}] - Starting warmup (2 repetitions of epoch 0)")
    warmup_start = time.time()
    complete_epoch(0, optimizer, parallel_loader, model)
    complete_epoch(0, optimizer, parallel_loader, model)
    warump_time = time.time() - warmup_start
    print(f"[Rank {RANK}] - Finished warmup in {warump_time:.2f}s")

    print(f"[Rank {RANK}] - Starting training for {epochs} epochs...")

    start_time = time.time()
    epoch_times = []

    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        print(f"[Rank {RANK}] - Epoch {epoch}/{epochs}")

        complete_epoch(epoch, optimizer, parallel_loader, model)

        epoch_time = time.time() - epoch_start_time
        epoch_times.append(epoch_time)

        print(f"[Rank {RANK}] - Epoch {epoch} done in {epoch_time:.2f}s")

    # Total training time
    total_time = time.time() - start_time
    print(f"[Rank {RANK}] - All epochs complete in {total_time:.2f}s")

    # Each rank processes (dataset_size / WORLD_SIZE) * epochs samples
    local_samples = (len(dataset) / WORLD_SIZE) * epochs
    local_throughput = local_samples / total_time

    # Average epoch time (local)
    if epoch_times:
        avg_epoch_time = sum(epoch_times) / len(epoch_times)
    else:
        avg_epoch_time = 0.0

    print(
        f"[Rank {RANK}] - local_samples={local_samples:.1f}, total_time={total_time:.2f}s, "
        f"local_throughput={local_throughput:.2f} samples/s, local_avg_epoch_time={avg_epoch_time:.2f}s"
    )

    print(f"[Rank {RANK}] training complete. Exiting main().")

if __name__ == "__main__":
    main()


================================================
FILE: test/images/nvidia/Dockerfile
================================================
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=8

# Start with the NVIDIA CUDA base image
FROM nvidia/cuda:$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.0-devel-ubuntu22.04

ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION

ENV DEBIAN_FRONTEND=noninteractive

# Install necessary dependencies
RUN apt update -y \
 && apt upgrade -y \
 && apt remove -y --allow-change-held-packages \
      libmlx5-1 \
      ibverbs-utils \
      libibverbs-dev \
      libibverbs1 \
      libnccl2 \
      libnccl-dev \
 && rm -rf /opt/hpcx \
 && rm -rf /usr/local/mpi \
 && rm -rf /usr/local/ucx \
 && rm -f /etc/ld.so.conf.d/hpcx.conf \
 && apt install -y \
      git \
      gcc \
      openssh-client \
      openssh-server \
      build-essential \
      curl \
      autoconf \
      libtool \
      automake \
      cmake \
      apt-utils \
      libhwloc-dev \
      freeglut3-dev \
      libglu1-mesa-dev \
      datacenter-gpu-manager-4-cuda12 \
      datacenter-gpu-manager-4-cuda13

RUN ldconfig

ENV LD_LIBRARY_PATH /opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/opt/aws-ofi-nccl/install/lib:/usr/local/cuda/lib:/usr/local/lib/:/usr/lib64:/usr/lib/x86_64-linux-gnu/:/usr/lib/aarch64-linux-gnu/:$LD_LIBRARY_PATH
ENV PATH /usr/local/cuda/bin:/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/sbin:/usr/bin:/usr/local/bin:$PATH

RUN mkdir -p /var/run/sshd \
 && sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config \
 && echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config \
 && sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config

# Build CUDA Samples
RUN git clone https://github.com/NVIDIA/cuda-samples.git /tmp/cuda-samples \
      --branch v$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
 && cd /tmp/cuda-samples/Samples/0_Introduction/vectorAdd && cmake . && make -j$(nproc) && cp vectorAdd /usr/bin \
 && cd /tmp/cuda-samples/Samples/1_Utilities/deviceQuery && cmake . && make -j$(nproc) && cp deviceQuery /usr/bin \
 && cd && rm -rf /tmp/cuda-samples

# Install EFA
ARG EFA_INSTALLER_VERSION=latest
RUN curl -sL https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_INSTALLER_VERSION.tar.gz | tar xvz -C /tmp \
 && cd /tmp/aws-efa-installer \
 && ./efa_installer.sh --yes --enable-gdr --skip-kmod --skip-limit-conf --no-verify --mpi openmpi4 \
 && cd && rm -rf /tmp/aws-efa-installer

# Build nvbandwidth
ARG NVBANDWIDTH_VERSION=v0.8
RUN apt install -y libboost-program-options-dev
RUN git clone https://github.com/NVIDIA/nvbandwidth.git --branch $NVBANDWIDTH_VERSION /tmp/nvbandwidth \
 && cd /tmp/nvbandwidth \
 && cmake -DMULTINODE=1 . && make && cp nvbandwidth /usr/bin \
 && cd && rm -rf /tmp/cuda-samples

# Install NCCL
ARG LIBNCCL_VERSION=2.28.7-1
RUN git clone https://github.com/NVIDIA/nccl.git --branch v$LIBNCCL_VERSION /tmp/nccl \
 && cd /tmp/nccl \
 && make -j $(nproc) \
 && make install \
 && cd && rm -rf /tmp/nccl

# Install AWS-OFI-NCCL plugin
ARG AWS_OFI_NCCL_VERSION=1.17.2
RUN curl -sL https://github.com/aws/aws-ofi-nccl/releases/download/v$AWS_OFI_NCCL_VERSION/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION.tar.gz | tar xvz -C /tmp \
 && cd /tmp/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION \
 && ./configure \
      --prefix=/opt/aws-ofi-nccl/install \
      --with-mpi=/opt/amazon/openmpi \
      --with-libfabric=/opt/amazon/efa \
      --with-cuda=/usr/local/cuda \
      --enable-platform-aws \
      --disable-tests \
 && make -j $(nproc) \
 && make install \
 && cd && rm -rf /tmp/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION

# Install NCCL Tests
# TODO: automate pin with version bump
RUN git clone https://github.com/NVIDIA/nccl-tests /tmp/nccl-tests \
 && cd /tmp/nccl-tests \
 && make \
      MPI=1 \
      MPI_HOME=/opt/amazon/openmpi/ \
      CUDA_HOME=/usr/local/cuda \
      NCCL_HOME=/usr/local/lib \
 && mkdir -p /opt/nccl-tests \
 && mv build /opt/nccl-tests/build \
 && cd && rm -rf /tmp/nccl-tests

# Set a default command for debugging or modify as per requirements
ENV NCCL_PROTO simple
# see https://linux.die.net/man/8/ld.so for usage. replaces LD_PRELOAD env.
RUN echo "/usr/local/lib/libnccl.so" >> /etc/ld.so.preload

RUN rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY test/images/nvidia/gpu_unit_tests ./gpu_unit_tests
RUN chmod +x ./gpu_unit_tests/unit_test


================================================
FILE: test/images/nvidia/gpu_unit_tests/README.md
================================================
# What

gpu_unit_tests is the unit tests for gpu enabled platforms. Idea is to create compact
set of tests which will cover most of performance critical aspects for gpu
platforms. Test designed to run on single instance.
# Usage

```
# Run tests
./unit_test
```

**Generate test data for new instance type**

Step 1: Copy the `gpu_unit_tests` folder to the EC2 instance where you want to generate the data.

Step 2:  Execute the following command in the `gpu_unit_tests` directory on the EC2 instance:
```
GENERATE_DATA=1 ./unit_test
```
Step 3:
Copy the files from `tests/test_sysinfo.sh.data` (e.g., `tests/test_sysinfo.sh.data/p3.2xlarge`) to your local repository.

Step 4:
Create PR with the new `tests/test_sysinfo.sh.data/xxx`

# Test list

-  test_sysinfo.sh :: Validate basic system configuration by comparing it with test config
  - test_numa_topo_topo :: check cpu/numa topology
  - test_nvidia_gpu_count :: fail if one of GPUs is broken or is not visiable
  - test_nvidia_fabric_status :: fail if fabric manager is not active
  - test_nvidia_smi_topo :: fail if nvidia-smi topology is differ
  - test_nvidia_persistence_status :: validate persistence state
  - test_nvidia_gpu_unused :: Check that no other process are using GPUs, fail is a signal system misconfiguration.


- 10_test_basic_cuda.sh :: Execute trivial cuda binaries, fail if cuda subsys is not healthy
  Use demo-suite binaries https://docs.nvidia.com/cuda/demo-suite/index.html and DCGM Diagnostics https://docs.nvidia.com/datacenter/dcgm/latest/user-guide/dcgm-diagnostics.html#run-levels-and-tests 
  If this test suite fail this is a sign that cuda subsystem is not usable at all.
  Usually this is side effect of system misconfiguration (driver or fabric manager is not loaded)
  - test_01_device_query
  - test_02_vector_add
  - test_03_nvbandwidth
  - test_04_dcgm_diagnostics


================================================
FILE: test/images/nvidia/gpu_unit_tests/bash_unit
================================================
#!/usr/bin/env bash
#
#   bash unit testing enterprise edition framework for professionals
#   Copyright (C) 2011-2016 Pascal Grange
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 3 of the License, or
#   (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software Foundation,
#   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
#
#  https://github.com/pgrange/bash_unit

VERSION=v2.1.0

ESCAPE=$(printf "\033")
NOCOLOR="${ESCAPE}[0m"
RED="${ESCAPE}[91m"
GREEN="${ESCAPE}[92m"
YELLOW="${ESCAPE}[93m"
BLUE="${ESCAPE}[94m"

# Make bash_unit immune to some basic unix commands faking
CAT="$(which cat)"
SED="$(which sed)"
GREP="$(which grep)"
RM="$(which rm)"
SHUF="$(which shuf)"

fail() {
  local message=${1:-}
  local stdout=${2:-}
  local stderr=${3:-}

  notify_test_failed "$__bash_unit_current_test__" "$message"
  [[ ! -z $stdout ]] && [ -s "$stdout" ] && notify_stdout < "$stdout"
  [[ ! -z $stderr ]] && [ -s "$stderr" ] && notify_stderr < "$stderr"

  stacktrace | notify_stack
  exit 1
}

skip() {
  local message=${1:-}
  notify_test_skipped "$__bash_unit_current_test__" "$message"
  echo "skipped $message" >  $__bash_unit_test_skipped__
  exit 0
}

_notify_trace() {
  local caller_shift=$1
  local message=${2}
  local stdout=${3:-}
  local stderr=${4:-}

  [ -z $trace_file ] && return

  caller_hdr=""
  cl=$((caller_shift + 2))

  if [ -n ${BASH_SOURCE[$cl]} ]
  then
      caller_hdr="${BASH_SOURCE[$cl]}:${BASH_LINENO[$((cl-1))]}"
  fi
  echo "trace:${caller_hdr}>  $message"  >> $trace_file
  [[ ! -z $stdout ]] && [ -s "$stdout" ] && "$SED" 's:^:trace-out> :' < "$stdout" >> $trace_file
  [[ ! -z $stderr ]] && [ -s "$stderr" ] && "$SED" 's:^:trace-err> :' < "$stderr" >> $trace_file
}

notify_trace_dbg() {
    _notify_trace 0 "$1"
}

notify_trace_info() {
  [ -z $trace_file ] && return

  local message=${1:-}
  echo "info> $message"  >> $trace_file
}

assert() {
  local assertion=$1
  local message=${2:-}

  _assert_expression \
    "$assertion" \
    "[ \$status == 0 ]" \
    "\"$message\""
}

assert_fails() {
  local assertion=$1
  local message=${2:-}

  _assert_expression \
    "$assertion" \
    "[ \$status != 0 ]" \
    "\"$message\""
}

assert_fail() {
  #deprecated, use assert_fails instead
  assert_fails "$@"
}

assert_status_code() {
  local expected_status=$1
  local assertion="$2"
  local message="${3:-}"

  _assert_expression \
    "$assertion" \
    "[ \$status == $expected_status ]" \
    "\"$message\" expected status code $expected_status but was \$status"
}

_assert_expression() {
  local assertion=$1
  local condition=$2
  local message=$3
  (
    local stdout=$(mktemp)
    local stderr=$(mktemp)
    trap "$RM  -f \"$stdout\" \"$stderr\"" EXIT

    local status
    eval "($assertion)" >"$stdout" 2>"$stderr" && status=$? || status=$?
    _notify_trace 1 "assert_expression:  exp: '$assertion', cond: '$condition', status: '$status'" "$stdout" "$stderr"

    if ! eval "$condition"
    then
      fail "$(eval echo $message)" "$stdout" "$stderr"
    fi
  ) || exit $?
}

assert_equals() {
  local expected=$1
  local actual=$2
  local message=${3:-}
  [[ -z $message ]] || message="$message\n"

  notify_trace_dbg "assert_equals '$expected' == '$actual'"
  if [ "$expected" != "$actual" ]
  then
    fail "$message expected [$expected] but was [$actual]"
  fi
}

assert_not_equals() {
  local unexpected="$1"
  local actual="$2"
  local message=${3:-}
  [[ -z $message ]] || message="$message\n"

 notify_trace_dbg "assert_not_equals: '$unexpected' != '$actual'"
 [ "$unexpected" != "$actual" ] || \
    fail "$message expected different value than [$unexpected] but was the same"
}

assert_matches() {
  local expected=$1
  local actual=$2
  local message=${3:-}
  [[ -z $message ]] || message="$message\n"

  notify_trace_dbg "assert_matches: '$actual' =~ '$expected'"
  if [[ ! "${actual}" =~ ${expected} ]]; then
    fail "$message expected regex [$expected] to match [$actual]"
  fi
}

assert_not_matches() {
  local unexpected=$1
  local actual=$2
  local message=${3:-}
  [[ -z $message ]] || message="$message\n"

  _notify_trace 0 "assert_not_matches: ! '$actual' =~ '$unexpected'"
  if [[ "${actual}" =~ ${unexpected} ]]; then
    fail "$message expected regex [$unexpected] should not match but matched [$actual]"
  fi
}

assert_within_delta() {
  function abs() {
    local value=$1
    local sign=$(( value < 0 ? -1 : 1 ))
    echo $((value * sign))
  }
  function is_number() {
    local value=$1
    test $value -eq $value 2>/dev/null
  }
  local expected=$1
  local actual=$2
  local max_delta=$3
  assert "is_number $expected" "$message expected value [$expected] is not a number"
  assert "is_number $actual" "$message actual value [$actual] is not a number"
  assert "is_number $max_delta" "$message max_delta [$max_delta] is not a number"
  local message=${4:-}
  [[ -z $message ]] || message="$message\n"

  local actual_delta="$(abs $(($expected - $actual)))"

  if (( $actual_delta > $max_delta )); then
    fail "$message expected value [$expected] to match [$actual] with a maximum delta of [$max_delta]"
  fi
}

assert_no_diff() {
  local expected=$1
  local actual=$2
  local message=${3:-}
  [[ -z $message ]] || message="$message\n"

  assert 'diff '"${expected}"' '"${actual}"  \
         "$message expected '"${actual}"' to be identical to '"${expected}"' but was different"
}

fake() {
  local command=$1
  shift
  if [ $# -gt 0 ]
  then
    eval "function $command() { export FAKE_PARAMS=(\"\$@\") ; $@ ; }"
  else
    eval "function $command() { echo \"$($CAT)\" ; }"
  fi
  export -f $command
}

stacktrace() {
  local i=1
  while ! [ -z "${BASH_SOURCE[$i]:-}" ]
  do
    echo ${BASH_SOURCE[$i]}:${BASH_LINENO[$((i-1))]}:${FUNCNAME[$i]}\(\)
    i=$((i + 1))
  done | "$GREP" -v "^$BASH_SOURCE"
}

run_test_suite() {
  local failure=0

  if run_setup_suite
  then
    run_tests || failure=$?
  else
    failure=$?
  fi
  run_teardown_suite

  return $failure
}

run_setup_suite() {
  if declare -F | "$GREP" ' setup_suite$' >/dev/null
  then
    setup_suite
  fi
}

maybe_shuffle() {
  ((randomise)) && $SHUF || $CAT
}

run_tests() {
  local failure=0

  for pending_test in $(set | "$GREP"  -E '^(pending|todo).* \(\)' | "$GREP" -E "$test_pattern" | "$SED" -e 's: .*::')
  do
    notify_test_starting "$pending_test"
    notify_test_pending "$pending_test"
  done


  for test in $(set | "$GREP"  -E '^test.* \(\)' | "$GREP" -E "$test_pattern" | "$SED" -e 's: .*::' | maybe_shuffle)
  do
    (
      local status=0
      declare -F | "$GREP" ' setup$' >/dev/null && setup
      __bash_unit_test_skipped__=$(mktemp)
      trap "$RM  -f \"$stdout\" \"$stderr\"" EXIT
      if [[ -n "$skip_pattern" && ("$test" =~ $skip_pattern) ]]; then
        skip "$test as specified in skip pattern: $skip_pattern"
      fi
      (__bash_unit_current_test__="$test" run_test) || status=$?
      test -s $__bash_unit_test_skipped__ && status=0
      declare -F | "$GREP" ' teardown$' >/dev/null && teardown
      exit $status
    )
    failure=$(( $? || failure))
  done
  return $failure
}

run_test() {
  set -e
  notify_test_starting "$__bash_unit_current_test__"
  "$__bash_unit_current_test__" && notify_test_succeeded "$__bash_unit_current_test__"
}

run_teardown_suite() {
  if declare -F | "$GREP" ' teardown_suite$' >/dev/null
  then
    teardown_suite
  fi
}

usage() {
  echo "$1" >&2
  echo "$0 [-f <output format>] [-p <pattern1>] [-p <pattern2>] [-s <skip_pattern>] [-r] ... <test_file1> <test_file2> ..." >&2
  echo >&2
  echo "Runs tests in test files that match <pattern>s" >&2
  echo "Skip tests in test files that match <skip_pattern>s" >&2
  echo "<output format> is optional only supported value is tap" >&2
  echo "-r to execute test cases in random order" >&2
  echo "-v to get current version information" >&2
  echo "See https://github.com/pgrange/bash_unit" >&2
  exit 1
}

# Formating

pretty_success() {
  pretty_format "$GREEN" "\u2713" "${1:-}"
}

pretty_warning() {
  pretty_format "$YELLOW" "\u2717" "$1"
}

pretty_failure() {
  pretty_format "$RED" "\u2717" "${1:-}"
}

pretty_format() {
  local color="$1"
  local pretty_symbol="$2"
  local alt_symbol="${3:-}"
  local term_utf8=false
#env
  if is_terminal && [[ "${LANG:-}" =~ .*UTF-8.* ]]
  then
    term_utf8=true
  fi
  (
    $CAT
    if $term_utf8
    then
      echo -en " $pretty_symbol "
    else
      [[ ! -z "$alt_symbol" ]] && echo -en " $alt_symbol "
    fi
  ) | color "$color"
}

color() {
  _start_color() {
    if is_terminal ; then echo -en "$color" ; fi
  }
  _stop_color() {
    if is_terminal ; then echo -en "$NOCOLOR" ; fi
  }
  local color=$1
  shift
  _start_color
  if [ $# -gt 0 ]
  then
    echo $*
  else
    $CAT
  fi
  _stop_color
}

is_terminal() {
  [ -t 1 ] || [[ "${FORCE_COLOR:-}" == true ]]
}

trace_suite_starting() {
    local test_file="$1"
    notify_trace_info "Running tests in $test_file"
  }
trace_test_starting() {
    local test="$1"
    notify_trace_info "Running $test"
}
trace_test_pending() {
    local test="$1"
    notify_trace_info "Pending $test"
}

trace_test_skipped() {
    local test="$1"
    local message="$2"
    notify_trace_info "Skip $test message: $message"
}

trace_test_succeeded() {
    local test="$1"
    notify_trace_info "Success $test"
}
trace_test_failed() {
    local test="$1"
    local message="$2"
    notify_trace_info "$test with message: $message"
}
trace_suites_succeded() {
    notify_trace_info  "Overall result: SUCCESS"
}
trace_suites_failed() {
    notify_trace_info "Overall result: FAILURE"
}

text_format() {
  notify_suite_starting() {
    local test_file="$1"
    trace_suite_starting $test_file
    echo "Running tests in $test_file"
  }
  notify_test_starting() {
    local test="$1"
    trace_test_starting $test
    echo -e -n "\tRunning $test ... " | color "$BLUE"
  }
  notify_test_pending() {
    local test="$1"
    trace_test_pending "$test"
    echo -n "PENDING" | pretty_warning
    echo
  }
  notify_test_skipped() {
    local test="$1"
    local message="$2"
    trace_test_skipped "$test" "$message"
    echo -n "SKIPPED" | pretty_warning
    [[ -z $message  ]] || printf -- "$message\n"
    echo
  }

  notify_test_succeeded() {
    local test="$1"
    trace_test_succeeded "$test"
    echo -n "SUCCESS" | pretty_success
    echo
  }
  notify_test_failed() {
    local test="$1"
    local message="$2"
    trace_test_failed "$test" "$message"
    echo -n "FAILURE" | pretty_failure
    echo
    [[ -z $message  ]] || printf -- "$message\n"
  }
  notify_stdout() {
    "$SED" 's:^:out> :' | color "$GREEN"
  }
  notify_stderr() {
    "$SED" 's:^:err> :' | color "$RED"
  }
  notify_stack() {
    color "$YELLOW"
  }
  notify_suites_succeded() {
    trace_suites_succeded
    echo -n "Overall result: SUCCESS" | pretty_success
    echo
  }
  notify_suites_failed() {
    trace_suites_failed
    echo -n "Overall result: FAILURE" | pretty_failure
    echo
  }
}

tap_format() {
  notify_suite_starting() {
    local test_file="$1"
    trace_suite_starting
    echo "# Running tests in $test_file"
  }
  notify_test_starting() {
    trace_test_starting $1
  }
  notify_test_pending() {
    local test="$1"
    trace_test_pending "$test"
    echo -n "ok" | pretty_warning -
    echo -n "$test" | color "$BLUE"
    echo " # skip test to be written" | color "$YELLOW"
  }
  notify_test_skipped() {
    local test="$1"
    local message="$2"
    trace_test_skipped "$test" "$message"
    echo -n "ok" | pretty_warning -
    echo -n "$test" | color "$BLUE"
    echo " # skip ${message}" | color "$YELLOW"
  }

  notify_test_succeeded() {
    local test="$1"
    trace_test_succeeded "$test"
    echo -n "ok" | pretty_success -
    echo "$test" | color "$BLUE"
  }
  notify_test_failed() {
    local test="$1"
    local message="$2"
    trace_test_failed "$test" "$message"
    echo -n "not ok" | pretty_failure -
    echo "$test" | color "$BLUE"
    [[ -z $message  ]] || printf -- "$message\n" | "$SED" -u -e 's/^/# /'
  }
  notify_stdout() {
    "$SED" 's:^:# out> :' | color "$GREEN"
  }
  notify_stderr() {
    "$SED" 's:^:# err> :' | color "$RED"
  }
  notify_stack() {
    "$SED" 's:^:# :' | color "$YELLOW"
  }
  notify_suites_succeded() {
    trace_suites_succeded
  }
  notify_suites_failed() {
    trace_suites_failed
  }
}

output_format=text
test_pattern=""
skip_pattern=""
trace_file=""
separator=""
randomise=0
while getopts "vp:t:f:r:s:" option
do
  case "$option" in
    p)
      test_pattern="${test_pattern}${separator}${OPTARG}"
      separator="|"
      ;;
    s)
      skip_pattern="${skip_pattern}${separator}${OPTARG}"
      separator="|"
      ;;
    t)
      trace_file="$(realpath ${OPTARG})"
      truncate -s0 "$trace_file"
      ;;
    f)
      output_format="${OPTARG}"
      ;;
    r)
      randomise=1
      ;;
    v)
      echo "bash_unit $VERSION"
      exit
      ;;
    ?|:)
      usage
      ;;
  esac
done
shift $((OPTIND-1))

for test_file in "$@"
do
  test -e "$test_file" || usage "file does not exist: $test_file"
  test -r "$test_file" || usage "can not read file: $test_file"
done

case "$output_format" in
  text)
    text_format
    ;;
  tap)
    tap_format
    ;;
  *)
    usage "unsupported output format: $output_format"
    ;;
esac

#run tests received as parameters
failure=0
for test_file in "$@"
do
  notify_suite_starting "$test_file"
  (
    set -e # Ensure bash_unit will exit with failure
           # in case of syntax error.
    if [[ "${STICK_TO_CWD}" != true ]]
    then
      cd "$(dirname "$test_file")"
      source "$(basename "$test_file")"
    else
      source "$test_file"
    fi
    set +e
    run_test_suite
  )
  failure=$(( $? || failure))
done

if ((failure))
then
  notify_suites_failed
else
  notify_suites_succeded
fi

exit $failure


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/common.sh
================================================
#!/bin/bash

get_instance_type()
{
    # Retrieve instance metadata: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-retrieval-examples
    [ -n "$FORCE_INSTANCE_TYPE" ] && echo $FORCE_INSTANCE_TYPE

    local token=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" 2>/dev/null)

    if [ -n "$token" ]; then
        curl -H "X-aws-ec2-metadata-token: $token" http://169.254.169.254/latest/meta-data/instance-type
    else
        curl http://169.254.169.254/latest/meta-data/instance-type
    fi
}

assert_gpu_unused()
{
    cmd="nvidia-smi --query-compute-apps timestamp,gpu_bus_id,gpu_uuid,pid,name,used_memory --format csv,noheader"
    assert_equals "" "`$cmd`" "gpu is busy by other task, system misconfig?"
}

_assert_data()
{
    local expected="$1"
    local cmd="$2"
    local message="${3:-}"
    local cmd_out="$ACTUAL_RESULTS/$(basename $expected)"
    [[ -z $message ]] || message="$message\n"

    eval "$cmd" > $cmd_out
    diff_cmd="diff -up $expected $cmd_out"
    diff_out="`$diff_cmd`"

    notify_trace_dbg "_assert_data $diff_cmd, out: $diff_out"
    if [ -n "$diff_out" ]
    then
	fail "$message test data value diff:\n$diff_out"
    fi
}

assert_data() {
    _assert_data "$1" "$2" "$3"
}

generate_data()
{
    local expected="$1"
    local cmd="$2"
    local msg="$3"
    local cmd_out="$ACTUAL_RESULTS/$(basename $expected)"

    eval "$cmd" > $expected
    _assert_data "$expected" "$cmd" "$msg"
}

function is_vgpu()
{
  local instance_type=${EC2_INSTANCE_TYPE:-$(get_instance_type)}
  case "${instance_type}" in
    g6f.*|gr6f.*) return ;;
    *) return 1 ;;  # Not supported
  esac
}


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_basic.sh
================================================
# Trivial cuda tests to validate that GPU it functional
# Use demu-suite binaries https://docs.nvidia.com/cuda/demo-suite/index.html 
# and DCGM Diagnostics https://docs.nvidia.com/datacenter/dcgm/latest/user-guide/dcgm-diagnostics.html#run-levels-and-tests

setup_suite()
{
    source common.sh
    assert_gpu_unused
    DEMO_SUITE_DIR=${DEMO_SUITE_DIR:-$(realpath /usr/local/cuda/extras/demo_suite)}
}

teardown_suite()
{
    assert_gpu_unused
}

test_01_device_query()
{
    assert_status_code 0 "$DEMO_SUITE_DIR/deviceQuery"
}

test_02_vector_add()
{
    assert_status_code 0 "$DEMO_SUITE_DIR/vectorAdd"
}

test_03_nvbandwidth()
{
    assert_status_code 0 "$DEMO_SUITE_DIR/nvbandwidth"
}

test_04_dcgm_diagnostics()
{
    # This test is not applicable for vGPU instance types.
    if is_vgpu; then
        skip "This test does not apply to vGPU instances (g6f.*, gr6f.*)"
    fi

    # https://docs.nvidia.com/datacenter/dcgm/latest/user-guide/dcgm-diagnostics.html#run-levels-and-tests
    assert_status_code 0 "dcgmi diag -r 2"
}


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh
================================================
# Validate basic system configuration by comparing with expected config
#
setup_suite()
{
    source common.sh

    EC2_INSTANCE_TYPE=${EC2_INSTANCE_TYPE:-$(get_instance_type)}
    data=test_sysinfo.sh.data/$EC2_INSTANCE_TYPE
    ACTUAL_RESULTS=`mktemp -t -d test_sysinfo.sh.actual-data.XXX`
    assert_not_equals "" "$ACTUAL_RESULTS"
    notify_trace_info "ACTUAL_RESULTS: $ACTUAL_RESULTS"

    if [ -n "$GENERATE_DATA" ]
    then
	echo "GENERATE_DATA is enabled..."
	mkdir -p $data
	function assert_data() {
	    generate_data "$@"
	}
    fi
}

teardown_suite()
{
    assert "test -z \"$GENERATE_DATA\"" "GENERATE_DATA was enabled, fail full suite"
    assert_gpu_unused
}


test_numa_topo_topo()
{
    assert_data $data/numa_topo.txt "grep . /sys/devices/system/node/node*/{cpulist,distance}" "Unexpected cpu topology"
}

test_nvidia_gpu_count()
{
    #Just for logging purposesclear
    assert_status_code 0 "nvidia-smi -q"
    assert_data $data/gpu_count.txt "nvidia-smi --query-gpu=name,index,pci.bus_id --format csv" "Unexpected gpu count"
}


test_nvidia_smi_topo()
{
    assert_data $data/nvidia_smi_topo.txt "nvidia-smi topo -m | grep GPU | cut -f 1-11" \
		"Unexpected gpu topology, likely broken nvlinks"
}


test_nvidia_persistence_status()
{
    assert_data $data/nvidia_persistence_status.txt "nvidia-smi --query-gpu=name,pci.bus_id,persistence_mode --format=csv" \
		  "Unexpected perfistance status, likely system configuration issue"
}

test_nvidia_gpu_unused()
{
    assert_gpu_unused
}

test_nvidia_gpu_throttled()
{

    # vGPU instances don't support GPU clock throttling detection.
    # This test is not applicable for vGPU instance types.
    if is_vgpu; then
        skip "This test does not apply to vGPU instances (g6f.*, gr6f.*)"
    fi
    # https://docs.nvidia.com/deploy/nvml-api/group__nvmlClocksEventReasons.html#group__nvmlClocksEventReasons
    # The only  bit allowed is nvmlClocksEventReasonGpuIdle 0x0000000000000001LL
    filter="egrep -v -e '(0x0000000000000000|0x0000000000000001|0x0000000000000004)'"
    cmd="nvidia-smi --query-gpu index,gpu_bus_id,gpu_uuid,clocks_throttle_reasons.active --format=csv,noheader"
    assert_status_code 1 "$cmd | $filter" "Throttled gpu detected"
}


test_nvidia_vgpu_license_status()
{
    if ! is_vgpu; then
        skip "This test only applies to vGPU instances (g6f.*, gr6f.*)"
    fi

    assert_data $data/nvidia_vgpu_license_status.txt \
          "nvidia-smi -q | grep 'vGPU Software' -A 2" \
          "vGPU license status validation failed"
}

================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.48xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA A10G, 0, 00000000:00:16.0
NVIDIA A10G, 1, 00000000:00:17.0
NVIDIA A10G, 2, 00000000:00:18.0
NVIDIA A10G, 3, 00000000:00:19.0
NVIDIA A10G, 4, 00000000:00:1A.0
NVIDIA A10G, 5, 00000000:00:1B.0
NVIDIA A10G, 6, 00000000:00:1C.0
NVIDIA A10G, 7, 00000000:00:1D.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.48xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-47,96-143
/sys/devices/system/node/node1/cpulist:48-95,144-191
/sys/devices/system/node/node0/distance:10 32
/sys/devices/system/node/node1/distance:32 10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.48xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA A10G, 00000000:00:16.0, Enabled
NVIDIA A10G, 00000000:00:17.0, Enabled
NVIDIA A10G, 00000000:00:18.0, Enabled
NVIDIA A10G, 00000000:00:19.0, Enabled
NVIDIA A10G, 00000000:00:1A.0, Enabled
NVIDIA A10G, 00000000:00:1B.0, Enabled
NVIDIA A10G, 00000000:00:1C.0, Enabled
NVIDIA A10G, 00000000:00:1D.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.48xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	GPU1	GPU2	GPU3	GPU4	GPU5	GPU6	GPU7	CPU Affinity	NUMA Affinity
GPU0	 X 	PHB	PHB	PHB	PHB	PHB	PHB	PHB	0-191	0-1
GPU1	PHB	 X 	PHB	PHB	PHB	PHB	PHB	PHB	0-191	0-1
GPU2	PHB	PHB	 X 	PHB	PHB	PHB	PHB	PHB	0-191	0-1
GPU3	PHB	PHB	PHB	 X 	PHB	PHB	PHB	PHB	0-191	0-1
GPU4	PHB	PHB	PHB	PHB	 X 	PHB	PHB	PHB	0-191	0-1
GPU5	PHB	PHB	PHB	PHB	PHB	 X 	PHB	PHB	0-191	0-1
GPU6	PHB	PHB	PHB	PHB	PHB	PHB	 X 	PHB	0-191	0-1
GPU7	PHB	PHB	PHB	PHB	PHB	PHB	PHB	 X 	0-191	0-1


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA A10G, 0, 00000000:00:1E.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-31
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA A10G, 00000000:00:1E.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-31	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5g.2xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA T4G, 0, 00000000:00:1F.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5g.2xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-7
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5g.2xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA T4G, 00000000:00:1F.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5g.2xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-7	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/efa_count.txt
================================================
0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA L4-6Q, 0, 00000000:31:00.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-7
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA L4-6Q, 00000000:31:00.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-7	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.2xlarge/nvidia_vgpu_license_status.txt
================================================
    vGPU Software Licensed Product
        Product Name                      : NVIDIA RTX Virtual Workstation
        License Status                    : Licensed (Expiry: N/A)


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/efa_count.txt
================================================
0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA L4-12Q, 0, 00000000:35:00.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-15
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA L4-12Q, 00000000:35:00.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-15	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.4xlarge/nvidia_vgpu_license_status.txt
================================================
    vGPU Software Licensed Product
        Product Name                      : NVIDIA RTX Virtual Workstation
        License Status                    : Licensed (Expiry: N/A)


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/efa_count.txt
================================================
0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA L4-3Q, 0, 00000000:31:00.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-1
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA L4-3Q, 00000000:31:00.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-1	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.large/nvidia_vgpu_license_status.txt
================================================
    vGPU Software Licensed Product
        Product Name                      : NVIDIA RTX Virtual Workstation
        License Status                    : Licensed (Expiry: N/A)


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/efa_count.txt
================================================
0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA L4-3Q, 0, 00000000:31:00.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-3
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA L4-3Q, 00000000:31:00.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-3	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g6f.xlarge/nvidia_vgpu_license_status.txt
================================================
    vGPU Software Licensed Product
        Product Name                      : NVIDIA RTX Virtual Workstation
        License Status                    : Licensed (Expiry: N/A)


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.16xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
Tesla V100-SXM2-16GB, 0, 00000000:00:17.0
Tesla V100-SXM2-16GB, 1, 00000000:00:18.0
Tesla V100-SXM2-16GB, 2, 00000000:00:19.0
Tesla V100-SXM2-16GB, 3, 00000000:00:1A.0
Tesla V100-SXM2-16GB, 4, 00000000:00:1B.0
Tesla V100-SXM2-16GB, 5, 00000000:00:1C.0
Tesla V100-SXM2-16GB, 6, 00000000:00:1D.0
Tesla V100-SXM2-16GB, 7, 00000000:00:1E.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.16xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-15,32-47
/sys/devices/system/node/node1/cpulist:16-31,48-63
/sys/devices/system/node/node0/distance:10 21
/sys/devices/system/node/node1/distance:21 10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.16xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
Tesla V100-SXM2-16GB, 00000000:00:17.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:18.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:19.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:1A.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:1B.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:1C.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:1D.0, Enabled
Tesla V100-SXM2-16GB, 00000000:00:1E.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.16xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	GPU1	GPU2	GPU3	GPU4	GPU5	GPU6	GPU7	CPU Affinity	NUMA Affinity
GPU0	 X 	NV1	NV1	NV2	NV2	PHB	PHB	PHB	0-63	0-1
GPU1	NV1	 X 	NV2	NV1	PHB	NV2	PHB	PHB	0-63	0-1
GPU2	NV1	NV2	 X 	NV2	PHB	PHB	NV1	PHB	0-63	0-1
GPU3	NV2	NV1	NV2	 X 	PHB	PHB	PHB	NV1	0-63	0-1
GPU4	NV2	PHB	PHB	PHB	 X 	NV1	NV1	NV2	0-63	0-1
GPU5	PHB	NV2	PHB	PHB	NV1	 X 	NV2	NV1	0-63	0-1
GPU6	PHB	PHB	NV1	PHB	NV1	NV2	 X 	NV2	0-63	0-1
GPU7	PHB	PHB	PHB	NV1	NV2	NV1	NV2	 X 	0-63	0-1


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.2xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
Tesla V100-SXM2-16GB, 0, 00000000:00:1E.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.2xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-7
/sys/devices/system/node/node0/distance:10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.2xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
Tesla V100-SXM2-16GB, 00000000:00:1E.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p3.2xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID[0m
GPU0	 X 	0-7	0		N/A


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4d.24xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA A100-SXM4-40GB, 0, 00000000:10:1C.0
NVIDIA A100-SXM4-40GB, 1, 00000000:10:1D.0
NVIDIA A100-SXM4-40GB, 2, 00000000:20:1C.0
NVIDIA A100-SXM4-40GB, 3, 00000000:20:1D.0
NVIDIA A100-SXM4-40GB, 4, 00000000:90:1C.0
NVIDIA A100-SXM4-40GB, 5, 00000000:90:1D.0
NVIDIA A100-SXM4-40GB, 6, 00000000:A0:1C.0
NVIDIA A100-SXM4-40GB, 7, 00000000:A0:1D.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4d.24xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-23,48-71
/sys/devices/system/node/node1/cpulist:24-47,72-95
/sys/devices/system/node/node0/distance:10 21
/sys/devices/system/node/node1/distance:21 10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4d.24xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA A100-SXM4-40GB, 00000000:10:1C.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:10:1D.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:20:1C.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:20:1D.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:90:1C.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:90:1D.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:A0:1C.0, Enabled
NVIDIA A100-SXM4-40GB, 00000000:A0:1D.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4d.24xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	GPU1	GPU2	GPU3	GPU4	GPU5	GPU6	GPU7	CPU Affinity	NUMA Affinity
GPU0	 X 	NV12	NV12	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU1	NV12	 X 	NV12	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU2	NV12	NV12	 X 	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU3	NV12	NV12	NV12	 X 	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU4	NV12	NV12	NV12	NV12	 X 	NV12	NV12	NV12	24-47,72-95	1
GPU5	NV12	NV12	NV12	NV12	NV12	 X 	NV12	NV12	24-47,72-95	1
GPU6	NV12	NV12	NV12	NV12	NV12	NV12	 X 	NV12	24-47,72-95	1
GPU7	NV12	NV12	NV12	NV12	NV12	NV12	NV12	 X 	24-47,72-95	1


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4de.24xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA A100-SXM4-80GB, 0, 00000000:10:1C.0
NVIDIA A100-SXM4-80GB, 1, 00000000:10:1D.0
NVIDIA A100-SXM4-80GB, 2, 00000000:20:1C.0
NVIDIA A100-SXM4-80GB, 3, 00000000:20:1D.0
NVIDIA A100-SXM4-80GB, 4, 00000000:90:1C.0
NVIDIA A100-SXM4-80GB, 5, 00000000:90:1D.0
NVIDIA A100-SXM4-80GB, 6, 00000000:A0:1C.0
NVIDIA A100-SXM4-80GB, 7, 00000000:A0:1D.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4de.24xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-23,48-71
/sys/devices/system/node/node1/cpulist:24-47,72-95
/sys/devices/system/node/node0/distance:10 21
/sys/devices/system/node/node1/distance:21 10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4de.24xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA A100-SXM4-80GB, 00000000:10:1C.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:10:1D.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:20:1C.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:20:1D.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:90:1C.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:90:1D.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:A0:1C.0, Enabled
NVIDIA A100-SXM4-80GB, 00000000:A0:1D.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p4de.24xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	GPU1	GPU2	GPU3	GPU4	GPU5	GPU6	GPU7	CPU Affinity	NUMA Affinity
GPU0	 X 	NV12	NV12	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU1	NV12	 X 	NV12	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU2	NV12	NV12	 X 	NV12	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU3	NV12	NV12	NV12	 X 	NV12	NV12	NV12	NV12	0-23,48-71	0
GPU4	NV12	NV12	NV12	NV12	 X 	NV12	NV12	NV12	24-47,72-95	1
GPU5	NV12	NV12	NV12	NV12	NV12	 X 	NV12	NV12	24-47,72-95	1
GPU6	NV12	NV12	NV12	NV12	NV12	NV12	 X 	NV12	24-47,72-95	1
GPU7	NV12	NV12	NV12	NV12	NV12	NV12	NV12	 X 	24-47,72-95	1


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p5.48xlarge/gpu_count.txt
================================================
name, index, pci.bus_id
NVIDIA H100 80GB HBM3, 0, 00000000:53:00.0
NVIDIA H100 80GB HBM3, 1, 00000000:64:00.0
NVIDIA H100 80GB HBM3, 2, 00000000:75:00.0
NVIDIA H100 80GB HBM3, 3, 00000000:86:00.0
NVIDIA H100 80GB HBM3, 4, 00000000:97:00.0
NVIDIA H100 80GB HBM3, 5, 00000000:A8:00.0
NVIDIA H100 80GB HBM3, 6, 00000000:B9:00.0
NVIDIA H100 80GB HBM3, 7, 00000000:CA:00.0


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p5.48xlarge/numa_topo.txt
================================================
/sys/devices/system/node/node0/cpulist:0-47,96-143
/sys/devices/system/node/node1/cpulist:48-95,144-191
/sys/devices/system/node/node0/distance:10 32
/sys/devices/system/node/node1/distance:32 10


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p5.48xlarge/nvidia_persistence_status.txt
================================================
name, pci.bus_id, persistence_mode
NVIDIA H100 80GB HBM3, 00000000:53:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:64:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:75:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:86:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:97:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:A8:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:B9:00.0, Enabled
NVIDIA H100 80GB HBM3, 00000000:CA:00.0, Enabled


================================================
FILE: test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/p5.48xlarge/nvidia_smi_topo.txt
================================================
	[4mGPU0	GPU1	GPU2	GPU3	GPU4	GPU5	GPU6	GPU7	CPU Affinity	NUMA Affinity
GPU0	 X 	NV18	NV18	NV18	NV18	NV18	NV18	NV18	0-47,96-143	0
GPU1	NV18	 X 	NV18	NV18	NV18	NV18	NV18	NV18	0-47,96-143	0
GPU2	NV18	NV18	 X 	NV18	NV18	NV18	NV18	NV18	0-47,96-143	0
GPU3	NV18	NV18	NV18	 X 	NV18	NV18	NV18	NV18	0-47,96-143	0
GPU4	NV18	NV18	NV18	NV18	 X 	NV18	NV18	NV18	48-95,144-191	1
GPU5	NV18	NV18	NV18	NV18	NV18	 X 	NV18	NV18	48-95,144-191	1
GPU6	NV18	NV18	NV18	NV18	NV18	NV18	 X 	NV18	48-95,144-191	1
GPU7	NV18	NV18	NV18	NV18	NV18	NV18	NV18	 X 	48-95,144-191	1


================================================
FILE: test/images/nvidia/gpu_unit_tests/unit_test
================================================
#!/usr/bin/env bash

set -o errexit
set -o nounset
set -o pipefail

TRACE_LOG=trace.log
TEST_TIMEOUT=3600
BASH="/usr/bin/bash"
CURRENT_DIR=$(pwd)
SKIP_TESTS_SUBCOMMAND=${SKIP_TESTS_SUBCOMMAND:-""}

timeout -k 10 ${TEST_TIMEOUT} ${BASH} gpu_unit_tests/bash_unit -f tap ${SKIP_TESTS_SUBCOMMAND} -t gpu_unit_tests/${TRACE_LOG} gpu_unit_tests/tests/*test*.sh


================================================
FILE: test/images/nvidia-inference/Dockerfile
================================================
###############################################################################
# Base image, arguments, and environment
###############################################################################
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=8

FROM nvidia/cuda:$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.0-devel-ubuntu22.04

ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION

# Disable interactive prompts
ENV DEBIAN_FRONTEND=noninteractive

###############################################################################
# System packages
###############################################################################
RUN apt update \
 && apt upgrade -y \
 && apt install -y --no-install-recommends \
       build-essential \
       ca-certificates \
       cmake \
       curl \
       emacs \
       git \
       jq \
       libopencv-dev \
       software-properties-common \
       wget \
       unzip \
       vim \
       pkg-config \
       gdb \
       lcov \
       libbz2-dev \
       zlib1g-dev \
       openssl \
       libssl-dev \
       libsqlite3-dev \
       libgdbm-dev \
       libc6-dev \
       libbz2-dev \
       libncurses-dev \
       tk-dev \
       libffi-dev \
       libcap-dev \
       gnupg2 \
       gpg-agent \
 && rm -rf /var/lib/apt/lists/*

###############################################################################
# Build and install Python from source
###############################################################################
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12

RUN curl -sL https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz | tar xvz -C /tmp \
 && cd /tmp/Python-$PYTHON_VERSION \
 && ./configure --enable-shared --prefix=/usr/local \
 && make -j$(nproc) \
 && make install \
 && cd && rm -rf /tmp/Python-$PYTHON_VERSION

RUN ln -s /usr/local/bin/pip3 /usr/bin/pip \
 && ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
 && pip3 --no-cache-dir install --upgrade pip setuptools

###############################################################################
# Install Pytorch from Source
###############################################################################
ARG PYTORCH_BRANCH=v2.6.0
ARG PYTORCH_BUILD_ENV="MAX_JOBS=8 BUILD_TEST=0"

# envs needed to make the path of NVCC known to the compilation
ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
ENV PATH=$PATH:$CUDA_HOME/bin
ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.7;8.9;9.0;10.0;12.0"

RUN pip3 install typing-extensions sympy pyyaml
RUN git clone https://github.com/pytorch/pytorch.git /tmp/pytorch \
      --recursive \
      --branch $PYTORCH_BRANCH \
 && cd /tmp/pytorch \
 && eval "$PYTORCH_BUILD_ENV python3 setup.py install" \
 && cd && rm -rf /tmp/pytorch

###############################################################################
# Application files and Python dependencies
###############################################################################
WORKDIR /app
COPY infer.py /app/
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt


================================================
FILE: test/images/nvidia-inference/infer.py
================================================
import logging
import os
import sys
import time
import random

import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertForPreTraining, BertTokenizer

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger("BERTInference")


def create_dummy_data(tokenizer, batch_size, num_samples=100, max_length=128, seed=42):
    """
    Creates a realistic NSP-style dataset:
      - 50% true next-sentence pairs
      - 50% random second sentences
    Ensures the final number of samples is a multiple of 'batch_size'.
    """
    random.seed(seed)

    if num_samples % batch_size != 0:
        adjusted = (num_samples // batch_size) * batch_size
        logger.info(
            f"[INFO] Adjusting num_samples from {num_samples} to {adjusted} "
            f"to ensure full batches."
        )
        num_samples = adjusted

    sample_sentences = [
        "The dog loves playing fetch in the park.",
        "Artificial intelligence is reshaping the future.",
        "Movies with complex storylines can be very engaging.",
        "This restaurant serves an amazing brunch on weekends.",
        "Many researchers are exploring neural network architectures.",
        "A day at the beach can reduce stress and improve well-being.",
        "ChatGPT is a popular large language model by OpenAI.",
        "The annual developer conference showcased innovative technologies.",
        "Hiking in the mountains offers both challenge and relaxation.",
        "Robotics and automation are revolutionizing many industries.",
    ]

    sentences_a = []
    sentences_b = []
    nsp_labels = []

    for _ in range(num_samples):
        idx_a = random.randint(0, len(sample_sentences) - 1)
        if random.random() < 0.5:
            idx_b = (idx_a + 1) % len(sample_sentences)
            nsp_labels.append(1)
        else:
            idx_b = random.randint(0, len(sample_sentences) - 1)
            nsp_labels.append(0)

        sentences_a.append(sample_sentences[idx_a])
        sentences_b.append(sample_sentences[idx_b])

    tokenized_inputs = tokenizer(
        sentences_a,
        sentences_b,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )

    return TensorDataset(
        tokenized_inputs.input_ids,
        tokenized_inputs.attention_mask,
        torch.tensor(nsp_labels, dtype=torch.long)
    )


def run_inference(model, tokenizer, batch_size, mode, device):
    """
    Runs a dummy BERT inference workload using the given model and tokenizer.
    Calculates average time per batch and throughput.
    Expects 'device' to be GPU only (validated in main()).
    """
    model.to(device)
    model.eval()

    try:
        dataset = create_dummy_data(tokenizer, batch_size=batch_size, num_samples=100, max_length=128)
    except Exception:
        logger.exception("[ERROR] Failed to create dummy data.")
        raise

    dataloader = DataLoader(dataset, batch_size=batch_size)
    total_time = 0.0
    total_batches = len(dataloader)

    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                inputs, masks, next_sentence_labels = batch
                inputs, masks, next_sentence_labels = (
                    inputs.to(device),
                    masks.to(device),
                    next_sentence_labels.to(device),
                )

                start_time = time.time()
                _ = model(
                    input_ids=inputs,
                    attention_mask=masks,
                    next_sentence_label=next_sentence_labels
                )
                end_time = time.time()

            except Exception:
                logger.exception(f"[ERROR] Inference failed on batch {batch_idx}.")
                raise

            total_time += (end_time - start_time)

    if total_time == 0.0:
        avg_time_per_batch = float('inf')
        throughput = 0.0
    else:
        avg_time_per_batch = total_time / total_batches
        throughput = (total_batches * batch_size) / total_time

    logger.info(
        "[BERT_INFERENCE_METRICS] "
        f"mode={mode} "
        f"avg_time_per_batch={avg_time_per_batch:.6f} "
        f"throughput_samples_per_sec={throughput:.6f}"
    )


def main():
    """
    Main entry point. Checks for GPU availability, determines inference mode,
    sets batch size, and runs inference. Logs throughput and timing stats.
    """
    if not torch.cuda.is_available():
        logger.error("[ERROR] GPU is not available. Exiting.")
        sys.exit(1)

    device = torch.device("cuda")
    num_gpus = torch.cuda.device_count()
    logger.info(f"[INFO] Found {num_gpus} GPU(s). GPU is available.")

    mode = os.environ.get("INFERENCE_MODE", "throughput").lower()
    if mode not in ["throughput", "latency"]:
        logger.warning(
            f"[WARNING] Unrecognized INFERENCE_MODE '{mode}'. "
            "Falling back to 'throughput'."
        )
        mode = "throughput"

    batch_size = 1 if mode == "latency" else 8
    logger.info(f"[INFO] Running inference in {mode} mode with batch size {batch_size}.")

    try:
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        model = BertForPreTraining.from_pretrained("bert-base-uncased")
    except Exception:
        logger.exception("[ERROR] Failed to load model/tokenizer. Exiting.")
        sys.exit(1)

    run_inference(model, tokenizer, batch_size, mode, device)


if __name__ == "__main__":
    main()


================================================
FILE: test/images/nvidia-inference/requirements.txt
================================================
transformers==4.53.0
numpy==1.26


================================================
FILE: test/images/nvidia-training/Dockerfile
================================================
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=8

# Use the NVIDIA CUDA runtime as a parent image
FROM nvidia/cuda:$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.0-devel-ubuntu22.04

# Redeclare build arguments
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION

# Set environment variable to disable interactive prompts
ENV DEBIAN_FRONTEND=noninteractive

# Set default values for MASTER_ADDR, MASTER_PORT, and NUM_GPUS_PER_NODE
ENV MASTER_ADDR=127.0.0.1
ENV MASTER_PORT=12355

RUN apt-get update \
 && apt-get upgrade -y \
 && apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        cmake \
        curl \
        emacs \
        git \
        jq \
        libopencv-dev \
        software-properties-common \
        wget \
        unzip \
        vim \
        pkg-config \
        gdb \
        lcov \
        libbz2-dev \
        zlib1g-dev \
        openssl \
        libssl-dev \
        libsqlite3-dev \
        libgdbm-dev \
        libc6-dev \
        libbz2-dev \
        libncurses-dev \
        tk-dev \
        libffi-dev \
        libcap-dev \
        gnupg2 \
        gpg-agent \
 && rm -rf /var/lib/apt/lists/*

# Install Python
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12

RUN curl -sL https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz | tar xvz -C /tmp \
 && cd /tmp/Python-$PYTHON_VERSION \
 && ./configure --enable-shared --prefix=/usr/local \
 && make -j $(nproc) \
 && make install \
 && cd && rm -rf /tmp/Python-$PYTHON_VERSION

RUN ln -s /usr/local/bin/pip3 /usr/bin/pip \
 && ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
 && pip --no-cache-dir install --upgrade pip setuptools

# Install Pytorch from Source
ARG PYTORCH_BRANCH=v2.6.0
ARG PYTORCH_BUILD_ENV="MAX_JOBS=8 BUILD_TEST=0"

ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
ENV PATH=$PATH:$CUDA_HOME/bin
ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.7;8.9;9.0;10.0;12.0"

RUN pip install typing-extensions sympy pyyaml
RUN git clone https://github.com/pytorch/pytorch.git /tmp/pytorch \
        --recursive \
        --branch $PYTORCH_BRANCH \
 && cd /tmp/pytorch \
 && eval "$PYTORCH_BUILD_ENV python3 setup.py install" \
 && cd && rm -rf /tmp/pytorch

RUN apt-get update -y && \
    apt-get remove -y --allow-change-held-packages \
    libmlx5-1 ibverbs-utils libibverbs-dev libibverbs1 libnccl2 libnccl-dev && \
    rm -rf /opt/hpcx /usr/local/mpi /usr/local/ucx /etc/ld.so.conf.d/hpcx.conf

RUN apt-get install -y --allow-unauthenticated \
    sudo git gcc vim kmod openssh-client openssh-server build-essential \
    wget curl autoconf libtool gdb automake python3-distutils cmake \
    apt-utils devscripts debhelper libsubunit-dev check pkg-config libhwloc-dev

RUN ldconfig

# SSH configuration
RUN mkdir -p /var/run/sshd && \
    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config

# Set environment variables for OpenMPI, CUDA, EFA, and NCCL
ENV LD_LIBRARY_PATH /opt/amazon/openmpi/lib64:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib64:/opt/aws-ofi-nccl/install/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib/:/usr/lib64:/usr/lib/x86_64-linux-gnu/:/usr/lib/aarch64-linux-gnu/:$LD_LIBRARY_PATH
ENV PATH /usr/local/cuda/bin:/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/sbin:/usr/bin:/usr/local/bin:$PATH

# Install EFA
ARG EFA_INSTALLER_VERSION=latest
RUN curl -sL https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_INSTALLER_VERSION.tar.gz | tar xvz -C /tmp \
 && cd /tmp/aws-efa-installer \
 && ./efa_installer.sh -y -g -d --skip-kmod --skip-limit-conf --no-verify \
 && cd && rm -rf /tmp/aws-efa-installer

# Install NCCL
ARG LIBNCCL_VERSION=2.28.7-1
RUN git clone https://github.com/NVIDIA/nccl.git --branch v$LIBNCCL_VERSION /tmp/nccl \
 && cd /tmp/nccl \
 && make -j $(nproc) \
 && make install \
 && cd && rm -rf /tmp/nccl

# Install AWS-OFI-NCCL plugin
ARG AWS_OFI_NCCL_VERSION=1.17.2
RUN curl -sL https://github.com/aws/aws-ofi-nccl/releases/download/v$AWS_OFI_NCCL_VERSION/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION.tar.gz | tar xvz -C /tmp \
 && cd /tmp/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION \
 && ./configure \
        --prefix=/opt/aws-ofi-nccl/install \
        --with-mpi=/opt/amazon/openmpi \
        --with-libfabric=/opt/amazon/efa \
        --with-cuda=/usr/local/cuda \
        --enable-platform-aws \
        --disable-tests \
 && make -j $(nproc) \
 && make install \
 && cd && rm -rf /tmp/aws-ofi-nccl-$AWS_OFI_NCCL_VERSION

ENV NCCL_PROTO simple
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

RUN rm -rf /var/lib/apt/lists/*

# Set the working directory in the container
WORKDIR /app

# Copy the training script and install requirements
COPY train.py /app/
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt


================================================
FILE: test/images/nvidia-training/requirements.txt
================================================
transformers==4.53.0
numpy==1.26


================================================
FILE: test/images/nvidia-training/train.py
================================================
import os
import time
import torch
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from transformers import BertForPreTraining, BertTokenizer
from torch.utils.data import DataLoader, TensorDataset
import numpy as np


def create_dummy_data(tokenizer, num_samples=100, max_length=128):
    sentences = [f"This is a dummy sentence number {i}" for i in range(num_samples)]
    tokenized_inputs = tokenizer(
        sentences,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )
    labels = tokenized_inputs.input_ids.detach().clone()

    # MLM task: randomly mask some tokens
    mlm_probability = 0.15
    input_ids, labels = mask_tokens(tokenized_inputs.input_ids, tokenizer, mlm_probability)

    # NSP task: create dummy pairs
    next_sentence_labels = torch.randint(0, 2, (num_samples,))

    return TensorDataset(input_ids, tokenized_inputs.attention_mask, labels, next_sentence_labels)


def mask_tokens(inputs, tokenizer, mlm_probability):
    labels = inputs.clone()
    probability_matrix = torch.full(labels.shape, mlm_probability)
    special_tokens_mask = [
        tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True)
        for val in labels.tolist()
    ]
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -100
    inputs[masked_indices] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)
    return inputs, labels


def setup(rank, world_size, local_rank):
    master_addr = os.environ["MASTER_ADDR"]
    master_port = os.environ["MASTER_PORT"]
    dist.init_process_group(
        "nccl",
        init_method=f"tcp://{master_addr}:{master_port}",
        rank=rank,
        world_size=world_size,
    )
    torch.cuda.set_device(local_rank)
    print(f"Process {rank} initialized, using GPU {local_rank}")


def cleanup():
    dist.destroy_process_group()


def train_bert(rank, world_size, local_rank, model, tokenizer):
    setup(rank, world_size, local_rank)

    model = model.to(local_rank)
    ddp_model = DDP(model, device_ids=[local_rank])

    dataset = create_dummy_data(tokenizer)
    train_dataloader = DataLoader(dataset, batch_size=8)

    optimizer = torch.optim.AdamW(ddp_model.parameters(), lr=0.001)

    start_time = time.time()

    # Simple single-epoch training loop
    for epoch in range(1):
        ddp_model.train()
        for batch in train_dataloader:
            optimizer.zero_grad()
            inputs, masks, labels, next_sentence_labels = batch
            inputs = inputs.to(local_rank)
            masks = masks.to(local_rank)
            labels = labels.to(local_rank)
            next_sentence_labels = next_sentence_labels.to(local_rank)

            outputs = ddp_model(
                input_ids=inputs,
                attention_mask=masks,
                labels=labels,
                next_sentence_label=next_sentence_labels,
            )
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    end_time = time.time()
    training_time = end_time - start_time
    throughput = len(dataset) / training_time

    print(f"Process {rank} - Training time: {training_time:.2f} seconds")
    print(f"Process {rank} - Throughput: {throughput:.2f} samples/second")

    cleanup()

    return throughput


def main():
    # Retrieve environment variables
    rank = int(os.getenv("OMPI_COMM_WORLD_RANK", "0"))
    world_size = int(os.getenv("OMPI_COMM_WORLD_SIZE", "1"))
    local_rank = int(os.getenv("OMPI_COMM_WORLD_LOCAL_RANK", "0"))

    print(f"Process started for rank {rank} with local rank {local_rank}")

    # Pre-download model and tokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model = BertForPreTraining.from_pretrained("bert-base-uncased")

    print(f"successfully downloaded model and tokenizer for rank: {rank}")

    throughput = train_bert(rank, world_size, local_rank, model, tokenizer)

    # Only rank 0 prints the "Average Throughput" line
    if rank == 0:
        print(f"Average Throughput: {throughput:.2f} samples/second")


if __name__ == "__main__":
    main()


================================================
FILE: test/manifests/assets/cloudwatch-agent.yaml
================================================
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-cwagentconfig
  namespace: amazon-cloudwatch
data:
  cwagentconfig.json: |
    {
      "agent": {
        "debug": true
      },
      "logs": {
        "metrics_collected": {
          "prometheus": {
            "prometheus_config_path": "/etc/prometheusconfig/prometheus.yaml",
            "emf_processor": {
              "metric_declaration": [
                {
                  "source_labels": ["job"],
                  "label_matcher": "dcgm-exporter",
                  "dimensions": [[{{.DimensionKeys}}]],
                  "metric_selectors": [
                    "^DCGM_FI_DEV_GPU_UTIL$",
                    "^DCGM_FI_DEV_MEM_COPY_UTIL$",
                    "^DCGM_FI_DEV_FB_USED$",
                    "^DCGM_FI_DEV_FB_FREE$",
                    "^DCGM_FI_DEV_POWER_USAGE$"
                  ]
                }
              ]
            }
          }
        },
        "force_flush_interval": 5
      }
    }

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: amazon-cloudwatch
data:
  prometheus.yaml: |
    global:
      scrape_interval: 1s
      scrape_timeout: 1s
    scrape_configs:
      - job_name: dcgm-exporter
        static_configs:
          - targets:
            - dcgm-exporter.kube-system.svc.cluster.local:9400
        metrics_path: /metrics
        metric_relabel_configs:
{{- range $key, $value := .MetricDimensions}}
          - {action: replace, target_label: {{$key}}, replacement: '{{$value}}'}
{{- end}}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: cwagent
  namespace: amazon-cloudwatch
spec:
  selector:
    matchLabels:
      app: cwagent
  template:
    metadata:
      labels:
        app: cwagent
    spec:
      serviceAccountName: cwagent
      dnsPolicy: ClusterFirst 
      containers:
        - name: cloudwatch-agent
          image: public.ecr.aws/cloudwatch-agent/cloudwatch-agent:latest
          imagePullPolicy: Always
          resources:
            limits:
              cpu: 1000m
              memory: 1000Mi
            requests:
              cpu: 200m
              memory: 200Mi
          volumeMounts:
            - name: prometheus-cwagentconfig
              mountPath: /etc/cwagentconfig
            - name: prometheus-config
              mountPath: /etc/prometheusconfig
      volumes:
        - name: prometheus-cwagentconfig
          configMap:
            name: prometheus-cwagentconfig
        - name: prometheus-config
          configMap:
            name: prometheus-config
      terminationGracePeriodSeconds: 60
---

================================================
FILE: test/manifests/assets/dcgm-exporter.yaml
================================================
# Derived from: Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: "dcgm-exporter"
  namespace: "kube-system"
  labels:
    app.kubernetes.io/name: "dcgm-exporter"
    app.kubernetes.io/version: "4.1.3"
spec:
  updateStrategy:
    type: RollingUpdate
  selector:
    matchLabels:
      app.kubernetes.io/name: "dcgm-exporter"
      app.kubernetes.io/version: "4.1.3"
  template:
    metadata:
      labels:
        app.kubernetes.io/name: "dcgm-exporter"
        app.kubernetes.io/version: "4.1.3"
      name: "dcgm-exporter"
    spec:
      containers:
      - image: "nvcr.io/nvidia/k8s/dcgm-exporter:4.2.3-4.1.3-ubuntu22.04"
        env:
        - name: "DCGM_EXPORTER_LISTEN"
          value: ":9400"
        - name: "DCGM_EXPORTER_INTERVAL"
          value: "100"
        - name: "DCGM_EXPORTER_KUBERNETES"
          value: "true"
        name: "dcgm-exporter"
        ports:
        - name: "metrics"
          containerPort: 9400
        securityContext:
          runAsNonRoot: false
          runAsUser: 0
          capabilities:
            add: ["SYS_ADMIN"]
        volumeMounts:
        - name: "pod-gpu-resources"
          readOnly: true
          mountPath: "/var/lib/kubelet/pod-resources"
      volumes:
      - name: "pod-gpu-resources"
        hostPath:
          path: "/var/lib/kubelet/pod-resources"

---

kind: Service
apiVersion: v1
metadata:
  name: "dcgm-exporter"
  namespace: "kube-system"
  labels:
    app.kubernetes.io/name: "dcgm-exporter"
    app.kubernetes.io/version: "4.1.3"
spec:
  clusterIP: "None"
  selector:
    app.kubernetes.io/name: "dcgm-exporter"
    app.kubernetes.io/version: "4.1.3"
  ports:
  - name: "metrics"
    port: 9400

================================================
FILE: test/manifests/assets/dranet.yaml
================================================
---
# Source: aws-dranet/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: dranet-aws-dranet
  namespace: kube-system
  labels:
    helm.sh/chart: aws-dranet-1.0.0
    app.kubernetes.io/name: aws-dranet
    app.kubernetes.io/instance: dranet
    app.kubernetes.io/version: "v1.2.0-eksbuild.2"
    app.kubernetes.io/managed-by: Helm
---
# Source: aws-dranet/templates/clusterrole.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: dranet-aws-dranet
  labels:
    helm.sh/chart: aws-dranet-1.0.0
    app.kubernetes.io/name: aws-dranet
    app.kubernetes.io/instance: dranet
    app.kubernetes.io/version: "v1.2.0-eksbuild.2"
    app.kubernetes.io/managed-by: Helm
rules:
  - apiGroups:
      - ""
    resources:
      - nodes
    verbs:
      - get
  - apiGroups:
      - "resource.k8s.io"
    resources:
      - resourceslices
    verbs:
      - list
      - watch
      - create
      - update
      - delete
  - apiGroups:
      - "resource.k8s.io"
    resources:
      - resourceclaims
      - deviceclasses
    verbs:
      - get
  - apiGroups:
      - "resource.k8s.io"
    resources:
      - resourceclaims/status
    verbs:
      - patch
      - update
---
# Source: aws-dranet/templates/clusterrolebinding.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: dranet-aws-dranet
  labels:
    helm.sh/chart: aws-dranet-1.0.0
    app.kubernetes.io/name: aws-dranet
    app.kubernetes.io/instance: dranet
    app.kubernetes.io/version: "v1.2.0-eksbuild.2"
    app.kubernetes.io/managed-by: Helm
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: dranet-aws-dranet
subjects:
- kind: ServiceAccount
  name: dranet-aws-dranet
  namespace: kube-system
---
# Source: aws-dranet/templates/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: dranet-aws-dranet
  namespace: kube-system
  labels:
    helm.sh/chart: aws-dranet-1.0.0
    app.kubernetes.io/name: aws-dranet
    app.kubernetes.io/instance: dranet
    app.kubernetes.io/version: "v1.2.0-eksbuild.2"
    app.kubernetes.io/managed-by: Helm
    tier: node
    app: dranet-aws-dranet
    k8s-app: dranet-aws-dranet
spec:
  selector:
    matchLabels:
      app: dranet-aws-dranet
  template:
    metadata:
      labels:
        app.kubernetes.io/name: aws-dranet
        app.kubernetes.io/instance: dranet
        tier: node
        app: dranet-aws-dranet
        k8s-app: dranet-aws-dranet
    spec:
      priorityClassName: "system-node-critical"
      hostNetwork: true
      hostPID: false
      tolerations:
        - key: CriticalAddonsOnly
          operator: Exists
      serviceAccountName: dranet-aws-dranet
      containers:
      - name: dranet
        args:
        - /dranet
        - --v=4
        - --hostname-override=$(NODE_NAME)
        - "--bind-address=:9177"
        - --cloud-provider-hint=AWS
        - --filter="dra.net/pciDevice" in attributes && attributes["dra.net/pciDevice"].StringValue == "Elastic Fabric Adapter (EFA)"
        image: {{.RdmaDeviceDraDriverImage}}
        imagePullPolicy: IfNotPresent
        env:
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        resources:
          limits:
            cpu: 500m
            memory: 256Mi
          requests:
            cpu: 100m
            memory: 50Mi
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
          privileged: false
          readOnlyRootFilesystem: true
          runAsGroup: 0
          runAsUser: 0
          seccompProfile:
            type: RuntimeDefault
        readinessProbe:
          httpGet:
            path: /healthz
            port: 9177
        volumeMounts:
        - name: device-plugin
          mountPath: /var/lib/kubelet/plugins
        - name: plugin-registry
          mountPath: /var/lib/kubelet/plugins_registry
        - name: nri-plugin
          mountPath: /var/run/nri
        - name: netns
          mountPath: /var/run/netns
          mountPropagation: HostToContainer
        - name: infiniband
          mountPath: /dev/infiniband
          mountPropagation: HostToContainer
        - name: tmp
          mountPath: /tmp
        - name: dranet-run
          mountPath: /var/run/dranet
      volumes:
      - name: device-plugin
        hostPath:
          path: /var/lib/kubelet/plugins
          type: DirectoryOrCreate
      - name: plugin-registry
        hostPath:
          path: /var/lib/kubelet/plugins_registry
          type: DirectoryOrCreate
      - name: nri-plugin
        hostPath:
          path: /var/run/nri
          type: DirectoryOrCreate
      - name: netns
        hostPath:
          path: /var/run/netns
          type: DirectoryOrCreate
      - name: infiniband
        hostPath:
          path: /dev/infiniband
          type: DirectoryOrCreate
      - name: tmp
        emptyDir:
          medium: Memory
          sizeLimit: 10Mi
      - name: dranet-run
        hostPath:
          path: /var/run/dranet
          type: DirectoryOrCreate
---
# Source: aws-dranet/templates/deviceclass.yaml
apiVersion: resource.k8s.io/v1
kind: DeviceClass
metadata:
  name: efa.networking.k8s.aws
  labels:
    helm.sh/chart: aws-dranet-1.0.0
    app.kubernetes.io/name: aws-dranet
    app.kubernetes.io/instance: dranet
    app.kubernetes.io/version: "v1.2.0-eksbuild.2"
    app.kubernetes.io/managed-by: Helm
spec:
  selectors:
  - cel:
      expression: |
        device.driver == "dra.net" &&
        device.attributes["dra.net"].pciDevice == 'Elastic Fabric Adapter (EFA)'


================================================
FILE: test/manifests/assets/efa-device-plugin.yaml
================================================
# Source: https://raw.githubusercontent.com/aws-samples/aws-efa-eks/main/manifest/efa-k8s-device-plugin.yml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: aws-efa-k8s-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name:  aws-efa-k8s-device-plugin
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        name: aws-efa-k8s-device-plugin
    spec:
      serviceAccount: default
      tolerations:
        - key: CriticalAddonsOnly
          operator: Exists
        - key: aws.amazon.com/efa
          operator: Exists
          effect: NoSchedule
      # Mark this pod as a critical add-on; when enabled, the critical add-on
      # scheduler reserves resources for critical add-on pods so that they can
      # be rescheduled after a failure.
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      priorityClassName: "system-node-critical"
      hostNetwork: true
      containers:
        - image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin:v0.5.8
          name: aws-efa-k8s-device-plugin
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop: ["ALL"]
            runAsNonRoot: false
          volumeMounts:
            - name: device-plugin
              mountPath: /var/lib/kubelet/device-plugins
            - name: infiniband-volume
              mountPath: /dev/infiniband
          resources:
            requests:
              cpu:    10m
              memory: 20Mi
      volumes:
        - name: device-plugin
          hostPath:
            path: /var/lib/kubelet/device-plugins
        - name: infiniband-volume
          hostPath:
            path: /dev/infiniband


================================================
FILE: test/manifests/assets/k8s-neuron-device-plugin-rbac.yml
================================================
# Source: https://github.com/aws-neuron/aws-neuron-sdk/blob/master/src/k8/k8s-neuron-device-plugin-rbac.yml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: neuron-device-plugin
rules:
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - update
  - patch
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
  - update
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: neuron-device-plugin
  namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: neuron-device-plugin
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: neuron-device-plugin
subjects:
- kind: ServiceAccount
  name: neuron-device-plugin
  namespace: kube-system


================================================
FILE: test/manifests/assets/k8s-neuron-device-plugin.yml
================================================
# Source: https://github.com/aws-neuron/aws-neuron-sdk/blob/master/src/k8/k8s-neuron-device-plugin.yml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: neuron-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name:  neuron-device-plugin-ds
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      # Uncomment the annotation below if k8s version is 1.13 or lower
      # annotations:
      #  scheduler.alpha.kubernetes.io/critical-pod: ""
      labels:
        name: neuron-device-plugin-ds
    spec:
      serviceAccount: neuron-device-plugin
      tolerations:
      - key: CriticalAddonsOnly
        operator: Exists
      - key: aws.amazon.com/neuron
        operator: Exists
        effect: NoSchedule
      # Mark this pod as a critical add-on; when enabled, the critical add-on
      # scheduler reserves resources for critical add-on pods so that they can
      # be rescheduled after a failure.
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      priorityClassName: "system-node-critical"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: "node.kubernetes.io/instance-type"
                    operator: In
                    values:
                      - inf1.xlarge
                      - inf1.2xlarge
                      - inf1.6xlarge
                      - inf1.24xlarge
                      - inf2.xlarge
                      - inf2.8xlarge
                      - inf2.24xlarge
                      - inf2.48xlarge
                      - trn1.2xlarge
                      - trn1.32xlarge
                      - trn1n.32xlarge
                      - trn2.48xlarge
                      - trn2u.48xlarge
      containers:
        # Find all neuron-device-plugin images at https://gallery.ecr.aws/neuron/neuron-device-plugin
      - image: public.ecr.aws/neuron/neuron-device-plugin:2.26.26.0
        imagePullPolicy: Always
        name: neuron-device-plugin
        env:
        - name: KUBECONFIG
          value: /etc/kubernetes/kubelet.conf
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop: ["ALL"]
        volumeMounts:
          - name: device-plugin
            mountPath: /var/lib/kubelet/device-plugins
          - name: infa-map
            mountPath: /run
      volumes:
        - name: device-plugin
          hostPath:
            path: /var/lib/kubelet/device-plugins
        - name: infa-map
          hostPath:
            path: /run


================================================
FILE: test/manifests/assets/mpi-operator.yaml
================================================
# --------------------------------------------------
# - Single configuration deployment YAML for MPI-Operator
# - Includes:
#      CRD
#      Namespace
#      RBAC
#      Controller deployment
# --------------------------------------------------
---
apiVersion: v1
kind: Namespace
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpi-operator
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.20.1
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpijobs.kubeflow.org
spec:
  group: kubeflow.org
  names:
    kind: MPIJob
    listKind: MPIJobList
    plural: mpijobs
    singular: mpijob
  scope: Namespaced
  versions:
  - name: v2beta1
    schema:
      openAPIV3Schema:
        properties:
          apiVersion:
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
          spec:
            properties:
              launcherCreationPolicy:
                default: AtStartup
                description: launcherCreationPolicy if WaitForWorkersReady, the launcher
                  is created only after all workers are in Ready state. Defaults to
                  AtStartup.
                type: string
              mpiImplementation:
                default: OpenMPI
                description: |-
                  MPIImplementation is the MPI implementation.
                  Options are "OpenMPI" (default), "Intel" and "MPICH".
                enum:
                - OpenMPI
                - Intel
                - MPICH
                type: string
              mpiReplicaSpecs:
                additionalProperties:
                  description: ReplicaSpec is a description of the replica
                  properties:
                    replicas:
                      description: |-
                        Replicas is the desired number of replicas of the given template.
                        If unspecified, defaults to 1.
                      format: int32
                      type: integer
                    restartPolicy:
                      description: |-
                        Restart policy for all replicas within the job.
                        One of Always, OnFailure, Never and ExitCode.
                        Default to Never.
                      type: string
                    template:
                      description: |-
                        Template is the object that describes the pod that
                        will be created for this replica. RestartPolicy in PodTemplateSpec
                        will be overide by RestartPolicy in ReplicaSpec
                      properties:
                        metadata:
                          description: |-
                            Standard object's metadata.
                            More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
                          properties:
                            annotations:
                              additionalProperties:
                                type: string
                              type: object
                            finalizers:
                              items:
                                type: string
                              type: array
                            labels:
                              additionalProperties:
                                type: string
                              type: object
                            name:
                              type: string
                            namespace:
                              type: string
                          type: object
                        spec:
                          description: |-
                            Specification of the desired behavior of the pod.
                            More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status
                          properties:
                            activeDeadlineSeconds:
                              description: |-
                                Optional duration in seconds the pod may be active on the node relative to
                                StartTime before the system will actively try to mark it failed and kill associated containers.
                                Value must be a positive integer.
                              format: int64
                              type: integer
                            affinity:
                              description: If specified, the pod's scheduling constraints
                              properties:
                                nodeAffinity:
                                  description: Describes node affinity scheduling
                                    rules for the pod.
                                  properties:
                                    preferredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        The scheduler will prefer to schedule pods to nodes that satisfy
                                        the affinity expressions specified by this field, but it may choose
                                        a node that violates one or more of the expressions. The node that is
                                        most preferred is the one with the greatest sum of weights, i.e.
                                        for each node that meets all of the scheduling requirements (resource
                                        request, requiredDuringScheduling affinity expressions, etc.),
                                        compute a sum by iterating through the elements of this field and adding
                                        "weight" to the sum if the node matches the corresponding matchExpressions; the
                                        node(s) with the highest sum are the most preferred.
                                      items:
                                        description: |-
                                          An empty preferred scheduling term matches all objects with implicit weight 0
                                          (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
                                        properties:
                                          preference:
                                            description: A node selector term, associated
                                              with the corresponding weight.
                                            properties:
                                              matchExpressions:
                                                description: A list of node selector
                                                  requirements by node's labels.
                                                items:
                                                  description: |-
                                                    A node selector requirement is a selector that contains values, a key, and an operator
                                                    that relates the key and values.
                                                  properties:
                                                    key:
                                                      description: The label key that
                                                        the selector applies to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        Represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        An array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. If the operator is Gt or Lt, the values
                                                        array must have a single element, which will be interpreted as an integer.
                                                        This array is replaced during a strategic merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchFields:
                                                description: A list of node selector
                                                  requirements by node's fields.
                                                items:
                                                  description: |-
                                                    A node selector requirement is a selector that contains values, a key, and an operator
                                                    that relates the key and values.
                                                  properties:
                                                    key:
                                                      description: The label key that
                                                        the selector applies to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        Represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        An array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. If the operator is Gt or Lt, the values
                                                        array must have a single element, which will be interpreted as an integer.
                                                        This array is replaced during a strategic merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          weight:
                                            description: Weight associated with matching
                                              the corresponding nodeSelectorTerm,
                                              in the range 1-100.
                                            format: int32
                                            type: integer
                                        required:
                                        - preference
                                        - weight
                                        type: object
                                      type: array
                                      x-kubernetes-list-type: atomic
                                    requiredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        If the affinity requirements specified by this field are not met at
                                        scheduling time, the pod will not be scheduled onto the node.
                                        If the affinity requirements specified by this field cease to be met
                                        at some point during pod execution (e.g. due to an update), the system
                                        may or may not try to eventually evict the pod from its node.
                                      properties:
                                        nodeSelectorTerms:
                                          description: Required. A list of node selector
                                            terms. The terms are ORed.
                                          items:
                                            description: |-
                                              A null or empty node selector term matches no objects. The requirements of
                                              them are ANDed.
                                              The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
                                            properties:
                                              matchExpressions:
                                                description: A list of node selector
                                                  requirements by node's labels.
                                                items:
                                                  description: |-
                                                    A node selector requirement is a selector that contains values, a key, and an operator
                                                    that relates the key and values.
                                                  properties:
                                                    key:
                                                      description: The label key that
                                                        the selector applies to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        Represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        An array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. If the operator is Gt or Lt, the values
                                                        array must have a single element, which will be interpreted as an integer.
                                                        This array is replaced during a strategic merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchFields:
                                                description: A list of node selector
                                                  requirements by node's fields.
                                                items:
                                                  description: |-
                                                    A node selector requirement is a selector that contains values, a key, and an operator
                                                    that relates the key and values.
                                                  properties:
                                                    key:
                                                      description: The label key that
                                                        the selector applies to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        Represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        An array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. If the operator is Gt or Lt, the values
                                                        array must have a single element, which will be interpreted as an integer.
                                                        This array is replaced during a strategic merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          type: array
                                          x-kubernetes-list-type: atomic
                                      required:
                                      - nodeSelectorTerms
                                      type: object
                                      x-kubernetes-map-type: atomic
                                  type: object
                                podAffinity:
                                  description: Describes pod affinity scheduling rules
                                    (e.g. co-locate this pod in the same node, zone,
                                    etc. as some other pod(s)).
                                  properties:
                                    preferredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        The scheduler will prefer to schedule pods to nodes that satisfy
                                        the affinity expressions specified by this field, but it may choose
                                        a node that violates one or more of the expressions. The node that is
                                        most preferred is the one with the greatest sum of weights, i.e.
                                        for each node that meets all of the scheduling requirements (resource
                                        request, requiredDuringScheduling affinity expressions, etc.),
                                        compute a sum by iterating through the elements of this field and adding
                                        "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the
                                        node(s) with the highest sum are the most preferred.
                                      items:
                                        description: The weights of all of the matched
                                          WeightedPodAffinityTerm fields are added
                                          per-node to find the most preferred node(s)
                                        properties:
                                          podAffinityTerm:
                                            description: Required. A pod affinity
                                              term, associated with the corresponding
                                              weight.
                                            properties:
                                              labelSelector:
                                                description: |-
                                                  A label query over a set of resources, in this case pods.
                                                  If it's null, this PodAffinityTerm matches with no Pods.
                                                properties:
                                                  matchExpressions:
                                                    description: matchExpressions
                                                      is a list of label selector
                                                      requirements. The requirements
                                                      are ANDed.
                                                    items:
                                                      description: |-
                                                        A label selector requirement is a selector that contains values, a key, and an operator that
                                                        relates the key and values.
                                                      properties:
                                                        key:
                                                          description: key is the
                                                            label key that the selector
                                                            applies to.
                                                          type: string
                                                        operator:
                                                          description: |-
                                                            operator represents a key's relationship to a set of values.
                                                            Valid operators are In, NotIn, Exists and DoesNotExist.
                                                          type: string
                                                        values:
                                                          description: |-
                                                            values is an array of string values. If the operator is In or NotIn,
                                                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                            the values array must be empty. This array is replaced during a strategic
                                                            merge patch.
                                                          items:
                                                            type: string
                                                          type: array
                                                          x-kubernetes-list-type: atomic
                                                      required:
                                                      - key
                                                      - operator
                                                      type: object
                                                    type: array
                                                    x-kubernetes-list-type: atomic
                                                  matchLabels:
                                                    additionalProperties:
                                                      type: string
                                                    description: |-
                                                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                      map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                      operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                    type: object
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              matchLabelKeys:
                                                description: |-
                                                  MatchLabelKeys is a set of pod label keys to select which pods will
                                                  be taken into consideration. The keys are used to lookup values from the
                                                  incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)`
                                                  to select the group of existing pods which pods will be taken into consideration
                                                  for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                                  pod labels will be ignored. The default value is empty.
                                                  The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                  Also, matchLabelKeys cannot be set when labelSelector isn't set.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              mismatchLabelKeys:
                                                description: |-
                                                  MismatchLabelKeys is a set of pod label keys to select which pods will
                                                  be taken into consideration. The keys are used to lookup values from the
                                                  incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)`
                                                  to select the group of existing pods which pods will be taken into consideration
                                                  for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                                  pod labels will be ignored. The default value is empty.
                                                  The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                  Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              namespaceSelector:
                                                description: |-
                                                  A label query over the set of namespaces that the term applies to.
                                                  The term is applied to the union of the namespaces selected by this field
                                                  and the ones listed in the namespaces field.
                                                  null selector and null or empty namespaces list means "this pod's namespace".
                                                  An empty selector ({}) matches all namespaces.
                                                properties:
                                                  matchExpressions:
                                                    description: matchExpressions
                                                      is a list of label selector
                                                      requirements. The requirements
                                                      are ANDed.
                                                    items:
                                                      description: |-
                                                        A label selector requirement is a selector that contains values, a key, and an operator that
                                                        relates the key and values.
                                                      properties:
                                                        key:
                                                          description: key is the
                                                            label key that the selector
                                                            applies to.
                                                          type: string
                                                        operator:
                                                          description: |-
                                                            operator represents a key's relationship to a set of values.
                                                            Valid operators are In, NotIn, Exists and DoesNotExist.
                                                          type: string
                                                        values:
                                                          description: |-
                                                            values is an array of string values. If the operator is In or NotIn,
                                                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                            the values array must be empty. This array is replaced during a strategic
                                                            merge patch.
                                                          items:
                                                            type: string
                                                          type: array
                                                          x-kubernetes-list-type: atomic
                                                      required:
                                                      - key
                                                      - operator
                                                      type: object
                                                    type: array
                                                    x-kubernetes-list-type: atomic
                                                  matchLabels:
                                                    additionalProperties:
                                                      type: string
                                                    description: |-
                                                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                      map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                      operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                    type: object
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              namespaces:
                                                description: |-
                                                  namespaces specifies a static list of namespace names that the term applies to.
                                                  The term is applied to the union of the namespaces listed in this field
                                                  and the ones selected by namespaceSelector.
                                                  null or empty namespaces list and null namespaceSelector means "this pod's namespace".
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              topologyKey:
                                                description: |-
                                                  This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching
                                                  the labelSelector in the specified namespaces, where co-located is defined as running on a node
                                                  whose value of the label with key topologyKey matches that of any node on which any of the
                                                  selected pods is running.
                                                  Empty topologyKey is not allowed.
                                                type: string
                                            required:
                                            - topologyKey
                                            type: object
                                          weight:
                                            description: |-
                                              weight associated with matching the corresponding podAffinityTerm,
                                              in the range 1-100.
                                            format: int32
                                            type: integer
                                        required:
                                        - podAffinityTerm
                                        - weight
                                        type: object
                                      type: array
                                      x-kubernetes-list-type: atomic
                                    requiredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        If the affinity requirements specified by this field are not met at
                                        scheduling time, the pod will not be scheduled onto the node.
                                        If the affinity requirements specified by this field cease to be met
                                        at some point during pod execution (e.g. due to a pod label update), the
                                        system may or may not try to eventually evict the pod from its node.
                                        When there are multiple elements, the lists of nodes corresponding to each
                                        podAffinityTerm are intersected, i.e. all terms must be satisfied.
                                      items:
                                        description: |-
                                          Defines a set of pods (namely those matching the labelSelector
                                          relative to the given namespace(s)) that this pod should be
                                          co-located (affinity) or not co-located (anti-affinity) with,
                                          where co-located is defined as running on a node whose value of
                                          the label with key <topologyKey> matches that of any node on which
                                          a pod of the set of pods is running
                                        properties:
                                          labelSelector:
                                            description: |-
                                              A label query over a set of resources, in this case pods.
                                              If it's null, this PodAffinityTerm matches with no Pods.
                                            properties:
                                              matchExpressions:
                                                description: matchExpressions is a
                                                  list of label selector requirements.
                                                  The requirements are ANDed.
                                                items:
                                                  description: |-
                                                    A label selector requirement is a selector that contains values, a key, and an operator that
                                                    relates the key and values.
                                                  properties:
                                                    key:
                                                      description: key is the label
                                                        key that the selector applies
                                                        to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        operator represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        values is an array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. This array is replaced during a strategic
                                                        merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchLabels:
                                                additionalProperties:
                                                  type: string
                                                description: |-
                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                type: object
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          matchLabelKeys:
                                            description: |-
                                              MatchLabelKeys is a set of pod label keys to select which pods will
                                              be taken into consideration. The keys are used to lookup values from the
                                              incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)`
                                              to select the group of existing pods which pods will be taken into consideration
                                              for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                              pod labels will be ignored. The default value is empty.
                                              The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                              Also, matchLabelKeys cannot be set when labelSelector isn't set.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          mismatchLabelKeys:
                                            description: |-
                                              MismatchLabelKeys is a set of pod label keys to select which pods will
                                              be taken into consideration. The keys are used to lookup values from the
                                              incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)`
                                              to select the group of existing pods which pods will be taken into consideration
                                              for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                              pod labels will be ignored. The default value is empty.
                                              The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                              Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          namespaceSelector:
                                            description: |-
                                              A label query over the set of namespaces that the term applies to.
                                              The term is applied to the union of the namespaces selected by this field
                                              and the ones listed in the namespaces field.
                                              null selector and null or empty namespaces list means "this pod's namespace".
                                              An empty selector ({}) matches all namespaces.
                                            properties:
                                              matchExpressions:
                                                description: matchExpressions is a
                                                  list of label selector requirements.
                                                  The requirements are ANDed.
                                                items:
                                                  description: |-
                                                    A label selector requirement is a selector that contains values, a key, and an operator that
                                                    relates the key and values.
                                                  properties:
                                                    key:
                                                      description: key is the label
                                                        key that the selector applies
                                                        to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        operator represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        values is an array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. This array is replaced during a strategic
                                                        merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchLabels:
                                                additionalProperties:
                                                  type: string
                                                description: |-
                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                type: object
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          namespaces:
                                            description: |-
                                              namespaces specifies a static list of namespace names that the term applies to.
                                              The term is applied to the union of the namespaces listed in this field
                                              and the ones selected by namespaceSelector.
                                              null or empty namespaces list and null namespaceSelector means "this pod's namespace".
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          topologyKey:
                                            description: |-
                                              This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching
                                              the labelSelector in the specified namespaces, where co-located is defined as running on a node
                                              whose value of the label with key topologyKey matches that of any node on which any of the
                                              selected pods is running.
                                              Empty topologyKey is not allowed.
                                            type: string
                                        required:
                                        - topologyKey
                                        type: object
                                      type: array
                                      x-kubernetes-list-type: atomic
                                  type: object
                                podAntiAffinity:
                                  description: Describes pod anti-affinity scheduling
                                    rules (e.g. avoid putting this pod in the same
                                    node, zone, etc. as some other pod(s)).
                                  properties:
                                    preferredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        The scheduler will prefer to schedule pods to nodes that satisfy
                                        the anti-affinity expressions specified by this field, but it may choose
                                        a node that violates one or more of the expressions. The node that is
                                        most preferred is the one with the greatest sum of weights, i.e.
                                        for each node that meets all of the scheduling requirements (resource
                                        request, requiredDuringScheduling anti-affinity expressions, etc.),
                                        compute a sum by iterating through the elements of this field and subtracting
                                        "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the
                                        node(s) with the highest sum are the most preferred.
                                      items:
                                        description: The weights of all of the matched
                                          WeightedPodAffinityTerm fields are added
                                          per-node to find the most preferred node(s)
                                        properties:
                                          podAffinityTerm:
                                            description: Required. A pod affinity
                                              term, associated with the corresponding
                                              weight.
                                            properties:
                                              labelSelector:
                                                description: |-
                                                  A label query over a set of resources, in this case pods.
                                                  If it's null, this PodAffinityTerm matches with no Pods.
                                                properties:
                                                  matchExpressions:
                                                    description: matchExpressions
                                                      is a list of label selector
                                                      requirements. The requirements
                                                      are ANDed.
                                                    items:
                                                      description: |-
                                                        A label selector requirement is a selector that contains values, a key, and an operator that
                                                        relates the key and values.
                                                      properties:
                                                        key:
                                                          description: key is the
                                                            label key that the selector
                                                            applies to.
                                                          type: string
                                                        operator:
                                                          description: |-
                                                            operator represents a key's relationship to a set of values.
                                                            Valid operators are In, NotIn, Exists and DoesNotExist.
                                                          type: string
                                                        values:
                                                          description: |-
                                                            values is an array of string values. If the operator is In or NotIn,
                                                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                            the values array must be empty. This array is replaced during a strategic
                                                            merge patch.
                                                          items:
                                                            type: string
                                                          type: array
                                                          x-kubernetes-list-type: atomic
                                                      required:
                                                      - key
                                                      - operator
                                                      type: object
                                                    type: array
                                                    x-kubernetes-list-type: atomic
                                                  matchLabels:
                                                    additionalProperties:
                                                      type: string
                                                    description: |-
                                                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                      map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                      operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                    type: object
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              matchLabelKeys:
                                                description: |-
                                                  MatchLabelKeys is a set of pod label keys to select which pods will
                                                  be taken into consideration. The keys are used to lookup values from the
                                                  incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)`
                                                  to select the group of existing pods which pods will be taken into consideration
                                                  for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                                  pod labels will be ignored. The default value is empty.
                                                  The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                  Also, matchLabelKeys cannot be set when labelSelector isn't set.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              mismatchLabelKeys:
                                                description: |-
                                                  MismatchLabelKeys is a set of pod label keys to select which pods will
                                                  be taken into consideration. The keys are used to lookup values from the
                                                  incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)`
                                                  to select the group of existing pods which pods will be taken into consideration
                                                  for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                                  pod labels will be ignored. The default value is empty.
                                                  The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                  Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              namespaceSelector:
                                                description: |-
                                                  A label query over the set of namespaces that the term applies to.
                                                  The term is applied to the union of the namespaces selected by this field
                                                  and the ones listed in the namespaces field.
                                                  null selector and null or empty namespaces list means "this pod's namespace".
                                                  An empty selector ({}) matches all namespaces.
                                                properties:
                                                  matchExpressions:
                                                    description: matchExpressions
                                                      is a list of label selector
                                                      requirements. The requirements
                                                      are ANDed.
                                                    items:
                                                      description: |-
                                                        A label selector requirement is a selector that contains values, a key, and an operator that
                                                        relates the key and values.
                                                      properties:
                                                        key:
                                                          description: key is the
                                                            label key that the selector
                                                            applies to.
                                                          type: string
                                                        operator:
                                                          description: |-
                                                            operator represents a key's relationship to a set of values.
                                                            Valid operators are In, NotIn, Exists and DoesNotExist.
                                                          type: string
                                                        values:
                                                          description: |-
                                                            values is an array of string values. If the operator is In or NotIn,
                                                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                            the values array must be empty. This array is replaced during a strategic
                                                            merge patch.
                                                          items:
                                                            type: string
                                                          type: array
                                                          x-kubernetes-list-type: atomic
                                                      required:
                                                      - key
                                                      - operator
                                                      type: object
                                                    type: array
                                                    x-kubernetes-list-type: atomic
                                                  matchLabels:
                                                    additionalProperties:
                                                      type: string
                                                    description: |-
                                                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                      map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                      operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                    type: object
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              namespaces:
                                                description: |-
                                                  namespaces specifies a static list of namespace names that the term applies to.
                                                  The term is applied to the union of the namespaces listed in this field
                                                  and the ones selected by namespaceSelector.
                                                  null or empty namespaces list and null namespaceSelector means "this pod's namespace".
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              topologyKey:
                                                description: |-
                                                  This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching
                                                  the labelSelector in the specified namespaces, where co-located is defined as running on a node
                                                  whose value of the label with key topologyKey matches that of any node on which any of the
                                                  selected pods is running.
                                                  Empty topologyKey is not allowed.
                                                type: string
                                            required:
                                            - topologyKey
                                            type: object
                                          weight:
                                            description: |-
                                              weight associated with matching the corresponding podAffinityTerm,
                                              in the range 1-100.
                                            format: int32
                                            type: integer
                                        required:
                                        - podAffinityTerm
                                        - weight
                                        type: object
                                      type: array
                                      x-kubernetes-list-type: atomic
                                    requiredDuringSchedulingIgnoredDuringExecution:
                                      description: |-
                                        If the anti-affinity requirements specified by this field are not met at
                                        scheduling time, the pod will not be scheduled onto the node.
                                        If the anti-affinity requirements specified by this field cease to be met
                                        at some point during pod execution (e.g. due to a pod label update), the
                                        system may or may not try to eventually evict the pod from its node.
                                        When there are multiple elements, the lists of nodes corresponding to each
                                        podAffinityTerm are intersected, i.e. all terms must be satisfied.
                                      items:
                                        description: |-
                                          Defines a set of pods (namely those matching the labelSelector
                                          relative to the given namespace(s)) that this pod should be
                                          co-located (affinity) or not co-located (anti-affinity) with,
                                          where co-located is defined as running on a node whose value of
                                          the label with key <topologyKey> matches that of any node on which
                                          a pod of the set of pods is running
                                        properties:
                                          labelSelector:
                                            description: |-
                                              A label query over a set of resources, in this case pods.
                                              If it's null, this PodAffinityTerm matches with no Pods.
                                            properties:
                                              matchExpressions:
                                                description: matchExpressions is a
                                                  list of label selector requirements.
                                                  The requirements are ANDed.
                                                items:
                                                  description: |-
                                                    A label selector requirement is a selector that contains values, a key, and an operator that
                                                    relates the key and values.
                                                  properties:
                                                    key:
                                                      description: key is the label
                                                        key that the selector applies
                                                        to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        operator represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        values is an array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. This array is replaced during a strategic
                                                        merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchLabels:
                                                additionalProperties:
                                                  type: string
                                                description: |-
                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                type: object
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          matchLabelKeys:
                                            description: |-
                                              MatchLabelKeys is a set of pod label keys to select which pods will
                                              be taken into consideration. The keys are used to lookup values from the
                                              incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)`
                                              to select the group of existing pods which pods will be taken into consideration
                                              for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                              pod labels will be ignored. The default value is empty.
                                              The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                              Also, matchLabelKeys cannot be set when labelSelector isn't set.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          mismatchLabelKeys:
                                            description: |-
                                              MismatchLabelKeys is a set of pod label keys to select which pods will
                                              be taken into consideration. The keys are used to lookup values from the
                                              incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)`
                                              to select the group of existing pods which pods will be taken into consideration
                                              for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming
                                              pod labels will be ignored. The default value is empty.
                                              The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                              Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          namespaceSelector:
                                            description: |-
                                              A label query over the set of namespaces that the term applies to.
                                              The term is applied to the union of the namespaces selected by this field
                                              and the ones listed in the namespaces field.
                                              null selector and null or empty namespaces list means "this pod's namespace".
                                              An empty selector ({}) matches all namespaces.
                                            properties:
                                              matchExpressions:
                                                description: matchExpressions is a
                                                  list of label selector requirements.
                                                  The requirements are ANDed.
                                                items:
                                                  description: |-
                                                    A label selector requirement is a selector that contains values, a key, and an operator that
                                                    relates the key and values.
                                                  properties:
                                                    key:
                                                      description: key is the label
                                                        key that the selector applies
                                                        to.
                                                      type: string
                                                    operator:
                                                      description: |-
                                                        operator represents a key's relationship to a set of values.
                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
                                                      type: string
                                                    values:
                                                      description: |-
                                                        values is an array of string values. If the operator is In or NotIn,
                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                        the values array must be empty. This array is replaced during a strategic
                                                        merge patch.
                                                      items:
                                                        type: string
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                  required:
                                                  - key
                                                  - operator
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              matchLabels:
                                                additionalProperties:
                                                  type: string
                                                description: |-
                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                type: object
                                            type: object
                                            x-kubernetes-map-type: atomic
                                          namespaces:
                                            description: |-
                                              namespaces specifies a static list of namespace names that the term applies to.
                                              The term is applied to the union of the namespaces listed in this field
                                              and the ones selected by namespaceSelector.
                                              null or empty namespaces list and null namespaceSelector means "this pod's namespace".
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          topologyKey:
                                            description: |-
                                              This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching
                                              the labelSelector in the specified namespaces, where co-located is defined as running on a node
                                              whose value of the label with key topologyKey matches that of any node on which any of the
                                              selected pods is running.
                                              Empty topologyKey is not allowed.
                                            type: string
                                        required:
                                        - topologyKey
                                        type: object
                                      type: array
                                      x-kubernetes-list-type: atomic
                                  type: object
                              type: object
                            automountServiceAccountToken:
                              description: AutomountServiceAccountToken indicates
                                whether a service account token should be automatically
                                mounted.
                              type: boolean
                            containers:
                              description: |-
                                List of containers belonging to the pod.
                                Containers cannot currently be added or removed.
                                There must be at least one container in a Pod.
                                Cannot be updated.
                              items:
                                description: A single application container that you
                                  want to run within a pod.
                                properties:
                                  args:
                                    description: |-
                                      Arguments to the entrypoint.
                                      The container image's CMD is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  command:
                                    description: |-
                                      Entrypoint array. Not executed within a shell.
                                      The container image's ENTRYPOINT is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  env:
                                    description: |-
                                      List of environment variables to set in the container.
                                      Cannot be updated.
                                    items:
                                      description: EnvVar represents an environment
                                        variable present in a Container.
                                      properties:
                                        name:
                                          description: |-
                                            Name of the environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        value:
                                          description: |-
                                            Variable references $(VAR_NAME) are expanded
                                            using the previously defined environment variables in the container and
                                            any service environment variables. If a variable cannot be resolved,
                                            the reference in the input string will be unchanged. Double $$ are reduced
                                            to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
                                            "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
                                            Escaped references will never be expanded, regardless of whether the variable
                                            exists or not.
                                            Defaults to "".
                                          type: string
                                        valueFrom:
                                          description: Source for the environment
                                            variable's value. Cannot be used if value
                                            is not empty.
                                          properties:
                                            configMapKeyRef:
                                              description: Selects a key of a ConfigMap.
                                              properties:
                                                key:
                                                  description: The key to select.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    ConfigMap or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fieldRef:
                                              description: |-
                                                Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
                                                spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
                                              properties:
                                                apiVersion:
                                                  description: Version of the schema
                                                    the FieldPath is written in terms
                                                    of, defaults to "v1".
                                                  type: string
                                                fieldPath:
                                                  description: Path of the field to
                                                    select in the specified API version.
                                                  type: string
                                              required:
                                              - fieldPath
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fileKeyRef:
                                              description: |-
                                                FileKeyRef selects a key of the env file.
                                                Requires the EnvFiles feature gate to be enabled.
                                              properties:
                                                key:
                                                  description: |-
                                                    The key within the env file. An invalid key will prevent the pod from starting.
                                                    The keys defined within a source may consist of any printable ASCII characters except '='.
                                                    During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
                                                  type: string
                                                optional:
                                                  default: false
                                                  description: |-
                                                    Specify whether the file or its key must be defined. If the file or key
                                                    does not exist, then the env var is not published.
                                                    If optional is set to true and the specified key does not exist,
                                                    the environment variable will not be set in the Pod's containers.

                                                    If optional is set to false and the specified key does not exist,
                                                    an error will be returned during Pod creation.
                                                  type: boolean
                                                path:
                                                  description: |-
                                                    The path within the volume from which to select the file.
                                                    Must be relative and may not contain the '..' path or start with '..'.
                                                  type: string
                                                volumeName:
                                                  description: The name of the volume
                                                    mount containing the env file.
                                                  type: string
                                              required:
                                              - key
                                              - path
                                              - volumeName
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            resourceFieldRef:
                                              description: |-
                                                Selects a resource of the container: only resources limits and requests
                                                (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
                                              properties:
                                                containerName:
                                                  description: 'Container name: required
                                                    for volumes, optional for env
                                                    vars'
                                                  type: string
                                                divisor:
                                                  anyOf:
                                                  - type: integer
                                                  - type: string
                                                  description: Specifies the output
                                                    format of the exposed resources,
                                                    defaults to "1"
                                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                  x-kubernetes-int-or-string: true
                                                resource:
                                                  description: 'Required: resource
                                                    to select'
                                                  type: string
                                              required:
                                              - resource
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            secretKeyRef:
                                              description: Selects a key of a secret
                                                in the pod's namespace
                                              properties:
                                                key:
                                                  description: The key of the secret
                                                    to select from.  Must be a valid
                                                    secret key.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    Secret or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                          type: object
                                      required:
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - name
                                    x-kubernetes-list-type: map
                                  envFrom:
                                    description: |-
                                      List of sources to populate environment variables in the container.
                                      The keys defined within a source may consist of any printable ASCII characters except '='.
                                      When a key exists in multiple
                                      sources, the value associated with the last source will take precedence.
                                      Values defined by an Env with a duplicate key will take precedence.
                                      Cannot be updated.
                                    items:
                                      description: EnvFromSource represents the source
                                        of a set of ConfigMaps or Secrets
                                      properties:
                                        configMapRef:
                                          description: The ConfigMap to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the ConfigMap
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                        prefix:
                                          description: |-
                                            Optional text to prepend to the name of each environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        secretRef:
                                          description: The Secret to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the Secret
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  image:
                                    description: |-
                                      Container image name.
                                      More info: https://kubernetes.io/docs/concepts/containers/images
                                      This field is optional to allow higher level config management to default or override
                                      container images in workload controllers like Deployments and StatefulSets.
                                    type: string
                                  imagePullPolicy:
                                    description: |-
                                      Image pull policy.
                                      One of Always, Never, IfNotPresent.
                                      Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
                                    type: string
                                  lifecycle:
                                    description: |-
                                      Actions that the management system should take in response to container lifecycle events.
                                      Cannot be updated.
                                    properties:
                                      postStart:
                                        description: |-
                                          PostStart is called immediately after a container is created. If the handler fails,
                                          the container is terminated and restarted according to its restart policy.
                                          Other management of the container blocks until the hook completes.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      preStop:
                                        description: |-
                                          PreStop is called immediately before a container is terminated due to an
                                          API request or management event such as liveness/startup probe failure,
                                          preemption, resource contention, etc. The handler is not called if the
                                          container crashes or exits. The Pod's termination grace period countdown begins before the
                                          PreStop hook is executed. Regardless of the outcome of the handler, the
                                          container will eventually terminate within the Pod's termination grace
                                          period (unless delayed by finalizers). Other management of the container blocks until the hook completes
                                          or until the termination grace period is reached.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      stopSignal:
                                        description: |-
                                          StopSignal defines which signal will be sent to a container when it is being stopped.
                                          If not specified, the default is defined by the container runtime in use.
                                          StopSignal can only be set for Pods with a non-empty .spec.os.name
                                        type: string
                                    type: object
                                  livenessProbe:
                                    description: |-
                                      Periodic probe of container liveness.
                                      Container will be restarted if the probe fails.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  name:
                                    description: |-
                                      Name of the container specified as a DNS_LABEL.
                                      Each container in a pod must have a unique name (DNS_LABEL).
                                      Cannot be updated.
                                    type: string
                                  ports:
                                    description: |-
                                      List of ports to expose from the container. Not specifying a port here
                                      DOES NOT prevent that port from being exposed. Any port which is
                                      listening on the default "0.0.0.0" address inside a container will be
                                      accessible from the network.
                                      Modifying this array with strategic merge patch may corrupt the data.
                                      For more information See https://github.com/kubernetes/kubernetes/issues/108255.
                                      Cannot be updated.
                                    items:
                                      description: ContainerPort represents a network
                                        port in a single container.
                                      properties:
                                        containerPort:
                                          description: |-
                                            Number of port to expose on the pod's IP address.
                                            This must be a valid port number, 0 < x < 65536.
                                          format: int32
                                          type: integer
                                        hostIP:
                                          description: What host IP to bind the external
                                            port to.
                                          type: string
                                        hostPort:
                                          description: |-
                                            Number of port to expose on the host.
                                            If specified, this must be a valid port number, 0 < x < 65536.
                                            If HostNetwork is specified, this must match ContainerPort.
                                            Most containers do not need this.
                                          format: int32
                                          type: integer
                                        name:
                                          description: |-
                                            If specified, this must be an IANA_SVC_NAME and unique within the pod. Each
                                            named port in a pod must have a unique name. Name for the port that can be
                                            referred to by services.
                                          type: string
                                        protocol:
                                          default: TCP
                                          description: |-
                                            Protocol for port. Must be UDP, TCP, or SCTP.
                                            Defaults to "TCP".
                                          type: string
                                      required:
                                      - containerPort
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - containerPort
                                    - protocol
                                    x-kubernetes-list-type: map
                                  readinessProbe:
                                    description: |-
                                      Periodic probe of container service readiness.
                                      Container will be removed from service endpoints if the probe fails.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  resizePolicy:
                                    description: |-
                                      Resources resize policy for the container.
                                      This field cannot be set on ephemeral containers.
                                    items:
                                      description: ContainerResizePolicy represents
                                        resource resize policy for the container.
                                      properties:
                                        resourceName:
                                          description: |-
                                            Name of the resource to which this resource resize policy applies.
                                            Supported values: cpu, memory.
                                          type: string
                                        restartPolicy:
                                          description: |-
                                            Restart policy to apply when specified resource is resized.
                                            If not specified, it defaults to NotRequired.
                                          type: string
                                      required:
                                      - resourceName
                                      - restartPolicy
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  resources:
                                    description: |-
                                      Compute Resources required by this container.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                    properties:
                                      claims:
                                        description: |-
                                          Claims lists the names of resources, defined in spec.resourceClaims,
                                          that are used by this container.

                                          This field depends on the
                                          DynamicResourceAllocation feature gate.

                                          This field is immutable. It can only be set for containers.
                                        items:
                                          description: ResourceClaim references one
                                            entry in PodSpec.ResourceClaims.
                                          properties:
                                            name:
                                              description: |-
                                                Name must match the name of one entry in pod.spec.resourceClaims of
                                                the Pod where this field is used. It makes that resource available
                                                inside a container.
                                              type: string
                                            request:
                                              description: |-
                                                Request is the name chosen for a request in the referenced claim.
                                                If empty, everything from the claim is made available, otherwise
                                                only the result of this request.
                                              type: string
                                          required:
                                          - name
                                          type: object
                                        type: array
                                        x-kubernetes-list-map-keys:
                                        - name
                                        x-kubernetes-list-type: map
                                      limits:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Limits describes the maximum amount of compute resources allowed.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                      requests:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Requests describes the minimum amount of compute resources required.
                                          If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
                                          otherwise to an implementation-defined value. Requests cannot exceed Limits.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                    type: object
                                  restartPolicy:
                                    description: |-
                                      RestartPolicy defines the restart behavior of individual containers in a pod.
                                      This overrides the pod-level restart policy. When this field is not specified,
                                      the restart behavior is defined by the Pod's restart policy and the container type.
                                      Additionally, setting the RestartPolicy as "Always" for the init container will
                                      have the following effect:
                                      this init container will be continually restarted on
                                      exit until all regular containers have terminated. Once all regular
                                      containers have completed, all init containers with restartPolicy "Always"
                                      will be shut down. This lifecycle differs from normal init containers and
                                      is often referred to as a "sidecar" container. Although this init
                                      container still starts in the init container sequence, it does not wait
                                      for the container to complete before proceeding to the next init
                                      container. Instead, the next init container starts immediately after this
                                      init container is started, or after any startupProbe has successfully
                                      completed.
                                    type: string
                                  restartPolicyRules:
                                    description: |-
                                      Represents a list of rules to be checked to determine if the
                                      container should be restarted on exit. The rules are evaluated in
                                      order. Once a rule matches a container exit condition, the remaining
                                      rules are ignored. If no rule matches the container exit condition,
                                      the Container-level restart policy determines the whether the container
                                      is restarted or not. Constraints on the rules:
                                      - At most 20 rules are allowed.
                                      - Rules can have the same action.
                                      - Identical rules are not forbidden in validations.
                                      When rules are specified, container MUST set RestartPolicy explicitly
                                      even it if matches the Pod's RestartPolicy.
                                    items:
                                      description: ContainerRestartRule describes
                                        how a container exit is handled.
                                      properties:
                                        action:
                                          description: |-
                                            Specifies the action taken on a container exit if the requirements
                                            are satisfied. The only possible value is "Restart" to restart the
                                            container.
                                          type: string
                                        exitCodes:
                                          description: Represents the exit codes to
                                            check on container exits.
                                          properties:
                                            operator:
                                              description: |-
                                                Represents the relationship between the container exit code(s) and the
                                                specified values. Possible values are:
                                                - In: the requirement is satisfied if the container exit code is in the
                                                  set of specified values.
                                                - NotIn: the requirement is satisfied if the container exit code is
                                                  not in the set of specified values.
                                              type: string
                                            values:
                                              description: |-
                                                Specifies the set of values to check for container exit codes.
                                                At most 255 elements are allowed.
                                              items:
                                                format: int32
                                                type: integer
                                              type: array
                                              x-kubernetes-list-type: set
                                          required:
                                          - operator
                                          type: object
                                      required:
                                      - action
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  securityContext:
                                    description: |-
                                      SecurityContext defines the security options the container should be run with.
                                      If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
                                      More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
                                    properties:
                                      allowPrivilegeEscalation:
                                        description: |-
                                          AllowPrivilegeEscalation controls whether a process can gain more
                                          privileges than its parent process. This bool directly controls if
                                          the no_new_privs flag will be set on the container process.
                                          AllowPrivilegeEscalation is true always when the container is:
                                          1) run as Privileged
                                          2) has CAP_SYS_ADMIN
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      appArmorProfile:
                                        description: |-
                                          appArmorProfile is the AppArmor options to use by this container. If set, this profile
                                          overrides the pod's appArmorProfile.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile loaded on the node that should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must match the loaded name of the profile.
                                              Must be set if and only if type is "Localhost".
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of AppArmor profile will be applied.
                                              Valid options are:
                                                Localhost - a profile pre-loaded on the node.
                                                RuntimeDefault - the container runtime's default profile.
                                                Unconfined - no AppArmor enforcement.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      capabilities:
                                        description: |-
                                          The capabilities to add/drop when running containers.
                                          Defaults to the default set of capabilities granted by the container runtime.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          add:
                                            description: Added capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          drop:
                                            description: Removed capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      privileged:
                                        description: |-
                                          Run container in privileged mode.
                                          Processes in privileged containers are essentially equivalent to root on the host.
                                          Defaults to false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      procMount:
                                        description: |-
                                          procMount denotes the type of proc mount to use for the containers.
                                          The default value is Default which uses the container runtime defaults for
                                          readonly paths and masked paths.
                                          This requires the ProcMountType feature flag to be enabled.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: string
                                      readOnlyRootFilesystem:
                                        description: |-
                                          Whether this container has a read-only root filesystem.
                                          Default is false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      runAsGroup:
                                        description: |-
                                          The GID to run the entrypoint of the container process.
                                          Uses runtime default if unset.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      runAsNonRoot:
                                        description: |-
                                          Indicates that the container must run as a non-root user.
                                          If true, the Kubelet will validate the image at runtime to ensure that it
                                          does not run as UID 0 (root) and fail to start the container if it does.
                                          If unset or false, no such validation will be performed.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                        type: boolean
                                      runAsUser:
                                        description: |-
                                          The UID to run the entrypoint of the container process.
                                          Defaults to user specified in image metadata if unspecified.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      seLinuxOptions:
                                        description: |-
                                          The SELinux context to be applied to the container.
                                          If unspecified, the container runtime will allocate a random SELinux context for each
                                          container.  May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          level:
                                            description: Level is SELinux level label
                                              that applies to the container.
                                            type: string
                                          role:
                                            description: Role is a SELinux role label
                                              that applies to the container.
                                            type: string
                                          type:
                                            description: Type is a SELinux type label
                                              that applies to the container.
                                            type: string
                                          user:
                                            description: User is a SELinux user label
                                              that applies to the container.
                                            type: string
                                        type: object
                                      seccompProfile:
                                        description: |-
                                          The seccomp options to use by this container. If seccomp options are
                                          provided at both the pod & container level, the container options
                                          override the pod options.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile defined in a file on the node should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must be a descending path, relative to the kubelet's configured seccomp profile location.
                                              Must be set if type is "Localhost". Must NOT be set for any other type.
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of seccomp profile will be applied.
                                              Valid options are:

                                              Localhost - a profile defined in a file on the node should be used.
                                              RuntimeDefault - the container runtime default profile should be used.
                                              Unconfined - no profile should be applied.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      windowsOptions:
                                        description: |-
                                          The Windows specific settings applied to all containers.
                                          If unspecified, the options from the PodSecurityContext will be used.
                                          If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is linux.
                                        properties:
                                          gmsaCredentialSpec:
                                            description: |-
                                              GMSACredentialSpec is where the GMSA admission webhook
                                              (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the
                                              GMSA credential spec named by the GMSACredentialSpecName field.
                                            type: string
                                          gmsaCredentialSpecName:
                                            description: GMSACredentialSpecName is
                                              the name of the GMSA credential spec
                                              to use.
                                            type: string
                                          hostProcess:
                                            description: |-
                                              HostProcess determines if a container should be run as a 'Host Process' container.
                                              All of a Pod's containers must have the same effective HostProcess value
                                              (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers).
                                              In addition, if HostProcess is true then HostNetwork must also be set to true.
                                            type: boolean
                                          runAsUserName:
                                            description: |-
                                              The UserName in Windows to run the entrypoint of the container process.
                                              Defaults to the user specified in image metadata if unspecified.
                                              May also be set in PodSecurityContext. If set in both SecurityContext and
                                              PodSecurityContext, the value specified in SecurityContext takes precedence.
                                            type: string
                                        type: object
                                    type: object
                                  startupProbe:
                                    description: |-
                                      StartupProbe indicates that the Pod has successfully initialized.
                                      If specified, no other probes are executed until this completes successfully.
                                      If this probe fails, the Pod will be restarted, just as if the livenessProbe failed.
                                      This can be used to provide different probe parameters at the beginning of a Pod's lifecycle,
                                      when it might take a long time to load data or warm a cache, than during steady-state operation.
                                      This cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  stdin:
                                    description: |-
                                      Whether this container should allocate a buffer for stdin in the container runtime. If this
                                      is not set, reads from stdin in the container will always result in EOF.
                                      Default is false.
                                    type: boolean
                                  stdinOnce:
                                    description: |-
                                      Whether the container runtime should close the stdin channel after it has been opened by
                                      a single attach. When stdin is true the stdin stream will remain open across multiple attach
                                      sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the
                                      first client attaches to stdin, and then remains open and accepts data until the client disconnects,
                                      at which time stdin is closed and remains closed until the container is restarted. If this
                                      flag is false, a container processes that reads from stdin will never receive an EOF.
                                      Default is false
                                    type: boolean
                                  terminationMessagePath:
                                    description: |-
                                      Optional: Path at which the file to which the container's termination message
                                      will be written is mounted into the container's filesystem.
                                      Message written is intended to be brief final status, such as an assertion failure message.
                                      Will be truncated by the node if greater than 4096 bytes. The total message length across
                                      all containers will be limited to 12kb.
                                      Defaults to /dev/termination-log.
                                      Cannot be updated.
                                    type: string
                                  terminationMessagePolicy:
                                    description: |-
                                      Indicate how the termination message should be populated. File will use the contents of
                                      terminationMessagePath to populate the container status message on both success and failure.
                                      FallbackToLogsOnError will use the last chunk of container log output if the termination
                                      message file is empty and the container exited with an error.
                                      The log output is limited to 2048 bytes or 80 lines, whichever is smaller.
                                      Defaults to File.
                                      Cannot be updated.
                                    type: string
                                  tty:
                                    description: |-
                                      Whether this container should allocate a TTY for itself, also requires 'stdin' to be true.
                                      Default is false.
                                    type: boolean
                                  volumeDevices:
                                    description: volumeDevices is the list of block
                                      devices to be used by the container.
                                    items:
                                      description: volumeDevice describes a mapping
                                        of a raw block device within a container.
                                      properties:
                                        devicePath:
                                          description: devicePath is the path inside
                                            of the container that the device will
                                            be mapped to.
                                          type: string
                                        name:
                                          description: name must match the name of
                                            a persistentVolumeClaim in the pod
                                          type: string
                                      required:
                                      - devicePath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - devicePath
                                    x-kubernetes-list-type: map
                                  volumeMounts:
                                    description: |-
                                      Pod volumes to mount into the container's filesystem.
                                      Cannot be updated.
                                    items:
                                      description: VolumeMount describes a mounting
                                        of a Volume within a container.
                                      properties:
                                        mountPath:
                                          description: |-
                                            Path within the container at which the volume should be mounted.  Must
                                            not contain ':'.
                                          type: string
                                        mountPropagation:
                                          description: |-
                                            mountPropagation determines how mounts are propagated from the host
                                            to container and the other way around.
                                            When not set, MountPropagationNone is used.
                                            This field is beta in 1.10.
                                            When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified
                                            (which defaults to None).
                                          type: string
                                        name:
                                          description: This must match the Name of
                                            a Volume.
                                          type: string
                                        readOnly:
                                          description: |-
                                            Mounted read-only if true, read-write otherwise (false or unspecified).
                                            Defaults to false.
                                          type: boolean
                                        recursiveReadOnly:
                                          description: |-
                                            RecursiveReadOnly specifies whether read-only mounts should be handled
                                            recursively.

                                            If ReadOnly is false, this field has no meaning and must be unspecified.

                                            If ReadOnly is true, and this field is set to Disabled, the mount is not made
                                            recursively read-only.  If this field is set to IfPossible, the mount is made
                                            recursively read-only, if it is supported by the container runtime.  If this
                                            field is set to Enabled, the mount is made recursively read-only if it is
                                            supported by the container runtime, otherwise the pod will not be started and
                                            an error will be generated to indicate the reason.

                                            If this field is set to IfPossible or Enabled, MountPropagation must be set to
                                            None (or be unspecified, which defaults to None).

                                            If this field is not specified, it is treated as an equivalent of Disabled.
                                          type: string
                                        subPath:
                                          description: |-
                                            Path within the volume from which the container's volume should be mounted.
                                            Defaults to "" (volume's root).
                                          type: string
                                        subPathExpr:
                                          description: |-
                                            Expanded path within the volume from which the container's volume should be mounted.
                                            Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment.
                                            Defaults to "" (volume's root).
                                            SubPathExpr and SubPath are mutually exclusive.
                                          type: string
                                      required:
                                      - mountPath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - mountPath
                                    x-kubernetes-list-type: map
                                  workingDir:
                                    description: |-
                                      Container's working directory.
                                      If not specified, the container runtime's default will be used, which
                                      might be configured in the container image.
                                      Cannot be updated.
                                    type: string
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            dnsConfig:
                              description: |-
                                Specifies the DNS parameters of a pod.
                                Parameters specified here will be merged to the generated DNS
                                configuration based on DNSPolicy.
                              properties:
                                nameservers:
                                  description: |-
                                    A list of DNS name server IP addresses.
                                    This will be appended to the base nameservers generated from DNSPolicy.
                                    Duplicated nameservers will be removed.
                                  items:
                                    type: string
                                  type: array
                                  x-kubernetes-list-type: atomic
                                options:
                                  description: |-
                                    A list of DNS resolver options.
                                    This will be merged with the base options generated from DNSPolicy.
                                    Duplicated entries will be removed. Resolution options given in Options
                                    will override those that appear in the base DNSPolicy.
                                  items:
                                    description: PodDNSConfigOption defines DNS resolver
                                      options of a pod.
                                    properties:
                                      name:
                                        description: |-
                                          Name is this DNS resolver option's name.
                                          Required.
                                        type: string
                                      value:
                                        description: Value is this DNS resolver option's
                                          value.
                                        type: string
                                    type: object
                                  type: array
                                  x-kubernetes-list-type: atomic
                                searches:
                                  description: |-
                                    A list of DNS search domains for host-name lookup.
                                    This will be appended to the base search paths generated from DNSPolicy.
                                    Duplicated search paths will be removed.
                                  items:
                                    type: string
                                  type: array
                                  x-kubernetes-list-type: atomic
                              type: object
                            dnsPolicy:
                              description: |-
                                Set DNS policy for the pod.
                                Defaults to "ClusterFirst".
                                Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'.
                                DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy.
                                To have DNS options set along with hostNetwork, you have to specify DNS policy
                                explicitly to 'ClusterFirstWithHostNet'.
                              type: string
                            enableServiceLinks:
                              description: |-
                                EnableServiceLinks indicates whether information about services should be injected into pod's
                                environment variables, matching the syntax of Docker links.
                                Optional: Defaults to true.
                              type: boolean
                            ephemeralContainers:
                              description: |-
                                List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing
                                pod to perform user-initiated actions such as debugging. This list cannot be specified when
                                creating a pod, and it cannot be modified by updating the pod spec. In order to add an
                                ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource.
                              items:
                                description: |-
                                  An EphemeralContainer is a temporary container that you may add to an existing Pod for
                                  user-initiated activities such as debugging. Ephemeral containers have no resource or
                                  scheduling guarantees, and they will not be restarted when they exit or when a Pod is
                                  removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the
                                  Pod to exceed its resource allocation.

                                  To add an ephemeral container, use the ephemeralcontainers subresource of an existing
                                  Pod. Ephemeral containers may not be removed or restarted.
                                properties:
                                  args:
                                    description: |-
                                      Arguments to the entrypoint.
                                      The image's CMD is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  command:
                                    description: |-
                                      Entrypoint array. Not executed within a shell.
                                      The image's ENTRYPOINT is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  env:
                                    description: |-
                                      List of environment variables to set in the container.
                                      Cannot be updated.
                                    items:
                                      description: EnvVar represents an environment
                                        variable present in a Container.
                                      properties:
                                        name:
                                          description: |-
                                            Name of the environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        value:
                                          description: |-
                                            Variable references $(VAR_NAME) are expanded
                                            using the previously defined environment variables in the container and
                                            any service environment variables. If a variable cannot be resolved,
                                            the reference in the input string will be unchanged. Double $$ are reduced
                                            to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
                                            "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
                                            Escaped references will never be expanded, regardless of whether the variable
                                            exists or not.
                                            Defaults to "".
                                          type: string
                                        valueFrom:
                                          description: Source for the environment
                                            variable's value. Cannot be used if value
                                            is not empty.
                                          properties:
                                            configMapKeyRef:
                                              description: Selects a key of a ConfigMap.
                                              properties:
                                                key:
                                                  description: The key to select.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    ConfigMap or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fieldRef:
                                              description: |-
                                                Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
                                                spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
                                              properties:
                                                apiVersion:
                                                  description: Version of the schema
                                                    the FieldPath is written in terms
                                                    of, defaults to "v1".
                                                  type: string
                                                fieldPath:
                                                  description: Path of the field to
                                                    select in the specified API version.
                                                  type: string
                                              required:
                                              - fieldPath
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fileKeyRef:
                                              description: |-
                                                FileKeyRef selects a key of the env file.
                                                Requires the EnvFiles feature gate to be enabled.
                                              properties:
                                                key:
                                                  description: |-
                                                    The key within the env file. An invalid key will prevent the pod from starting.
                                                    The keys defined within a source may consist of any printable ASCII characters except '='.
                                                    During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
                                                  type: string
                                                optional:
                                                  default: false
                                                  description: |-
                                                    Specify whether the file or its key must be defined. If the file or key
                                                    does not exist, then the env var is not published.
                                                    If optional is set to true and the specified key does not exist,
                                                    the environment variable will not be set in the Pod's containers.

                                                    If optional is set to false and the specified key does not exist,
                                                    an error will be returned during Pod creation.
                                                  type: boolean
                                                path:
                                                  description: |-
                                                    The path within the volume from which to select the file.
                                                    Must be relative and may not contain the '..' path or start with '..'.
                                                  type: string
                                                volumeName:
                                                  description: The name of the volume
                                                    mount containing the env file.
                                                  type: string
                                              required:
                                              - key
                                              - path
                                              - volumeName
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            resourceFieldRef:
                                              description: |-
                                                Selects a resource of the container: only resources limits and requests
                                                (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
                                              properties:
                                                containerName:
                                                  description: 'Container name: required
                                                    for volumes, optional for env
                                                    vars'
                                                  type: string
                                                divisor:
                                                  anyOf:
                                                  - type: integer
                                                  - type: string
                                                  description: Specifies the output
                                                    format of the exposed resources,
                                                    defaults to "1"
                                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                  x-kubernetes-int-or-string: true
                                                resource:
                                                  description: 'Required: resource
                                                    to select'
                                                  type: string
                                              required:
                                              - resource
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            secretKeyRef:
                                              description: Selects a key of a secret
                                                in the pod's namespace
                                              properties:
                                                key:
                                                  description: The key of the secret
                                                    to select from.  Must be a valid
                                                    secret key.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    Secret or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                          type: object
                                      required:
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - name
                                    x-kubernetes-list-type: map
                                  envFrom:
                                    description: |-
                                      List of sources to populate environment variables in the container.
                                      The keys defined within a source may consist of any printable ASCII characters except '='.
                                      When a key exists in multiple
                                      sources, the value associated with the last source will take precedence.
                                      Values defined by an Env with a duplicate key will take precedence.
                                      Cannot be updated.
                                    items:
                                      description: EnvFromSource represents the source
                                        of a set of ConfigMaps or Secrets
                                      properties:
                                        configMapRef:
                                          description: The ConfigMap to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the ConfigMap
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                        prefix:
                                          description: |-
                                            Optional text to prepend to the name of each environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        secretRef:
                                          description: The Secret to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the Secret
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  image:
                                    description: |-
                                      Container image name.
                                      More info: https://kubernetes.io/docs/concepts/containers/images
                                    type: string
                                  imagePullPolicy:
                                    description: |-
                                      Image pull policy.
                                      One of Always, Never, IfNotPresent.
                                      Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
                                    type: string
                                  lifecycle:
                                    description: Lifecycle is not allowed for ephemeral
                                      containers.
                                    properties:
                                      postStart:
                                        description: |-
                                          PostStart is called immediately after a container is created. If the handler fails,
                                          the container is terminated and restarted according to its restart policy.
                                          Other management of the container blocks until the hook completes.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      preStop:
                                        description: |-
                                          PreStop is called immediately before a container is terminated due to an
                                          API request or management event such as liveness/startup probe failure,
                                          preemption, resource contention, etc. The handler is not called if the
                                          container crashes or exits. The Pod's termination grace period countdown begins before the
                                          PreStop hook is executed. Regardless of the outcome of the handler, the
                                          container will eventually terminate within the Pod's termination grace
                                          period (unless delayed by finalizers). Other management of the container blocks until the hook completes
                                          or until the termination grace period is reached.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      stopSignal:
                                        description: |-
                                          StopSignal defines which signal will be sent to a container when it is being stopped.
                                          If not specified, the default is defined by the container runtime in use.
                                          StopSignal can only be set for Pods with a non-empty .spec.os.name
                                        type: string
                                    type: object
                                  livenessProbe:
                                    description: Probes are not allowed for ephemeral
                                      containers.
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  name:
                                    description: |-
                                      Name of the ephemeral container specified as a DNS_LABEL.
                                      This name must be unique among all containers, init containers and ephemeral containers.
                                    type: string
                                  ports:
                                    description: Ports are not allowed for ephemeral
                                      containers.
                                    items:
                                      description: ContainerPort represents a network
                                        port in a single container.
                                      properties:
                                        containerPort:
                                          description: |-
                                            Number of port to expose on the pod's IP address.
                                            This must be a valid port number, 0 < x < 65536.
                                          format: int32
                                          type: integer
                                        hostIP:
                                          description: What host IP to bind the external
                                            port to.
                                          type: string
                                        hostPort:
                                          description: |-
                                            Number of port to expose on the host.
                                            If specified, this must be a valid port number, 0 < x < 65536.
                                            If HostNetwork is specified, this must match ContainerPort.
                                            Most containers do not need this.
                                          format: int32
                                          type: integer
                                        name:
                                          description: |-
                                            If specified, this must be an IANA_SVC_NAME and unique within the pod. Each
                                            named port in a pod must have a unique name. Name for the port that can be
                                            referred to by services.
                                          type: string
                                        protocol:
                                          default: TCP
                                          description: |-
                                            Protocol for port. Must be UDP, TCP, or SCTP.
                                            Defaults to "TCP".
                                          type: string
                                      required:
                                      - containerPort
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - containerPort
                                    - protocol
                                    x-kubernetes-list-type: map
                                  readinessProbe:
                                    description: Probes are not allowed for ephemeral
                                      containers.
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  resizePolicy:
                                    description: Resources resize policy for the container.
                                    items:
                                      description: ContainerResizePolicy represents
                                        resource resize policy for the container.
                                      properties:
                                        resourceName:
                                          description: |-
                                            Name of the resource to which this resource resize policy applies.
                                            Supported values: cpu, memory.
                                          type: string
                                        restartPolicy:
                                          description: |-
                                            Restart policy to apply when specified resource is resized.
                                            If not specified, it defaults to NotRequired.
                                          type: string
                                      required:
                                      - resourceName
                                      - restartPolicy
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  resources:
                                    description: |-
                                      Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources
                                      already allocated to the pod.
                                    properties:
                                      claims:
                                        description: |-
                                          Claims lists the names of resources, defined in spec.resourceClaims,
                                          that are used by this container.

                                          This field depends on the
                                          DynamicResourceAllocation feature gate.

                                          This field is immutable. It can only be set for containers.
                                        items:
                                          description: ResourceClaim references one
                                            entry in PodSpec.ResourceClaims.
                                          properties:
                                            name:
                                              description: |-
                                                Name must match the name of one entry in pod.spec.resourceClaims of
                                                the Pod where this field is used. It makes that resource available
                                                inside a container.
                                              type: string
                                            request:
                                              description: |-
                                                Request is the name chosen for a request in the referenced claim.
                                                If empty, everything from the claim is made available, otherwise
                                                only the result of this request.
                                              type: string
                                          required:
                                          - name
                                          type: object
                                        type: array
                                        x-kubernetes-list-map-keys:
                                        - name
                                        x-kubernetes-list-type: map
                                      limits:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Limits describes the maximum amount of compute resources allowed.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                      requests:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Requests describes the minimum amount of compute resources required.
                                          If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
                                          otherwise to an implementation-defined value. Requests cannot exceed Limits.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                    type: object
                                  restartPolicy:
                                    description: |-
                                      Restart policy for the container to manage the restart behavior of each
                                      container within a pod.
                                      You cannot set this field on ephemeral containers.
                                    type: string
                                  restartPolicyRules:
                                    description: |-
                                      Represents a list of rules to be checked to determine if the
                                      container should be restarted on exit. You cannot set this field on
                                      ephemeral containers.
                                    items:
                                      description: ContainerRestartRule describes
                                        how a container exit is handled.
                                      properties:
                                        action:
                                          description: |-
                                            Specifies the action taken on a container exit if the requirements
                                            are satisfied. The only possible value is "Restart" to restart the
                                            container.
                                          type: string
                                        exitCodes:
                                          description: Represents the exit codes to
                                            check on container exits.
                                          properties:
                                            operator:
                                              description: |-
                                                Represents the relationship between the container exit code(s) and the
                                                specified values. Possible values are:
                                                - In: the requirement is satisfied if the container exit code is in the
                                                  set of specified values.
                                                - NotIn: the requirement is satisfied if the container exit code is
                                                  not in the set of specified values.
                                              type: string
                                            values:
                                              description: |-
                                                Specifies the set of values to check for container exit codes.
                                                At most 255 elements are allowed.
                                              items:
                                                format: int32
                                                type: integer
                                              type: array
                                              x-kubernetes-list-type: set
                                          required:
                                          - operator
                                          type: object
                                      required:
                                      - action
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  securityContext:
                                    description: |-
                                      Optional: SecurityContext defines the security options the ephemeral container should be run with.
                                      If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
                                    properties:
                                      allowPrivilegeEscalation:
                                        description: |-
                                          AllowPrivilegeEscalation controls whether a process can gain more
                                          privileges than its parent process. This bool directly controls if
                                          the no_new_privs flag will be set on the container process.
                                          AllowPrivilegeEscalation is true always when the container is:
                                          1) run as Privileged
                                          2) has CAP_SYS_ADMIN
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      appArmorProfile:
                                        description: |-
                                          appArmorProfile is the AppArmor options to use by this container. If set, this profile
                                          overrides the pod's appArmorProfile.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile loaded on the node that should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must match the loaded name of the profile.
                                              Must be set if and only if type is "Localhost".
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of AppArmor profile will be applied.
                                              Valid options are:
                                                Localhost - a profile pre-loaded on the node.
                                                RuntimeDefault - the container runtime's default profile.
                                                Unconfined - no AppArmor enforcement.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      capabilities:
                                        description: |-
                                          The capabilities to add/drop when running containers.
                                          Defaults to the default set of capabilities granted by the container runtime.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          add:
                                            description: Added capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          drop:
                                            description: Removed capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      privileged:
                                        description: |-
                                          Run container in privileged mode.
                                          Processes in privileged containers are essentially equivalent to root on the host.
                                          Defaults to false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      procMount:
                                        description: |-
                                          procMount denotes the type of proc mount to use for the containers.
                                          The default value is Default which uses the container runtime defaults for
                                          readonly paths and masked paths.
                                          This requires the ProcMountType feature flag to be enabled.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: string
                                      readOnlyRootFilesystem:
                                        description: |-
                                          Whether this container has a read-only root filesystem.
                                          Default is false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      runAsGroup:
                                        description: |-
                                          The GID to run the entrypoint of the container process.
                                          Uses runtime default if unset.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      runAsNonRoot:
                                        description: |-
                                          Indicates that the container must run as a non-root user.
                                          If true, the Kubelet will validate the image at runtime to ensure that it
                                          does not run as UID 0 (root) and fail to start the container if it does.
                                          If unset or false, no such validation will be performed.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                        type: boolean
                                      runAsUser:
                                        description: |-
                                          The UID to run the entrypoint of the container process.
                                          Defaults to user specified in image metadata if unspecified.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      seLinuxOptions:
                                        description: |-
                                          The SELinux context to be applied to the container.
                                          If unspecified, the container runtime will allocate a random SELinux context for each
                                          container.  May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          level:
                                            description: Level is SELinux level label
                                              that applies to the container.
                                            type: string
                                          role:
                                            description: Role is a SELinux role label
                                              that applies to the container.
                                            type: string
                                          type:
                                            description: Type is a SELinux type label
                                              that applies to the container.
                                            type: string
                                          user:
                                            description: User is a SELinux user label
                                              that applies to the container.
                                            type: string
                                        type: object
                                      seccompProfile:
                                        description: |-
                                          The seccomp options to use by this container. If seccomp options are
                                          provided at both the pod & container level, the container options
                                          override the pod options.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile defined in a file on the node should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must be a descending path, relative to the kubelet's configured seccomp profile location.
                                              Must be set if type is "Localhost". Must NOT be set for any other type.
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of seccomp profile will be applied.
                                              Valid options are:

                                              Localhost - a profile defined in a file on the node should be used.
                                              RuntimeDefault - the container runtime default profile should be used.
                                              Unconfined - no profile should be applied.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      windowsOptions:
                                        description: |-
                                          The Windows specific settings applied to all containers.
                                          If unspecified, the options from the PodSecurityContext will be used.
                                          If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is linux.
                                        properties:
                                          gmsaCredentialSpec:
                                            description: |-
                                              GMSACredentialSpec is where the GMSA admission webhook
                                              (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the
                                              GMSA credential spec named by the GMSACredentialSpecName field.
                                            type: string
                                          gmsaCredentialSpecName:
                                            description: GMSACredentialSpecName is
                                              the name of the GMSA credential spec
                                              to use.
                                            type: string
                                          hostProcess:
                                            description: |-
                                              HostProcess determines if a container should be run as a 'Host Process' container.
                                              All of a Pod's containers must have the same effective HostProcess value
                                              (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers).
                                              In addition, if HostProcess is true then HostNetwork must also be set to true.
                                            type: boolean
                                          runAsUserName:
                                            description: |-
                                              The UserName in Windows to run the entrypoint of the container process.
                                              Defaults to the user specified in image metadata if unspecified.
                                              May also be set in PodSecurityContext. If set in both SecurityContext and
                                              PodSecurityContext, the value specified in SecurityContext takes precedence.
                                            type: string
                                        type: object
                                    type: object
                                  startupProbe:
                                    description: Probes are not allowed for ephemeral
                                      containers.
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  stdin:
                                    description: |-
                                      Whether this container should allocate a buffer for stdin in the container runtime. If this
                                      is not set, reads from stdin in the container will always result in EOF.
                                      Default is false.
                                    type: boolean
                                  stdinOnce:
                                    description: |-
                                      Whether the container runtime should close the stdin channel after it has been opened by
                                      a single attach. When stdin is true the stdin stream will remain open across multiple attach
                                      sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the
                                      first client attaches to stdin, and then remains open and accepts data until the client disconnects,
                                      at which time stdin is closed and remains closed until the container is restarted. If this
                                      flag is false, a container processes that reads from stdin will never receive an EOF.
                                      Default is false
                                    type: boolean
                                  targetContainerName:
                                    description: |-
                                      If set, the name of the container from PodSpec that this ephemeral container targets.
                                      The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container.
                                      If not set then the ephemeral container uses the namespaces configured in the Pod spec.

                                      The container runtime must implement support for this feature. If the runtime does not
                                      support namespace targeting then the result of setting this field is undefined.
                                    type: string
                                  terminationMessagePath:
                                    description: |-
                                      Optional: Path at which the file to which the container's termination message
                                      will be written is mounted into the container's filesystem.
                                      Message written is intended to be brief final status, such as an assertion failure message.
                                      Will be truncated by the node if greater than 4096 bytes. The total message length across
                                      all containers will be limited to 12kb.
                                      Defaults to /dev/termination-log.
                                      Cannot be updated.
                                    type: string
                                  terminationMessagePolicy:
                                    description: |-
                                      Indicate how the termination message should be populated. File will use the contents of
                                      terminationMessagePath to populate the container status message on both success and failure.
                                      FallbackToLogsOnError will use the last chunk of container log output if the termination
                                      message file is empty and the container exited with an error.
                                      The log output is limited to 2048 bytes or 80 lines, whichever is smaller.
                                      Defaults to File.
                                      Cannot be updated.
                                    type: string
                                  tty:
                                    description: |-
                                      Whether this container should allocate a TTY for itself, also requires 'stdin' to be true.
                                      Default is false.
                                    type: boolean
                                  volumeDevices:
                                    description: volumeDevices is the list of block
                                      devices to be used by the container.
                                    items:
                                      description: volumeDevice describes a mapping
                                        of a raw block device within a container.
                                      properties:
                                        devicePath:
                                          description: devicePath is the path inside
                                            of the container that the device will
                                            be mapped to.
                                          type: string
                                        name:
                                          description: name must match the name of
                                            a persistentVolumeClaim in the pod
                                          type: string
                                      required:
                                      - devicePath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - devicePath
                                    x-kubernetes-list-type: map
                                  volumeMounts:
                                    description: |-
                                      Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers.
                                      Cannot be updated.
                                    items:
                                      description: VolumeMount describes a mounting
                                        of a Volume within a container.
                                      properties:
                                        mountPath:
                                          description: |-
                                            Path within the container at which the volume should be mounted.  Must
                                            not contain ':'.
                                          type: string
                                        mountPropagation:
                                          description: |-
                                            mountPropagation determines how mounts are propagated from the host
                                            to container and the other way around.
                                            When not set, MountPropagationNone is used.
                                            This field is beta in 1.10.
                                            When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified
                                            (which defaults to None).
                                          type: string
                                        name:
                                          description: This must match the Name of
                                            a Volume.
                                          type: string
                                        readOnly:
                                          description: |-
                                            Mounted read-only if true, read-write otherwise (false or unspecified).
                                            Defaults to false.
                                          type: boolean
                                        recursiveReadOnly:
                                          description: |-
                                            RecursiveReadOnly specifies whether read-only mounts should be handled
                                            recursively.

                                            If ReadOnly is false, this field has no meaning and must be unspecified.

                                            If ReadOnly is true, and this field is set to Disabled, the mount is not made
                                            recursively read-only.  If this field is set to IfPossible, the mount is made
                                            recursively read-only, if it is supported by the container runtime.  If this
                                            field is set to Enabled, the mount is made recursively read-only if it is
                                            supported by the container runtime, otherwise the pod will not be started and
                                            an error will be generated to indicate the reason.

                                            If this field is set to IfPossible or Enabled, MountPropagation must be set to
                                            None (or be unspecified, which defaults to None).

                                            If this field is not specified, it is treated as an equivalent of Disabled.
                                          type: string
                                        subPath:
                                          description: |-
                                            Path within the volume from which the container's volume should be mounted.
                                            Defaults to "" (volume's root).
                                          type: string
                                        subPathExpr:
                                          description: |-
                                            Expanded path within the volume from which the container's volume should be mounted.
                                            Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment.
                                            Defaults to "" (volume's root).
                                            SubPathExpr and SubPath are mutually exclusive.
                                          type: string
                                      required:
                                      - mountPath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - mountPath
                                    x-kubernetes-list-type: map
                                  workingDir:
                                    description: |-
                                      Container's working directory.
                                      If not specified, the container runtime's default will be used, which
                                      might be configured in the container image.
                                      Cannot be updated.
                                    type: string
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            hostAliases:
                              description: |-
                                HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts
                                file if specified.
                              items:
                                description: |-
                                  HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the
                                  pod's hosts file.
                                properties:
                                  hostnames:
                                    description: Hostnames for the above IP address.
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  ip:
                                    description: IP address of the host file entry.
                                    type: string
                                required:
                                - ip
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - ip
                              x-kubernetes-list-type: map
                            hostIPC:
                              description: |-
                                Use the host's ipc namespace.
                                Optional: Default to false.
                              type: boolean
                            hostNetwork:
                              description: |-
                                Host networking requested for this pod. Use the host's network namespace.
                                When using HostNetwork you should specify ports so the scheduler is aware.
                                When `hostNetwork` is true, specified `hostPort` fields in port definitions must match `containerPort`,
                                and unspecified `hostPort` fields in port definitions are defaulted to match `containerPort`.
                                Default to false.
                              type: boolean
                            hostPID:
                              description: |-
                                Use the host's pid namespace.
                                Optional: Default to false.
                              type: boolean
                            hostUsers:
                              description: |-
                                Use the host's user namespace.
                                Optional: Default to true.
                                If set to true or not present, the pod will be run in the host user namespace, useful
                                for when the pod needs a feature only available to the host user namespace, such as
                                loading a kernel module with CAP_SYS_MODULE.
                                When set to false, a new userns is created for the pod. Setting false is useful for
                                mitigating container breakout vulnerabilities even allowing users to run their
                                containers as root without actually having root privileges on the host.
                                This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature.
                              type: boolean
                            hostname:
                              description: |-
                                Specifies the hostname of the Pod
                                If not specified, the pod's hostname will be set to a system-defined value.
                              type: string
                            hostnameOverride:
                              description: |-
                                HostnameOverride specifies an explicit override for the pod's hostname as perceived by the pod.
                                This field only specifies the pod's hostname and does not affect its DNS records.
                                When this field is set to a non-empty string:
                                - It takes precedence over the values set in `hostname` and `subdomain`.
                                - The Pod's hostname will be set to this value.
                                - `setHostnameAsFQDN` must be nil or set to false.
                                - `hostNetwork` must be set to false.

                                This field must be a valid DNS subdomain as defined in RFC 1123 and contain at most 64 characters.
                                Requires the HostnameOverride feature gate to be enabled.
                              type: string
                            imagePullSecrets:
                              description: |-
                                ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec.
                                If specified, these secrets will be passed to individual puller implementations for them to use.
                                More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod
                              items:
                                description: |-
                                  LocalObjectReference contains enough information to let you locate the
                                  referenced object inside the same namespace.
                                properties:
                                  name:
                                    default: ""
                                    description: |-
                                      Name of the referent.
                                      This field is effectively required, but due to backwards compatibility is
                                      allowed to be empty. Instances of this type with an empty value here are
                                      almost certainly wrong.
                                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                    type: string
                                type: object
                                x-kubernetes-map-type: atomic
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            initContainers:
                              description: |-
                                List of initialization containers belonging to the pod.
                                Init containers are executed in order prior to containers being started. If any
                                init container fails, the pod is considered to have failed and is handled according
                                to its restartPolicy. The name for an init container or normal container must be
                                unique among all containers.
                                Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes.
                                The resourceRequirements of an init container are taken into account during scheduling
                                by finding the highest request/limit for each resource type, and then using the max of
                                that value or the sum of the normal containers. Limits are applied to init containers
                                in a similar fashion.
                                Init containers cannot currently be added or removed.
                                Cannot be updated.
                                More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
                              items:
                                description: A single application container that you
                                  want to run within a pod.
                                properties:
                                  args:
                                    description: |-
                                      Arguments to the entrypoint.
                                      The container image's CMD is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  command:
                                    description: |-
                                      Entrypoint array. Not executed within a shell.
                                      The container image's ENTRYPOINT is used if this is not provided.
                                      Variable references $(VAR_NAME) are expanded using the container's environment. If a variable
                                      cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced
                                      to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will
                                      produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless
                                      of whether the variable exists or not. Cannot be updated.
                                      More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  env:
                                    description: |-
                                      List of environment variables to set in the container.
                                      Cannot be updated.
                                    items:
                                      description: EnvVar represents an environment
                                        variable present in a Container.
                                      properties:
                                        name:
                                          description: |-
                                            Name of the environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        value:
                                          description: |-
                                            Variable references $(VAR_NAME) are expanded
                                            using the previously defined environment variables in the container and
                                            any service environment variables. If a variable cannot be resolved,
                                            the reference in the input string will be unchanged. Double $$ are reduced
                                            to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
                                            "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
                                            Escaped references will never be expanded, regardless of whether the variable
                                            exists or not.
                                            Defaults to "".
                                          type: string
                                        valueFrom:
                                          description: Source for the environment
                                            variable's value. Cannot be used if value
                                            is not empty.
                                          properties:
                                            configMapKeyRef:
                                              description: Selects a key of a ConfigMap.
                                              properties:
                                                key:
                                                  description: The key to select.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    ConfigMap or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fieldRef:
                                              description: |-
                                                Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
                                                spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
                                              properties:
                                                apiVersion:
                                                  description: Version of the schema
                                                    the FieldPath is written in terms
                                                    of, defaults to "v1".
                                                  type: string
                                                fieldPath:
                                                  description: Path of the field to
                                                    select in the specified API version.
                                                  type: string
                                              required:
                                              - fieldPath
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            fileKeyRef:
                                              description: |-
                                                FileKeyRef selects a key of the env file.
                                                Requires the EnvFiles feature gate to be enabled.
                                              properties:
                                                key:
                                                  description: |-
                                                    The key within the env file. An invalid key will prevent the pod from starting.
                                                    The keys defined within a source may consist of any printable ASCII characters except '='.
                                                    During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
                                                  type: string
                                                optional:
                                                  default: false
                                                  description: |-
                                                    Specify whether the file or its key must be defined. If the file or key
                                                    does not exist, then the env var is not published.
                                                    If optional is set to true and the specified key does not exist,
                                                    the environment variable will not be set in the Pod's containers.

                                                    If optional is set to false and the specified key does not exist,
                                                    an error will be returned during Pod creation.
                                                  type: boolean
                                                path:
                                                  description: |-
                                                    The path within the volume from which to select the file.
                                                    Must be relative and may not contain the '..' path or start with '..'.
                                                  type: string
                                                volumeName:
                                                  description: The name of the volume
                                                    mount containing the env file.
                                                  type: string
                                              required:
                                              - key
                                              - path
                                              - volumeName
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            resourceFieldRef:
                                              description: |-
                                                Selects a resource of the container: only resources limits and requests
                                                (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
                                              properties:
                                                containerName:
                                                  description: 'Container name: required
                                                    for volumes, optional for env
                                                    vars'
                                                  type: string
                                                divisor:
                                                  anyOf:
                                                  - type: integer
                                                  - type: string
                                                  description: Specifies the output
                                                    format of the exposed resources,
                                                    defaults to "1"
                                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                  x-kubernetes-int-or-string: true
                                                resource:
                                                  description: 'Required: resource
                                                    to select'
                                                  type: string
                                              required:
                                              - resource
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            secretKeyRef:
                                              description: Selects a key of a secret
                                                in the pod's namespace
                                              properties:
                                                key:
                                                  description: The key of the secret
                                                    to select from.  Must be a valid
                                                    secret key.
                                                  type: string
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: Specify whether the
                                                    Secret or its key must be defined
                                                  type: boolean
                                              required:
                                              - key
                                              type: object
                                              x-kubernetes-map-type: atomic
                                          type: object
                                      required:
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - name
                                    x-kubernetes-list-type: map
                                  envFrom:
                                    description: |-
                                      List of sources to populate environment variables in the container.
                                      The keys defined within a source may consist of any printable ASCII characters except '='.
                                      When a key exists in multiple
                                      sources, the value associated with the last source will take precedence.
                                      Values defined by an Env with a duplicate key will take precedence.
                                      Cannot be updated.
                                    items:
                                      description: EnvFromSource represents the source
                                        of a set of ConfigMaps or Secrets
                                      properties:
                                        configMapRef:
                                          description: The ConfigMap to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the ConfigMap
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                        prefix:
                                          description: |-
                                            Optional text to prepend to the name of each environment variable.
                                            May consist of any printable ASCII characters except '='.
                                          type: string
                                        secretRef:
                                          description: The Secret to select from
                                          properties:
                                            name:
                                              default: ""
                                              description: |-
                                                Name of the referent.
                                                This field is effectively required, but due to backwards compatibility is
                                                allowed to be empty. Instances of this type with an empty value here are
                                                almost certainly wrong.
                                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                              type: string
                                            optional:
                                              description: Specify whether the Secret
                                                must be defined
                                              type: boolean
                                          type: object
                                          x-kubernetes-map-type: atomic
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  image:
                                    description: |-
                                      Container image name.
                                      More info: https://kubernetes.io/docs/concepts/containers/images
                                      This field is optional to allow higher level config management to default or override
                                      container images in workload controllers like Deployments and StatefulSets.
                                    type: string
                                  imagePullPolicy:
                                    description: |-
                                      Image pull policy.
                                      One of Always, Never, IfNotPresent.
                                      Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
                                    type: string
                                  lifecycle:
                                    description: |-
                                      Actions that the management system should take in response to container lifecycle events.
                                      Cannot be updated.
                                    properties:
                                      postStart:
                                        description: |-
                                          PostStart is called immediately after a container is created. If the handler fails,
                                          the container is terminated and restarted according to its restart policy.
                                          Other management of the container blocks until the hook completes.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      preStop:
                                        description: |-
                                          PreStop is called immediately before a container is terminated due to an
                                          API request or management event such as liveness/startup probe failure,
                                          preemption, resource contention, etc. The handler is not called if the
                                          container crashes or exits. The Pod's termination grace period countdown begins before the
                                          PreStop hook is executed. Regardless of the outcome of the handler, the
                                          container will eventually terminate within the Pod's termination grace
                                          period (unless delayed by finalizers). Other management of the container blocks until the hook completes
                                          or until the termination grace period is reached.
                                          More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
                                        properties:
                                          exec:
                                            description: Exec specifies a command
                                              to execute in the container.
                                            properties:
                                              command:
                                                description: |-
                                                  Command is the command line to execute inside the container, the working directory for the
                                                  command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                                  not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                                  a shell, you need to explicitly call out to that shell.
                                                  Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                            type: object
                                          httpGet:
                                            description: HTTPGet specifies an HTTP
                                              GET request to perform.
                                            properties:
                                              host:
                                                description: |-
                                                  Host name to connect to, defaults to the pod IP. You probably want to set
                                                  "Host" in httpHeaders instead.
                                                type: string
                                              httpHeaders:
                                                description: Custom headers to set
                                                  in the request. HTTP allows repeated
                                                  headers.
                                                items:
                                                  description: HTTPHeader describes
                                                    a custom header to be used in
                                                    HTTP probes
                                                  properties:
                                                    name:
                                                      description: |-
                                                        The header field name.
                                                        This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                      type: string
                                                    value:
                                                      description: The header field
                                                        value
                                                      type: string
                                                  required:
                                                  - name
                                                  - value
                                                  type: object
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              path:
                                                description: Path to access on the
                                                  HTTP server.
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Name or number of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                              scheme:
                                                description: |-
                                                  Scheme to use for connecting to the host.
                                                  Defaults to HTTP.
                                                type: string
                                            required:
                                            - port
                                            type: object
                                          sleep:
                                            description: Sleep represents a duration
                                              that the container should sleep.
                                            properties:
                                              seconds:
                                                description: Seconds is the number
                                                  of seconds to sleep.
                                                format: int64
                                                type: integer
                                            required:
                                            - seconds
                                            type: object
                                          tcpSocket:
                                            description: |-
                                              Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
                                              for backward compatibility. There is no validation of this field and
                                              lifecycle hooks will fail at runtime when it is specified.
                                            properties:
                                              host:
                                                description: 'Optional: Host name
                                                  to connect to, defaults to the pod
                                                  IP.'
                                                type: string
                                              port:
                                                anyOf:
                                                - type: integer
                                                - type: string
                                                description: |-
                                                  Number or name of the port to access on the container.
                                                  Number must be in the range 1 to 65535.
                                                  Name must be an IANA_SVC_NAME.
                                                x-kubernetes-int-or-string: true
                                            required:
                                            - port
                                            type: object
                                        type: object
                                      stopSignal:
                                        description: |-
                                          StopSignal defines which signal will be sent to a container when it is being stopped.
                                          If not specified, the default is defined by the container runtime in use.
                                          StopSignal can only be set for Pods with a non-empty .spec.os.name
                                        type: string
                                    type: object
                                  livenessProbe:
                                    description: |-
                                      Periodic probe of container liveness.
                                      Container will be restarted if the probe fails.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  name:
                                    description: |-
                                      Name of the container specified as a DNS_LABEL.
                                      Each container in a pod must have a unique name (DNS_LABEL).
                                      Cannot be updated.
                                    type: string
                                  ports:
                                    description: |-
                                      List of ports to expose from the container. Not specifying a port here
                                      DOES NOT prevent that port from being exposed. Any port which is
                                      listening on the default "0.0.0.0" address inside a container will be
                                      accessible from the network.
                                      Modifying this array with strategic merge patch may corrupt the data.
                                      For more information See https://github.com/kubernetes/kubernetes/issues/108255.
                                      Cannot be updated.
                                    items:
                                      description: ContainerPort represents a network
                                        port in a single container.
                                      properties:
                                        containerPort:
                                          description: |-
                                            Number of port to expose on the pod's IP address.
                                            This must be a valid port number, 0 < x < 65536.
                                          format: int32
                                          type: integer
                                        hostIP:
                                          description: What host IP to bind the external
                                            port to.
                                          type: string
                                        hostPort:
                                          description: |-
                                            Number of port to expose on the host.
                                            If specified, this must be a valid port number, 0 < x < 65536.
                                            If HostNetwork is specified, this must match ContainerPort.
                                            Most containers do not need this.
                                          format: int32
                                          type: integer
                                        name:
                                          description: |-
                                            If specified, this must be an IANA_SVC_NAME and unique within the pod. Each
                                            named port in a pod must have a unique name. Name for the port that can be
                                            referred to by services.
                                          type: string
                                        protocol:
                                          default: TCP
                                          description: |-
                                            Protocol for port. Must be UDP, TCP, or SCTP.
                                            Defaults to "TCP".
                                          type: string
                                      required:
                                      - containerPort
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - containerPort
                                    - protocol
                                    x-kubernetes-list-type: map
                                  readinessProbe:
                                    description: |-
                                      Periodic probe of container service readiness.
                                      Container will be removed from service endpoints if the probe fails.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  resizePolicy:
                                    description: |-
                                      Resources resize policy for the container.
                                      This field cannot be set on ephemeral containers.
                                    items:
                                      description: ContainerResizePolicy represents
                                        resource resize policy for the container.
                                      properties:
                                        resourceName:
                                          description: |-
                                            Name of the resource to which this resource resize policy applies.
                                            Supported values: cpu, memory.
                                          type: string
                                        restartPolicy:
                                          description: |-
                                            Restart policy to apply when specified resource is resized.
                                            If not specified, it defaults to NotRequired.
                                          type: string
                                      required:
                                      - resourceName
                                      - restartPolicy
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  resources:
                                    description: |-
                                      Compute Resources required by this container.
                                      Cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                    properties:
                                      claims:
                                        description: |-
                                          Claims lists the names of resources, defined in spec.resourceClaims,
                                          that are used by this container.

                                          This field depends on the
                                          DynamicResourceAllocation feature gate.

                                          This field is immutable. It can only be set for containers.
                                        items:
                                          description: ResourceClaim references one
                                            entry in PodSpec.ResourceClaims.
                                          properties:
                                            name:
                                              description: |-
                                                Name must match the name of one entry in pod.spec.resourceClaims of
                                                the Pod where this field is used. It makes that resource available
                                                inside a container.
                                              type: string
                                            request:
                                              description: |-
                                                Request is the name chosen for a request in the referenced claim.
                                                If empty, everything from the claim is made available, otherwise
                                                only the result of this request.
                                              type: string
                                          required:
                                          - name
                                          type: object
                                        type: array
                                        x-kubernetes-list-map-keys:
                                        - name
                                        x-kubernetes-list-type: map
                                      limits:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Limits describes the maximum amount of compute resources allowed.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                      requests:
                                        additionalProperties:
                                          anyOf:
                                          - type: integer
                                          - type: string
                                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                          x-kubernetes-int-or-string: true
                                        description: |-
                                          Requests describes the minimum amount of compute resources required.
                                          If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
                                          otherwise to an implementation-defined value. Requests cannot exceed Limits.
                                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                        type: object
                                    type: object
                                  restartPolicy:
                                    description: |-
                                      RestartPolicy defines the restart behavior of individual containers in a pod.
                                      This overrides the pod-level restart policy. When this field is not specified,
                                      the restart behavior is defined by the Pod's restart policy and the container type.
                                      Additionally, setting the RestartPolicy as "Always" for the init container will
                                      have the following effect:
                                      this init container will be continually restarted on
                                      exit until all regular containers have terminated. Once all regular
                                      containers have completed, all init containers with restartPolicy "Always"
                                      will be shut down. This lifecycle differs from normal init containers and
                                      is often referred to as a "sidecar" container. Although this init
                                      container still starts in the init container sequence, it does not wait
                                      for the container to complete before proceeding to the next init
                                      container. Instead, the next init container starts immediately after this
                                      init container is started, or after any startupProbe has successfully
                                      completed.
                                    type: string
                                  restartPolicyRules:
                                    description: |-
                                      Represents a list of rules to be checked to determine if the
                                      container should be restarted on exit. The rules are evaluated in
                                      order. Once a rule matches a container exit condition, the remaining
                                      rules are ignored. If no rule matches the container exit condition,
                                      the Container-level restart policy determines the whether the container
                                      is restarted or not. Constraints on the rules:
                                      - At most 20 rules are allowed.
                                      - Rules can have the same action.
                                      - Identical rules are not forbidden in validations.
                                      When rules are specified, container MUST set RestartPolicy explicitly
                                      even it if matches the Pod's RestartPolicy.
                                    items:
                                      description: ContainerRestartRule describes
                                        how a container exit is handled.
                                      properties:
                                        action:
                                          description: |-
                                            Specifies the action taken on a container exit if the requirements
                                            are satisfied. The only possible value is "Restart" to restart the
                                            container.
                                          type: string
                                        exitCodes:
                                          description: Represents the exit codes to
                                            check on container exits.
                                          properties:
                                            operator:
                                              description: |-
                                                Represents the relationship between the container exit code(s) and the
                                                specified values. Possible values are:
                                                - In: the requirement is satisfied if the container exit code is in the
                                                  set of specified values.
                                                - NotIn: the requirement is satisfied if the container exit code is
                                                  not in the set of specified values.
                                              type: string
                                            values:
                                              description: |-
                                                Specifies the set of values to check for container exit codes.
                                                At most 255 elements are allowed.
                                              items:
                                                format: int32
                                                type: integer
                                              type: array
                                              x-kubernetes-list-type: set
                                          required:
                                          - operator
                                          type: object
                                      required:
                                      - action
                                      type: object
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  securityContext:
                                    description: |-
                                      SecurityContext defines the security options the container should be run with.
                                      If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
                                      More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
                                    properties:
                                      allowPrivilegeEscalation:
                                        description: |-
                                          AllowPrivilegeEscalation controls whether a process can gain more
                                          privileges than its parent process. This bool directly controls if
                                          the no_new_privs flag will be set on the container process.
                                          AllowPrivilegeEscalation is true always when the container is:
                                          1) run as Privileged
                                          2) has CAP_SYS_ADMIN
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      appArmorProfile:
                                        description: |-
                                          appArmorProfile is the AppArmor options to use by this container. If set, this profile
                                          overrides the pod's appArmorProfile.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile loaded on the node that should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must match the loaded name of the profile.
                                              Must be set if and only if type is "Localhost".
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of AppArmor profile will be applied.
                                              Valid options are:
                                                Localhost - a profile pre-loaded on the node.
                                                RuntimeDefault - the container runtime's default profile.
                                                Unconfined - no AppArmor enforcement.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      capabilities:
                                        description: |-
                                          The capabilities to add/drop when running containers.
                                          Defaults to the default set of capabilities granted by the container runtime.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          add:
                                            description: Added capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          drop:
                                            description: Removed capabilities
                                            items:
                                              description: Capability represent POSIX
                                                capabilities type
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      privileged:
                                        description: |-
                                          Run container in privileged mode.
                                          Processes in privileged containers are essentially equivalent to root on the host.
                                          Defaults to false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      procMount:
                                        description: |-
                                          procMount denotes the type of proc mount to use for the containers.
                                          The default value is Default which uses the container runtime defaults for
                                          readonly paths and masked paths.
                                          This requires the ProcMountType feature flag to be enabled.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: string
                                      readOnlyRootFilesystem:
                                        description: |-
                                          Whether this container has a read-only root filesystem.
                                          Default is false.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        type: boolean
                                      runAsGroup:
                                        description: |-
                                          The GID to run the entrypoint of the container process.
                                          Uses runtime default if unset.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      runAsNonRoot:
                                        description: |-
                                          Indicates that the container must run as a non-root user.
                                          If true, the Kubelet will validate the image at runtime to ensure that it
                                          does not run as UID 0 (root) and fail to start the container if it does.
                                          If unset or false, no such validation will be performed.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                        type: boolean
                                      runAsUser:
                                        description: |-
                                          The UID to run the entrypoint of the container process.
                                          Defaults to user specified in image metadata if unspecified.
                                          May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        format: int64
                                        type: integer
                                      seLinuxOptions:
                                        description: |-
                                          The SELinux context to be applied to the container.
                                          If unspecified, the container runtime will allocate a random SELinux context for each
                                          container.  May also be set in PodSecurityContext.  If set in both SecurityContext and
                                          PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          level:
                                            description: Level is SELinux level label
                                              that applies to the container.
                                            type: string
                                          role:
                                            description: Role is a SELinux role label
                                              that applies to the container.
                                            type: string
                                          type:
                                            description: Type is a SELinux type label
                                              that applies to the container.
                                            type: string
                                          user:
                                            description: User is a SELinux user label
                                              that applies to the container.
                                            type: string
                                        type: object
                                      seccompProfile:
                                        description: |-
                                          The seccomp options to use by this container. If seccomp options are
                                          provided at both the pod & container level, the container options
                                          override the pod options.
                                          Note that this field cannot be set when spec.os.name is windows.
                                        properties:
                                          localhostProfile:
                                            description: |-
                                              localhostProfile indicates a profile defined in a file on the node should be used.
                                              The profile must be preconfigured on the node to work.
                                              Must be a descending path, relative to the kubelet's configured seccomp profile location.
                                              Must be set if type is "Localhost". Must NOT be set for any other type.
                                            type: string
                                          type:
                                            description: |-
                                              type indicates which kind of seccomp profile will be applied.
                                              Valid options are:

                                              Localhost - a profile defined in a file on the node should be used.
                                              RuntimeDefault - the container runtime default profile should be used.
                                              Unconfined - no profile should be applied.
                                            type: string
                                        required:
                                        - type
                                        type: object
                                      windowsOptions:
                                        description: |-
                                          The Windows specific settings applied to all containers.
                                          If unspecified, the options from the PodSecurityContext will be used.
                                          If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.
                                          Note that this field cannot be set when spec.os.name is linux.
                                        properties:
                                          gmsaCredentialSpec:
                                            description: |-
                                              GMSACredentialSpec is where the GMSA admission webhook
                                              (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the
                                              GMSA credential spec named by the GMSACredentialSpecName field.
                                            type: string
                                          gmsaCredentialSpecName:
                                            description: GMSACredentialSpecName is
                                              the name of the GMSA credential spec
                                              to use.
                                            type: string
                                          hostProcess:
                                            description: |-
                                              HostProcess determines if a container should be run as a 'Host Process' container.
                                              All of a Pod's containers must have the same effective HostProcess value
                                              (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers).
                                              In addition, if HostProcess is true then HostNetwork must also be set to true.
                                            type: boolean
                                          runAsUserName:
                                            description: |-
                                              The UserName in Windows to run the entrypoint of the container process.
                                              Defaults to the user specified in image metadata if unspecified.
                                              May also be set in PodSecurityContext. If set in both SecurityContext and
                                              PodSecurityContext, the value specified in SecurityContext takes precedence.
                                            type: string
                                        type: object
                                    type: object
                                  startupProbe:
                                    description: |-
                                      StartupProbe indicates that the Pod has successfully initialized.
                                      If specified, no other probes are executed until this completes successfully.
                                      If this probe fails, the Pod will be restarted, just as if the livenessProbe failed.
                                      This can be used to provide different probe parameters at the beginning of a Pod's lifecycle,
                                      when it might take a long time to load data or warm a cache, than during steady-state operation.
                                      This cannot be updated.
                                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                    properties:
                                      exec:
                                        description: Exec specifies a command to execute
                                          in the container.
                                        properties:
                                          command:
                                            description: |-
                                              Command is the command line to execute inside the container, the working directory for the
                                              command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
                                              not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
                                              a shell, you need to explicitly call out to that shell.
                                              Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
                                            items:
                                              type: string
                                            type: array
                                            x-kubernetes-list-type: atomic
                                        type: object
                                      failureThreshold:
                                        description: |-
                                          Minimum consecutive failures for the probe to be considered failed after having succeeded.
                                          Defaults to 3. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      grpc:
                                        description: GRPC specifies a GRPC HealthCheckRequest.
                                        properties:
                                          port:
                                            description: Port number of the gRPC service.
                                              Number must be in the range 1 to 65535.
                                            format: int32
                                            type: integer
                                          service:
                                            default: ""
                                            description: |-
                                              Service is the name of the service to place in the gRPC HealthCheckRequest
                                              (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).

                                              If this is not specified, the default behavior is defined by gRPC.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      httpGet:
                                        description: HTTPGet specifies an HTTP GET
                                          request to perform.
                                        properties:
                                          host:
                                            description: |-
                                              Host name to connect to, defaults to the pod IP. You probably want to set
                                              "Host" in httpHeaders instead.
                                            type: string
                                          httpHeaders:
                                            description: Custom headers to set in
                                              the request. HTTP allows repeated headers.
                                            items:
                                              description: HTTPHeader describes a
                                                custom header to be used in HTTP probes
                                              properties:
                                                name:
                                                  description: |-
                                                    The header field name.
                                                    This will be canonicalized upon output, so case-variant names will be understood as the same header.
                                                  type: string
                                                value:
                                                  description: The header field value
                                                  type: string
                                              required:
                                              - name
                                              - value
                                              type: object
                                            type: array
                                            x-kubernetes-list-type: atomic
                                          path:
                                            description: Path to access on the HTTP
                                              server.
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Name or number of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                          scheme:
                                            description: |-
                                              Scheme to use for connecting to the host.
                                              Defaults to HTTP.
                                            type: string
                                        required:
                                        - port
                                        type: object
                                      initialDelaySeconds:
                                        description: |-
                                          Number of seconds after the container has started before liveness probes are initiated.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                      periodSeconds:
                                        description: |-
                                          How often (in seconds) to perform the probe.
                                          Default to 10 seconds. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      successThreshold:
                                        description: |-
                                          Minimum consecutive successes for the probe to be considered successful after having failed.
                                          Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
                                        format: int32
                                        type: integer
                                      tcpSocket:
                                        description: TCPSocket specifies a connection
                                          to a TCP port.
                                        properties:
                                          host:
                                            description: 'Optional: Host name to connect
                                              to, defaults to the pod IP.'
                                            type: string
                                          port:
                                            anyOf:
                                            - type: integer
                                            - type: string
                                            description: |-
                                              Number or name of the port to access on the container.
                                              Number must be in the range 1 to 65535.
                                              Name must be an IANA_SVC_NAME.
                                            x-kubernetes-int-or-string: true
                                        required:
                                        - port
                                        type: object
                                      terminationGracePeriodSeconds:
                                        description: |-
                                          Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
                                          The grace period is the duration in seconds after the processes running in the pod are sent
                                          a termination signal and the time when the processes are forcibly halted with a kill signal.
                                          Set this value longer than the expected cleanup time for your process.
                                          If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
                                          value overrides the value provided by the pod spec.
                                          Value must be non-negative integer. The value zero indicates stop immediately via
                                          the kill signal (no opportunity to shut down).
                                          This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
                                          Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
                                        format: int64
                                        type: integer
                                      timeoutSeconds:
                                        description: |-
                                          Number of seconds after which the probe times out.
                                          Defaults to 1 second. Minimum value is 1.
                                          More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
                                        format: int32
                                        type: integer
                                    type: object
                                  stdin:
                                    description: |-
                                      Whether this container should allocate a buffer for stdin in the container runtime. If this
                                      is not set, reads from stdin in the container will always result in EOF.
                                      Default is false.
                                    type: boolean
                                  stdinOnce:
                                    description: |-
                                      Whether the container runtime should close the stdin channel after it has been opened by
                                      a single attach. When stdin is true the stdin stream will remain open across multiple attach
                                      sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the
                                      first client attaches to stdin, and then remains open and accepts data until the client disconnects,
                                      at which time stdin is closed and remains closed until the container is restarted. If this
                                      flag is false, a container processes that reads from stdin will never receive an EOF.
                                      Default is false
                                    type: boolean
                                  terminationMessagePath:
                                    description: |-
                                      Optional: Path at which the file to which the container's termination message
                                      will be written is mounted into the container's filesystem.
                                      Message written is intended to be brief final status, such as an assertion failure message.
                                      Will be truncated by the node if greater than 4096 bytes. The total message length across
                                      all containers will be limited to 12kb.
                                      Defaults to /dev/termination-log.
                                      Cannot be updated.
                                    type: string
                                  terminationMessagePolicy:
                                    description: |-
                                      Indicate how the termination message should be populated. File will use the contents of
                                      terminationMessagePath to populate the container status message on both success and failure.
                                      FallbackToLogsOnError will use the last chunk of container log output if the termination
                                      message file is empty and the container exited with an error.
                                      The log output is limited to 2048 bytes or 80 lines, whichever is smaller.
                                      Defaults to File.
                                      Cannot be updated.
                                    type: string
                                  tty:
                                    description: |-
                                      Whether this container should allocate a TTY for itself, also requires 'stdin' to be true.
                                      Default is false.
                                    type: boolean
                                  volumeDevices:
                                    description: volumeDevices is the list of block
                                      devices to be used by the container.
                                    items:
                                      description: volumeDevice describes a mapping
                                        of a raw block device within a container.
                                      properties:
                                        devicePath:
                                          description: devicePath is the path inside
                                            of the container that the device will
                                            be mapped to.
                                          type: string
                                        name:
                                          description: name must match the name of
                                            a persistentVolumeClaim in the pod
                                          type: string
                                      required:
                                      - devicePath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - devicePath
                                    x-kubernetes-list-type: map
                                  volumeMounts:
                                    description: |-
                                      Pod volumes to mount into the container's filesystem.
                                      Cannot be updated.
                                    items:
                                      description: VolumeMount describes a mounting
                                        of a Volume within a container.
                                      properties:
                                        mountPath:
                                          description: |-
                                            Path within the container at which the volume should be mounted.  Must
                                            not contain ':'.
                                          type: string
                                        mountPropagation:
                                          description: |-
                                            mountPropagation determines how mounts are propagated from the host
                                            to container and the other way around.
                                            When not set, MountPropagationNone is used.
                                            This field is beta in 1.10.
                                            When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified
                                            (which defaults to None).
                                          type: string
                                        name:
                                          description: This must match the Name of
                                            a Volume.
                                          type: string
                                        readOnly:
                                          description: |-
                                            Mounted read-only if true, read-write otherwise (false or unspecified).
                                            Defaults to false.
                                          type: boolean
                                        recursiveReadOnly:
                                          description: |-
                                            RecursiveReadOnly specifies whether read-only mounts should be handled
                                            recursively.

                                            If ReadOnly is false, this field has no meaning and must be unspecified.

                                            If ReadOnly is true, and this field is set to Disabled, the mount is not made
                                            recursively read-only.  If this field is set to IfPossible, the mount is made
                                            recursively read-only, if it is supported by the container runtime.  If this
                                            field is set to Enabled, the mount is made recursively read-only if it is
                                            supported by the container runtime, otherwise the pod will not be started and
                                            an error will be generated to indicate the reason.

                                            If this field is set to IfPossible or Enabled, MountPropagation must be set to
                                            None (or be unspecified, which defaults to None).

                                            If this field is not specified, it is treated as an equivalent of Disabled.
                                          type: string
                                        subPath:
                                          description: |-
                                            Path within the volume from which the container's volume should be mounted.
                                            Defaults to "" (volume's root).
                                          type: string
                                        subPathExpr:
                                          description: |-
                                            Expanded path within the volume from which the container's volume should be mounted.
                                            Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment.
                                            Defaults to "" (volume's root).
                                            SubPathExpr and SubPath are mutually exclusive.
                                          type: string
                                      required:
                                      - mountPath
                                      - name
                                      type: object
                                    type: array
                                    x-kubernetes-list-map-keys:
                                    - mountPath
                                    x-kubernetes-list-type: map
                                  workingDir:
                                    description: |-
                                      Container's working directory.
                                      If not specified, the container runtime's default will be used, which
                                      might be configured in the container image.
                                      Cannot be updated.
                                    type: string
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            nodeName:
                              description: |-
                                NodeName indicates in which node this pod is scheduled.
                                If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName.
                                Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod.
                                This field should not be used to express a desire for the pod to be scheduled on a specific node.
                                https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodename
                              type: string
                            nodeSelector:
                              additionalProperties:
                                type: string
                              description: |-
                                NodeSelector is a selector which must be true for the pod to fit on a node.
                                Selector which must match a node's labels for the pod to be scheduled on that node.
                                More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
                              type: object
                              x-kubernetes-map-type: atomic
                            os:
                              description: |-
                                Specifies the OS of the containers in the pod.
                                Some pod and container fields are restricted if this is set.

                                If the OS field is set to linux, the following fields must be unset:
                                -securityContext.windowsOptions

                                If the OS field is set to windows, following fields must be unset:
                                - spec.hostPID
                                - spec.hostIPC
                                - spec.hostUsers
                                - spec.resources
                                - spec.securityContext.appArmorProfile
                                - spec.securityContext.seLinuxOptions
                                - spec.securityContext.seccompProfile
                                - spec.securityContext.fsGroup
                                - spec.securityContext.fsGroupChangePolicy
                                - spec.securityContext.sysctls
                                - spec.shareProcessNamespace
                                - spec.securityContext.runAsUser
                                - spec.securityContext.runAsGroup
                                - spec.securityContext.supplementalGroups
                                - spec.securityContext.supplementalGroupsPolicy
                                - spec.containers[*].securityContext.appArmorProfile
                                - spec.containers[*].securityContext.seLinuxOptions
                                - spec.containers[*].securityContext.seccompProfile
                                - spec.containers[*].securityContext.capabilities
                                - spec.containers[*].securityContext.readOnlyRootFilesystem
                                - spec.containers[*].securityContext.privileged
                                - spec.containers[*].securityContext.allowPrivilegeEscalation
                                - spec.containers[*].securityContext.procMount
                                - spec.containers[*].securityContext.runAsUser
                                - spec.containers[*].securityContext.runAsGroup
                              properties:
                                name:
                                  description: |-
                                    Name is the name of the operating system. The currently supported values are linux and windows.
                                    Additional value may be defined in future and can be one of:
                                    https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration
                                    Clients should expect to handle additional values and treat unrecognized values in this field as os: null
                                  type: string
                              required:
                              - name
                              type: object
                            overhead:
                              additionalProperties:
                                anyOf:
                                - type: integer
                                - type: string
                                pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                x-kubernetes-int-or-string: true
                              description: |-
                                Overhead represents the resource overhead associated with running a pod for a given RuntimeClass.
                                This field will be autopopulated at admission time by the RuntimeClass admission controller. If
                                the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests.
                                The RuntimeClass admission controller will reject Pod create requests which have the overhead already
                                set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value
                                defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero.
                                More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md
                              type: object
                            preemptionPolicy:
                              description: |-
                                PreemptionPolicy is the Policy for preempting pods with lower priority.
                                One of Never, PreemptLowerPriority.
                                Defaults to PreemptLowerPriority if unset.
                              type: string
                            priority:
                              description: |-
                                The priority value. Various system components use this field to find the
                                priority of the pod. When Priority Admission Controller is enabled, it
                                prevents users from setting this field. The admission controller populates
                                this field from PriorityClassName.
                                The higher the value, the higher the priority.
                              format: int32
                              type: integer
                            priorityClassName:
                              description: |-
                                If specified, indicates the pod's priority. "system-node-critical" and
                                "system-cluster-critical" are two special keywords which indicate the
                                highest priorities with the former being the highest priority. Any other
                                name must be defined by creating a PriorityClass object with that name.
                                If not specified, the pod priority will be default or zero if there is no
                                default.
                              type: string
                            readinessGates:
                              description: |-
                                If specified, all readiness gates will be evaluated for pod readiness.
                                A pod is ready when all its containers are ready AND
                                all conditions specified in the readiness gates have status equal to "True"
                                More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates
                              items:
                                description: PodReadinessGate contains the reference
                                  to a pod condition
                                properties:
                                  conditionType:
                                    description: ConditionType refers to a condition
                                      in the pod's condition list with matching type.
                                    type: string
                                required:
                                - conditionType
                                type: object
                              type: array
                              x-kubernetes-list-type: atomic
                            resourceClaims:
                              description: |-
                                ResourceClaims defines which ResourceClaims must be allocated
                                and reserved before the Pod is allowed to start. The resources
                                will be made available to those containers which consume them
                                by name.

                                This is a stable field but requires that the
                                DynamicResourceAllocation feature gate is enabled.

                                This field is immutable.
                              items:
                                description: |-
                                  PodResourceClaim references exactly one ResourceClaim, either directly
                                  or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim
                                  for the pod.

                                  It adds a name to it that uniquely identifies the ResourceClaim inside the Pod.
                                  Containers that need access to the ResourceClaim reference it with this name.
                                properties:
                                  name:
                                    description: |-
                                      Name uniquely identifies this resource claim inside the pod.
                                      This must be a DNS_LABEL.
                                    type: string
                                  resourceClaimName:
                                    description: |-
                                      ResourceClaimName is the name of a ResourceClaim object in the same
                                      namespace as this pod.

                                      Exactly one of ResourceClaimName and ResourceClaimTemplateName must
                                      be set.
                                    type: string
                                  resourceClaimTemplateName:
                                    description: |-
                                      ResourceClaimTemplateName is the name of a ResourceClaimTemplate
                                      object in the same namespace as this pod.

                                      The template will be used to create a new ResourceClaim, which will
                                      be bound to this pod. When this pod is deleted, the ResourceClaim
                                      will also be deleted. The pod name and resource name, along with a
                                      generated component, will be used to form a unique name for the
                                      ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses.

                                      This field is immutable and no changes will be made to the
                                      corresponding ResourceClaim by the control plane after creating the
                                      ResourceClaim.

                                      Exactly one of ResourceClaimName and ResourceClaimTemplateName must
                                      be set.
                                    type: string
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            resources:
                              description: |-
                                Resources is the total amount of CPU and Memory resources required by all
                                containers in the pod. It supports specifying Requests and Limits for
                                "cpu", "memory" and "hugepages-" resource names only. ResourceClaims are not supported.

                                This field enables fine-grained control over resource allocation for the
                                entire pod, allowing resource sharing among containers in a pod.

                                This is an alpha field and requires enabling the PodLevelResources feature
                                gate.
                              properties:
                                claims:
                                  description: |-
                                    Claims lists the names of resources, defined in spec.resourceClaims,
                                    that are used by this container.

                                    This field depends on the
                                    DynamicResourceAllocation feature gate.

                                    This field is immutable. It can only be set for containers.
                                  items:
                                    description: ResourceClaim references one entry
                                      in PodSpec.ResourceClaims.
                                    properties:
                                      name:
                                        description: |-
                                          Name must match the name of one entry in pod.spec.resourceClaims of
                                          the Pod where this field is used. It makes that resource available
                                          inside a container.
                                        type: string
                                      request:
                                        description: |-
                                          Request is the name chosen for a request in the referenced claim.
                                          If empty, everything from the claim is made available, otherwise
                                          only the result of this request.
                                        type: string
                                    required:
                                    - name
                                    type: object
                                  type: array
                                  x-kubernetes-list-map-keys:
                                  - name
                                  x-kubernetes-list-type: map
                                limits:
                                  additionalProperties:
                                    anyOf:
                                    - type: integer
                                    - type: string
                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                    x-kubernetes-int-or-string: true
                                  description: |-
                                    Limits describes the maximum amount of compute resources allowed.
                                    More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                  type: object
                                requests:
                                  additionalProperties:
                                    anyOf:
                                    - type: integer
                                    - type: string
                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                    x-kubernetes-int-or-string: true
                                  description: |-
                                    Requests describes the minimum amount of compute resources required.
                                    If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
                                    otherwise to an implementation-defined value. Requests cannot exceed Limits.
                                    More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                  type: object
                              type: object
                            restartPolicy:
                              description: |-
                                Restart policy for all containers within the pod.
                                One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted.
                                Default to Always.
                                More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy
                              type: string
                            runtimeClassName:
                              description: |-
                                RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used
                                to run this pod.  If no RuntimeClass resource matches the named class, the pod will not be run.
                                If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an
                                empty definition that uses the default runtime handler.
                                More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class
                              type: string
                            schedulerName:
                              description: |-
                                If specified, the pod will be dispatched by specified scheduler.
                                If not specified, the pod will be dispatched by default scheduler.
                              type: string
                            schedulingGates:
                              description: |-
                                SchedulingGates is an opaque list of values that if specified will block scheduling the pod.
                                If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the
                                scheduler will not attempt to schedule the pod.

                                SchedulingGates can only be set at pod creation time, and be removed only afterwards.
                              items:
                                description: PodSchedulingGate is associated to a
                                  Pod to guard its scheduling.
                                properties:
                                  name:
                                    description: |-
                                      Name of the scheduling gate.
                                      Each scheduling gate must have a unique name field.
                                    type: string
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            securityContext:
                              description: |-
                                SecurityContext holds pod-level security attributes and common container settings.
                                Optional: Defaults to empty.  See type description for default values of each field.
                              properties:
                                appArmorProfile:
                                  description: |-
                                    appArmorProfile is the AppArmor options to use by the containers in this pod.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  properties:
                                    localhostProfile:
                                      description: |-
                                        localhostProfile indicates a profile loaded on the node that should be used.
                                        The profile must be preconfigured on the node to work.
                                        Must match the loaded name of the profile.
                                        Must be set if and only if type is "Localhost".
                                      type: string
                                    type:
                                      description: |-
                                        type indicates which kind of AppArmor profile will be applied.
                                        Valid options are:
                                          Localhost - a profile pre-loaded on the node.
                                          RuntimeDefault - the container runtime's default profile.
                                          Unconfined - no AppArmor enforcement.
                                      type: string
                                  required:
                                  - type
                                  type: object
                                fsGroup:
                                  description: |-
                                    A special supplemental group that applies to all containers in a pod.
                                    Some volume types allow the Kubelet to change the ownership of that volume
                                    to be owned by the pod:

                                    1. The owning GID will be the FSGroup
                                    2. The setgid bit is set (new files created in the volume will be owned by FSGroup)
                                    3. The permission bits are OR'd with rw-rw----

                                    If unset, the Kubelet will not modify the ownership and permissions of any volume.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  format: int64
                                  type: integer
                                fsGroupChangePolicy:
                                  description: |-
                                    fsGroupChangePolicy defines behavior of changing ownership and permission of the volume
                                    before being exposed inside Pod. This field will only apply to
                                    volume types which support fsGroup based ownership(and permissions).
                                    It will have no effect on ephemeral volume types such as: secret, configmaps
                                    and emptydir.
                                    Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  type: string
                                runAsGroup:
                                  description: |-
                                    The GID to run the entrypoint of the container process.
                                    Uses runtime default if unset.
                                    May also be set in SecurityContext.  If set in both SecurityContext and
                                    PodSecurityContext, the value specified in SecurityContext takes precedence
                                    for that container.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  format: int64
                                  type: integer
                                runAsNonRoot:
                                  description: |-
                                    Indicates that the container must run as a non-root user.
                                    If true, the Kubelet will validate the image at runtime to ensure that it
                                    does not run as UID 0 (root) and fail to start the container if it does.
                                    If unset or false, no such validation will be performed.
                                    May also be set in SecurityContext.  If set in both SecurityContext and
                                    PodSecurityContext, the value specified in SecurityContext takes precedence.
                                  type: boolean
                                runAsUser:
                                  description: |-
                                    The UID to run the entrypoint of the container process.
                                    Defaults to user specified in image metadata if unspecified.
                                    May also be set in SecurityContext.  If set in both SecurityContext and
                                    PodSecurityContext, the value specified in SecurityContext takes precedence
                                    for that container.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  format: int64
                                  type: integer
                                seLinuxChangePolicy:
                                  description: |-
                                    seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod.
                                    It has no effect on nodes that do not support SELinux or to volumes does not support SELinux.
                                    Valid values are "MountOption" and "Recursive".

                                    "Recursive" means relabeling of all files on all Pod volumes by the container runtime.
                                    This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node.

                                    "MountOption" mounts all eligible Pod volumes with `-o context` mount option.
                                    This requires all Pods that share the same volume to use the same SELinux label.
                                    It is not possible to share the same volume among privileged and unprivileged Pods.
                                    Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes
                                    whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their
                                    CSIDriver instance. Other volumes are always re-labelled recursively.
                                    "MountOption" value is allowed only when SELinuxMount feature gate is enabled.

                                    If not specified and SELinuxMount feature gate is enabled, "MountOption" is used.
                                    If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes
                                    and "Recursive" for all other volumes.

                                    This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers.

                                    All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  type: string
                                seLinuxOptions:
                                  description: |-
                                    The SELinux context to be applied to all containers.
                                    If unspecified, the container runtime will allocate a random SELinux context for each
                                    container.  May also be set in SecurityContext.  If set in
                                    both SecurityContext and PodSecurityContext, the value specified in SecurityContext
                                    takes precedence for that container.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  properties:
                                    level:
                                      description: Level is SELinux level label that
                                        applies to the container.
                                      type: string
                                    role:
                                      description: Role is a SELinux role label that
                                        applies to the container.
                                      type: string
                                    type:
                                      description: Type is a SELinux type label that
                                        applies to the container.
                                      type: string
                                    user:
                                      description: User is a SELinux user label that
                                        applies to the container.
                                      type: string
                                  type: object
                                seccompProfile:
                                  description: |-
                                    The seccomp options to use by the containers in this pod.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  properties:
                                    localhostProfile:
                                      description: |-
                                        localhostProfile indicates a profile defined in a file on the node should be used.
                                        The profile must be preconfigured on the node to work.
                                        Must be a descending path, relative to the kubelet's configured seccomp profile location.
                                        Must be set if type is "Localhost". Must NOT be set for any other type.
                                      type: string
                                    type:
                                      description: |-
                                        type indicates which kind of seccomp profile will be applied.
                                        Valid options are:

                                        Localhost - a profile defined in a file on the node should be used.
                                        RuntimeDefault - the container runtime default profile should be used.
                                        Unconfined - no profile should be applied.
                                      type: string
                                  required:
                                  - type
                                  type: object
                                supplementalGroups:
                                  description: |-
                                    A list of groups applied to the first process run in each container, in
                                    addition to the container's primary GID and fsGroup (if specified).  If
                                    the SupplementalGroupsPolicy feature is enabled, the
                                    supplementalGroupsPolicy field determines whether these are in addition
                                    to or instead of any group memberships defined in the container image.
                                    If unspecified, no additional groups are added, though group memberships
                                    defined in the container image may still be used, depending on the
                                    supplementalGroupsPolicy field.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  items:
                                    format: int64
                                    type: integer
                                  type: array
                                  x-kubernetes-list-type: atomic
                                supplementalGroupsPolicy:
                                  description: |-
                                    Defines how supplemental groups of the first container processes are calculated.
                                    Valid values are "Merge" and "Strict". If not specified, "Merge" is used.
                                    (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled
                                    and the container runtime must implement support for this feature.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  type: string
                                sysctls:
                                  description: |-
                                    Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported
                                    sysctls (by the container runtime) might fail to launch.
                                    Note that this field cannot be set when spec.os.name is windows.
                                  items:
                                    description: Sysctl defines a kernel parameter
                                      to be set
                                    properties:
                                      name:
                                        description: Name of a property to set
                                        type: string
                                      value:
                                        description: Value of a property to set
                                        type: string
                                    required:
                                    - name
                                    - value
                                    type: object
                                  type: array
                                  x-kubernetes-list-type: atomic
                                windowsOptions:
                                  description: |-
                                    The Windows specific settings applied to all containers.
                                    If unspecified, the options within a container's SecurityContext will be used.
                                    If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.
                                    Note that this field cannot be set when spec.os.name is linux.
                                  properties:
                                    gmsaCredentialSpec:
                                      description: |-
                                        GMSACredentialSpec is where the GMSA admission webhook
                                        (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the
                                        GMSA credential spec named by the GMSACredentialSpecName field.
                                      type: string
                                    gmsaCredentialSpecName:
                                      description: GMSACredentialSpecName is the name
                                        of the GMSA credential spec to use.
                                      type: string
                                    hostProcess:
                                      description: |-
                                        HostProcess determines if a container should be run as a 'Host Process' container.
                                        All of a Pod's containers must have the same effective HostProcess value
                                        (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers).
                                        In addition, if HostProcess is true then HostNetwork must also be set to true.
                                      type: boolean
                                    runAsUserName:
                                      description: |-
                                        The UserName in Windows to run the entrypoint of the container process.
                                        Defaults to the user specified in image metadata if unspecified.
                                        May also be set in PodSecurityContext. If set in both SecurityContext and
                                        PodSecurityContext, the value specified in SecurityContext takes precedence.
                                      type: string
                                  type: object
                              type: object
                            serviceAccount:
                              description: |-
                                DeprecatedServiceAccount is a deprecated alias for ServiceAccountName.
                                Deprecated: Use serviceAccountName instead.
                              type: string
                            serviceAccountName:
                              description: |-
                                ServiceAccountName is the name of the ServiceAccount to use to run this pod.
                                More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
                              type: string
                            setHostnameAsFQDN:
                              description: |-
                                If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default).
                                In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname).
                                In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN.
                                If a pod does not have FQDN, this has no effect.
                                Default to false.
                              type: boolean
                            shareProcessNamespace:
                              description: |-
                                Share a single process namespace between all of the containers in a pod.
                                When this is set containers will be able to view and signal processes from other containers
                                in the same pod, and the first process in each container will not be assigned PID 1.
                                HostPID and ShareProcessNamespace cannot both be set.
                                Optional: Default to false.
                              type: boolean
                            subdomain:
                              description: |-
                                If specified, the fully qualified Pod hostname will be "<hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>".
                                If not specified, the pod will not have a domainname at all.
                              type: string
                            terminationGracePeriodSeconds:
                              description: |-
                                Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request.
                                Value must be non-negative integer. The value zero indicates stop immediately via
                                the kill signal (no opportunity to shut down).
                                If this value is nil, the default grace period will be used instead.
                                The grace period is the duration in seconds after the processes running in the pod are sent
                                a termination signal and the time when the processes are forcibly halted with a kill signal.
                                Set this value longer than the expected cleanup time for your process.
                                Defaults to 30 seconds.
                              format: int64
                              type: integer
                            tolerations:
                              description: If specified, the pod's tolerations.
                              items:
                                description: |-
                                  The pod this Toleration is attached to tolerates any taint that matches
                                  the triple <key,value,effect> using the matching operator <operator>.
                                properties:
                                  effect:
                                    description: |-
                                      Effect indicates the taint effect to match. Empty means match all taint effects.
                                      When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
                                    type: string
                                  key:
                                    description: |-
                                      Key is the taint key that the toleration applies to. Empty means match all taint keys.
                                      If the key is empty, operator must be Exists; this combination means to match all values and all keys.
                                    type: string
                                  operator:
                                    description: |-
                                      Operator represents a key's relationship to the value.
                                      Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal.
                                      Exists is equivalent to wildcard for value, so that a pod can
                                      tolerate all taints of a particular category.
                                      Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators).
                                    type: string
                                  tolerationSeconds:
                                    description: |-
                                      TolerationSeconds represents the period of time the toleration (which must be
                                      of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
                                      it is not set, which means tolerate the taint forever (do not evict). Zero and
                                      negative values will be treated as 0 (evict immediately) by the system.
                                    format: int64
                                    type: integer
                                  value:
                                    description: |-
                                      Value is the taint value the toleration matches to.
                                      If the operator is Exists, the value should be empty, otherwise just a regular string.
                                    type: string
                                type: object
                              type: array
                              x-kubernetes-list-type: atomic
                            topologySpreadConstraints:
                              description: |-
                                TopologySpreadConstraints describes how a group of pods ought to spread across topology
                                domains. Scheduler will schedule pods in a way which abides by the constraints.
                                All topologySpreadConstraints are ANDed.
                              items:
                                description: TopologySpreadConstraint specifies how
                                  to spread matching pods among the given topology.
                                properties:
                                  labelSelector:
                                    description: |-
                                      LabelSelector is used to find matching pods.
                                      Pods that match this label selector are counted to determine the number of pods
                                      in their corresponding topology domain.
                                    properties:
                                      matchExpressions:
                                        description: matchExpressions is a list of
                                          label selector requirements. The requirements
                                          are ANDed.
                                        items:
                                          description: |-
                                            A label selector requirement is a selector that contains values, a key, and an operator that
                                            relates the key and values.
                                          properties:
                                            key:
                                              description: key is the label key that
                                                the selector applies to.
                                              type: string
                                            operator:
                                              description: |-
                                                operator represents a key's relationship to a set of values.
                                                Valid operators are In, NotIn, Exists and DoesNotExist.
                                              type: string
                                            values:
                                              description: |-
                                                values is an array of string values. If the operator is In or NotIn,
                                                the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                the values array must be empty. This array is replaced during a strategic
                                                merge patch.
                                              items:
                                                type: string
                                              type: array
                                              x-kubernetes-list-type: atomic
                                          required:
                                          - key
                                          - operator
                                          type: object
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      matchLabels:
                                        additionalProperties:
                                          type: string
                                        description: |-
                                          matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                          map is equivalent to an element of matchExpressions, whose key field is "key", the
                                          operator is "In", and the values array contains only "value". The requirements are ANDed.
                                        type: object
                                    type: object
                                    x-kubernetes-map-type: atomic
                                  matchLabelKeys:
                                    description: |-
                                      MatchLabelKeys is a set of pod label keys to select the pods over which
                                      spreading will be calculated. The keys are used to lookup values from the
                                      incoming pod labels, those key-value labels are ANDed with labelSelector
                                      to select the group of existing pods over which spreading will be calculated
                                      for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector.
                                      MatchLabelKeys cannot be set when LabelSelector isn't set.
                                      Keys that don't exist in the incoming pod labels will
                                      be ignored. A null or empty list means only match against labelSelector.

                                      This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default).
                                    items:
                                      type: string
                                    type: array
                                    x-kubernetes-list-type: atomic
                                  maxSkew:
                                    description: |-
                                      MaxSkew describes the degree to which pods may be unevenly distributed.
                                      When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference
                                      between the number of matching pods in the target topology and the global minimum.
                                      The global minimum is the minimum number of matching pods in an eligible domain
                                      or zero if the number of eligible domains is less than MinDomains.
                                      For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
                                      labelSelector spread as 2/2/1:
                                      In this case, the global minimum is 1.
                                      | zone1 | zone2 | zone3 |
                                      |  P P  |  P P  |   P   |
                                      - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2;
                                      scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2)
                                      violate MaxSkew(1).
                                      - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
                                      When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence
                                      to topologies that satisfy it.
                                      It's a required field. Default value is 1 and 0 is not allowed.
                                    format: int32
                                    type: integer
                                  minDomains:
                                    description: |-
                                      MinDomains indicates a minimum number of eligible domains.
                                      When the number of eligible domains with matching topology keys is less than minDomains,
                                      Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed.
                                      And when the number of eligible domains with matching topology keys equals or greater than minDomains,
                                      this value has no effect on scheduling.
                                      As a result, when the number of eligible domains is less than minDomains,
                                      scheduler won't schedule more than maxSkew Pods to those domains.
                                      If value is nil, the constraint behaves as if MinDomains is equal to 1.
                                      Valid values are integers greater than 0.
                                      When value is not nil, WhenUnsatisfiable must be DoNotSchedule.

                                      For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same
                                      labelSelector spread as 2/2/2:
                                      | zone1 | zone2 | zone3 |
                                      |  P P  |  P P  |  P P  |
                                      The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0.
                                      In this situation, new pod with the same labelSelector cannot be scheduled,
                                      because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones,
                                      it will violate MaxSkew.
                                    format: int32
                                    type: integer
                                  nodeAffinityPolicy:
                                    description: |-
                                      NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector
                                      when calculating pod topology spread skew. Options are:
                                      - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations.
                                      - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations.

                                      If this value is nil, the behavior is equivalent to the Honor policy.
                                    type: string
                                  nodeTaintsPolicy:
                                    description: |-
                                      NodeTaintsPolicy indicates how we will treat node taints when calculating
                                      pod topology spread skew. Options are:
                                      - Honor: nodes without taints, along with tainted nodes for which the incoming pod
                                      has a toleration, are included.
                                      - Ignore: node taints are ignored. All nodes are included.

                                      If this value is nil, the behavior is equivalent to the Ignore policy.
                                    type: string
                                  topologyKey:
                                    description: |-
                                      TopologyKey is the key of node labels. Nodes that have a label with this key
                                      and identical values are considered to be in the same topology.
                                      We consider each <key, value> as a "bucket", and try to put balanced number
                                      of pods into each bucket.
                                      We define a domain as a particular instance of a topology.
                                      Also, we define an eligible domain as a domain whose nodes meet the requirements of
                                      nodeAffinityPolicy and nodeTaintsPolicy.
                                      e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology.
                                      And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology.
                                      It's a required field.
                                    type: string
                                  whenUnsatisfiable:
                                    description: |-
                                      WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
                                      the spread constraint.
                                      - DoNotSchedule (default) tells the scheduler not to schedule it.
                                      - ScheduleAnyway tells the scheduler to schedule the pod in any location,
                                        but giving higher precedence to topologies that would help reduce the
                                        skew.
                                      A constraint is considered "Unsatisfiable" for an incoming pod
                                      if and only if every possible node assignment for that pod would violate
                                      "MaxSkew" on some topology.
                                      For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
                                      labelSelector spread as 3/1/1:
                                      | zone1 | zone2 | zone3 |
                                      | P P P |   P   |   P   |
                                      If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled
                                      to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies
                                      MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler
                                      won't make it *more* imbalanced.
                                      It's a required field.
                                    type: string
                                required:
                                - maxSkew
                                - topologyKey
                                - whenUnsatisfiable
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - topologyKey
                              - whenUnsatisfiable
                              x-kubernetes-list-type: map
                            volumes:
                              description: |-
                                List of volumes that can be mounted by containers belonging to the pod.
                                More info: https://kubernetes.io/docs/concepts/storage/volumes
                              items:
                                description: Volume represents a named volume in a
                                  pod that may be accessed by any container in the
                                  pod.
                                properties:
                                  awsElasticBlockStore:
                                    description: |-
                                      awsElasticBlockStore represents an AWS Disk resource that is attached to a
                                      kubelet's host machine and then exposed to the pod.
                                      Deprecated: AWSElasticBlockStore is deprecated. All operations for the in-tree
                                      awsElasticBlockStore type are redirected to the ebs.csi.aws.com CSI driver.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type of the volume that you want to mount.
                                          Tip: Ensure that the filesystem type is supported by the host operating system.
                                          Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore
                                        type: string
                                      partition:
                                        description: |-
                                          partition is the partition in the volume that you want to mount.
                                          If omitted, the default is to mount by volume name.
                                          Examples: For volume /dev/sda1, you specify the partition as "1".
                                          Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty).
                                        format: int32
                                        type: integer
                                      readOnly:
                                        description: |-
                                          readOnly value true will force the readOnly setting in VolumeMounts.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore
                                        type: boolean
                                      volumeID:
                                        description: |-
                                          volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume).
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore
                                        type: string
                                    required:
                                    - volumeID
                                    type: object
                                  azureDisk:
                                    description: |-
                                      azureDisk represents an Azure Data Disk mount on the host and bind mount to the pod.
                                      Deprecated: AzureDisk is deprecated. All operations for the in-tree azureDisk type
                                      are redirected to the disk.csi.azure.com CSI driver.
                                    properties:
                                      cachingMode:
                                        description: 'cachingMode is the Host Caching
                                          mode: None, Read Only, Read Write.'
                                        type: string
                                      diskName:
                                        description: diskName is the Name of the data
                                          disk in the blob storage
                                        type: string
                                      diskURI:
                                        description: diskURI is the URI of data disk
                                          in the blob storage
                                        type: string
                                      fsType:
                                        default: ext4
                                        description: |-
                                          fsType is Filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      kind:
                                        description: 'kind expected values are Shared:
                                          multiple blob disks per storage account  Dedicated:
                                          single blob disk per storage account  Managed:
                                          azure managed data disk (only in managed
                                          availability set). defaults to shared'
                                        type: string
                                      readOnly:
                                        default: false
                                        description: |-
                                          readOnly Defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                    required:
                                    - diskName
                                    - diskURI
                                    type: object
                                  azureFile:
                                    description: |-
                                      azureFile represents an Azure File Service mount on the host and bind mount to the pod.
                                      Deprecated: AzureFile is deprecated. All operations for the in-tree azureFile type
                                      are redirected to the file.csi.azure.com CSI driver.
                                    properties:
                                      readOnly:
                                        description: |-
                                          readOnly defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      secretName:
                                        description: secretName is the  name of secret
                                          that contains Azure Storage Account Name
                                          and Key
                                        type: string
                                      shareName:
                                        description: shareName is the azure share
                                          Name
                                        type: string
                                    required:
                                    - secretName
                                    - shareName
                                    type: object
                                  cephfs:
                                    description: |-
                                      cephFS represents a Ceph FS mount on the host that shares a pod's lifetime.
                                      Deprecated: CephFS is deprecated and the in-tree cephfs type is no longer supported.
                                    properties:
                                      monitors:
                                        description: |-
                                          monitors is Required: Monitors is a collection of Ceph monitors
                                          More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it
                                        items:
                                          type: string
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      path:
                                        description: 'path is Optional: Used as the
                                          mounted root, rather than the full Ceph
                                          tree, default is /'
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly is Optional: Defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                          More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it
                                        type: boolean
                                      secretFile:
                                        description: |-
                                          secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret
                                          More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it
                                        type: string
                                      secretRef:
                                        description: |-
                                          secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty.
                                          More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      user:
                                        description: |-
                                          user is optional: User is the rados user name, default is admin
                                          More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it
                                        type: string
                                    required:
                                    - monitors
                                    type: object
                                  cinder:
                                    description: |-
                                      cinder represents a cinder volume attached and mounted on kubelets host machine.
                                      Deprecated: Cinder is deprecated. All operations for the in-tree cinder type
                                      are redirected to the cinder.csi.openstack.org CSI driver.
                                      More info: https://examples.k8s.io/mysql-cinder-pd/README.md
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                          More info: https://examples.k8s.io/mysql-cinder-pd/README.md
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                          More info: https://examples.k8s.io/mysql-cinder-pd/README.md
                                        type: boolean
                                      secretRef:
                                        description: |-
                                          secretRef is optional: points to a secret object containing parameters used to connect
                                          to OpenStack.
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      volumeID:
                                        description: |-
                                          volumeID used to identify the volume in cinder.
                                          More info: https://examples.k8s.io/mysql-cinder-pd/README.md
                                        type: string
                                    required:
                                    - volumeID
                                    type: object
                                  configMap:
                                    description: configMap represents a configMap
                                      that should populate this volume
                                    properties:
                                      defaultMode:
                                        description: |-
                                          defaultMode is optional: mode bits used to set permissions on created files by default.
                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                          Defaults to 0644.
                                          Directories within the path are not affected by this setting.
                                          This might be in conflict with other options that affect the file
                                          mode, like fsGroup, and the result can be other mode bits set.
                                        format: int32
                                        type: integer
                                      items:
                                        description: |-
                                          items if unspecified, each key-value pair in the Data field of the referenced
                                          ConfigMap will be projected into the volume as a file whose name is the
                                          key and content is the value. If specified, the listed keys will be
                                          projected into the specified paths, and unlisted keys will not be
                                          present. If a key is specified which is not present in the ConfigMap,
                                          the volume setup will error unless it is marked optional. Paths must be
                                          relative and may not contain the '..' path or start with '..'.
                                        items:
                                          description: Maps a string key to a path
                                            within a volume.
                                          properties:
                                            key:
                                              description: key is the key to project.
                                              type: string
                                            mode:
                                              description: |-
                                                mode is Optional: mode bits used to set permissions on this file.
                                                Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                                YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                If not specified, the volume defaultMode will be used.
                                                This might be in conflict with other options that affect the file
                                                mode, like fsGroup, and the result can be other mode bits set.
                                              format: int32
                                              type: integer
                                            path:
                                              description: |-
                                                path is the relative path of the file to map the key to.
                                                May not be an absolute path.
                                                May not contain the path element '..'.
                                                May not start with the string '..'.
                                              type: string
                                          required:
                                          - key
                                          - path
                                          type: object
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      name:
                                        default: ""
                                        description: |-
                                          Name of the referent.
                                          This field is effectively required, but due to backwards compatibility is
                                          allowed to be empty. Instances of this type with an empty value here are
                                          almost certainly wrong.
                                          More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                        type: string
                                      optional:
                                        description: optional specify whether the
                                          ConfigMap or its keys must be defined
                                        type: boolean
                                    type: object
                                    x-kubernetes-map-type: atomic
                                  csi:
                                    description: csi (Container Storage Interface)
                                      represents ephemeral storage that is handled
                                      by certain external CSI drivers.
                                    properties:
                                      driver:
                                        description: |-
                                          driver is the name of the CSI driver that handles this volume.
                                          Consult with your admin for the correct name as registered in the cluster.
                                        type: string
                                      fsType:
                                        description: |-
                                          fsType to mount. Ex. "ext4", "xfs", "ntfs".
                                          If not provided, the empty value is passed to the associated CSI driver
                                          which will determine the default filesystem to apply.
                                        type: string
                                      nodePublishSecretRef:
                                        description: |-
                                          nodePublishSecretRef is a reference to the secret object containing
                                          sensitive information to pass to the CSI driver to complete the CSI
                                          NodePublishVolume and NodeUnpublishVolume calls.
                                          This field is optional, and  may be empty if no secret is required. If the
                                          secret object contains more than one secret, all secret references are passed.
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      readOnly:
                                        description: |-
                                          readOnly specifies a read-only configuration for the volume.
                                          Defaults to false (read/write).
                                        type: boolean
                                      volumeAttributes:
                                        additionalProperties:
                                          type: string
                                        description: |-
                                          volumeAttributes stores driver-specific properties that are passed to the CSI
                                          driver. Consult your driver's documentation for supported values.
                                        type: object
                                    required:
                                    - driver
                                    type: object
                                  downwardAPI:
                                    description: downwardAPI represents downward API
                                      about the pod that should populate this volume
                                    properties:
                                      defaultMode:
                                        description: |-
                                          Optional: mode bits to use on created files by default. Must be a
                                          Optional: mode bits used to set permissions on created files by default.
                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                          Defaults to 0644.
                                          Directories within the path are not affected by this setting.
                                          This might be in conflict with other options that affect the file
                                          mode, like fsGroup, and the result can be other mode bits set.
                                        format: int32
                                        type: integer
                                      items:
                                        description: Items is a list of downward API
                                          volume file
                                        items:
                                          description: DownwardAPIVolumeFile represents
                                            information to create the file containing
                                            the pod field
                                          properties:
                                            fieldRef:
                                              description: 'Required: Selects a field
                                                of the pod: only annotations, labels,
                                                name, namespace and uid are supported.'
                                              properties:
                                                apiVersion:
                                                  description: Version of the schema
                                                    the FieldPath is written in terms
                                                    of, defaults to "v1".
                                                  type: string
                                                fieldPath:
                                                  description: Path of the field to
                                                    select in the specified API version.
                                                  type: string
                                              required:
                                              - fieldPath
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            mode:
                                              description: |-
                                                Optional: mode bits used to set permissions on this file, must be an octal value
                                                between 0000 and 0777 or a decimal value between 0 and 511.
                                                YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                If not specified, the volume defaultMode will be used.
                                                This might be in conflict with other options that affect the file
                                                mode, like fsGroup, and the result can be other mode bits set.
                                              format: int32
                                              type: integer
                                            path:
                                              description: 'Required: Path is  the
                                                relative path name of the file to
                                                be created. Must not be absolute or
                                                contain the ''..'' path. Must be utf-8
                                                encoded. The first item of the relative
                                                path must not start with ''..'''
                                              type: string
                                            resourceFieldRef:
                                              description: |-
                                                Selects a resource of the container: only resources limits and requests
                                                (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported.
                                              properties:
                                                containerName:
                                                  description: 'Container name: required
                                                    for volumes, optional for env
                                                    vars'
                                                  type: string
                                                divisor:
                                                  anyOf:
                                                  - type: integer
                                                  - type: string
                                                  description: Specifies the output
                                                    format of the exposed resources,
                                                    defaults to "1"
                                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                  x-kubernetes-int-or-string: true
                                                resource:
                                                  description: 'Required: resource
                                                    to select'
                                                  type: string
                                              required:
                                              - resource
                                              type: object
                                              x-kubernetes-map-type: atomic
                                          required:
                                          - path
                                          type: object
                                        type: array
                                        x-kubernetes-list-type: atomic
                                    type: object
                                  emptyDir:
                                    description: |-
                                      emptyDir represents a temporary directory that shares a pod's lifetime.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir
                                    properties:
                                      medium:
                                        description: |-
                                          medium represents what type of storage medium should back this directory.
                                          The default is "" which means to use the node's default medium.
                                          Must be an empty string (default) or Memory.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir
                                        type: string
                                      sizeLimit:
                                        anyOf:
                                        - type: integer
                                        - type: string
                                        description: |-
                                          sizeLimit is the total amount of local storage required for this EmptyDir volume.
                                          The size limit is also applicable for memory medium.
                                          The maximum usage on memory medium EmptyDir would be the minimum value between
                                          the SizeLimit specified here and the sum of memory limits of all containers in a pod.
                                          The default is nil which means that the limit is undefined.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir
                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                        x-kubernetes-int-or-string: true
                                    type: object
                                  ephemeral:
                                    description: |-
                                      ephemeral represents a volume that is handled by a cluster storage driver.
                                      The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts,
                                      and deleted when the pod is removed.

                                      Use this if:
                                      a) the volume is only needed while the pod runs,
                                      b) features of normal volumes like restoring from snapshot or capacity
                                         tracking are needed,
                                      c) the storage driver is specified through a storage class, and
                                      d) the storage driver supports dynamic volume provisioning through
                                         a PersistentVolumeClaim (see EphemeralVolumeSource for more
                                         information on the connection between this volume type
                                         and PersistentVolumeClaim).

                                      Use PersistentVolumeClaim or one of the vendor-specific
                                      APIs for volumes that persist for longer than the lifecycle
                                      of an individual pod.

                                      Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to
                                      be used that way - see the documentation of the driver for
                                      more information.

                                      A pod can use both types of ephemeral volumes and
                                      persistent volumes at the same time.
                                    properties:
                                      volumeClaimTemplate:
                                        description: |-
                                          Will be used to create a stand-alone PVC to provision the volume.
                                          The pod in which this EphemeralVolumeSource is embedded will be the
                                          owner of the PVC, i.e. the PVC will be deleted together with the
                                          pod.  The name of the PVC will be `<pod name>-<volume name>` where
                                          `<volume name>` is the name from the `PodSpec.Volumes` array
                                          entry. Pod validation will reject the pod if the concatenated name
                                          is not valid for a PVC (for example, too long).

                                          An existing PVC with that name that is not owned by the pod
                                          will *not* be used for the pod to avoid using an unrelated
                                          volume by mistake. Starting the pod is then blocked until
                                          the unrelated PVC is removed. If such a pre-created PVC is
                                          meant to be used by the pod, the PVC has to updated with an
                                          owner reference to the pod once the pod exists. Normally
                                          this should not be necessary, but it may be useful when
                                          manually reconstructing a broken cluster.

                                          This field is read-only and no changes will be made by Kubernetes
                                          to the PVC after it has been created.

                                          Required, must not be nil.
                                        properties:
                                          metadata:
                                            description: |-
                                              May contain labels and annotations that will be copied into the PVC
                                              when creating it. No other fields are allowed and will be rejected during
                                              validation.
                                            properties:
                                              annotations:
                                                additionalProperties:
                                                  type: string
                                                type: object
                                              finalizers:
                                                items:
                                                  type: string
                                                type: array
                                              labels:
                                                additionalProperties:
                                                  type: string
                                                type: object
                                              name:
                                                type: string
                                              namespace:
                                                type: string
                                            type: object
                                          spec:
                                            description: |-
                                              The specification for the PersistentVolumeClaim. The entire content is
                                              copied unchanged into the PVC that gets created from this
                                              template. The same fields as in a PersistentVolumeClaim
                                              are also valid here.
                                            properties:
                                              accessModes:
                                                description: |-
                                                  accessModes contains the desired access modes the volume should have.
                                                  More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1
                                                items:
                                                  type: string
                                                type: array
                                                x-kubernetes-list-type: atomic
                                              dataSource:
                                                description: |-
                                                  dataSource field can be used to specify either:
                                                  * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot)
                                                  * An existing PVC (PersistentVolumeClaim)
                                                  If the provisioner or an external controller can support the specified data source,
                                                  it will create a new volume based on the contents of the specified data source.
                                                  When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef,
                                                  and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified.
                                                  If the namespace is specified, then dataSourceRef will not be copied to dataSource.
                                                properties:
                                                  apiGroup:
                                                    description: |-
                                                      APIGroup is the group for the resource being referenced.
                                                      If APIGroup is not specified, the specified Kind must be in the core API group.
                                                      For any other third-party types, APIGroup is required.
                                                    type: string
                                                  kind:
                                                    description: Kind is the type
                                                      of resource being referenced
                                                    type: string
                                                  name:
                                                    description: Name is the name
                                                      of resource being referenced
                                                    type: string
                                                required:
                                                - kind
                                                - name
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              dataSourceRef:
                                                description: |-
                                                  dataSourceRef specifies the object from which to populate the volume with data, if a non-empty
                                                  volume is desired. This may be any object from a non-empty API group (non
                                                  core object) or a PersistentVolumeClaim object.
                                                  When this field is specified, volume binding will only succeed if the type of
                                                  the specified object matches some installed volume populator or dynamic
                                                  provisioner.
                                                  This field will replace the functionality of the dataSource field and as such
                                                  if both fields are non-empty, they must have the same value. For backwards
                                                  compatibility, when namespace isn't specified in dataSourceRef,
                                                  both fields (dataSource and dataSourceRef) will be set to the same
                                                  value automatically if one of them is empty and the other is non-empty.
                                                  When namespace is specified in dataSourceRef,
                                                  dataSource isn't set to the same value and must be empty.
                                                  There are three important differences between dataSource and dataSourceRef:
                                                  * While dataSource only allows two specific types of objects, dataSourceRef
                                                    allows any non-core object, as well as PersistentVolumeClaim objects.
                                                  * While dataSource ignores disallowed values (dropping them), dataSourceRef
                                                    preserves all values, and generates an error if a disallowed value is
                                                    specified.
                                                  * While dataSource only allows local objects, dataSourceRef allows objects
                                                    in any namespaces.
                                                  (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled.
                                                  (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled.
                                                properties:
                                                  apiGroup:
                                                    description: |-
                                                      APIGroup is the group for the resource being referenced.
                                                      If APIGroup is not specified, the specified Kind must be in the core API group.
                                                      For any other third-party types, APIGroup is required.
                                                    type: string
                                                  kind:
                                                    description: Kind is the type
                                                      of resource being referenced
                                                    type: string
                                                  name:
                                                    description: Name is the name
                                                      of resource being referenced
                                                    type: string
                                                  namespace:
                                                    description: |-
                                                      Namespace is the namespace of resource being referenced
                                                      Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details.
                                                      (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled.
                                                    type: string
                                                required:
                                                - kind
                                                - name
                                                type: object
                                              resources:
                                                description: |-
                                                  resources represents the minimum resources the volume should have.
                                                  Users are allowed to specify resource requirements
                                                  that are lower than previous value but must still be higher than capacity recorded in the
                                                  status field of the claim.
                                                  More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources
                                                properties:
                                                  limits:
                                                    additionalProperties:
                                                      anyOf:
                                                      - type: integer
                                                      - type: string
                                                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                      x-kubernetes-int-or-string: true
                                                    description: |-
                                                      Limits describes the maximum amount of compute resources allowed.
                                                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                                    type: object
                                                  requests:
                                                    additionalProperties:
                                                      anyOf:
                                                      - type: integer
                                                      - type: string
                                                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                      x-kubernetes-int-or-string: true
                                                    description: |-
                                                      Requests describes the minimum amount of compute resources required.
                                                      If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
                                                      otherwise to an implementation-defined value. Requests cannot exceed Limits.
                                                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                                                    type: object
                                                type: object
                                              selector:
                                                description: selector is a label query
                                                  over volumes to consider for binding.
                                                properties:
                                                  matchExpressions:
                                                    description: matchExpressions
                                                      is a list of label selector
                                                      requirements. The requirements
                                                      are ANDed.
                                                    items:
                                                      description: |-
                                                        A label selector requirement is a selector that contains values, a key, and an operator that
                                                        relates the key and values.
                                                      properties:
                                                        key:
                                                          description: key is the
                                                            label key that the selector
                                                            applies to.
                                                          type: string
                                                        operator:
                                                          description: |-
                                                            operator represents a key's relationship to a set of values.
                                                            Valid operators are In, NotIn, Exists and DoesNotExist.
                                                          type: string
                                                        values:
                                                          description: |-
                                                            values is an array of string values. If the operator is In or NotIn,
                                                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                            the values array must be empty. This array is replaced during a strategic
                                                            merge patch.
                                                          items:
                                                            type: string
                                                          type: array
                                                          x-kubernetes-list-type: atomic
                                                      required:
                                                      - key
                                                      - operator
                                                      type: object
                                                    type: array
                                                    x-kubernetes-list-type: atomic
                                                  matchLabels:
                                                    additionalProperties:
                                                      type: string
                                                    description: |-
                                                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                      map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                      operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                    type: object
                                                type: object
                                                x-kubernetes-map-type: atomic
                                              storageClassName:
                                                description: |-
                                                  storageClassName is the name of the StorageClass required by the claim.
                                                  More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1
                                                type: string
                                              volumeAttributesClassName:
                                                description: |-
                                                  volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim.
                                                  If specified, the CSI driver will create or update the volume with the attributes defined
                                                  in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName,
                                                  it can be changed after the claim is created. An empty string or nil value indicates that no
                                                  VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state,
                                                  this field can be reset to its previous value (including nil) to cancel the modification.
                                                  If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be
                                                  set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource
                                                  exists.
                                                  More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/
                                                type: string
                                              volumeMode:
                                                description: |-
                                                  volumeMode defines what type of volume is required by the claim.
                                                  Value of Filesystem is implied when not included in claim spec.
                                                type: string
                                              volumeName:
                                                description: volumeName is the binding
                                                  reference to the PersistentVolume
                                                  backing this claim.
                                                type: string
                                            type: object
                                        required:
                                        - spec
                                        type: object
                                    type: object
                                  fc:
                                    description: fc represents a Fibre Channel resource
                                      that is attached to a kubelet's host machine
                                      and then exposed to the pod.
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      lun:
                                        description: 'lun is Optional: FC target lun
                                          number'
                                        format: int32
                                        type: integer
                                      readOnly:
                                        description: |-
                                          readOnly is Optional: Defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      targetWWNs:
                                        description: 'targetWWNs is Optional: FC target
                                          worldwide names (WWNs)'
                                        items:
                                          type: string
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      wwids:
                                        description: |-
                                          wwids Optional: FC volume world wide identifiers (wwids)
                                          Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously.
                                        items:
                                          type: string
                                        type: array
                                        x-kubernetes-list-type: atomic
                                    type: object
                                  flexVolume:
                                    description: |-
                                      flexVolume represents a generic volume resource that is
                                      provisioned/attached using an exec based plugin.
                                      Deprecated: FlexVolume is deprecated. Consider using a CSIDriver instead.
                                    properties:
                                      driver:
                                        description: driver is the name of the driver
                                          to use for this volume.
                                        type: string
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script.
                                        type: string
                                      options:
                                        additionalProperties:
                                          type: string
                                        description: 'options is Optional: this field
                                          holds extra command options if any.'
                                        type: object
                                      readOnly:
                                        description: |-
                                          readOnly is Optional: defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      secretRef:
                                        description: |-
                                          secretRef is Optional: secretRef is reference to the secret object containing
                                          sensitive information to pass to the plugin scripts. This may be
                                          empty if no secret object is specified. If the secret object
                                          contains more than one secret, all secrets are passed to the plugin
                                          scripts.
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                    required:
                                    - driver
                                    type: object
                                  flocker:
                                    description: |-
                                      flocker represents a Flocker volume attached to a kubelet's host machine. This depends on the Flocker control service being running.
                                      Deprecated: Flocker is deprecated and the in-tree flocker type is no longer supported.
                                    properties:
                                      datasetName:
                                        description: |-
                                          datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker
                                          should be considered as deprecated
                                        type: string
                                      datasetUUID:
                                        description: datasetUUID is the UUID of the
                                          dataset. This is unique identifier of a
                                          Flocker dataset
                                        type: string
                                    type: object
                                  gcePersistentDisk:
                                    description: |-
                                      gcePersistentDisk represents a GCE Disk resource that is attached to a
                                      kubelet's host machine and then exposed to the pod.
                                      Deprecated: GCEPersistentDisk is deprecated. All operations for the in-tree
                                      gcePersistentDisk type are redirected to the pd.csi.storage.gke.io CSI driver.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is filesystem type of the volume that you want to mount.
                                          Tip: Ensure that the filesystem type is supported by the host operating system.
                                          Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk
                                        type: string
                                      partition:
                                        description: |-
                                          partition is the partition in the volume that you want to mount.
                                          If omitted, the default is to mount by volume name.
                                          Examples: For volume /dev/sda1, you specify the partition as "1".
                                          Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty).
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk
                                        format: int32
                                        type: integer
                                      pdName:
                                        description: |-
                                          pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly here will force the ReadOnly setting in VolumeMounts.
                                          Defaults to false.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk
                                        type: boolean
                                    required:
                                    - pdName
                                    type: object
                                  gitRepo:
                                    description: |-
                                      gitRepo represents a git repository at a particular revision.
                                      Deprecated: GitRepo is deprecated. To provision a container with a git repo, mount an
                                      EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir
                                      into the Pod's container.
                                    properties:
                                      directory:
                                        description: |-
                                          directory is the target directory name.
                                          Must not contain or start with '..'.  If '.' is supplied, the volume directory will be the
                                          git repository.  Otherwise, if specified, the volume will contain the git repository in
                                          the subdirectory with the given name.
                                        type: string
                                      repository:
                                        description: repository is the URL
                                        type: string
                                      revision:
                                        description: revision is the commit hash for
                                          the specified revision.
                                        type: string
                                    required:
                                    - repository
                                    type: object
                                  glusterfs:
                                    description: |-
                                      glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime.
                                      Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported.
                                    properties:
                                      endpoints:
                                        description: endpoints is the endpoint name
                                          that details Glusterfs topology.
                                        type: string
                                      path:
                                        description: |-
                                          path is the Glusterfs volume path.
                                          More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly here will force the Glusterfs volume to be mounted with read-only permissions.
                                          Defaults to false.
                                          More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod
                                        type: boolean
                                    required:
                                    - endpoints
                                    - path
                                    type: object
                                  hostPath:
                                    description: |-
                                      hostPath represents a pre-existing file or directory on the host
                                      machine that is directly exposed to the container. This is generally
                                      used for system agents or other privileged things that are allowed
                                      to see the host machine. Most containers will NOT need this.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath
                                    properties:
                                      path:
                                        description: |-
                                          path of the directory on the host.
                                          If the path is a symlink, it will follow the link to the real path.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath
                                        type: string
                                      type:
                                        description: |-
                                          type for HostPath Volume
                                          Defaults to ""
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath
                                        type: string
                                    required:
                                    - path
                                    type: object
                                  image:
                                    description: |-
                                      image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine.
                                      The volume is resolved at pod startup depending on which PullPolicy value is provided:

                                      - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails.
                                      - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present.
                                      - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails.

                                      The volume gets re-resolved if the pod gets deleted and recreated, which means that new remote content will become available on pod recreation.
                                      A failure to resolve or pull the image during pod startup will block containers from starting and may add significant latency. Failures will be retried using normal volume backoff and will be reported on the pod reason and message.
                                      The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field.
                                      The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images.
                                      The volume will be mounted read-only (ro) and non-executable files (noexec).
                                      Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath) before 1.33.
                                      The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type.
                                    properties:
                                      pullPolicy:
                                        description: |-
                                          Policy for pulling OCI objects. Possible values are:
                                          Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails.
                                          Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present.
                                          IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails.
                                          Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
                                        type: string
                                      reference:
                                        description: |-
                                          Required: Image or artifact reference to be used.
                                          Behaves in the same way as pod.spec.containers[*].image.
                                          Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets.
                                          More info: https://kubernetes.io/docs/concepts/containers/images
                                          This field is optional to allow higher level config management to default or override
                                          container images in workload controllers like Deployments and StatefulSets.
                                        type: string
                                    type: object
                                  iscsi:
                                    description: |-
                                      iscsi represents an ISCSI Disk resource that is attached to a
                                      kubelet's host machine and then exposed to the pod.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi
                                    properties:
                                      chapAuthDiscovery:
                                        description: chapAuthDiscovery defines whether
                                          support iSCSI Discovery CHAP authentication
                                        type: boolean
                                      chapAuthSession:
                                        description: chapAuthSession defines whether
                                          support iSCSI Session CHAP authentication
                                        type: boolean
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type of the volume that you want to mount.
                                          Tip: Ensure that the filesystem type is supported by the host operating system.
                                          Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi
                                        type: string
                                      initiatorName:
                                        description: |-
                                          initiatorName is the custom iSCSI Initiator Name.
                                          If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface
                                          <target portal>:<volume name> will be created for the connection.
                                        type: string
                                      iqn:
                                        description: iqn is the target iSCSI Qualified
                                          Name.
                                        type: string
                                      iscsiInterface:
                                        default: default
                                        description: |-
                                          iscsiInterface is the interface Name that uses an iSCSI transport.
                                          Defaults to 'default' (tcp).
                                        type: string
                                      lun:
                                        description: lun represents iSCSI Target Lun
                                          number.
                                        format: int32
                                        type: integer
                                      portals:
                                        description: |-
                                          portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port
                                          is other than default (typically TCP ports 860 and 3260).
                                        items:
                                          type: string
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      readOnly:
                                        description: |-
                                          readOnly here will force the ReadOnly setting in VolumeMounts.
                                          Defaults to false.
                                        type: boolean
                                      secretRef:
                                        description: secretRef is the CHAP Secret
                                          for iSCSI target and initiator authentication
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      targetPortal:
                                        description: |-
                                          targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port
                                          is other than default (typically TCP ports 860 and 3260).
                                        type: string
                                    required:
                                    - iqn
                                    - lun
                                    - targetPortal
                                    type: object
                                  name:
                                    description: |-
                                      name of the volume.
                                      Must be a DNS_LABEL and unique within the pod.
                                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                    type: string
                                  nfs:
                                    description: |-
                                      nfs represents an NFS mount on the host that shares a pod's lifetime
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs
                                    properties:
                                      path:
                                        description: |-
                                          path that is exported by the NFS server.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly here will force the NFS export to be mounted with read-only permissions.
                                          Defaults to false.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs
                                        type: boolean
                                      server:
                                        description: |-
                                          server is the hostname or IP address of the NFS server.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs
                                        type: string
                                    required:
                                    - path
                                    - server
                                    type: object
                                  persistentVolumeClaim:
                                    description: |-
                                      persistentVolumeClaimVolumeSource represents a reference to a
                                      PersistentVolumeClaim in the same namespace.
                                      More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims
                                    properties:
                                      claimName:
                                        description: |-
                                          claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume.
                                          More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly Will force the ReadOnly setting in VolumeMounts.
                                          Default false.
                                        type: boolean
                                    required:
                                    - claimName
                                    type: object
                                  photonPersistentDisk:
                                    description: |-
                                      photonPersistentDisk represents a PhotonController persistent disk attached and mounted on kubelets host machine.
                                      Deprecated: PhotonPersistentDisk is deprecated and the in-tree photonPersistentDisk type is no longer supported.
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      pdID:
                                        description: pdID is the ID that identifies
                                          Photon Controller persistent disk
                                        type: string
                                    required:
                                    - pdID
                                    type: object
                                  portworxVolume:
                                    description: |-
                                      portworxVolume represents a portworx volume attached and mounted on kubelets host machine.
                                      Deprecated: PortworxVolume is deprecated. All operations for the in-tree portworxVolume type
                                      are redirected to the pxd.portworx.com CSI driver when the CSIMigrationPortworx feature-gate
                                      is on.
                                    properties:
                                      fsType:
                                        description: |-
                                          fSType represents the filesystem type to mount
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      volumeID:
                                        description: volumeID uniquely identifies
                                          a Portworx volume
                                        type: string
                                    required:
                                    - volumeID
                                    type: object
                                  projected:
                                    description: projected items for all in one resources
                                      secrets, configmaps, and downward API
                                    properties:
                                      defaultMode:
                                        description: |-
                                          defaultMode are the mode bits used to set permissions on created files by default.
                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                          Directories within the path are not affected by this setting.
                                          This might be in conflict with other options that affect the file
                                          mode, like fsGroup, and the result can be other mode bits set.
                                        format: int32
                                        type: integer
                                      sources:
                                        description: |-
                                          sources is the list of volume projections. Each entry in this list
                                          handles one source.
                                        items:
                                          description: |-
                                            Projection that may be projected along with other supported volume types.
                                            Exactly one of these fields must be set.
                                          properties:
                                            clusterTrustBundle:
                                              description: |-
                                                ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field
                                                of ClusterTrustBundle objects in an auto-updating file.

                                                Alpha, gated by the ClusterTrustBundleProjection feature gate.

                                                ClusterTrustBundle objects can either be selected by name, or by the
                                                combination of signer name and a label selector.

                                                Kubelet performs aggressive normalization of the PEM contents written
                                                into the pod filesystem.  Esoteric PEM features such as inter-block
                                                comments and block headers are stripped.  Certificates are deduplicated.
                                                The ordering of certificates within the file is arbitrary, and Kubelet
                                                may change the order over time.
                                              properties:
                                                labelSelector:
                                                  description: |-
                                                    Select all ClusterTrustBundles that match this label selector.  Only has
                                                    effect if signerName is set.  Mutually-exclusive with name.  If unset,
                                                    interpreted as "match nothing".  If set but empty, interpreted as "match
                                                    everything".
                                                  properties:
                                                    matchExpressions:
                                                      description: matchExpressions
                                                        is a list of label selector
                                                        requirements. The requirements
                                                        are ANDed.
                                                      items:
                                                        description: |-
                                                          A label selector requirement is a selector that contains values, a key, and an operator that
                                                          relates the key and values.
                                                        properties:
                                                          key:
                                                            description: key is the
                                                              label key that the selector
                                                              applies to.
                                                            type: string
                                                          operator:
                                                            description: |-
                                                              operator represents a key's relationship to a set of values.
                                                              Valid operators are In, NotIn, Exists and DoesNotExist.
                                                            type: string
                                                          values:
                                                            description: |-
                                                              values is an array of string values. If the operator is In or NotIn,
                                                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                                              the values array must be empty. This array is replaced during a strategic
                                                              merge patch.
                                                            items:
                                                              type: string
                                                            type: array
                                                            x-kubernetes-list-type: atomic
                                                        required:
                                                        - key
                                                        - operator
                                                        type: object
                                                      type: array
                                                      x-kubernetes-list-type: atomic
                                                    matchLabels:
                                                      additionalProperties:
                                                        type: string
                                                      description: |-
                                                        matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                                                        map is equivalent to an element of matchExpressions, whose key field is "key", the
                                                        operator is "In", and the values array contains only "value". The requirements are ANDed.
                                                      type: object
                                                  type: object
                                                  x-kubernetes-map-type: atomic
                                                name:
                                                  description: |-
                                                    Select a single ClusterTrustBundle by object name.  Mutually-exclusive
                                                    with signerName and labelSelector.
                                                  type: string
                                                optional:
                                                  description: |-
                                                    If true, don't block pod startup if the referenced ClusterTrustBundle(s)
                                                    aren't available.  If using name, then the named ClusterTrustBundle is
                                                    allowed not to exist.  If using signerName, then the combination of
                                                    signerName and labelSelector is allowed to match zero
                                                    ClusterTrustBundles.
                                                  type: boolean
                                                path:
                                                  description: Relative path from
                                                    the volume root to write the bundle.
                                                  type: string
                                                signerName:
                                                  description: |-
                                                    Select all ClusterTrustBundles that match this signer name.
                                                    Mutually-exclusive with name.  The contents of all selected
                                                    ClusterTrustBundles will be unified and deduplicated.
                                                  type: string
                                              required:
                                              - path
                                              type: object
                                            configMap:
                                              description: configMap information about
                                                the configMap data to project
                                              properties:
                                                items:
                                                  description: |-
                                                    items if unspecified, each key-value pair in the Data field of the referenced
                                                    ConfigMap will be projected into the volume as a file whose name is the
                                                    key and content is the value. If specified, the listed keys will be
                                                    projected into the specified paths, and unlisted keys will not be
                                                    present. If a key is specified which is not present in the ConfigMap,
                                                    the volume setup will error unless it is marked optional. Paths must be
                                                    relative and may not contain the '..' path or start with '..'.
                                                  items:
                                                    description: Maps a string key
                                                      to a path within a volume.
                                                    properties:
                                                      key:
                                                        description: key is the key
                                                          to project.
                                                        type: string
                                                      mode:
                                                        description: |-
                                                          mode is Optional: mode bits used to set permissions on this file.
                                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                          If not specified, the volume defaultMode will be used.
                                                          This might be in conflict with other options that affect the file
                                                          mode, like fsGroup, and the result can be other mode bits set.
                                                        format: int32
                                                        type: integer
                                                      path:
                                                        description: |-
                                                          path is the relative path of the file to map the key to.
                                                          May not be an absolute path.
                                                          May not contain the path element '..'.
                                                          May not start with the string '..'.
                                                        type: string
                                                    required:
                                                    - key
                                                    - path
                                                    type: object
                                                  type: array
                                                  x-kubernetes-list-type: atomic
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: optional specify whether
                                                    the ConfigMap or its keys must
                                                    be defined
                                                  type: boolean
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            downwardAPI:
                                              description: downwardAPI information
                                                about the downwardAPI data to project
                                              properties:
                                                items:
                                                  description: Items is a list of
                                                    DownwardAPIVolume file
                                                  items:
                                                    description: DownwardAPIVolumeFile
                                                      represents information to create
                                                      the file containing the pod
                                                      field
                                                    properties:
                                                      fieldRef:
                                                        description: 'Required: Selects
                                                          a field of the pod: only
                                                          annotations, labels, name,
                                                          namespace and uid are supported.'
                                                        properties:
                                                          apiVersion:
                                                            description: Version of
                                                              the schema the FieldPath
                                                              is written in terms
                                                              of, defaults to "v1".
                                                            type: string
                                                          fieldPath:
                                                            description: Path of the
                                                              field to select in the
                                                              specified API version.
                                                            type: string
                                                        required:
                                                        - fieldPath
                                                        type: object
                                                        x-kubernetes-map-type: atomic
                                                      mode:
                                                        description: |-
                                                          Optional: mode bits used to set permissions on this file, must be an octal value
                                                          between 0000 and 0777 or a decimal value between 0 and 511.
                                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                          If not specified, the volume defaultMode will be used.
                                                          This might be in conflict with other options that affect the file
                                                          mode, like fsGroup, and the result can be other mode bits set.
                                                        format: int32
                                                        type: integer
                                                      path:
                                                        description: 'Required: Path
                                                          is  the relative path name
                                                          of the file to be created.
                                                          Must not be absolute or
                                                          contain the ''..'' path.
                                                          Must be utf-8 encoded. The
                                                          first item of the relative
                                                          path must not start with
                                                          ''..'''
                                                        type: string
                                                      resourceFieldRef:
                                                        description: |-
                                                          Selects a resource of the container: only resources limits and requests
                                                          (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported.
                                                        properties:
                                                          containerName:
                                                            description: 'Container
                                                              name: required for volumes,
                                                              optional for env vars'
                                                            type: string
                                                          divisor:
                                                            anyOf:
                                                            - type: integer
                                                            - type: string
                                                            description: Specifies
                                                              the output format of
                                                              the exposed resources,
                                                              defaults to "1"
                                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                                                            x-kubernetes-int-or-string: true
                                                          resource:
                                                            description: 'Required:
                                                              resource to select'
                                                            type: string
                                                        required:
                                                        - resource
                                                        type: object
                                                        x-kubernetes-map-type: atomic
                                                    required:
                                                    - path
                                                    type: object
                                                  type: array
                                                  x-kubernetes-list-type: atomic
                                              type: object
                                            podCertificate:
                                              description: |-
                                                Projects an auto-rotating credential bundle (private key and certificate
                                                chain) that the pod can use either as a TLS client or server.

                                                Kubelet generates a private key and uses it to send a
                                                PodCertificateRequest to the named signer.  Once the signer approves the
                                                request and issues a certificate chain, Kubelet writes the key and
                                                certificate chain to the pod filesystem.  The pod does not start until
                                                certificates have been issued for each podCertificate projected volume
                                                source in its spec.

                                                Kubelet will begin trying to rotate the certificate at the time indicated
                                                by the signer using the PodCertificateRequest.Status.BeginRefreshAt
                                                timestamp.

                                                Kubelet can write a single file, indicated by the credentialBundlePath
                                                field, or separate files, indicated by the keyPath and
                                                certificateChainPath fields.

                                                The credential bundle is a single file in PEM format.  The first PEM
                                                entry is the private key (in PKCS#8 format), and the remaining PEM
                                                entries are the certificate chain issued by the signer (typically,
                                                signers will return their certificate chain in leaf-to-root order).

                                                Prefer using the credential bundle format, since your application code
                                                can read it atomically.  If you use keyPath and certificateChainPath,
                                                your application must make two separate file reads. If these coincide
                                                with a certificate rotation, it is possible that the private key and leaf
                                                certificate you read may not correspond to each other.  Your application
                                                will need to check for this condition, and re-read until they are
                                                consistent.

                                                The named signer controls chooses the format of the certificate it
                                                issues; consult the signer implementation's documentation to learn how to
                                                use the certificates it issues.
                                              properties:
                                                certificateChainPath:
                                                  description: |-
                                                    Write the certificate chain at this path in the projected volume.

                                                    Most applications should use credentialBundlePath.  When using keyPath
                                                    and certificateChainPath, your application needs to check that the key
                                                    and leaf certificate are consistent, because it is possible to read the
                                                    files mid-rotation.
                                                  type: string
                                                credentialBundlePath:
                                                  description: |-
                                                    Write the credential bundle at this path in the projected volume.

                                                    The credential bundle is a single file that contains multiple PEM blocks.
                                                    The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private
                                                    key.

                                                    The remaining blocks are CERTIFICATE blocks, containing the issued
                                                    certificate chain from the signer (leaf and any intermediates).

                                                    Using credentialBundlePath lets your Pod's application code make a single
                                                    atomic read that retrieves a consistent key and certificate chain.  If you
                                                    project them to separate files, your application code will need to
                                                    additionally check that the leaf certificate was issued to the key.
                                                  type: string
                                                keyPath:
                                                  description: |-
                                                    Write the key at this path in the projected volume.

                                                    Most applications should use credentialBundlePath.  When using keyPath
                                                    and certificateChainPath, your application needs to check that the key
                                                    and leaf certificate are consistent, because it is possible to read the
                                                    files mid-rotation.
                                                  type: string
                                                keyType:
                                                  description: |-
                                                    The type of keypair Kubelet will generate for the pod.

                                                    Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384",
                                                    "ECDSAP521", and "ED25519".
                                                  type: string
                                                maxExpirationSeconds:
                                                  description: |-
                                                    maxExpirationSeconds is the maximum lifetime permitted for the
                                                    certificate.

                                                    Kubelet copies this value verbatim into the PodCertificateRequests it
                                                    generates for this projection.

                                                    If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver
                                                    will reject values shorter than 3600 (1 hour).  The maximum allowable
                                                    value is 7862400 (91 days).

                                                    The signer implementation is then free to issue a certificate with any
                                                    lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600
                                                    seconds (1 hour).  This constraint is enforced by kube-apiserver.
                                                    `kubernetes.io` signers will never issue certificates with a lifetime
                                                    longer than 24 hours.
                                                  format: int32
                                                  type: integer
                                                signerName:
                                                  description: Kubelet's generated
                                                    CSRs will be addressed to this
                                                    signer.
                                                  type: string
                                                userAnnotations:
                                                  additionalProperties:
                                                    type: string
                                                  description: |-
                                                    userAnnotations allow pod authors to pass additional information to
                                                    the signer implementation.  Kubernetes does not restrict or validate this
                                                    metadata in any way.

                                                    These values are copied verbatim into the `spec.unverifiedUserAnnotations` field of
                                                    the PodCertificateRequest objects that Kubelet creates.

                                                    Entries are subject to the same validation as object metadata annotations,
                                                    with the addition that all keys must be domain-prefixed. No restrictions
                                                    are placed on values, except an overall size limitation on the entire field.

                                                    Signers should document the keys and values they support. Signers should
                                                    deny requests that contain keys they do not recognize.
                                                  type: object
                                              required:
                                              - keyType
                                              - signerName
                                              type: object
                                            secret:
                                              description: secret information about
                                                the secret data to project
                                              properties:
                                                items:
                                                  description: |-
                                                    items if unspecified, each key-value pair in the Data field of the referenced
                                                    Secret will be projected into the volume as a file whose name is the
                                                    key and content is the value. If specified, the listed keys will be
                                                    projected into the specified paths, and unlisted keys will not be
                                                    present. If a key is specified which is not present in the Secret,
                                                    the volume setup will error unless it is marked optional. Paths must be
                                                    relative and may not contain the '..' path or start with '..'.
                                                  items:
                                                    description: Maps a string key
                                                      to a path within a volume.
                                                    properties:
                                                      key:
                                                        description: key is the key
                                                          to project.
                                                        type: string
                                                      mode:
                                                        description: |-
                                                          mode is Optional: mode bits used to set permissions on this file.
                                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                                          YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                          If not specified, the volume defaultMode will be used.
                                                          This might be in conflict with other options that affect the file
                                                          mode, like fsGroup, and the result can be other mode bits set.
                                                        format: int32
                                                        type: integer
                                                      path:
                                                        description: |-
                                                          path is the relative path of the file to map the key to.
                                                          May not be an absolute path.
                                                          May not contain the path element '..'.
                                                          May not start with the string '..'.
                                                        type: string
                                                    required:
                                                    - key
                                                    - path
                                                    type: object
                                                  type: array
                                                  x-kubernetes-list-type: atomic
                                                name:
                                                  default: ""
                                                  description: |-
                                                    Name of the referent.
                                                    This field is effectively required, but due to backwards compatibility is
                                                    allowed to be empty. Instances of this type with an empty value here are
                                                    almost certainly wrong.
                                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                                  type: string
                                                optional:
                                                  description: optional field specify
                                                    whether the Secret or its key
                                                    must be defined
                                                  type: boolean
                                              type: object
                                              x-kubernetes-map-type: atomic
                                            serviceAccountToken:
                                              description: serviceAccountToken is
                                                information about the serviceAccountToken
                                                data to project
                                              properties:
                                                audience:
                                                  description: |-
                                                    audience is the intended audience of the token. A recipient of a token
                                                    must identify itself with an identifier specified in the audience of the
                                                    token, and otherwise should reject the token. The audience defaults to the
                                                    identifier of the apiserver.
                                                  type: string
                                                expirationSeconds:
                                                  description: |-
                                                    expirationSeconds is the requested duration of validity of the service
                                                    account token. As the token approaches expiration, the kubelet volume
                                                    plugin will proactively rotate the service account token. The kubelet will
                                                    start trying to rotate the token if the token is older than 80 percent of
                                                    its time to live or if the token is older than 24 hours.Defaults to 1 hour
                                                    and must be at least 10 minutes.
                                                  format: int64
                                                  type: integer
                                                path:
                                                  description: |-
                                                    path is the path relative to the mount point of the file to project the
                                                    token into.
                                                  type: string
                                              required:
                                              - path
                                              type: object
                                          type: object
                                        type: array
                                        x-kubernetes-list-type: atomic
                                    type: object
                                  quobyte:
                                    description: |-
                                      quobyte represents a Quobyte mount on the host that shares a pod's lifetime.
                                      Deprecated: Quobyte is deprecated and the in-tree quobyte type is no longer supported.
                                    properties:
                                      group:
                                        description: |-
                                          group to map volume access to
                                          Default is no group
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly here will force the Quobyte volume to be mounted with read-only permissions.
                                          Defaults to false.
                                        type: boolean
                                      registry:
                                        description: |-
                                          registry represents a single or multiple Quobyte Registry services
                                          specified as a string as host:port pair (multiple entries are separated with commas)
                                          which acts as the central registry for volumes
                                        type: string
                                      tenant:
                                        description: |-
                                          tenant owning the given Quobyte volume in the Backend
                                          Used with dynamically provisioned Quobyte volumes, value is set by the plugin
                                        type: string
                                      user:
                                        description: |-
                                          user to map volume access to
                                          Defaults to serivceaccount user
                                        type: string
                                      volume:
                                        description: volume is a string that references
                                          an already created Quobyte volume by name.
                                        type: string
                                    required:
                                    - registry
                                    - volume
                                    type: object
                                  rbd:
                                    description: |-
                                      rbd represents a Rados Block Device mount on the host that shares a pod's lifetime.
                                      Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported.
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type of the volume that you want to mount.
                                          Tip: Ensure that the filesystem type is supported by the host operating system.
                                          Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd
                                        type: string
                                      image:
                                        description: |-
                                          image is the rados image name.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        type: string
                                      keyring:
                                        default: /etc/ceph/keyring
                                        description: |-
                                          keyring is the path to key ring for RBDUser.
                                          Default is /etc/ceph/keyring.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        type: string
                                      monitors:
                                        description: |-
                                          monitors is a collection of Ceph monitors.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        items:
                                          type: string
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      pool:
                                        default: rbd
                                        description: |-
                                          pool is the rados pool name.
                                          Default is rbd.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly here will force the ReadOnly setting in VolumeMounts.
                                          Defaults to false.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        type: boolean
                                      secretRef:
                                        description: |-
                                          secretRef is name of the authentication secret for RBDUser. If provided
                                          overrides keyring.
                                          Default is nil.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      user:
                                        default: admin
                                        description: |-
                                          user is the rados user name.
                                          Default is admin.
                                          More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it
                                        type: string
                                    required:
                                    - image
                                    - monitors
                                    type: object
                                  scaleIO:
                                    description: |-
                                      scaleIO represents a ScaleIO persistent volume attached and mounted on Kubernetes nodes.
                                      Deprecated: ScaleIO is deprecated and the in-tree scaleIO type is no longer supported.
                                    properties:
                                      fsType:
                                        default: xfs
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs".
                                          Default is "xfs".
                                        type: string
                                      gateway:
                                        description: gateway is the host address of
                                          the ScaleIO API Gateway.
                                        type: string
                                      protectionDomain:
                                        description: protectionDomain is the name
                                          of the ScaleIO Protection Domain for the
                                          configured storage.
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly Defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      secretRef:
                                        description: |-
                                          secretRef references to the secret for ScaleIO user and other
                                          sensitive information. If this is not provided, Login operation will fail.
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      sslEnabled:
                                        description: sslEnabled Flag enable/disable
                                          SSL communication with Gateway, default
                                          false
                                        type: boolean
                                      storageMode:
                                        default: ThinProvisioned
                                        description: |-
                                          storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned.
                                          Default is ThinProvisioned.
                                        type: string
                                      storagePool:
                                        description: storagePool is the ScaleIO Storage
                                          Pool associated with the protection domain.
                                        type: string
                                      system:
                                        description: system is the name of the storage
                                          system as configured in ScaleIO.
                                        type: string
                                      volumeName:
                                        description: |-
                                          volumeName is the name of a volume already created in the ScaleIO system
                                          that is associated with this volume source.
                                        type: string
                                    required:
                                    - gateway
                                    - secretRef
                                    - system
                                    type: object
                                  secret:
                                    description: |-
                                      secret represents a secret that should populate this volume.
                                      More info: https://kubernetes.io/docs/concepts/storage/volumes#secret
                                    properties:
                                      defaultMode:
                                        description: |-
                                          defaultMode is Optional: mode bits used to set permissions on created files by default.
                                          Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                          YAML accepts both octal and decimal values, JSON requires decimal values
                                          for mode bits. Defaults to 0644.
                                          Directories within the path are not affected by this setting.
                                          This might be in conflict with other options that affect the file
                                          mode, like fsGroup, and the result can be other mode bits set.
                                        format: int32
                                        type: integer
                                      items:
                                        description: |-
                                          items If unspecified, each key-value pair in the Data field of the referenced
                                          Secret will be projected into the volume as a file whose name is the
                                          key and content is the value. If specified, the listed keys will be
                                          projected into the specified paths, and unlisted keys will not be
                                          present. If a key is specified which is not present in the Secret,
                                          the volume setup will error unless it is marked optional. Paths must be
                                          relative and may not contain the '..' path or start with '..'.
                                        items:
                                          description: Maps a string key to a path
                                            within a volume.
                                          properties:
                                            key:
                                              description: key is the key to project.
                                              type: string
                                            mode:
                                              description: |-
                                                mode is Optional: mode bits used to set permissions on this file.
                                                Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511.
                                                YAML accepts both octal and decimal values, JSON requires decimal values for mode bits.
                                                If not specified, the volume defaultMode will be used.
                                                This might be in conflict with other options that affect the file
                                                mode, like fsGroup, and the result can be other mode bits set.
                                              format: int32
                                              type: integer
                                            path:
                                              description: |-
                                                path is the relative path of the file to map the key to.
                                                May not be an absolute path.
                                                May not contain the path element '..'.
                                                May not start with the string '..'.
                                              type: string
                                          required:
                                          - key
                                          - path
                                          type: object
                                        type: array
                                        x-kubernetes-list-type: atomic
                                      optional:
                                        description: optional field specify whether
                                          the Secret or its keys must be defined
                                        type: boolean
                                      secretName:
                                        description: |-
                                          secretName is the name of the secret in the pod's namespace to use.
                                          More info: https://kubernetes.io/docs/concepts/storage/volumes#secret
                                        type: string
                                    type: object
                                  storageos:
                                    description: |-
                                      storageOS represents a StorageOS volume attached and mounted on Kubernetes nodes.
                                      Deprecated: StorageOS is deprecated and the in-tree storageos type is no longer supported.
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is the filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      readOnly:
                                        description: |-
                                          readOnly defaults to false (read/write). ReadOnly here will force
                                          the ReadOnly setting in VolumeMounts.
                                        type: boolean
                                      secretRef:
                                        description: |-
                                          secretRef specifies the secret to use for obtaining the StorageOS API
                                          credentials.  If not specified, default values will be attempted.
                                        properties:
                                          name:
                                            default: ""
                                            description: |-
                                              Name of the referent.
                                              This field is effectively required, but due to backwards compatibility is
                                              allowed to be empty. Instances of this type with an empty value here are
                                              almost certainly wrong.
                                              More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
                                            type: string
                                        type: object
                                        x-kubernetes-map-type: atomic
                                      volumeName:
                                        description: |-
                                          volumeName is the human-readable name of the StorageOS volume.  Volume
                                          names are only unique within a namespace.
                                        type: string
                                      volumeNamespace:
                                        description: |-
                                          volumeNamespace specifies the scope of the volume within StorageOS.  If no
                                          namespace is specified then the Pod's namespace will be used.  This allows the
                                          Kubernetes name scoping to be mirrored within StorageOS for tighter integration.
                                          Set VolumeName to any name to override the default behaviour.
                                          Set to "default" if you are not using namespaces within StorageOS.
                                          Namespaces that do not pre-exist within StorageOS will be created.
                                        type: string
                                    type: object
                                  vsphereVolume:
                                    description: |-
                                      vsphereVolume represents a vSphere volume attached and mounted on kubelets host machine.
                                      Deprecated: VsphereVolume is deprecated. All operations for the in-tree vsphereVolume type
                                      are redirected to the csi.vsphere.vmware.com CSI driver.
                                    properties:
                                      fsType:
                                        description: |-
                                          fsType is filesystem type to mount.
                                          Must be a filesystem type supported by the host operating system.
                                          Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
                                        type: string
                                      storagePolicyID:
                                        description: storagePolicyID is the storage
                                          Policy Based Management (SPBM) profile ID
                                          associated with the StoragePolicyName.
                                        type: string
                                      storagePolicyName:
                                        description: storagePolicyName is the storage
                                          Policy Based Management (SPBM) profile name.
                                        type: string
                                      volumePath:
                                        description: volumePath is the path that identifies
                                          vSphere volume vmdk
                                        type: string
                                    required:
                                    - volumePath
                                    type: object
                                required:
                                - name
                                type: object
                              type: array
                              x-kubernetes-list-map-keys:
                              - name
                              x-kubernetes-list-type: map
                            workloadRef:
                              description: |-
                                WorkloadRef provides a reference to the Workload object that this Pod belongs to.
                                This field is used by the scheduler to identify the PodGroup and apply the
                                correct group scheduling policies. The Workload object referenced
                                by this field may not exist at the time the Pod is created.
                                This field is immutable, but a Workload object with the same name
                                may be recreated with different policies. Doing this during pod scheduling
                                may result in the placement not conforming to the expected policies.
                              properties:
                                name:
                                  description: |-
                                    Name defines the name of the Workload object this Pod belongs to.
                                    Workload must be in the same namespace as the Pod.
                                    If it doesn't match any existing Workload, the Pod will remain unschedulable
                                    until a Workload object is created and observed by the kube-scheduler.
                                    It must be a DNS subdomain.
                                  type: string
                                podGroup:
                                  description: |-
                                    PodGroup is the name of the PodGroup within the Workload that this Pod
                                    belongs to. If it doesn't match any existing PodGroup within the Workload,
                                    the Pod will remain unschedulable until the Workload object is recreated
                                    and observed by the kube-scheduler. It must be a DNS label.
                                  type: string
                                podGroupReplicaKey:
                                  description: |-
                                    PodGroupReplicaKey specifies the replica key of the PodGroup to which this
                                    Pod belongs. It is used to distinguish pods belonging to different replicas
                                    of the same pod group. The pod group policy is applied separately to each replica.
                                    When set, it must be a DNS label.
                                  type: string
                              required:
                              - name
                              - podGroup
                              type: object
                          required:
                          - containers
                          type: object
                      type: object
                  type: object
                description: |-
                  MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that
                  specify the MPI replicas to run.
                type: object
              runLauncherAsWorker:
                default: false
                description: |-
                  RunLauncherAsWorker indicates whether to run worker process in launcher
                  Defaults to false.
                type: boolean
              runPolicy:
                description: RunPolicy encapsulates various runtime policies of the
                  job.
                properties:
                  activeDeadlineSeconds:
                    description: |-
                      Specifies the duration in seconds relative to the startTime that the job may be active
                      before the system tries to terminate it; value must be positive integer.
                    format: int64
                    type: integer
                  backoffLimit:
                    description: Optional number of retries before marking this job
                      failed.
                    format: int32
                    type: integer
                  cleanPodPolicy:
                    description: |-
                      CleanPodPolicy defines the policy to kill pods after the job completes.
                      Default to Running.
                    type: string
                  managedBy:
                    description: |-
                      ManagedBy is used to indicate the controller or entity that manages a MPIJob.
                      The value must be either empty, 'kubeflow.org/mpi-operator' or
                      'kueue.x-k8s.io/multikueue'.
                      The mpi-operator reconciles a MPIJob which doesn't have this
                      field at all or the field value is the reserved string
                      'kubeflow.org/mpi-operator', but delegates reconciling the MPIJob
                      with 'kueue.x-k8s.io/multikueue' to the Kueue.
                      The field is immutable.
                    type: string
                  schedulingPolicy:
                    description: SchedulingPolicy defines the policy related to scheduling,
                      e.g. gang-scheduling
                    properties:
                      minAvailable:
                        description: |-
                          MinAvailable defines the minimal number of member to run the PodGroup.
                          If the gang-scheduling isn't empty, input is passed to `.spec.minMember` in PodGroup.
                          Note that, when using this field,
                          you need to make sure the application supports resizing (e.g., Elastic Horovod).

                          If not set, it defaults to the number of workers.
                        format: int32
                        type: integer
                      minResources:
                        additionalProperties:
                          anyOf:
                          - type: integer
                          - type: string
                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                          x-kubernetes-int-or-string: true
                        description: |-
                          MinResources defines the minimal resources of members to run the PodGroup.
                          If the gang-scheduling isn't empty,
                          input is passed to `.spec.minResources` in PodGroup for scheduler-plugins.
                        type: object
                      priorityClass:
                        description: |-
                          PriorityClass defines the PodGroup's PriorityClass.
                          If the gang-scheduling is set to the volcano,
                          input is passed to `.spec.priorityClassName` in PodGroup for volcano,
                          and if it is set to the scheduler-plugins,
                          input isn't passed to PodGroup for scheduler-plugins.
                        type: string
                      queue:
                        description: |-
                          Queue defines the queue name to allocate resource for PodGroup.
                          If the gang-scheduling is set to the volcano,
                          input is passed to `.spec.queue` in PodGroup for the volcano,
                          and if it is set to the scheduler-plugins,
                          input isn't passed to PodGroup.
                        type: string
                      scheduleTimeoutSeconds:
                        description: |-
                          SchedulerTimeoutSeconds defines the maximal time of members to wait before run the PodGroup.
                          If the gang-scheduling is set to the scheduler-plugins,
                          input is passed to `.spec.scheduleTimeoutSeconds` in PodGroup for the scheduler-plugins,
                          and if it is set to the volcano, input isn't passed to PodGroup.
                        format: int32
                        type: integer
                    type: object
                  suspend:
                    default: false
                    description: |-
                      suspend specifies whether the MPIJob controller should create Pods or not.
                      If a MPIJob is created with suspend set to true, no Pods are created by
                      the MPIJob controller. If a MPIJob is suspended after creation (i.e. the
                      flag goes from false to true), the MPIJob controller will delete all
                      active Pods and PodGroups associated with this MPIJob. Also, it will suspend the
                      Launcher Job. Users must design their workload to gracefully handle this.
                      Suspending a Job will reset the StartTime field of the MPIJob.

                      Defaults to false.
                    type: boolean
                  ttlSecondsAfterFinished:
                    description: |-
                      TTLSecondsAfterFinished is the TTL to clean up jobs.
                      It may take extra ReconcilePeriod seconds for the cleanup, since
                      reconcile gets called periodically.
                      Default to infinite.
                    format: int32
                    type: integer
                type: object
              slotsPerWorker:
                default: 1
                description: |-
                  Specifies the number of slots per worker used in hostfile.
                  Defaults to 1.
                format: int32
                type: integer
              sshAuthMountPath:
                default: /root/.ssh
                description: |-
                  SSHAuthMountPath is the directory where SSH keys are mounted.
                  Defaults to "/root/.ssh".
                type: string
            required:
            - mpiReplicaSpecs
            type: object
          status:
            description: JobStatus represents the current observed state of the training
              Job.
            properties:
              completionTime:
                description: |-
                  Represents time when the job was completed. It is not guaranteed to
                  be set in happens-before order across separate operations.
                  It is represented in RFC3339 form and is in UTC.
                format: date-time
                type: string
              conditions:
                description: conditions is a list of current observed job conditions.
                items:
                  description: JobCondition describes the state of the job at a certain
                    point.
                  properties:
                    lastTransitionTime:
                      description: Last time the condition transitioned from one status
                        to another.
                      format: date-time
                      type: string
                    lastUpdateTime:
                      description: The last time this condition was updated.
                      format: date-time
                      type: string
                    message:
                      description: A human-readable message indicating details about
                        the transition.
                      type: string
                    reason:
                      description: The reason for the condition's last transition.
                      type: string
                    status:
                      description: status of the condition, one of True, False, Unknown.
                      enum:
                      - "True"
                      - "False"
                      - Unknown
                      type: string
                    type:
                      description: type of job condition.
                      type: string
                  required:
                  - status
                  - type
                  type: object
                type: array
                x-kubernetes-list-map-keys:
                - type
                x-kubernetes-list-type: map
              lastReconcileTime:
                description: |-
                  Represents last time when the job was reconciled. It is not guaranteed to
                  be set in happens-before order across separate operations.
                  It is represented in RFC3339 form and is in UTC.
                format: date-time
                type: string
              replicaStatuses:
                additionalProperties:
                  description: ReplicaStatus represents the current observed state
                    of the replica.
                  properties:
                    active:
                      description: The number of actively running pods.
                      format: int32
                      type: integer
                    failed:
                      description: The number of pods which reached phase failed.
                      format: int32
                      type: integer
                    labelSelector:
                      description: 'Deprecated: Use selector instead'
                      properties:
                        matchExpressions:
                          description: matchExpressions is a list of label selector
                            requirements. The requirements are ANDed.
                          items:
                            description: |-
                              A label selector requirement is a selector that contains values, a key, and an operator that
                              relates the key and values.
                            properties:
                              key:
                                description: key is the label key that the selector
                                  applies to.
                                type: string
                              operator:
                                description: |-
                                  operator represents a key's relationship to a set of values.
                                  Valid operators are In, NotIn, Exists and DoesNotExist.
                                type: string
                              values:
                                description: |-
                                  values is an array of string values. If the operator is In or NotIn,
                                  the values array must be non-empty. If the operator is Exists or DoesNotExist,
                                  the values array must be empty. This array is replaced during a strategic
                                  merge patch.
                                items:
                                  type: string
                                type: array
                                x-kubernetes-list-type: atomic
                            required:
                            - key
                            - operator
                            type: object
                          type: array
                          x-kubernetes-list-type: atomic
                        matchLabels:
                          additionalProperties:
                            type: string
                          description: |-
                            matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
                            map is equivalent to an element of matchExpressions, whose key field is "key", the
                            operator is "In", and the values array contains only "value". The requirements are ANDed.
                          type: object
                      type: object
                      x-kubernetes-map-type: atomic
                    selector:
                      description: |-
                        A selector is a label query over a set of resources. The result of matchLabels and
                        matchExpressions are ANDed. An empty selector matches all objects. A null
                        selector matches no objects.
                      type: string
                    succeeded:
                      description: The number of pods which reached phase succeeded.
                      format: int32
                      type: integer
                  type: object
                description: |-
                  replicaStatuses is map of ReplicaType and ReplicaStatus,
                  specifies the status of each replica.
                type: object
              startTime:
                description: |-
                  Represents time when the job was acknowledged by the job controller.
                  It is not guaranteed to be set in happens-before order across separate operations.
                  It is represented in RFC3339 form and is in UTC.
                format: date-time
                type: string
            type: object
        type: object
    served: true
    storage: true
    subresources:
      status: {}
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpi-operator
  namespace: mpi-operator
---
aggregationRule:
  clusterRoleSelectors:
  - matchLabels:
      rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true"
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
    rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true"
  name: kubeflow-mpijobs-admin
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
    rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
    rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true"
  name: kubeflow-mpijobs-edit
rules:
- apiGroups:
  - kubeflow.org
  resources:
  - mpijobs
  - mpijobs/status
  verbs:
  - get
  - list
  - watch
  - create
  - delete
  - deletecollection
  - patch
  - update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
    rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
  name: kubeflow-mpijobs-view
rules:
- apiGroups:
  - kubeflow.org
  resources:
  - mpijobs
  - mpijobs/status
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpi-operator
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  - secrets
  - services
  verbs:
  - create
  - list
  - watch
  - update
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - create
  - get
  - list
  - watch
  - delete
  - update
  - patch
- apiGroups:
  - ""
  resources:
  - pods/exec
  verbs:
  - create
- apiGroups:
  - ""
  resources:
  - endpoints
  verbs:
  - create
  - get
  - update
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
- apiGroups:
  - batch
  resources:
  - jobs
  verbs:
  - create
  - list
  - update
  - watch
- apiGroups:
  - apiextensions.k8s.io
  resources:
  - customresourcedefinitions
  verbs:
  - create
  - get
- apiGroups:
  - kubeflow.org
  resources:
  - mpijobs
  - mpijobs/finalizers
  - mpijobs/status
  verbs:
  - '*'
- apiGroups:
  - coordination.k8s.io
  resources:
  - leases
  verbs:
  - '*'
- apiGroups:
  - scheduling.incubator.k8s.io
  - scheduling.sigs.dev
  - scheduling.volcano.sh
  resources:
  - queues
  - podgroups
  verbs:
  - '*'
- apiGroups:
  - scheduling.x-k8s.io
  resources:
  - podgroups
  verbs:
  - '*'
- apiGroups:
  - scheduling.k8s.io
  resources:
  - priorityclasses
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpi-operator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: mpi-operator
subjects:
- kind: ServiceAccount
  name: mpi-operator
  namespace: mpi-operator
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: mpi-operator
    app.kubernetes.io/component: mpijob
    app.kubernetes.io/name: mpi-operator
    kustomize.component: mpi-operator
  name: mpi-operator
  namespace: mpi-operator
spec:
  replicas: 1
  selector:
    matchLabels:
      app: mpi-operator
      app.kubernetes.io/component: mpijob
      app.kubernetes.io/name: mpi-operator
      kustomize.component: mpi-operator
  template:
    metadata:
      annotations:
        sidecar.istio.io/inject: "false"
      labels:
        app: mpi-operator
        app.kubernetes.io/component: mpijob
        app.kubernetes.io/name: mpi-operator
        kustomize.component: mpi-operator
    spec:
      containers:
      - args:
        - -alsologtostderr
        - --lock-namespace=mpi-operator
        image: mpioperator/mpi-operator:0.8.0
        name: mpi-operator
      serviceAccountName: mpi-operator


================================================
FILE: test/manifests/assets/nvidia-device-plugin.yaml
================================================
# Source: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/main/deployments/static/nvidia-device-plugin.yml

# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: nvidia-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name: nvidia-device-plugin-ds
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        name: nvidia-device-plugin-ds
    spec:
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule
      # Mark this pod as a critical add-on; when enabled, the critical add-on
      # scheduler reserves resources for critical add-on pods so that they can
      # be rescheduled after a failure.
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      priorityClassName: "system-node-critical"
      containers:
      - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.2
        name: nvidia-device-plugin-ctr
        env:
          - name: FAIL_ON_INIT_ERROR
            value: "false"
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop: ["ALL"]
        volumeMounts:
        - name: device-plugin
          mountPath: /var/lib/kubelet/device-plugins
      volumes:
      - name: device-plugin
        hostPath:
          path: /var/lib/kubelet/device-plugins


================================================
FILE: test/manifests/raw.go
================================================
package manifests

import (
	_ "embed"
)

var (
	//go:embed assets/nvidia-device-plugin.yaml
	NvidiaDevicePluginManifest []byte
	//go:embed assets/mpi-operator.yaml
	MpiOperatorManifest []byte

	//go:embed assets/efa-device-plugin.yaml
	EfaDevicePluginManifest []byte

	//go:embed assets/k8s-neuron-device-plugin-rbac.yml
	NeuronDevicePluginRbacManifest []byte
	//go:embed assets/k8s-neuron-device-plugin.yml
	NeuronDevicePluginManifest []byte

	//go:embed assets/dranet.yaml
	DranetManifest []byte

	//go:embed assets/dcgm-exporter.yaml
	DCGMExporterManifest []byte

	//go:embed assets/cloudwatch-agent.yaml
	cloudWatchAgentManifestTemplate []byte
)


================================================
FILE: test/manifests/rendered.go
================================================
package manifests

import (
	"html/template"
	"strings"

	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
)

// RenderCloudWatchAgentManifest renders the CloudWatch Agent manifest with dynamic dimensions
func RenderCloudWatchAgentManifest(metricDimensions map[string]string) ([]byte, error) {
	var keys []string
	for key := range metricDimensions {
		keys = append(keys, `"`+key+`"`)
	}
	dimensionsStr := strings.Join(keys, ", ")
	return fwext.RenderManifests(cloudWatchAgentManifestTemplate, map[string]interface{}{
		"MetricDimensions": metricDimensions,
		"DimensionKeys":    template.HTML(dimensionsStr),
	})
}