Repository: aws-samples/cdk-eks-blueprints-patterns Branch: main Commit: ebb07d658b6d Files: 231 Total size: 650.4 KB Directory structure: gitextract_2o9c5389/ ├── .devcontainer/ │ ├── devcontainer.json │ └── postCreateCommand.sh ├── .eslintignore ├── .eslintrc.js ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── ci.yaml │ ├── docbuild.yml │ ├── e2e.yaml │ ├── linkcheck.json │ ├── markdown-link-check.yaml │ └── stale_issue_pr.yaml ├── .gitignore ├── .nvmrc ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── aws-quickstart-eks-blueprints-1.13.1.tgz ├── bin/ │ ├── asg.ts │ ├── backstage.ts │ ├── batch.ts │ ├── bottlerocket.ts │ ├── crossplane-argocd-gitops.ts │ ├── custom-networking-ipv4.ts │ ├── data-at-rest-encryption.ts │ ├── datadog.ts │ ├── dynatrace-operator.ts │ ├── ecr-image-scanning.ts │ ├── eks-config-rules.ts │ ├── emr.ts │ ├── fargate.ts │ ├── generative-ai-showcase.ts │ ├── generic-cluster-provider.ts │ ├── gmaestro.ts │ ├── gpu.ts │ ├── graviton.ts │ ├── guardduty.ts │ ├── import-cluster.ts │ ├── instana-operator.ts │ ├── ipv6.ts │ ├── jupyterhub.ts │ ├── karpenter.ts │ ├── kasten.ts │ ├── keptn-control-plane.ts │ ├── komodor.ts │ ├── konveyor.ts │ ├── kubecost.ts │ ├── kubeflow.ts │ ├── kubeshark.ts │ ├── multi-cluster-conformitron.ts │ ├── multi-region.ts │ ├── multi-team.ts │ ├── newrelic.ts │ ├── nginx.ts │ ├── paralus.ts │ ├── pipeline-multienv-gitops.ts │ ├── pipeline-multienv-monitoring.ts │ ├── pipeline.ts │ ├── rafay.ts │ ├── secure-ingress-cognito.ts │ ├── securityhub.ts │ ├── snyk.ts │ ├── starter.ts │ ├── unionai.ts │ ├── windows.ts │ └── workloads-codecommit.ts ├── ci/ │ └── buildspec.yml ├── docs/ │ ├── index.md │ └── patterns/ │ ├── backstage.md │ ├── batch.md │ ├── crossplane-argocd-gitops.md │ ├── custom-networking-with-ipv4.md │ ├── generative-ai/ │ │ └── showcase.md │ ├── gmaestro.md │ ├── graviton.md │ ├── instana.md │ ├── jupyterhub.md │ ├── karpenter.md │ ├── konveyor.md │ ├── kubeflow.md │ ├── kubeshark.md │ ├── multi-cluster-conformitron.md │ ├── nginx.md │ ├── observability/ │ │ ├── existing-eks-apiserver-observability.md │ │ ├── existing-eks-awsnative-observability.md │ │ ├── existing-eks-mixed-observability.md │ │ ├── existing-eks-nginx-observability.md │ │ ├── existing-eks-opensource-observability.md │ │ ├── multi-acc-new-eks-mixed-observability.md │ │ ├── multi-account-monitoring.md │ │ ├── single-new-eks-apiserver-opensource-observability.md │ │ ├── single-new-eks-awsnative-fargate-observability.md │ │ ├── single-new-eks-gpu-opensource-observability.md │ │ ├── single-new-eks-graviton-opensource-observability.md │ │ ├── single-new-eks-java-opensource-observability.md │ │ ├── single-new-eks-mixed-observability.md │ │ ├── single-new-eks-native.md │ │ ├── single-new-eks-nginx-opensource-observability.md │ │ └── single-new-eks-opensource.md │ ├── paralus.md │ ├── pipeline-multi-env-gitops.md │ ├── secureingresscognito.md │ ├── security/ │ │ ├── eks-config-rules.md │ │ ├── encryption-at-rest.md │ │ ├── guardduty.md │ │ ├── image-scanning.md │ │ └── securityhub.md │ ├── union.md │ ├── windows.md │ └── workloads-codecommit.md ├── jest.config.js ├── lib/ │ ├── amp-monitoring/ │ │ └── index.ts │ ├── argo-config-managent/ │ │ └── index.ts │ ├── aws-batch-on-eks-construct/ │ │ └── index.ts │ ├── backstage-construct/ │ │ ├── backstage-secret-addon.ts │ │ ├── database-credentials.ts │ │ ├── index.ts │ │ └── rds-database-instance.ts │ ├── bottlerocket-construct/ │ │ └── index.ts │ ├── cloudwatch-monitoring/ │ │ └── index.ts │ ├── common/ │ │ ├── construct-utils.ts │ │ └── default-main.ts │ ├── crossplane-argocd-gitops/ │ │ ├── custom-addons/ │ │ │ ├── crossplane-helm-provider-addon.ts │ │ │ ├── crossplane-k8s-provider-addon.ts │ │ │ ├── custom-iam-role-creator.ts │ │ │ ├── upbound-crossplane-addon.ts │ │ │ └── upbound-crossplane-eks-provider-addon.ts │ │ ├── management-cluster-builder.ts │ │ ├── multi-cluster-options.ts │ │ └── multi-cluster-pipeline.ts │ ├── custom-networking-ipv4-construct/ │ │ └── index.ts │ ├── datadog-construct/ │ │ └── index.ts │ ├── dynatrace-construct/ │ │ └── index.ts │ ├── emr-eks/ │ │ └── index.ts │ ├── fargate-construct/ │ │ └── index.ts │ ├── generative-ai-showcase/ │ │ ├── deployment/ │ │ │ └── showcase-deployment.ytpl │ │ ├── index.ts │ │ └── python/ │ │ ├── Dockerfile │ │ ├── requirements.txt │ │ ├── showcase_app.py │ │ ├── showcase_examples.py │ │ └── showcase_lib.py │ ├── generic-cluster-construct/ │ │ └── index.ts │ ├── gmaestro-construct/ │ │ └── index.ts │ ├── gpu-construct/ │ │ └── index.ts │ ├── graviton-construct/ │ │ └── index.ts │ ├── import-cluster/ │ │ └── index.ts │ ├── instana-construct/ │ │ └── index.ts │ ├── ipv6-construct/ │ │ └── index.ts │ ├── jupyterhub-construct/ │ │ └── index.ts │ ├── karpenter-construct/ │ │ └── index.ts │ ├── kasten-k10-construct/ │ │ └── index.ts │ ├── keptn-construct/ │ │ └── index.ts │ ├── komodor-construct/ │ │ └── index.ts │ ├── konveyor-construct/ │ │ └── index.ts │ ├── kubecost-construct/ │ │ └── index.ts │ ├── kubeflow-construct/ │ │ └── index.ts │ ├── kubeshark-construct/ │ │ └── index.ts │ ├── multi-account-monitoring/ │ │ ├── amg-iam-setup.ts │ │ ├── amp-iam-setup.ts │ │ ├── cloudwatch-iam-setup.ts │ │ ├── index.ts │ │ └── pipeline.ts │ ├── multi-cluster-construct/ │ │ ├── cluster-secret-store-addon.ts │ │ ├── clusterMapping.ts │ │ ├── grafana-monitor-builder.ts │ │ ├── grafana-operator-secret-addon.ts │ │ ├── multi-cluster-builder.ts │ │ ├── pipeline.ts │ │ └── resources/ │ │ ├── amp-config/ │ │ │ ├── alerting-rules.yml │ │ │ ├── apiserver/ │ │ │ │ └── recording-rules.yml │ │ │ ├── istio/ │ │ │ │ ├── alerting-rules.yml │ │ │ │ └── recording-rules.yml │ │ │ ├── java/ │ │ │ │ ├── alerting-rules.yml │ │ │ │ └── recording-rules.yml │ │ │ ├── nginx/ │ │ │ │ └── alerting-rules.yml │ │ │ └── recording-rules.yml │ │ ├── cost-optimization/ │ │ │ ├── scaleDownEksToZero.yml │ │ │ └── scaleUpEksToOne.yml │ │ └── otel-collector-config.yml │ ├── multi-region-construct/ │ │ └── index.ts │ ├── multi-team-construct/ │ │ └── index.ts │ ├── newrelic-construct/ │ │ └── index.ts │ ├── nginx-ingress-construct/ │ │ └── index.ts │ ├── paralus-construct/ │ │ └── index.ts │ ├── pipeline-multi-env-gitops/ │ │ └── index.ts │ ├── pipeline-stack/ │ │ └── index.ts │ ├── rafay-construct/ │ │ └── index.ts │ ├── secure-ingress-auth-cognito/ │ │ ├── index.ts │ │ └── lambda/ │ │ └── lambda_function.py │ ├── security/ │ │ ├── data-at-rest-encryption/ │ │ │ └── index.ts │ │ ├── eks-config-rules/ │ │ │ ├── config-setup.ts │ │ │ └── index.ts │ │ ├── guardduty-construct/ │ │ │ ├── guardduty-setup.ts │ │ │ └── index.ts │ │ ├── image-vulnerability-scanning/ │ │ │ ├── image-scanning-setup.ts │ │ │ └── index.ts │ │ └── securityhub-construct/ │ │ └── index.ts │ ├── snyk-construct/ │ │ └── index.ts │ ├── starter-construct/ │ │ └── index.ts │ ├── teams/ │ │ ├── index.ts │ │ ├── multi-account-monitoring/ │ │ │ ├── index.ts │ │ │ ├── team-geordi.ts │ │ │ └── team-platform.ts │ │ ├── pipeline-multi-env-gitops/ │ │ │ ├── index.ts │ │ │ ├── team-backend-crystal.ts │ │ │ ├── team-backend-frontend.ts │ │ │ ├── team-backend-nodejs.ts │ │ │ └── team-platform.ts │ │ ├── team-batch/ │ │ │ └── index.ts │ │ ├── team-burnham/ │ │ │ ├── index.ts │ │ │ └── restrict-ingress-egress-burnham.yaml │ │ ├── team-emr-on-eks/ │ │ │ └── index.ts │ │ ├── team-platform/ │ │ │ └── index.ts │ │ ├── team-riker/ │ │ │ ├── index.ts │ │ │ └── restrict-ingress-egress-riker.yaml │ │ ├── team-scan/ │ │ │ └── index.ts │ │ └── team-troi/ │ │ └── index.ts │ ├── union-dataplane-construct/ │ │ └── index.ts │ ├── windows-construct/ │ │ ├── index.ts │ │ └── vpc-cni/ │ │ └── index.ts │ └── workloads-codecommit-construct/ │ ├── codecommit-credentials.ts │ ├── index.ts │ ├── lambda/ │ │ └── index.js │ └── workloads-codecommit-repo-stack.ts ├── mkdocs.yml ├── package.json └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/devcontainer.json ================================================ { "image": "mcr.microsoft.com/vscode/devcontainers/base:ubuntu", "features": { "ghcr.io/devcontainers/features/node:1": {}, "ghcr.io/devcontainers/features/aws-cli:1": {}, "ghcr.io/devcontainers-contrib/features/aws-cdk:2": { "version": "2.133.0" } }, "postCreateCommand": ".devcontainer/postCreateCommand.sh", "workspaceFolder": "/home/vscode/cdk-eks-blueprints-patterns", "workspaceMount": "source=${localWorkspaceFolder},target=/home/vscode/cdk-eks-blueprints-patterns,type=bind", "hostRequirements": { "cpus": 2 }, "remoteEnv": { "PATH": "${containerEnv:PATH}:/home/vscode/cdk-eks-blueprints-patterns" } } ================================================ FILE: .devcontainer/postCreateCommand.sh ================================================ #!/usr/bin/env bash # For Kubectl AMD64 / x86_64 [ $(uname -m) = x86_64 ] && curl -sLO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" # For Kubectl ARM64 [ $(uname -m) = aarch64 ] && curl -sLO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/arm64/kubectl" chmod +x ./kubectl sudo mv ./kubectl /usr/local/bin/kubectl # For Helm curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 chmod 700 get_helm.sh echo "Installing 'helm' utility ..." ./get_helm.sh rm -rf get_helm.sh # setup autocomplete for kubectl and alias k mkdir $HOME/.kube echo "source <(kubectl completion bash)" >> $HOME/.bashrc echo "alias k=kubectl" >> $HOME/.bashrc echo "complete -F __start_kubectl k" >> $HOME/.bashrc ================================================ FILE: .eslintignore ================================================ node_modules dist coverage cdk.out .eslintrc.js jest.config.js ================================================ FILE: .eslintrc.js ================================================ module.exports = { root: true, parser: '@typescript-eslint/parser', plugins: [ '@typescript-eslint', ], extends: [ 'eslint:recommended', 'plugin:@typescript-eslint/recommended' ], rules: { "@typescript-eslint/no-explicit-any": "off", "@typescript-eslint/explicit-module-boundary-types": "off", "@typescript-eslint/no-non-null-assertion": "off", "@typescript-eslint/no-unused-vars": [1, {"argsIgnorePattern": "^_"}], indent: ['error', 4], "prefer-const": "off", "semi": ['error',"always"], }, }; ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Smartphone (please complete the following information):** - Device: [e.g. iPhone6] - OS: [e.g. iOS8.1] - Browser [e.g. stock browser, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ *Issue #, if available:* *Description of changes:* By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. ================================================ FILE: .github/workflows/ci.yaml ================================================ name: Node.js CI on: push: branches: [ main ] pull_request: branches: [ main ] jobs: build: runs-on: macos-14 strategy: matrix: node-version: [22] steps: - uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v3 with: node-version: ${{ matrix.node-version }} - name: Cache node modules uses: actions/cache@v5 env: cache-name: cache-node-modules with: # npm cache files are stored in `~/.npm` on Linux/macOS path: ~/.npm key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-build-${{ env.cache-name }}- ${{ runner.os }}-build- ${{ runner.os }}- - name: Install Deps run: make deps - name: Run Linter run: make lint - name: Build TSC run: make build - name: Run CDK List run: make list - name: Run CDK Synth run: make test-all ================================================ FILE: .github/workflows/docbuild.yml ================================================ name: ci on: push: branches: - master - main permissions: contents: write jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: 3.x - run: pip install mkdocs-material - run: pip install mkdocs-embed-external-markdown - run: mkdocs gh-deploy --force ================================================ FILE: .github/workflows/e2e.yaml ================================================ name: "E2E Pipeline for CDK Observability Accelerator" on: issue_comment: types: [created] jobs: checkPermissions: runs-on: ubuntu-latest if: github.event.issue.pull_request && contains(github.event.comment.body, '/do-e2e-test') # check the comment if it contains the keywords steps: - id: checkUserPermissions uses: actions-cool/check-user-permission@main with: require: 'admin' outputs: run_test: ${{ steps.checkUserPermissions.outputs.require-result }} e2ePipeline: runs-on: ubuntu-latest if: github.event.issue.pull_request && contains(github.event.comment.body, '/do-e2e-test') && needs.checkPermissions.outputs.run_test == 'true' needs: - checkPermissions permissions: id-token: write # This is required for requesting the JWT contents: read steps: - name: Dump GitHub context env: GITHUB_CONTEXT: ${{ toJson(github) }} run: | echo "$GITHUB_CONTEXT" - name: Get PR branch uses: xt0rted/pull-request-comment-branch@v1 id: comment-branch - name: Set latest commit status as pending uses: myrotvorets/set-commit-status-action@master with: sha: ${{ steps.comment-branch.outputs.head_sha }} token: ${{ secrets.CI_TOKEN }} status: pending - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: arn:aws:iam::867286930927:role/BlueprintsCodeBuildRole role-session-name: codebuildsession aws-region: ${{ vars.AWS_REGION }} - name: Run CodeBuild uses: aws-actions/aws-codebuild-run-build@v1 with: project-name: cdk-pattern-test # buildspec-override: path/to/buildspec.yaml or inline buildspec definition # compute-type-override: compute-type # environment-type-override: environment-type # image-override: ecr-image-uri env-vars-for-codebuild: | PR_NUMBER, COMMIT_ID, PATTERN_NAME env: PR_NUMBER: ${{ github.event.issue.number }} COMMIT_ID: ${{ steps.comment-branch.outputs.head_sha }} PATTERN_NAME: ${{ github.event.comment.body }} - name: Set latest commit status as ${{ job.status }} uses: myrotvorets/set-commit-status-action@master if: always() with: sha: ${{ steps.comment-branch.outputs.head_sha }} token: ${{ secrets.CI_TOKEN }} status: ${{ job.status }} ================================================ FILE: .github/workflows/linkcheck.json ================================================ { "timeout": "5s", "retryOn429": true, "retryCount": 5, "fallbackRetryDelay": "30s", "aliveStatusCodes": [200, 206], "httpHeaders": [ { "urls": ["https://help.github.com/"], "headers": { "Accept-Encoding": "zstd, br, gzip, deflate" } } ], "ignorePatterns": [ { "pattern": [ "localhost" ] }, { "pattern": [ "127.0.0.1" ] } ] } ================================================ FILE: .github/workflows/markdown-link-check.yaml ================================================ name: Check Markdown links on: push: branches: - main paths: - "**/*.md" pull_request: branches: - main paths: - "**/*.md" jobs: markdown-link-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: node-version: '20.x' - name: install markdown-link-check run: npm install -g markdown-link-check@3.10.2 - name: markdown-link-check version run: npm list -g markdown-link-check - name: Run markdown-link-check on MD files run: find docs -name "*.md" | xargs -n 1 markdown-link-check -q -c .github/workflows/linkcheck.json ================================================ FILE: .github/workflows/stale_issue_pr.yaml ================================================ name: 'Stale issue & PR handler' on: workflow_dispatch: schedule: - cron: '0 0 * * *' jobs: stale: runs-on: ubuntu-latest permissions: issues: write pull-requests: write steps: - uses: actions/stale@main id: stale with: ascending: true close-issue-message: 'Issue closed due to inactivity.' close-pr-message: 'Pull request closed due to inactivity.' days-before-close: 60 days-before-stale: 90 stale-issue-label: stale stale-pr-label: stale # Not stale if have this labels exempt-issue-labels: 'bug,enhancement,"feature request"' exempt-pr-labels: 'bug,enhancement' operations-per-run: 100 stale-issue-message: | This issue has been automatically marked as stale because it has been open 60 days with no activity. Remove stale label or comment or this issue will be closed in 10 days stale-pr-message: | This PR has been automatically marked as stale because it has been open 60 days with no activity. Remove stale label or comment or this PR will be closed in 10 days ================================================ FILE: .gitignore ================================================ !jest.config.js *.d.ts node_modules .vscode .classpath.txt .idea .settings .vscode *.iml # CDK asset staging directory .cdk.staging cdk.out cdk.json dist *.swp cdk.context.json package-lock.json yarn.lock # mkdocs artifact site # macOS extraneous file .DS_STORE # Python virtual env directory *.venv* *otel-collector-config-new.yml ================================================ FILE: .nvmrc ================================================ 12.18.2 ================================================ FILE: CODE_OF_CONDUCT.md ================================================ ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing Guidelines Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. Please read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution. ## Reporting Bugs/Feature Requests We welcome you to use the GitHub issue tracker to report bugs or suggest features. When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: * A reproducible test case or series of steps * The version of our code being used * Any modifications you've made relevant to the bug * Anything unusual about your environment or deployment ## Contributing via Pull Requests Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 1. You are working against the latest source on the *main* branch. 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. To send us a pull request, please: 1. Fork the repository. 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 3. Ensure local tests pass. 4. Ensure the following commands pass: ``` make build make lint # no errors / warnings, also you can use make lint-fix to fix issues make pattern list make pattern deploy ``` The above should produce no errors. 5. Commit to your fork using clear commit messages. 6. Send us a pull request, answering any default questions in the pull request interface. 7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ## Security issue notifications If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. ## Licensing See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. ================================================ FILE: LICENSE ================================================ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ #!/bin/bash # Libraries TSC := node node_modules/.bin/tsc ESLINT := node node_modules/.bin/eslint CDK := node node_modules/.bin/cdk pattern: pattern_name := $(firstword $(filter-out pattern, $(MAKECMDGOALS))) pattern: pattern_command := $(subst pattern $(pattern_name), , $(MAKECMDGOALS)) pattern_files := $(notdir $(wildcard bin/*.ts)) formatted_pattern_names := $(patsubst %.ts,%,$(pattern_files)) # Dependecies HOMEBREW_LIBS := nvm typescript argocd list: @$ echo "To work with patterns use: \n\t$$ make pattern " @$ echo "Example:\n\t$$ make pattern fargate deploy \n\nPatterns: \n" @$ $(foreach pattern, $(formatted_pattern_names), echo "\t$(pattern)";) deps: bootstrap npm install lint: $(ESLINT) . --ext .js,.jsx,.ts,.tsx lint-fix: $(ESLINT) . --ext .js,.jsx,.ts,.tsx --fix build: rm -rf dist && $(TSC) --skipLibCheck compile: $(TSC) --build --incremental mkdocs: mkdocs serve pattern: @echo $(pattern_name) performing $(pattern_command) $(CDK) --app "npx ts-node bin/$(pattern_name).ts" $(if $(pattern_command),$(pattern_command), list) @: %: @: test-all: @for pattern in $(formatted_pattern_names) ; do \ echo "Building pattern $$pattern"; \ $(CDK) --app "npx ts-node bin/$$pattern.ts" list || exit 1 ;\ done bootstrap: @for LIB in $(HOMEBREW_LIBS) ; do \ LIB=$$LIB make check-lib ; \ done check-lib: ifeq ($(shell brew ls --versions $(LIB)),) @echo Installing $(LIB) via Homebrew @brew install $(LIB) else @echo $(LIB) is already installed, skipping. endif ================================================ FILE: README.md ================================================ # EKS Blueprints Patterns Welcome to the `EKS Blueprints Patterns` repository. This repository contains a number of samples for how you can leverage the [Amazon EKS Blueprints](https://github.com/aws-quickstart/cdk-eks-blueprints). You can think of the patterns as "codified" reference architectures, which can be explained and executed as code in the customer environment. ## Patterns The individual patterns can be found in the `lib` directory. Most of the patterns are self-explanatory, for some more complex examples please use this guide and docs/patterns directory for more information. ## Documentation Please refer to the Amazon EKS Blueprints Patterns [documentation site](https://aws-samples.github.io/cdk-eks-blueprints-patterns/) for complete list of Amazon EKS Blueprints patterns documentation. Please refer to the Amazon EKS Blueprints Quick Start [documentation site](https://aws-quickstart.github.io/cdk-eks-blueprints/) for complete project documentation. ## Usage Before proceeding, make sure [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) is installed on your machine. To use the eks-blueprints and patterns module, you must have [Node.js](https://nodejs.org/en/) and [npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) installed. You will also use `make` and `brew` to simplify build and other common actions. ## Workstation Setup Options ### DevContainer Setup Users can choose this option, if you dont want to run this solution on a mac or ubuntu machine. Please use the dev container configuration in the `.devcontainer` folder with [devpod](devpod.sh) or any other dev container environment to create a development environment with dependencies such as Node, NPM, aws-cli, aws-cdk, kubectl, helm dependencies for your local development with `cdk-eks-blueprints-patterns` solution. ### RHEL Setup Follow the below steps to setup and leverage `eks-blueprints` and `eks-blueprints-patterns` in your Amazon Linux/CentOS/RHEL Linux machine. 1. **Update the package list** Update the package list to ensure you're installing the latest versions. ```bash sudo yum update ``` 1. **Install `make`** ```bash sudo yum install make ``` 1. **Install `brew`** by following instructions as detailed in [docs.brew.sh](https://docs.brew.sh/Homebrew-on-Linux) ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` Add Homebrew to your PATH ```bash test -d ~/.linuxbrew && eval "$(~/.linuxbrew/bin/brew shellenv)" test -d /home/linuxbrew/.linuxbrew && eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)" test -r ~/.bash_profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.bash_profile echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.profile ``` Verify brew installation ```bash brew -v ``` 1. **Install `Node.js` and `npm`** Install Node.js v18 and npm using brew. ```bash brew install node@18 ``` Note: Node.js package includes npm Set PATH for node@18 ```bash test -r ~/.bash_profile && echo 'export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH"' >> ~/.bash_profile echo 'export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH"' >> ~/.profile export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH" ``` Post completing the above, continue from [Verify Node.js and npm Installation](#verify-nodejs-and-npm-installationbash) ### Ubuntu Setup Follow the below steps to setup and leverage `eks-blueprints` and `eks-blueprints-patterns` in your Ubuntu Linux machine. 1. **Update the package list** Update the package list to ensure you're installing the latest versions. ```bash sudo apt update ``` 1. **Install `make`** ```bash sudo apt install make ``` 1. **Install `brew`** by following instructions as detailed in [docs.brew.sh](https://docs.brew.sh/Homebrew-on-Linux) ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` Add Homebrew to your PATH ```bash test -d ~/.linuxbrew && eval "$(~/.linuxbrew/bin/brew shellenv)" test -d /home/linuxbrew/.linuxbrew && eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)" test -r ~/.bash_profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.bash_profile echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.profile ``` Verify brew installation ```bash brew -v ``` 1. **Install `Node.js` and `npm`** Install Node.js v18 and npm using brew. ```bash brew install node@18 ``` Note: Node.js package includes npm Set PATH for node@18 ```bash test -r ~/.bash_profile && echo 'export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH"' >> ~/.bash_profile echo 'export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH"' >> ~/.profile export PATH="/home/linuxbrew/.linuxbrew/opt/node@18/bin:$PATH" ``` Post completing the above, continue from [Verify Node.js and npm Installation](#verify-nodejs-and-npm-installation) ### Mac Setup Follow the below steps to setup and leverage `eks-blueprints` and `eks-blueprints-patterns` in your local Mac laptop. 1. **Install `make`, `node` and `npm` using brew** ```bash brew install make brew install node@18 ``` Note: Node.js package includes npm Set PATH for node@18 ```bash echo 'export PATH="/opt/homebrew/opt/node@18/bin:$PATH"' >> ~/.zshrc export PATH="/opt/homebrew/opt/node@18/bin:$PATH" ``` ### Verify `Node.js` and `npm` Installation 1. Check the installed version of Node.js: ```bash node -v ``` The output should be `v18.x.x`. 1. Check the installed version of npm ```bash npm -v ``` The output should be a version greater than `9.x.x`. If your npm version is not `9.x.x` or above, update npm with the following command: ```bash sudo npm install -g npm@latest ``` Verify the installed version by running `npm -v`. ### Repo setup 1. Clone `cdk-eks-blueprints-patterns` repository ```bash git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` PS: If you are contributing to this repo, please make sure to fork the repo, add your changes and create a PR against it. 1. Once you have cloned the repo, you can open it using your favourite IDE and run the below commands to install the dependencies and build the existing patterns. - Install project dependencies. ```bash make deps ``` - To view patterns that are available to be deployed, execute the following: ```bash npm i make build ``` - To list the existing CDK EKS Blueprints patterns ```bash make list ``` Note: Some patterns have a hard dependency on AWS Secrets (for example GitHub access tokens). Initially you will see errors complaining about lack of the required secrets. It is normal. At the bottom, it will show the list of patterns which can be deployed, in case the pattern you are looking for is not available, it is due to the hard dependency which can be fixed by following the docs specific to those patterns. ```bash To work with patterns use: $ make pattern Example: $ make pattern fargate deploy Patterns: bottlerocket data-at-rest datadog dynatrace-operator ecr-image-scanning emr fargate generative-ai-showcase generic-cluster-provider guardduty jupyterhub kasten keptn-control-plane konveyor kubecost kubeflow kubeshark multi-region multi-team newrelic nginx pipeline-multienv-gitops pipeline-multienv-monitoring pipeline rafay secure-ingress-cognito snyk starter gmaestro workloads-codecommit ``` - Bootstrap your CDK environment. ```bash npx cdk bootstrap ``` - You can then deploy a specific pattern with the following: ```bash make pattern multi-team deploy ``` # Developer Flow ## Modifications All files are compiled to the dist folder including `lib` and `bin` directories. For iterative development (e.g. if you make a change to any of the patterns) make sure to run compile: ```bash make compile ``` The `compile` command is optimized to build only modified files and is fast. ## New Patterns To create a new pattern, please follow these steps: 1. Under lib create a folder for your pattern, such as `-construct`. If you plan to create a set of patterns that represent a particular subdomain, e.g. `security` or `hardening`, please create an issue to discuss it first. If approved, you will be able to create a folder with your subdomain name and group your pattern constructs under it. 2. Blueprints generally don't require a specific class, however we use a convention of wrapping each pattern in a plain class like `Construct`. This class is generally placed in `index.ts` under your pattern folder. 3. Once the pattern implementation is ready, you need to include it in the list of the patterns by creating a file `bin/.ts`. The implementation of this file is very light, and it is done to allow patterns to run independently. Example simple synchronous pattern: ```typescript import { configureApp } from "../lib/common/construct-utils"; import FargateConstruct from "../lib/fargate-construct"; new FargateConstruct(configureApp(), "fargate"); // configureApp() will create app and configure loggers and perform other prep steps ``` 4. In some cases, patterns need to use async APIs. For example, they may rely on external secrets that you want to validate ahead of the pattern deployment. Example async pattern: ```typescript import { configureApp, errorHandler } from "../lib/common/construct-utils"; const app = configureApp(); new NginxIngressConstruct().buildAsync(app, "nginx").catch((e) => { errorHandler( app, "NGINX Ingress pattern is not setup. This maybe due to missing secrets for ArgoCD admin pwd.", e ); }); ``` 5. There are a few utility functions that can be used in the pattern implementation such as secret prevalidation. This function will fail if the corresponding secret is not defined, this preventing the pattern to deploy. ```typescript await prevalidateSecrets( NginxIngressConstruct.name, undefined, SECRET_ARGO_ADMIN_PWD ); await prevalidateSecrets("my-pattern-name", "us-east-1", "my-secret-name"); // ``` ## Contributing See [Contributing](CONTRIBUTING.md) guide for requirements on contribution. # Deploying Blueprints with External Dependency on AWS Resources There are cases when the blueprints defined in the patterns have dependencies on existing AWS Resources such as Secrets defined in the account/region. For such cases, you may see errors if such resources are not defined. For [`PipelineMultiEnvGitops`](./lib/pipeline-multi-env-gitops/index.ts) please see instructions in this [README](./docs/patterns/pipeline-multi-env-gitops.md). For `MultiRegionConstruct` the pattern relies on the following secrets defined: 1. `github-ssh-key` - must contain GitHub SSH private key as a JSON structure containing fields `sshPrivateKey` and `url`. The secret is expected to be defined in `us-east-1` and replicated to `us-east-2` and `us-west-2` regions. For more information on SSH credentials setup see [ArgoCD Secrets Support](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/#secrets-support). Example Structure: ``` { "sshPrivateKey": "-----BEGIN THIS IS NOT A REAL PRIVATE KEY-----\nb3BlbnNzaC1rtdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn\nNhAAAAAwEAAQAAAgEAy82zTTDStK+s0dnaYzE7vLSAcwsiHM8gN\nhq2p5TfcjCcYUWetyu6e/xx5Rh+AwbVvDV5h9QyMw4NJobwuj5PBnhkc3QfwJAO5wOnl7R\nGbehIleWWZLs9qq`DufViQsa0fDwP6JCrqD14aIozg6sJ0Oqi7vQkV+jR0ht/\nuFO1ANXBn2ih0ZpXeHSbPDLeZQjlOBrbGytnCbdvLtfGEsV0WO2oIieWVXJj/zzpKuMmrr\nebPsfwr36nLprOQV6IhDDo\n-----END NOT A REAL PRIVATE KEY-----\n", "url": "git@github" } ``` Note: You can notice explicit \n characters in the sshPrivateKey. 2. `argo-admin-secret` - must contain ArgoCD admin password in Plain Text. The secret is expected to be defined in `us-east-1` and replicated to `us-east-1` and `us-west-2` regions. For ``Dynatrace One Agent` - `dynatrace-tokens` - must contain [API_URL](https://github.com/dynatrace-oss/dynatrace-ssp-addon#aws-secret-manager-secrets), [API_TOKEN](https://github.com/dynatrace-oss/dynatrace-eks-blueprints-addon#aws-secret-manager-secrets) and [PAAS_TOKEN](https://github.com/dynatrace-oss/dynatrace-eks-blueprints-addon#aws-secret-manager-secrets) in Plain Text. The secret is expected to be defined in the target region (either directly or through AWS Secrets Manager Replication). For `keptn-control-plane` the pattern relies on the following secrets defined: - `keptn-secrets` - must contain API_TOKEN and BRIDGE_PASSWORD password in Plain Text. The secret is expected to be defined in `us-east-1` region. For `newrelic` the pattern relies on the following secrets defined: - `newrelic-pixie-keys` - must contain New Relic (required) and Pixie keys (optional). The secret is expected to be defined in the target region (either directly or through AWS Secrets Manager Replication). For more information on defining secrets for ArgoCD, please refer to [Blueprints Documentation](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/#secrets-support) as well as [known issues](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/#known-issues). For `nginx` please see [NGINX Blueprint documentation](docs/patterns/nginx.md). For `datadog` the pattern relies on the following secret defined: - `apiKeyAWSSecret` - must contain the Datadog API key in Plain Text named `datadog-api-key`. The secret is expected to be defined in the target region. For `kubeflow` please see [Kubeflow documentation](docs/patterns/kubeflow.md). For `secure-ingress-cognito` please see [Secure Ingress using Cognito Blueprint documentation](docs/patterns/secureingresscognito.md). For `GmaestroConstruct` the pattern relies on the following secret defined: - `granulate-client-id` - must contain the client_id Plain Text. The secret is expected to be defined in the target region (either directly or through AWS Secrets Manager Replication). ## Security See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. ## License This library is licensed under the MIT-0 License. See the LICENSE file. ================================================ FILE: bin/asg.ts ================================================ import 'source-map-support/register'; import * as cdk from 'aws-cdk-lib'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { configureApp } from "../lib/common/construct-utils"; const app = configureApp(); const spotInterruptHandlerAddOn = new blueprints.addons.AwsNodeTerminationHandlerAddOn({ version: "0.25.1", repository: 'oci://public.ecr.aws/aws-ec2/helm/aws-node-termination-handler' }); const clusterProvider = new blueprints.AsgClusterProvider({ version: cdk.aws_eks.KubernetesVersion.V1_30, minSize: 1, maxSize: 1, spotPrice: "0.10", machineImageType: cdk.aws_eks.MachineImageType.BOTTLEROCKET, id: "asg-spot", name: "asg-spot", spotInterruptHandler: false }); const blueprint = blueprints.EksBlueprint.builder() .region("us-west-2") .version("auto") .clusterProvider(clusterProvider) .addOns(spotInterruptHandlerAddOn) .build(app, 'asg-test'); ================================================ FILE: bin/backstage.ts ================================================ import { BackstageConstruct } from '../lib/backstage-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new BackstageConstruct(app, 'backstage-stack'); ================================================ FILE: bin/batch.ts ================================================ import BatchConstruct from '../lib/aws-batch-on-eks-construct'; import { batchTeam } from '../lib/teams/team-batch'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); //------------------------------------------- // Single cluster with Batch on EKS deployed //------------------------------------------- new BatchConstruct().build(app, 'batch', [batchTeam]); ================================================ FILE: bin/bottlerocket.ts ================================================ import BottleRocketConstruct from '../lib/bottlerocket-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); //------------------------------------------- // Single cluster with Bottlerocket nodes. //------------------------------------------- new BottleRocketConstruct().build(app, 'bottlerocket'); ================================================ FILE: bin/crossplane-argocd-gitops.ts ================================================ #!/usr/bin/env node import * as cdk from 'aws-cdk-lib'; import { errorHandler } from '../lib/common/construct-utils'; import MultiClusterPipelineConstruct from "../lib/crossplane-argocd-gitops/multi-cluster-pipeline"; const app = new cdk.App(); new MultiClusterPipelineConstruct().buildAsync(app, "crossplane-argocd-gitops").catch((e) => { errorHandler(app, "Pipeline construct failed because of error: ", e); }); ================================================ FILE: bin/custom-networking-ipv4.ts ================================================ import CustomNetworkingIPv4Construct from '../lib/custom-networking-ipv4-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new CustomNetworkingIPv4Construct(app, 'custom-networking-ipv4'); ================================================ FILE: bin/data-at-rest-encryption.ts ================================================ import EncryptionAtRestConstruct from "../lib/security/data-at-rest-encryption"; import { configureApp, errorHandler } from '../lib/common/construct-utils'; //-------------------------------------------------------------------------- // Security Patterns //-------------------------------------------------------------------------- const app = configureApp(); new EncryptionAtRestConstruct().buildAsync(app, "data-at-rest-encryption").catch((e) => { errorHandler(app, "EncryptionAtRestConstruct is not setup due to missing secrets for ArgoCD admin pwd", e); }); ================================================ FILE: bin/datadog.ts ================================================ import DatadogConstruct from '../lib/datadog-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new DatadogConstruct().buildAsync(app, 'datadog').catch((error) => { errorHandler(app, "Datadog pattern is not setup due to missing secrets: " + error); }); ================================================ FILE: bin/dynatrace-operator.ts ================================================ import DynatraceOperatorConstruct from '../lib/dynatrace-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new DynatraceOperatorConstruct().buildAsync(app, "dynatrace-operator").catch((e) => { errorHandler(app, "Dynatrace pattern is not setup due to missing secrets for dynatrace-tokens.", e); }); ================================================ FILE: bin/ecr-image-scanning.ts ================================================ import { ImageScanningSetupStack } from "../lib/security/image-vulnerability-scanning/image-scanning-setup"; import { configureApp, errorHandler } from '../lib/common/construct-utils'; import ImageScanningWorkloadConstruct from "../lib/security/image-vulnerability-scanning"; const app = configureApp(); new ImageScanningSetupStack(app, "image-scanning-setup"); new ImageScanningWorkloadConstruct().buildAsync(app, "image-scanning-workload").catch((e) => { errorHandler(app, "ImageScanningWorkloadConstruct is not setup due to missing secrets for ArgoCD admin pwd", e); }); ================================================ FILE: bin/eks-config-rules.ts ================================================ import { configureApp } from '../lib/common/construct-utils'; import { EksConfigRulesSetup } from '../lib/security/eks-config-rules'; import { EksConfigSetup } from '../lib/security/eks-config-rules/config-setup'; const app = configureApp(); new EksConfigSetup(app, 'eks-config-setup'); new EksConfigRulesSetup(app, 'eks-config-rules-setup'); ================================================ FILE: bin/emr.ts ================================================ import EmrEksConstruct from '../lib/emr-eks'; import { dataTeam } from '../lib/teams/team-emr-on-eks'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new EmrEksConstruct().build(app, 'emrOnEks', [dataTeam]); ================================================ FILE: bin/fargate.ts ================================================ import { configureApp } from '../lib/common/construct-utils'; import FargateConstruct from '../lib/fargate-construct'; new FargateConstruct(configureApp(), 'fargate'); ================================================ FILE: bin/generative-ai-showcase.ts ================================================ import GenAIShowcase from "../lib/generative-ai-showcase"; import { configureApp } from "../lib/common/construct-utils"; const app = configureApp(); new GenAIShowcase(app, 'generative-ai-showcase'); ================================================ FILE: bin/generic-cluster-provider.ts ================================================ import { configureApp } from '../lib/common/construct-utils'; import GenericClusterConstruct from '../lib/generic-cluster-construct'; const app = configureApp(); //------------------------------------------- // Single cluster with custom configuration. //------------------------------------------- new GenericClusterConstruct().build(app, 'generic-cluster'); ================================================ FILE: bin/gmaestro.ts ================================================ #!/usr/bin/env node import GmaestroConstruct from '../lib/gmaestro-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new GmaestroConstruct().buildAsync(app, 'gmaestro').catch((error) => { errorHandler(app, "Gmaestro is not setup due to missing secrets: " + error); }); ================================================ FILE: bin/gpu.ts ================================================ import { configureApp } from "../lib/common/construct-utils"; import GpuConstruct from "../lib/gpu-construct"; const app = configureApp(); new GpuConstruct().build(app, "gpu"); ================================================ FILE: bin/graviton.ts ================================================ import { configureApp } from "../lib/common/construct-utils"; import GravitonConstruct from "../lib/graviton-construct"; const app = configureApp(); new GravitonConstruct().build(app, "graviton"); ================================================ FILE: bin/guardduty.ts ================================================ import { GuardDutySetupStack } from "../lib/security/guardduty-construct/guardduty-setup"; import GuardDutyWorkloadConstruct from "../lib/security/guardduty-construct"; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new GuardDutySetupStack(app, "guardduty-setup"); new GuardDutyWorkloadConstruct().buildAsync(app, "guardduty").catch((e) => { errorHandler(app, "GuardDutyWorkloadConstruct is not setup due to missing secrets for ArgoCD admin pwd", e); }); ================================================ FILE: bin/import-cluster.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import { ImportClusterConstruct } from '../lib/import-cluster'; const app = configureApp(); //------------------------------------------- // Multiple clusters, multiple regions. //------------------------------------------- new ImportClusterConstruct().build(app).catch((error) => { errorHandler(app, "Import cluster construct failed to import cluster", error); }); ================================================ FILE: bin/instana-operator.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import InstanaConstruct from '../lib/instana-construct'; const app = configureApp(); new InstanaConstruct().buildAsync(app, "instana-operator").catch((error) => { errorHandler(app, "Instana pattern is not setup due to missing secrets: " + error); }); ================================================ FILE: bin/ipv6.ts ================================================ import { configureApp } from "../lib/common/construct-utils"; import IpV6Construct from "../lib/ipv6-construct"; const app = configureApp(); new IpV6Construct().build(app, "ipv6"); ================================================ FILE: bin/jupyterhub.ts ================================================ import JupyterHubConstruct from '../lib/jupyterhub-construct'; import { configureApp } from '../lib/common/construct-utils'; const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const app = configureApp(); new JupyterHubConstruct(app, 'jupyterhub', { env: { account, region } }); ================================================ FILE: bin/karpenter.ts ================================================ import KarpenterConstruct from "../lib/karpenter-construct"; import { configureApp } from "../lib/common/construct-utils"; const app = configureApp(); new KarpenterConstruct(app, 'karpenter'); ================================================ FILE: bin/kasten.ts ================================================ import KastenK10Construct from '../lib/kasten-k10-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new KastenK10Construct(app, 'kasten'); ================================================ FILE: bin/keptn-control-plane.ts ================================================ import KeptnControlPlaneConstruct from '../lib/keptn-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new KeptnControlPlaneConstruct(app, 'keptn'); ================================================ FILE: bin/komodor.ts ================================================ // import KomodorConstruct from '../lib/komodor-construct'; // import { configureApp } from '../lib/common/construct-utils'; // const app = configureApp(); // new KomodorConstruct(app, 'komodor'); ================================================ FILE: bin/konveyor.ts ================================================ import { KonveyorConstruct } from "../lib/konveyor-construct"; import { configureApp } from "../lib/common/construct-utils"; const app = configureApp(); new KonveyorConstruct(app, 'konveyor-stack'); ================================================ FILE: bin/kubecost.ts ================================================ import KubecostConstruct from '../lib/kubecost-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new KubecostConstruct(app, 'kubecost'); ================================================ FILE: bin/kubeflow.ts ================================================ import KubeflowConstruct from '../lib/kubeflow-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new KubeflowConstruct(app, 'kubeflow'); ================================================ FILE: bin/kubeshark.ts ================================================ import KubesharkConstruct from '../lib/kubeshark-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new KubesharkConstruct(app, 'kubeshark'); ================================================ FILE: bin/multi-cluster-conformitron.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import { PipelineMultiCluster } from '../lib/multi-cluster-construct/pipeline'; const app = configureApp(); //------------------------------------------- // Multiple clusters, multiple regions. //------------------------------------------- new PipelineMultiCluster().buildAsync(app).catch((error) => { errorHandler(app, "Multi cluster pattern is not setup. It may be due to missing secrets: ", error); }); ================================================ FILE: bin/multi-region.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import MultiRegionConstruct from '../lib/multi-region-construct'; const app = configureApp(); //------------------------------------------- // Multiple clusters, multiple regions. //------------------------------------------- new MultiRegionConstruct().buildAsync(app, 'multi-region').catch((error) => { errorHandler(app, "Multi region pattern is not setup. It may be due to missing secrets: ", error); }); ================================================ FILE: bin/multi-team.ts ================================================ import { configureApp } from '../lib/common/construct-utils'; import MultiTeamConstruct from '../lib/multi-team-construct'; const app = configureApp(); //------------------------------------------- // Single Cluster with multiple teams. //------------------------------------------- new MultiTeamConstruct(app, 'multi-team'); ================================================ FILE: bin/newrelic.ts ================================================ import NewRelicConstruct from '../lib/newrelic-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new NewRelicConstruct(app, 'newrelic-cluster'); ================================================ FILE: bin/nginx.ts ================================================ #!/usr/bin/env node import NginxIngressConstruct from '../lib/nginx-ingress-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new NginxIngressConstruct().buildAsync(app, 'nginx').catch((e) => { errorHandler(app, "NGINX Ingress pattern is not setup. This maybe due to missing secrets for ArgoCD admin pwd.", e); }); ================================================ FILE: bin/paralus.ts ================================================ import ParalusConstruct from '../lib/paralus-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new ParalusConstruct(app, 'paralus'); ================================================ FILE: bin/pipeline-multienv-gitops.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import PipelineMultiEnvGitops, { populateWithContextDefaults } from '../lib/pipeline-multi-env-gitops'; import * as cdk from 'aws-cdk-lib'; // CDK Default Environment - default account and region const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const env: cdk.Environment = { account: account, region: region }; const app = configureApp(); // These different CDK environments are meant to be used for multi-region/account usage, // where the pipeline, dev cluster, and prod cluster are deployed in seperate environments const { devEnv, pipelineEnv, prodEnv }: { devEnv: cdk.Environment; pipelineEnv: cdk.Environment; prodEnv: cdk.Environment; } = populateWithContextDefaults(app, account, region); //-------------------------------------------------------------------------- // Multiple clusters, multiple reginos ,multiple teams, GitOps bootstrapped. //-------------------------------------------------------------------------- new PipelineMultiEnvGitops() .buildAsync( app, 'pipeline-multi-env', { devTestEnv: devEnv, pipelineEnv: pipelineEnv, prodEnv: prodEnv, }, { env } ) .catch((e) => { errorHandler( app, 'Pipeline pattern is not setup due to missing secrets for GitHub access.', e ); }); ================================================ FILE: bin/pipeline-multienv-monitoring.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import { PipelineMultiEnvMonitoring } from '../lib/multi-account-monitoring'; const app = configureApp(); //-------------------------------------------------------------------------- // Multiple clusters, multiple accounts, pipeline and Monitoring //-------------------------------------------------------------------------- new PipelineMultiEnvMonitoring() .buildAsync(app) .catch((e) => { errorHandler(app, "Multi Account Monitoring pattern is not setup due to missing secrets for GitHub \ access and/or CDK Context. See Multi Account Monitoring in the readme for instructions", e); }); ================================================ FILE: bin/pipeline.ts ================================================ import { configureApp, errorHandler } from '../lib/common/construct-utils'; import PipelineConstruct from '../lib/pipeline-stack'; import * as cdk from 'aws-cdk-lib'; //------------------------------------------- // Multiple clusters with deployment pipeline. //------------------------------------------- const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const env: cdk.Environment = { account: account, region: region }; const app = configureApp(); new PipelineConstruct().buildAsync(app, { env }).catch((e) => { errorHandler(app, "Pipeline pattern is not setup due to missing secrets for GitHub access.", e); }); ================================================ FILE: bin/rafay.ts ================================================ import RafayConstruct from '../lib/rafay-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new RafayConstruct().buildAsync(app, 'rafay-cluster').catch((error) => { errorHandler(app, "Rafay pattern is not setup due to missing secrets: " + error); }); ================================================ FILE: bin/secure-ingress-cognito.ts ================================================ import { SecureIngressCognito } from '../lib/secure-ingress-auth-cognito'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; //-------------------------------------------------------------------------- // Single Cluster, Secure Ingress Auth using cognito //-------------------------------------------------------------------------- const app = configureApp(); new SecureIngressCognito() .buildAsync(app, 'secure-ingress') .catch((e) => { errorHandler(app, "Secure Ingress Auth pattern is not setup due to missing secrets for ArgoCD admin pwd. \ See Secure Ingress Auth in the readme for instructions", e); }); ================================================ FILE: bin/securityhub.ts ================================================ import { configureApp } from '../lib/common/construct-utils'; import { SecurityHubStackSetup } from '../lib/security/securityhub-construct'; const app = configureApp(); new SecurityHubStackSetup(app, 'securityhub-setup'); ================================================ FILE: bin/snyk.ts ================================================ import SnykConstruct from '../lib/snyk-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new SnykConstruct(app, 'snyk-monitor'); ================================================ FILE: bin/starter.ts ================================================ #!/usr/bin/env node import { configureApp } from '../lib/common/construct-utils'; import StarterConstruct from '../lib/starter-construct'; const app = configureApp(); new StarterConstruct().build(app, 'starter-construct'); ================================================ FILE: bin/unionai.ts ================================================ import UnionDataplaneConstruct from '../lib/union-dataplane-construct'; import { configureApp, errorHandler } from '../lib/common/construct-utils'; const app = configureApp(); new UnionDataplaneConstruct().buildAsync(app, 'union-ai-datplane').catch((e) => { errorHandler(app, "Union Dataplane Construct pattern is not setup due to missing secrets for Union client. See Union Dataplane Construct in the readme for instructions", e); }); ================================================ FILE: bin/windows.ts ================================================ import { configureApp } from "../lib/common/construct-utils"; import WindowsConstruct from "../lib/windows-construct"; const app = configureApp(); new WindowsConstruct().build(app, "windows"); ================================================ FILE: bin/workloads-codecommit.ts ================================================ import WorkloadsCodeCommitConstruct from '../lib/workloads-codecommit-construct'; import { configureApp } from '../lib/common/construct-utils'; const app = configureApp(); new WorkloadsCodeCommitConstruct(app, 'workloads-codecommit'); ================================================ FILE: ci/buildspec.yml ================================================ version: 0.2 env: variables: CONTEXT_LOCATION: COMMIT_ID: PR_NUMBER: PATTERN_NAME: phases: install: runtime-versions: nodejs: 18 commands: - n 20.10.0 - | if [ ! -z "${COMMIT_ID}" ]; then git fetch origin pull/${PR_NUMBER}/head:pr git checkout main # git merge needs user details, but we don't push anything, so the contents are unimportant git -c "user.name=CI Bot" -c "user.email=dev@null" merge --no-edit ${COMMIT_ID} fi - npm i - make build pre_build: commands: - | [ -z "$CONTEXT_LOCATION" ] || aws s3 cp $CONTEXT_LOCATION . build: commands: - export AWS_REGION=us-east-2 && make pattern "${PATTERN_NAME#/do-e2e-test } --verbose --all --require-approval never --force" # finally: # - make destroy-all ================================================ FILE: docs/index.md ================================================ --8<-- "README.md" ================================================ FILE: docs/patterns/backstage.md ================================================ # Backstage on EKS ## Objective [Backstage](https://backstage.io/) is an application that aims to facilitate introduction and maintenance of standards and best practices, across the organization, tying all infrastructure tooling, resources, owners, contributors, and administrators together in one place. The base functionality is provided by the Core component, which is assembled together with Plugins into an Application. Plugins extend the Core with additional functionalities that can be open source, or proprietary to a company. The objective of this pattern is to illustrate how to deploy a Backstage pre-built Docker image, using the [Amazon EKS Blueprints Backstage add-on](https://github.com/aws-quickstart/cdk-eks-blueprints/blob/main/docs/addons/backstage.md). ## Architecture ![Backstage Architecture](./images/backstage-diagram.png) ## Approach This blueprint will include the following: - A new Well-Architected VPC with both Public and Private subnets - A new Well-Architected EKS cluster in the region and account you specify - An Application Load Balancer (ALB), implementing the Backstage Ingress rules - An Amazon RDS for PostgreSQL instance - A certificate, assigned to the ALB - A Secret in AWS Secrets Manager, storing the database credentials, imported into the cluster via [ExternalsSecretsAddOn](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/external-secrets/) - Other popular add-ons ## Prerequisites Ensure that you have installed the following tools on your machine: - [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) (also ensure it is [configured](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html#getting-started-quickstart-new)) - [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) - [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) - [tsc](https://www.typescriptlang.org/download) - [make](https://www.gnu.org/software/make/) - [Docker](https://docs.docker.com/get-docker/) Let’s start by setting the account and region environment variables: ```sh ACCOUNT_ID=$(aws sts get-caller-identity --query 'Account' --output text) AWS_REGION=$(aws configure get region) ``` Create the [Backstage application](https://backstage.io/docs/getting-started/create-an-app), command reported here for your convenience: ```sh npx @backstage/create-app@latest ``` Build the corresponding [Docker image](https://backstage.io/docs/deployment/docker), commands reported here for your convenience: ```sh cd ./backstage yarn install --frozen-lockfile yarn tsc yarn build:backend --config app-config.yaml ``` Note: if the above command throws an error caused by app-config.yaml not found, you can explicitly set the path to the file: ```sh yarn build:backend --config $(pwd)/app-config.yaml ``` Then you can progress with the docker image build: ```sh docker image build . -f packages/backend/Dockerfile --tag backstage ``` Note: consider the platform you are building on, and the target platform the image will run on, you might want to use the [--platform option](https://docs.docker.com/engine/reference/commandline/buildx_build/), e.g.: ```sh docker buildx build ... --platform=... ``` Note: If you are running a version of Docker Engine version earlier than 23.0, you might need to enable BuildKit manually, like explained in the [Getting Started section](https://docs.docker.com/build/buildkit/#getting-started) of the BuildKit webpage. (Optional) to show examples on the UI, add to Docker file: ```sh COPY --chown=node:node examples /examples ``` Create an Amazon Elastic Container Registry (ECR) repository, named _backstage_: ```sh aws ecr create-repository --repository-name backstage ``` ```sh DOCKER_IMAGE_ID=... #see output of image id from above image creation aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com docker tag $DOCKER_IMAGE_ID $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/backstage:latest docker push $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/backstage:latest ``` Setup a Hosted Zone in Route 53, with your parent domain. The pattern will create a new subdomain with format _{backstage subdomain label}.{parent domain}_. The default value for _{backstage subdomain label}_ is _backstage_ (see parameters below). ## Deployment Clone the repository: ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` Set the pattern's parameters in the CDK context by overriding the _cdk.json_ file (edit _PARENT_DOMAIN_NAME_ as it fits): ```sh PARENT_DOMAIN_NAME=example.com HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name $PARENT_DOMAIN_NAME --query "HostedZones[].Id" --output text | xargs basename) cat << EOF > cdk.json { "app": "npx ts-node dist/lib/common/default-main.js", "context": { "backstage.image.registry.name": "${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com", "backstage.parent.domain.name":"${PARENT_DOMAIN_NAME}", "backstage.hosted.zone.id": "${HOSTED_ZONE_ID}" } } EOF ``` (Optional) The full list of parameters you can set in the _context_ is: ``` "context": { "backstage.namespace.name": ..., "backstage.image.registry.name": ..., "backstage.image.repository.name": ..., "backstage.image.tag.name": ..., "backstage.parent.domain.name": ..., "backstage.subdomain.label": ..., "backstage.hosted.zone.id": ..., "backstage.certificate.resource.name": ..., "backstage.database.resource.name": ..., "backstage.database.instance.port": ..., "backstage.database.secret.resource.name": ..., "backstage.database.username": ..., "backstage.database.secret.target.name": ..., } ``` You can assign values to the above keys according to the following criteria (values are required where you don't see _default_ mentioned): - "backstage.namespace.name": Backstage's namespace, the default is "backstage" - "backstage.image.registry.name": the image registry for the Backstage Helm chart in Amazon ECR, a value similar to "youraccount.dkr.ecr.yourregion.amazonaws.com" - "backstage.image.repository.name": the image repository for the Backstage Helm chart, the default is "backstage" - "backstage.image.tag.name": the image tag, the default is "latest" - "backstage.parent.domain.name": the parent domain in your Hosted Zone - "backstage.subdomain.label": to be used as _{"subdomain.label"}.{"parent.domain.name"}_, the default is "backstage" - "backstage.hosted.zone.id": the Hosted zone ID (format: 20x chars/numbers) - "backstage.certificate.resource.name": resource name of the certificate, registered by the resource provider, the default is "backstage-certificate" - "backstage.database.resource.name": resource name of the database, registered by the resource provider, the default is "backstage-database" - "backstage.database.instance.port": the port the database will use, the default is 5432 - "backstage.database.secret.resource.name": resource name of the database's Secret, registered by the resource provider, the default is "backstage-database-credentials" - "backstage.database.username": the username for the database's credentials, the default is "postgres" - "backstage.database.secret.target.name": the name to be used when creating the Secret, the default is "backstage-database-secret" If you haven't done it before, [bootstrap your cdk account and region](https://docs.aws.amazon.com/cdk/v2/guide/bootstrapping.html). Run the following commands: ```sh make deps make build make pattern backstage deploy ``` When deployment completes, the output will be similar to the following: ![Backstage deployment output](./images/backstage-console-output.png) Navigate to the URL indicated by the first line in the output (_backstage-blueprint.BackstagebaseURL ...), you should see the screen below: ![Backstage console](./images/backstage-screen.png) To see the deployed resources within the cluster, please run: ```sh kubectl get pod,svc,secrets,ingress -A ``` A sample output is shown below: ![Backstage kubectl output](./images/backstage-kubectl-output.png) ## Next steps You can go the [AWS Blog](https://aws.amazon.com/blogs/) to explore how to use Backstage e.g., [as an API Developer Portal for Amazon API Gateway](https://aws.amazon.com/blogs/opensource/how-traveloka-uses-backstage-as-an-api-developer-portal-for-amazon-api-gateway/) or [to provision infrastructure using AWS Proton](https://aws.amazon.com/blogs/containers/provisioning-infrastructure-using-the-aws-proton-open-source-backstage-plugin/). On the Backstage website you can also see other examples of [how to use and expand Backstage](https://backstage.io/demos/). ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern backstage destroy ``` ================================================ FILE: docs/patterns/batch.md ================================================ # AWS Batch on Amazon EKS Pattern ## Objective AWS Batch helps you run batch computing workloads on AWS. Using Amazon EKS as the compute resource, you can now schedule and scale batch workloads into new or existing EKS cluster. As part of the deployment, AWS Batch doesn't create, administer, or perform lifecycle operations of the EKS cluster, but will only scale up and down the nodes maanged by AWS Batch and run pods on those nodes to complete batch jobs. The objective of this pattern is to deploy AWS Batch on Amazon EKS using EKS Blueprints with the following features in place: - Batch addon implemented - Batch Team defined with a sample compute environment and job queue (as defined under `lib/teams/team-batch`) - This can be customized based on your needs - Fluent Bit addon implemented to monitor AWS Batch on Amazon EKS jobs using CloudWatch, with the proper permissions for sending logs ================================================ FILE: docs/patterns/crossplane-argocd-gitops.md ================================================ # GitOps based Multi-cluster add-on and Apps Management using Crossplane and ArgoCD ## Objective The objective of this pattern is to provide centralized management of Amazon EKS add-ons, Kubernetes Applications and Helm charts in workload clusters. This approach consists of a Management Cluster and multiple workload clusters. The Management Cluster is created with ArgoCD and Crossplane add-ons. The platform team creates Crossplane Manifest files for Amazon EKS add-ons/Kubernetes Applications/Helm charts and pushes them to the GitOps Repo. The ArgoCD Application Controller in the Management Cluster reconciles these Crossplane Manifests and deploy them into Management Cluster. The Crossplane Controller in the Management Cluster deploys the Amazon EKS add-ons/Kubernetes Applications/Helm charts into the Workload Clusters. This helps platform teams to simplify the process of deploying add-ons and Apps from a central Management Cluster. In this Solution, we use CDK to deploy AWS CodePipeline which monitors this platform repo and deploy the Management and Workload Clusters using CDK EKS Blueprints. ## Architecture ![crossplane-argocd-gitops](./images/crossplane-argocd-gitops.png) ## Approach This blueprint will include the following: * AWS CodePipeline which deploys the Management and Workload Clusters * A new Well-Architected EKS cluster `eks-mgmt-cluster` and two workload EKS Clusters `workload-amd-1-29-blueprint` and `workload-arm-1-29-blueprint` in the region and account you specify. * [Amazon VPC CNI add-on (VpcCni)](https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html) into your cluster to support native VPC networking for Amazon EKS. * The Management Cluster is deployed with the following add-ons. * Upbound Universal Crossplane Provider * Upbound AWS Family Crossplane Provider * Upbound AWS EKS Crossplane Provider * Kubernetes Crossplane Provider * Helm Crossplane Provider * Secrets Store AddOn * ArgoCD add-on * The ArgoCD add-on is bootstrapped with [GitOps](https://github.com/aws-samples/eks-blueprints-workloads) which contains Crossplane Manifest files to deploy EKS add-ons, Kubernetes Manifests and also Helm Charts. ## GitOps Configuration For GitOps, the blueprint bootstrap the ArgoCD add-on and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. [helm](https://helm.sh/docs/intro/install/) 6. GitHub Access Token for this repo and AWS secret ### Create AWS Secret Manager Secret Create a plain-text Amazon secret to hold a fine-grained GitHub access token for this repo in the desired region, and set its name as a value to the GITHUB_SECRET environment variable. Default value is `cdk_blueprints_github_secret`. > **WARNING:** When switching the CDK between region, remember to replicate this secret!!!! ```shell export ACCOUNT_ID=$(aws sts get-caller-identity --output text --query Account) export AWS_REGION="us-west-2" export CDK_REPO_GITHUB_PAT_TOKEN= export CDK_REPO_AWS_SECRET_NAME="cdk_blueprints_github_secret" aws secretsmanager create-secret --region $AWS_REGION \ --name $CDK_REPO_AWS_SECRET_NAME \ --description "GitHub Personal Access Token for CodePipeline to access GitHub account" \ --secret-string $CDK_REPO_GITHUB_PAT_TOKEN ``` ## Deploy 1. Clone the repository and install dependency packages. This repository contains CDK v2 code written in TypeScript. ``` git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns npm i ``` 2. Execute the commands below to bootstrap the AWS environment ``` cdk bootstrap aws://$ACCOUNT_ID/$AWS_REGION ``` 4. Run the following command from the root of this repository to deploy the pipeline stack: ``` make clean make build make list make pattern crossplane-argocd-gitops deploy ``` ## Cluster Access ### View the CodePipeline ![codepipeline1](./images/codepipeline1.png) ![codepipeline2](./images/codepipeline2.png) ### Access the Management EKS cluster In this section, let us create a kube-context for the Management cluster and ensure that the ArgoCD and Crossplane add-ons are deployed successfully. 1. Run the below command to get the AWS command from CloudFormation Stack `eks-mgmt-cluster-stage-eks-mgmt-cluster-stage-blueprint` outputs The example command looks like below. ```shell export CFNOutputKey=$(aws cloudformation describe-stacks \ --stack-name eks-mgmt-cluster-stage-eks-mgmt-cluster-stage-blueprint \ --query 'Stacks[].Outputs[].OutputKey' | jq -r '.[]|select(. | startswith("mgmtclusterstageblueprintConfigCommand"))') echo $CFNOutputKey export mgmtclusterstageblueprintConfigCommand=$(aws cloudformation describe-stacks \ --stack-name eks-mgmt-cluster-stage-eks-mgmt-cluster-stage-blueprint \ --query 'Stacks[].Outputs[?OutputKey==`'$CFNOutputKey'`].OutputValue' \ --output text) echo $mgmtclusterstageblueprintConfigCommand ``` 2. Run below command to create the kube-context for the Management cluster. ```shell $mgmtclusterstageblueprintConfigCommand ``` The output will look like below. ```shell Updated context arn:aws:eks:us-west-2:ACCOUNT_ID:cluster/eks-eks-mgmt-cluster in /Users//.kube/config ``` 3. Copy the context in the output above and set an environment variable ```shell export MANAGEMENT_CLUSTER_CONTEXT="arn:aws:eks:${AWS_REGION}:${ACCOUNT_ID}:cluster/eks-eks-mgmt-cluster" echo "export MANAGEMENT_CLUSTER_CONTEXT=${MANAGEMENT_CLUSTER_CONTEXT}" >> ~/.bash_profile ``` 4. Run below command to validate the access to the cluster ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get node ``` The output will like below. ```shell NAME STATUS ROLES AGE VERSION ip-10-0-137-3.ec2.internal Ready 18h v1.29.6-eks-1552ad0 ip-10-0-169-194.ec2.internal Ready 18h v1.29.6-eks-1552ad0 ``` 5. Run below command to get the list of Crossplane Providers deployed in the cluster ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get providers.pkg.crossplane.io ``` The output will like below. ```shell NAME INSTALLED HEALTHY PACKAGE AGE helm-provider True True xpkg.upbound.io/crossplane-contrib/provider-helm:v0.19.0 18h kubernetes-provider True True xpkg.upbound.io/crossplane-contrib/provider-kubernetes:v0.13.0 18h provider-aws-eks True True xpkg.upbound.io/upbound/provider-aws-eks:v1.1.0 18h upbound-provider-family-aws True True xpkg.upbound.io/upbound/provider-family-aws:v1.13.0 ``` 6. Run below command to get the Crossplane Providers pods in the `upbound-system` Namespace. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get pod -n upbound-system ``` The output will like below. ```shell NAME READY STATUS RESTARTS AGE crossplane-594b65bfdb-pgkxf 1/1 Running 0 6d8h crossplane-rbac-manager-86c74cf5d-tjcw8 1/1 Running 0 6d8h helm-provider-4d90a08b9ede-7c874b858b-pp26d 1/1 Running 0 47h kubernetes-provider-a3cbbe355fa7-55846cfbfb-6tpcl 1/1 Running 0 25h provider-aws-eks-23042d28ed58-66d9db8476-jr6mb 1/1 Running 0 6d8h upbound-provider-family-aws-bac5d48bd353-64845bdcbc-4vpn6 1/1 Running 0 6d8h 8d ``` 7. Run below command to get the ArgoCD pods deployed in the `argocd` Namespace. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get pod -n argocd ``` The output will like below. ```shell NAME READY STATUS RESTARTS AGE blueprints-addon-argocd-application-controller-0 1/1 Running 0 24h blueprints-addon-argocd-applicationset-controller-7b78c7fc94ls9 1/1 Running 0 24h blueprints-addon-argocd-dex-server-6cf94ddc54-dfhv7 1/1 Running 0 24h blueprints-addon-argocd-notifications-controller-6f6b7d95cdd2tl 1/1 Running 0 24h blueprints-addon-argocd-redis-b8dbc7dc6-h4bs8 1/1 Running 0 24h blueprints-addon-argocd-repo-server-fd57dc686-zkbsm 1/1 Running 0 4h15m blueprints-addon-argocd-server-84c8b597c9-98c95 1/1 Running 0 24h ``` ### Access to the Workload clusters using IAM role `eks-workload-connector-role` Note that we create and add an IAM role eks-workload-connector-role with system:masters RBAC access to both of the workload clusters i.e. workload-amd-1-29-blueprint and workload-arm-1-29-blueprint as part of the Stack creation. The Upbound AWS EKS Provider Pod will use its IRSA role to assume the `eks-workload-connector-role` to gain access to the workload clusters. The `sts:AssumeRole` IAM permission is already added to the IRSA role during the Management cluster creation. We will create two Crossplane objects of type `ClusterAuth` to create kube-context to access the Workload clusters using the IAM role `eks-workload-connector-role` We will also create two Crossplane objects of type `Addon` to deploy Amazon EKS add-ons into the Workload clusters. To deploy add-ons, the AWS EKS Provider Pod needs `eks:*` IAM permissions, which are already added to `eks-workload-connector-role` during cluster creation. Note this IAM permissions can be made very granular to provide least privileged access to workload clusters. ### Access the Workload EKS cluster `workload-amd-1-29-blueprint` In this section, let us create a kube-context and verify access to the Workload cluster `workload-amd-1-29-blueprint` >Note that we have added an IAM role eks-workload-connector-role with system:masters RBAC access to both of the workload clusters i.e. workload-amd-1-29-blueprint and workload-arm-1-29-blueprint. 1. Run the command to create the kube-context for the cluster. ```shell aws eks update-kubeconfig --name workload-amd-1-29-blueprint --region ${AWS_REGION} --role-arn "arn:aws:iam::${ACCOUNT_ID}:role/eks-workload-connector-role" ``` 2. Copy the context in the output above and set an environment variable. ```shell export WORKLOAD_CLUSTER1_CONTEXT="arn:aws:eks:${AWS_REGION}:${ACCOUNT_ID}:cluster/workload-amd-1-29-blueprint" echo "export WORKLOAD_CLUSTER1_CONTEXT=${WORKLOAD_CLUSTER1_CONTEXT}" >> ~/.bash_profile ``` 3. Run below command to validate the access to the cluster. ```shell kubectl --context $WORKLOAD_CLUSTER1_CONTEXT get node ``` ### Access the Workload EKS cluster `workload-arm-1-29-blueprint` In this section, let us create a kube-context and verify access to the Workload cluster `workload-arm-1-29-blueprint` >Note that we have added an IAM role eks-workload-connector-role with system:masters RBAC access to both of the workload clusters i.e. workload-amd-1-29-blueprint and workload-arm-1-29-blueprint. 1. Run the command to create the kube-context for the cluster. ```shell aws eks update-kubeconfig --name workload-arm-1-29-blueprint --region ${AWS_REGION} --role-arn "arn:aws:iam::${ACCOUNT_ID}:role/eks-workload-connector-role" ``` 2. Copy the context in the output above and set an environment variable. ```shell export WORKLOAD_CLUSTER2_CONTEXT="arn:aws:eks:${AWS_REGION}:${ACCOUNT_ID}:cluster/workload-arm-1-29-blueprint" echo "export WORKLOAD_CLUSTER2_CONTEXT=${WORKLOAD_CLUSTER1_CONTEXT}" >> ~/.bash_profile ``` 3. Run below command to validate the access to the cluster. ```shell kubectl --context $WORKLOAD_CLUSTER1_CONTEXT get node ``` ## Test ### Install the ArgoCD CLI 1. Install the ArgoCD CLI as per the [docs](https://argo-cd.readthedocs.io/en/stable/cli_installation/) 2. Get the ArgoCD Admin password using below command. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d; echo ``` 3. Open a **New Terminal** and Run a local proxy server for the ArgoCD Server. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT port-forward svc/blueprints-addon-argocd-server -n argocd 8080:443 ``` 4. In the current Terminal run the ArgoCD login command. ```shell argocd login localhost:8080 --username admin --password ``` 5. Add Management EKS cluster to ArgoCD. ```shell argocd cluster add $MANAGEMENT_CLUSTER_CONTEXT ``` The output will look like below. ```shell WARNING: This will create a service account `argocd-manager` on the cluster referenced by context `arn:aws:eks:us-west-2:ACCOUNT_ID:cluster/eks-mgmt-cluster` with full cluster level privileges. Do you want to continue [y/N]? y INFO[0004] ServiceAccount "argocd-manager" already exists in namespace "kube-system" INFO[0004] ClusterRole "argocd-manager-role" updated INFO[0005] ClusterRoleBinding "argocd-manager-role-binding" updated Cluster 'https://0F745A41ECA76297CBF070C032932033.sk1.us-west-2.eks.amazonaws.com' added ``` 6. Run the below command to get the list of ArgoCD Applications. ```shell argocd app list ``` The output will look like below. ```shell NAME CLUSTER NAMESPACE PROJECT STATUS HEALTH SYNCPOLICY CONDITIONS REPO PATH TARGET argocd/bootstrap-apps https://kubernetes.default.svc argocd default Synced Healthy Auto-Prune https://github.com/aws-samples/eks-blueprints-workloads ./crossplane-argocd-gitops/envs/dev main argocd/team-spock https://kubernetes.default.svc argocd default Synced Healthy Auto-Prune https://github.com/aws-samples/eks-blueprints-workloads ./teams/team-spock/dev main ``` ### Validate EKS add-ons deployment in Workload Clusters 1. Run the below command to get the list of `ProviderConfig` Crossplane CRD objects deployed in the Management cluster ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get providerconfigs.aws.upbound.io ``` The output will look like below. ```shell NAME AGE common-provider-config-aws 23h ``` 2. Run the below command to get the list of `Addon` Objects deployed in the Management cluster. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get addons.eks.aws.upbound.io ``` The output will look like below. ```shell NAME READY SYNCED EXTERNAL-NAME AGE addon-eks-pod-identity-agent-amd-1-29 True True workload-amd-1-29-blueprint:eks-pod-identity-agent 4h15m addon-eks-pod-identity-agent-arm-1-29 True True workload-arm-1-29-blueprint:eks-pod-identity-agent 4h15m ``` 3. Go to the Workload EKS Clusters and Ensure that EKS add-on is deployed successfully. ![workload-amd-1-29-blueprint EKS add-on](./images/amd-add-on.png) ![workload-arm-1-29-blueprint EKS add-on](./images/arm-add-on.png) ### Validate Kubernetes Manifests deployment in Workload clusters 1. Run the below command to get the list of Crossplane Kubernetes `ProviderConfig` objects deployed in the Management cluster. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get providerconfigs.kubernetes.crossplane.io ``` The output will look like below. ```shell NAME AGE provider-config-k8s-workload-amd-1-29-blueprint 4h31m provider-config-k8s-workload-arm-1-29-blueprint 4h40m ``` 2. Run the below command to get the list of Namespaces in the Workload cluster `workload-amd-1-29-blueprint` ```shell kubectl --context $WORKLOAD_CLUSTER1_CONTEXT get ns ``` The output will look like below. ```shell NAME STATUS AGE default Active 8d external-secrets Active 8d kube-node-lease Active 8d kube-public Active 8d kube-system Active 8d test-namespace-workload-amd-1-29-blueprint Active 4h9m ``` 3. Run the below command to get the list of Namespaces in the Workload cluster `workload-arm-1-29-blueprint` ```shell kubectl --context $WORKLOAD_CLUSTER2_CONTEXT get ns ``` The output will look like below. ```shell NAME STATUS AGE default Active 8d external-secrets Active 8d kube-node-lease Active 8d kube-public Active 8d kube-system Active 8d test-namespace-workload-arm-1-29-blueprint Active 4h9m ``` ### Validate Helm Chart deployment in Workload clusters 1. Run the below command to get the list of Crossplane Helm Provider Objects deployed in the Management Cluster. ```shell kubectl --context $MANAGEMENT_CLUSTER_CONTEXT get providerconfigs.helm.crossplane.io ``` The output will look like below. ```shell NAME AGE provider-config-helm-workload-amd-1-29-blueprint 4h37m provider-config-helm-workload-arm-1-29-blueprint 4h46m ``` 2. Run the below command to get the list of helm charts in the Workload Cluster `workload-amd-1-29-blueprint` ```shell helm --kube-context $WORKLOAD_CLUSTER1_CONTEXT list -A ``` The output will look like below. ```shell NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION blueprints-addon-external-secrets external-secrets 1 2024-05-07 05:25:31.465715836 +0000 UTC deployed external-secrets-0.9.9 v0.9.9 test-helm-workload-amd-1-29-blueprint default 1 2024-05-15 06:39:17.325950143 +0000 UTC deployed nginx-17.0.1 1.26.0 ``` 3. Run the below command to get the list of Helm Charts in the Workload cluster `workload-arm-1-29-blueprint` ```shell helm --kube-context $WORKLOAD_CLUSTER2_CONTEXT list -A ``` The output will look like below. ```shell NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION blueprints-addon-external-secrets external-secrets 1 2024-05-07 05:26:52.028907405 +0000 UTC deployed external-secrets-0.9.9 v0.9.9 test-helm-workload-arm-1-29-blueprint default 1 2024-05-15 06:39:17.222351682 +0000 UTC deployed nginx-17.0.1 1.26.0 ``` ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern crossplane-argocd-gitops destroy ``` The above command deletes the AWS CodePipeline `crossplane-argocd-gitops`. However to complete the Cleanup, delete the following CloudFormation Stacks manually using AWS Console or AWS CLI using below commands. ```sh aws cloudformation delete-stack --stack-name workload-amd-1-29-workload-amd-1-29-blueprint aws cloudformation delete-stack --stack-name workload-arm-1-29-workload-arm-1-29-blueprint aws cloudformation delete-stack --stack-name mgmt-cluster-stage-mgmt-cluster-stage-blueprint ``` ================================================ FILE: docs/patterns/custom-networking-with-ipv4.md ================================================ # Custom Networking on EKS On Amazon EKS clusters, the default Container Networking Interface(CNI) is implemented by the Amazon VPC CNI plugin. When VPC CNI is used in EKS clusters, by default the VPC CNI assigns pods an IP address that's selected from the primary subnet of the VPC. The primary subnet is the subnet CIDR that the primary Elastic Network Interface(ENI) is attached to; usually it's the subnet of the worker node/host in the EKS cluster. If the primary subnet CIDR is too small, the CNI may not be able to have enough IP addresses to assign to the pods running in the cluster. This is a common challenge for EKS IPv4 clusters. Custom Networking provides a solution to the IP exhaustion issue by assigning the Pod IPs from secondary VPC address spaces(CIDR). When custom networking is enabled in VPC CNI, it creates secondary ENIs in the subnet defined under a custom resource named ENIConfig that includes an alternate subnet CIDR range (carved from a secondary VPC CIDR). The VPC CNI assigns pods IP addresses from the CIDR range defined in the ENIConfig Custom Resource Definition(CRD). Using the Custom Networking with IPv4 pattern, you should be able to stand up an EKS cluster with VPC CNI installed and configured with custom networking enabled. This pattern deploys the following resources: - Creates EKS Cluster Control plane with a managed node group - Deploys supporting add-ons: VpcCni, CoreDns, KubeProxy, AWSLoadBalancerController - Enables Custom Networking configuration in VpcCni AddOn ## Prerequisites: Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. [yq](https://github.com/mikefarah/yq/#install) 6. `make` Amazon EKS add-ons are only available with Amazon EKS clusters running Kubernetes version 1.18 and later. ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK ### Check Versions Make sure that, following versions are installed. Node version is a current stable node version 18.x. ``` node -v v18.12.1 ``` NPM version must be 8.4 or above: ``` npm -v 8.19.2 ``` ### Clone the cdk-blueprints-patterns github repository ``` git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git ``` ### Install project dependencies Once you have cloned the above repository, you can open it using your favourite IDE and run the below command to install the dependencies and build the existing patterns. `make deps` ### To view patterns that are available to be deployed, execute the following: ``` npm i make build ``` To list the existing CDK EKS Blueprints patterns, run `make list` ### Bootstrap your CDK environment `npx cdk bootstrap` You can now proceed with deployment of the `custom-networking-ipv4` pattern. ### To deploy the custom-networking-ipv4 pattern, run `make pattern custom-networking-ipv4 deploy` Once the deployment is successful, run `update-kubeconfig` command to update the kubeconfig file with required access. You should be able to get the command from CDK output message. ``` aws eks update-kubeconfig --name custom-networking-ipv4-blueprint --region $AWS_REGION --role-arn arn:aws:iam::$AWS_ACCOUNT_ID:role/custom-networking-ipv4-bl-customnetworkingipv4blue-2SR7PW3UBLIH ``` You can verify the resources created by executing ``` kubectl get node -o wide ``` Output: ``` NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME ip-10-0-18-208.us-east-2.compute.internal Ready 70m v1.24.11-eks-a59e1f0 10.0.18.208 18.116.23.237 Amazon Linux 2 5.10.173-154.642.amzn2.x86_64 containerd://1.6.19 ip-10-0-61-228.us-east-2.compute.internal Ready 70m v1.24.11-eks-a59e1f0 10.0 ``` ### Under the Hood This pattern first creates secondary CIDRs and secondary subnets with specified range of CIDRs as shown below in resourceProvider. Then the VPC CNI addon sets up custom networking based on the parameters `awsVpcK8sCniCustomNetworkCfg`, `eniConfigLabelDef: "topology.kubernetes.io/zone"` for your Amazon EKS cluster workloads with secondary subnet ranges. * When the secondary CIDRs are passed to the VPC resource provider, the secondary subnets are created and registered under names `secondary-cidr-subnet-${order}` with the resource providers. * We enable CNI plugin with custom pod networking with below environment variables: * `AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG` = `true` * `ENI_CONFIG_LABEL_DEF` = `topology.kubernetes.io/zone` This deploys an ENIConfig custom resource for pod subnets (one per availability zone). ``` import 'source-map-support/register'; import * as cdk from 'aws-cdk-lib'; import * as blueprints from '@aws-quickstart/eks-blueprints'; const app = new cdk.App(); const addOn = new blueprints.addons.VpcCniAddOn({ customNetworkingConfig: { subnets: [ blueprints.getNamedResource("secondary-cidr-subnet-0"), blueprints.getNamedResource("secondary-cidr-subnet-1"), blueprints.getNamedResource("secondary-cidr-subnet-2"), ] }, awsVpcK8sCniCustomNetworkCfg: true, eniConfigLabelDef: 'topology.kubernetes.io/zone' }); const blueprint = blueprints.EksBlueprint.builder() .addOns(addOn) .resourceProvider(blueprints.GlobalResources.Vpc, new blueprints.VpcProvider(undefined, { primaryCidr: "10.2.0.0/16", secondaryCidr: "100.64.0.0/16", secondarySubnetCidrs: ["100.64.0.0/24","100.64.1.0/24","100.64.2.0/24"] })) .build(app, 'my-stack-name'); ``` In the diagram shown below, a secondary CIDR (100/64) is assigned to each private subnet that gets created in an availability zone. Worker nodes in the EKS cluster still gets an IP address from the Primary CIDRs(10.0) range whereas the pods get an IP address from the secondary CIDR range. ![Custom-NW-IPv4](./images/custom-nw-mng.png) This can be verified by issuing the following command ``` kubectl get eniconfig ``` Output: ``` NAME AGE us-east-2a 47m us-east-2b 47m us-east-2c 47m ``` An ENIConfig custom resource is created in each AZ. Number of secondary ENIs associated with the Worker node varies by instance type. ![Custom-NW-MNG](./images/custom-nw-mng.png) ## Additional Configuration Options VPC CNI AddOn provides some knobs to add additional advanced configuration on top of custom networking. ### Prefix Delegation When using custom networking mode, since the node’s primary ENI is no longer used to assign Pod IP addresses, there is a decrease in the number of Pods that can run on a given EC2 instance type. To work around this limitation you can use prefix delegation with custom networking. This is an important capability because when you use custom networking, only Pods that are configured to use hostNetwork are “bound” to the host’s primary ENI. All other Pods are bound to secondary ENIs. However, with prefix delegation enabled, each secondary IP is replaced with a /28 prefix which negates the IP address loss when you use custom networking. By default, Prefix Delegation is turned off in Vpc Cni. To check this, run the following command. ``` kubectl get ds aws-node -o yaml -n kube-system | yq '.spec.template.spec.containers[].env' ``` Output: ``` [...] - name: ENABLE_PREFIX_DELEGATION value: "false" [...] ``` Consider the maximum number of Pods for an m5.large instance with custom networking. When using custom networking, the maximum number of Pods you can run without prefix delegation enabled is 20. Download and run max-pods-calculator.sh script to calculate the maximum number of pods: ``` curl -o max-pods-calculator.sh https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/files/max-pods-calculator.sh chmod +x max-pods-calculator.sh /max-pods-calculator.sh \ --instance-type m5.large \ --cni-version 1.12.5-eksbuild.2 \ --cni-custom-networking-enabled ``` Output: ``` 20 ``` To turn on `Prefix Delegation`, use the following command ``` kubectl set env daemonset aws-node -n kube-system ENABLE_PREFIX_DELEGATION=true ``` Output: `110` ![Custom-NW-Bar-Chart](./images/Custom-nw-bar-chart.png) The reason we got max-pods is 110 instead of 290 is because the instance has a relatively low number of vCPUs. In addition the Kubernetes community recommends set max Pods no greater than 10 * number of cores, up to 110. Since Vpc Cni runs as a daemonset, you’d need to create new nodes for this to take effect. The number of ENIs and IP addresses in a pool are configured through environment variables called `WARM_ENI_TARGET`, `WARM_IP_TARGET`, `MINIMUM_IP_TARGET`. For more details on these options, please refer to [EKS Best Practices Networking](https://aws.github.io/aws-eks-best-practices/networking/vpc-cni/#overview) Guide. ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern custom-networking-ipv4 destroy ``` ================================================ FILE: docs/patterns/generative-ai/showcase.md ================================================ # Using Gen AI to run a prompt showcase with Bedrock and Amazon EKS ## Objective [Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service for using foundation models. It allows you to access models from Amazon and third parties with a single set of APIs for both text generation and image generation. [LangChain](https://python.langchain.com/) provides convenient functions for interacting with Amazon Bedrock's models and related services like vector databases. LangChain offers Python and JavaScript libraries. For this workshop, we will use the Python version of LangChain. [Streamlit](https://streamlit.io/) allows us to quickly create web front ends for our Python code, without needing front-end development skills. Streamlit is great for creating proofs-of-concepts that can be presented to a wide audience of both technical and non-technical people. In this pattern we will demonstrate a prompt showcase use case with Gen AI using Bedrock and Amazon EKS. This usecase will demonstrate a prompt showcase which uses different prompt templates such as Summarization, Sentiment and Recommendation with user input to generate a response using Generative AI. In this model we will running a containerized application on Amazon EKS which integrates with Bedrock to provide required user response. ## Architecture ![Showcase Architecture](../images/generativeai-showcase-architecture.jpg) ## Prerequisites Ensure that you have installed the following tools on your machine: - [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) (also ensure it is [configured](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html#getting-started-quickstart-new)) - Bedrock is currently in preview. Please make sure your AWS account is enabled to use Bedrock - [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) - [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) - [tsc](https://www.typescriptlang.org/download) - [make](https://www.gnu.org/software/make/) - [Docker](https://docs.docker.com/get-docker/) Let’s start by setting the account and region environment variables: ```sh ACCOUNT_ID=$(aws sts get-caller-identity --query 'Account' --output text) AWS_REGION=$(aws configure get region) ``` Clone the repository: ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns/lib/generative-ai-showcase/python ``` Create the ECR image repository and push the docker image to ECR for your showcase app: ```sh IMAGE_NAME=bedrock-showcase IMAGE_TAG=v2 aws ecr create-repository --repository-name $IMAGE_NAME aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com docker build -t $IMAGE_NAME . docker tag bedrock-showcase:latest $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$IMAGE_NAME:$IMAGE_TAG docker push $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$IMAGE_NAME:$IMAGE_TAG cd ../../../../ ``` ## Deployment If you haven't done it before, [bootstrap your cdk account and region](https://docs.aws.amazon.com/cdk/v2/guide/bootstrapping.html). Set the pattern's parameters in the CDK context by overriding the _cdk.json_ file: ```sh cat << EOF > cdk.json { "app": "npx ts-node dist/lib/common/default-main.js", "context": { "bedrock.pattern.name": "showcase", "bedrock.pattern.namespace": "bedrock", "bedrock.pattern.image.name": "${ACCOUNT_ID}.dkr.ecr.$AWS_REGION.amazonaws.com/${IMAGE_NAME}", "bedrock.pattern.image.tag": "${IMAGE_TAG}" } } EOF ``` Run the following commands: ```sh make deps make build make pattern generative-ai-showcase deploy ``` When deployment completes, the output will be similar to the following: ```output ✅ generative-ai-showcase-blueprint ✨ Deployment time: 1287.16s Outputs: generative-ai-showcase-blueprint.generativeaishowcaseblueprintClusterNameA8D25DA0 = generative-ai-showcase-blueprint generative-ai-showcase-blueprint.generativeaishowcaseblueprintConfigCommandC6A8442C = aws eks update-kubeconfig --name generative-ai-showcase-blueprint --region us-east-1 --role-arn arn:aws:iam::XXXXXXXXXX:role/generative-ai-showcase-bl-generativeaishowcaseblue-L18IUPGQ8M2I generative-ai-showcase-blueprint.generativeaishowcaseblueprintGetTokenCommand5AE22878 = aws eks get-token --cluster-name generative-ai-showcase-blueprint --region us-east-1 --role-arn arn:aws:iam::XXXXXXXXXX:role/generative-ai-showcase-bl-generativeaishowcaseblue-L18IUPGQ8M2I Stack ARN: arn:aws:cloudformation:us-east-1:XXXXXXXXXX:stack/generative-ai-showcase-blueprint/cd2c4d90-5317-11ee-9c8d-0e69cfd9ba55 ✨ Total time: 1290.99s ``` To see the deployed resources within the cluster, please run: ```sh kubectl get pod,svc,secrets,ingress -A ``` A sample output is shown below: ```output NAME READY STATUS RESTARTS AGE pod/bedrock-showcase-model-586b558b46-bkwql 1/1 Running 0 60s NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/bedrock-showcase-model-service NodePort 172.20.12.47 80:30451/TCP 10m NAME CLASS HOSTS ADDRESS PORTS AGE ingress.networking.k8s.io/bedrock-showcase-model-ingress alb * k8s-bedrock-bedrocks-63d6186d4e-765982776.us-east-1.elb.amazonaws.com 80 10m ``` Next, Navigate to the URL show under Ingress to see the below screen to interact with Generative AI showcase application by selecting different promptsand inputs and see the result : ![Showcase application](../images/generativeai-showcase-demo-output.jpg) ## Next steps You can go [AWS Blogs](https://aws.amazon.com/blogs/) to learn about [New Tools for Building with Generative AI on AWS](https://aws.amazon.com/blogs/machine-learning/announcing-new-tools-for-building-with-generative-ai-on-aws/). Also check on another blog our on [Enabling Foundation Models to Complete Tasks With Agents for Amazon Bedrock](https://aws.amazon.com/blogs/aws/preview-enable-foundation-models-to-complete-tasks-with-agents-for-amazon-bedrock/). ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern generative-ai-showcase destroy ``` ================================================ FILE: docs/patterns/gmaestro.md ================================================ # gMaestro on EKS pattern gMaestro is a Kubernetes cost optimization solution that helps companies reduce spending on un-utilized resources. For additional information, visit [gMaestro documentation](https://docs.gomaestro.org/). This pattern deploys the following resources: - Creates a single EKS cluster that includes a managed node group - Deploys supporting add-ons: ClusterAutoScaler and MetricsServer - Deploys a single granulate-gmaestro deployment with a single pod on the EKS cluster ## Prerequisites Before using gMaestro, you need to: 1. [Sign up](https://app.granulate.io/gMaestroSignup) to the gMaestro platform 2. Download a config YAML file - After signing up to gMaestro, navigate to the [Deploy](https://app.granulate.io/deploy) on the left-hand menu, fill in the required fields and click on "Generate Config File" as shown bellow: ![GmaestroGenerateConfigFile](images/gmaestro-generate-config-file.png) ![GmaestroConfigFile](images/gmaestro-config-file.png) 3. Create a secret (as a plaintext, not key/value) in AWS Secrets Manager: ```bash export MAESTRO_CLIENT_ID="" export MAESTRO_SECRET_NAME="" aws secretsmanager create-secret --name --region $AWS_REGION \ --description "Encrypted client ID for Granulate gMaestro" \ --secret-string "" ``` 4. Follow the usage [instructions](../../README.md#usage) to install the dependencies ## Deployment Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` If you haven't done it before, [bootstrap your cdk account and region](https://docs.aws.amazon.com/cdk/v2/guide/bootstrapping.html). Update `context` in `cdk.json` file located in the `cdk-eks-blueprints-patterns` directory as follows: ```json "context": { "clusterName": "", "namespace": "", } ``` Run the following commands: ```sh make deps make build make pattern gmaestro deploy ``` ## Verify the resources Use the following command to validate that gMaestro installed successfully: ```bash $ kubectl get pods -A | grep granulate-maestro NAMESPACE NAME READY STATUS RESTARTS AGE default granulate-maestro-6947dc87bc-k5nfc 1/1 Running 0 11m ``` After a few seconds, you will gain full visibility into your K8s cluster objects: ![GmaestroRecommendations](images/gmaestro-recommendations.png) The first rightsizing recommendations may take up to 5 minutes to load. ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern gmaestro destroy ``` ## Support If you have questions about gMaestro, catch us [on Slack](https://granulatecommunity.slack.com/archives/C03RK0HN2TU)! ## Disclaimer This pattern relies on an open-source NPM package gmaestro-eks-blueprints-addon. Please refer to the package npm site for more information. If you have any questions about the npm package or find any defect, please post in the source repo at ================================================ FILE: docs/patterns/graviton.md ================================================ # Graviton on EKS AWS Graviton processors are designed by AWS to deliver the best price performance for your cloud workloads running in Amazon EC2. These processors are ARM chips running on aarch64 architecture. AWS Graviton processors are supported by many Linux operating systems including Amazon Linux 2, Red Hat Enterprise Linux, SUSE, and Ubuntu. Many popular applications and services for security, monitoring and management, containers, and continuous integration and delivery (CI/CD) from AWS and software partners also support AWS Graviton-based instances. AWS Graviton processors feature key capabilities that enable you to run cloud native applications securely, and at scale. EC2 instances powered by AWS Graviton processors are built on the AWS Nitro System that features the AWS Nitro security chip with dedicated hardware and software for security functions, and support for encrypted Amazon Elastic Block Store (EBS) volumes by default. ### Why an M7g instance? There are 7 families of Graviton instances split into 5 categories. General Purpose: M and T families Compute Optimized: C family Memory Optimized: R and X family Storage Optimized: I family Accelerated Computing: G family For a blueprint pattern, the General Purpose and Compute Optimized categories make the most sense, since they are the most common use cases and are all Nitro-Enabled instances. Being Nitro-Enabled means that these instances provide better networking security as well as increased performance compared to non Nitro-Enabled instances. In these categories, there are 7 different instance types: M7g, M6g, T4g, C7g, C7gn, C6g, and C6gn. T4g instances are specialized for burstable workloads, and both T4g and M6g instances are Graviton2 chips. M7g instances are Graviton3 chips, which offer 25% better compute performace than Graviton2 and support DDR5 memory that provides 50% more bandwith compared to DDR4. C6g and C6gn instances are also Graviton2 chips, and C7g instances are specialized for high performance computing. For this general blueprint pattern, the M7g instance is the best option due to the high compute power, memory bandwith, networking bandwith, and broad use cases. This pattern deploys the following resources: - Creates EKS Cluster Control plane with a managed node group running on an M family Graviton3 processor ### Addons Not all of the listed EKS addons support the Graviton processors. To find a list of supported addons, visit the [documentation](https://github.com/aws-quickstart/cdk-eks-blueprints/blob/main/docs/addons/index.md). ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. `make` ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` Updating npm ```sh npm install -g npm@latest ``` To view patterns and deploy graviton pattern ```sh make list npx cdk bootstrap make pattern graviton deploy ``` ## Verify the resources Run the update-kubeconfig command. You should be able to get the command from the CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name graviton-blueprint --region --role-arn arn:aws:iam::xxxxxxxxx:role/graviton-construct-bluepr-gravitonconstructbluepri-1OZNO42GH3OCB ``` Let's verify the resources created from the steps above. ```sh kubectl get nodes -o json | jq -r '.items[] | "Name: ",.metadata.name,"\nInstance Type: ",.metadata.labels."beta.kubernetes.io/instance-type","\nArch: ",.metadata.labels."beta.kubernetes.io/arch","\n"' # Output shows node on M famGraviton3 processor and ARM architecture ``` ## Cleanup To clean up your EKS Blueprint, run the following command: ```sh make pattern graviton destroy ``` ================================================ FILE: docs/patterns/instana.md ================================================ # IBM Instana on EKS pattern The IBM® Instana® Addon for Amazon EKS Blueprint is designed to enhance observability, monitoring, and management capabilities for applications running on Amazon Elastic Kubernetes Service (EKS). Instana Addon focuses on enhancing the user experience by reducing the complexity and time required to install and configure an Instana host agent on Amazon EKS cluster. This Addon will use IBM® Instana® Agent Operator in the namespace ```instana-agent``` to install and manage Instana Agent. It also configures custom resource values to configure the operator. This pattern deploys the following resources: - Creates EKS Cluster Control plane with public endpoint (for demo purpose only) with a managed node group - Install and set up Instana Agent for monitoring your EKS workloads. (by using the provided environment variable and additional configuration parameters) ## Prerequisites: Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. Instana backend application - Use SaaS (eg [aws](https://aws.amazon.com/marketplace/pp/prodview-hnqy5e3t3fzda?sr=0-1&ref_=beagle&applicationId=AWSMPContessa)) or Install self-hosted Instana backend ([on-premises](https://www.ibm.com/docs/en/instana-observability/current?topic=installing-configuring-self-hosted-instana-backend-premises)) ## Project Setup Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git ``` Go inside project directory (eg. cdk-eks-blueprints-patterns) ```sh cd cdk-eks-blueprints-patterns ``` Install project dependencies. ```sh make deps ``` ## Instana Agent Configuration Go to your Instana Backend application (Instana User Interface), click ... More > Agents > Installing Instana Agents and select 'Kubernetes' platform to get the Instana Agent Key, Instana Service Endpoint, Instana Service port. These steps are also described on the screenshot below. [Instana Agent Configuration](./images/instana-agent.png) ## Usage : Using AWS Secret Manager Secrets ### AWS Secret Manager Secrets (Optional) If you wish to use AWS Secret Manager Secrets to pass Instana props (key, endpoint, and port), then you will be required to setup Secrets first. ```shell export SECRET_NAME= export INSTANA_AGENT_KEY= export INSTANA_ENDPOINT_HOST_URL= export INSTANA_ENDPOINT_HOST_PORT=" aws secretsmanager create-secret \ --name $SECRET_NAME \ --secret-string "{\"INSTANA_AGENT_KEY\":\"${INSTANA_AGENT_KEY}\", \"INSTANA_ENDPOINT_HOST_URL\":\"${INSTANA_ENDPOINT_HOST_URL}\", \"INSTANA_ENDPOINT_HOST_PORT\":\"${INSTANA_ENDPOINT_HOST_PORT}\" }" ``` secret_name = AWS Secret Manager Secret name (eg. *instana-secret-params*). ### Using AWS Secret Manager Secrets To use AWS Secret Manager Secrets follow these steps: 1. The actual settings for the secret name (```secretParamName```) are expected to be specified in the CDK context. Generically it is inside the cdk.context.json file of the current directory or in `~/.cdk.json` in your home directory. Example settings: Update the context in `cdk.json` file located in `cdk-eks-blueprints-patterns` directory ```json "context": { "secretParamName": "instana-secret-params" } ``` 2. Go to project/lib/instana-construct/index.ts ```typescript import { loadYaml } from "@aws-quickstart/eks-blueprints/dist/utils"; import * as cdk from "aws-cdk-lib"; import { InstanaOperatorAddon } from "@instana/aws-eks-blueprint-addon"; import { EksBlueprint, utils } from "@aws-quickstart/eks-blueprints"; import { prevalidateSecrets } from "../common/construct-utils"; export const instanaProps: { [key: string]: any } = {}; export default class InstanaConstruct { async buildAsync(scope: cdk.App, id: string) { try { await prevalidateSecrets(InstanaConstruct.name, undefined, 'instana-secret-params'); const secretParamName: string = utils.valueFromContext(scope, "secretParamName", undefined); if(secretParamName != undefined) { instanaProps.secretParamName = secretParamName; } const yamlObject = loadYaml(JSON.stringify(instanaProps)); const stackId = `${id}-blueprint`; const addOns = new InstanaOperatorAddon(yamlObject); EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION!) .addOns(addOns) .build(scope, stackId); console.log("Blueprint built successfully."); } catch (error) { console.error("Error:", error); throw new Error(`environment variables must be setup for the instana-operator pattern to work`); } } } ``` ## Usage : Using Secrets in the Code ### Setting up environment variable To set the following environment variables from the CLI, use the corresponding values obtained from the Instana Service Endpoint and Port (as shown in the above screenshot), and the Instana Application Key (also shown in the above screenshot): - Set the value of **INSTANA_ENDPOINT_HOST_URL** to the Instana Service Endpoint. - Set the value of **INSTANA_ENDPOINT_HOST_PORT** to the Instana Service Port. - Set the value of **INSTANA_AGENT_KEY** to the Instana Application Key. Set the value of the following environment variable and run it on CLI to set those variables. For an example: ```shell export INSTANA_AGENT_KEY=abc123 export INSTANA_ENDPOINT_HOST_URL=instana.example.com export INSTANA_ENDPOINT_HOST_PORT="443" ``` ### Configure additional configuration parameters. To configure additional parameters for Instana Agent according to your specific use case, follow these steps: - Go to project/lib/instana-construct/index.ts - Add the additional configuration parameters under ```const instanaProps``` variable. For an example: ```typescript export const instanaProps = { agent: { key: process.env.INSTANA_AGENT_KEY,// Mandatory Parameter endpointHost: process.env.INSTANA_ENDPOINT_HOST_URL,//Mandatory Parameter endpointPort: process.env.INSTANA_ENDPOINT_HOST_PORT, // Mandatory Parameter, env: { INSTANA_AGENT_TAGS: "staging", } } }; ``` ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK To view patterns and deploy ```instana-operator``` pattern ```sh make deps make build cdk bootstrap make pattern instana-operator deploy ``` ## Verify the resources Run update-kubeconfig command. You should be able to get the command from CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name --region --role-arn arn:aws:iam::xxxxxxxxx:role/eks-blue1-eksblue1AccessRole32C5DF05-1NBFCH8INI08A ``` Lets verify the resources created by Steps above. ```sh kubectl get pods -n instana-agent # Output shows the EKS Managed Node group nodes under instana-agent namespace ``` Output of the above command will be silimar to below one: ```output NAMESPACE NAME READY STATUS RESTARTS AGE instana-agent controller-manager-78479cb596-sktg9 1/1 Running 0 56m instana-agent controller-manager-78479cb596-xz8kn 1/1 Running 0 56m instana-agent instana-agent-gsqx8 1/1 Running 0 56m ``` Run following command to verify Instana Agent logs ```shell kubectl logs -n instana-agent # Output shows instana agent logs. pod name in this example is instana-agent-gsqx8 ``` ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern instana-operator destroy ``` ## Disclaimer This pattern relies on an open source NPM package [aws-eks-blueprint-addon](https://www.npmjs.com/package/%40instana/aws-eks-blueprint-addon). Please refer to the package npm site for more information. ``` https://www.npmjs.com/package/@instana/aws-eks-blueprint-addon' ``` If you have any questions about the npm package or find any defect, please post in the source repo at: https://github.com/instana/instana-eks-blueprint-addon/issues ================================================ FILE: docs/patterns/jupyterhub.md ================================================ # JupyterHub on EKS Pattern ## Objective JupyterHub is a multi-user Hub that spawns, manages, and proxies multiple instances of the single-user Jupyter notebook server. The hub can offer notebook servers to a class of students, a corporate data science workgroup, a scientific research project, or a high-performance computing group. The objective of this pattern is to deploy JupyterHub on EKS using EKS Blueprints with the following features in place: - JupyterHub is hosted behind an ALB on EKS cluster across multiple AZs - JupyterHub allows for user friendly DNS name to route traffic to the load balancer, which is a subdomain of a parent domain in a separate account. This is representatitve of a typical global enterprise domain setup, where a central, global DNS account defines the parent domain (in Route53). The subdomain will be defined in Route53 from this account where the JupyterHub cluster is provisioned. - JupyterHub leverages an identity provider for user authentication. - JupyterHub uses persistent storage that is provided within a file system (i.e. EFS) when the user logs in - JupyterHub uses certificates to provide secured connection to the hub (the load balancer) - The hub has a persistent storage with an EBS volume ## Approach Since we will be defining subdomains for a global enterprise domain across multiple environments, which are as a rule placed in separate AWS accounts, root domain should defined in a separate account. Let's call it global DNS account. Our blueprint will then include the following: 1. AWS Loadbalancer controller to provision an ALB instance fronting the Kubernetes Ingress resource for the JupyterHub server. Deployed with a public certificate created from ACM (Certificate ARN must be provided post-creation via CDK context) 2. External DNS to integrate ALB with Route53 and use custom domain to access the hub. 3. Configurations to leverage existing user management via OAuth 2.0 protocol standard (i.e. Auth0). 4. EFS file server for user persistent storage using the Blueprints. 5. EBS volume for hub persistent storage. ## Prerequisites 1. Identity Provider that can be leveraged using 0Auth 2.0 protocol. The actual settings are expected to be specified in the CDK context. Generically it is inside the cdk.context.json file of the current directory or in `~/.cdk.json` in your home directory. Example settings: ``` { "context": { "callbackUrl": "https://your.hub.domain.com/hub/oauth_callback", "authUrl": "https://some.auth.address.com/authorize", "tokenUrl": "https://some.auth.address.com/oauth/token", "userDataUrl": "https://some.auth.address.com/userinfo", "clientId": "someClientID", "clientSecret": "someClientSecret", "scope": ["openid","name","profile","email"], "usernameKey": "name" } } ``` 2. The parent domain must be defined in a separate account (GLOBAL_DNS_ACCOUNT). 3. The GLOBAL_DNS_ACCOUNT must contain a role with a trust policy to the workload(s) account. We naed it `DomainOperatorRole` but you can choose any arbitrary name for it. 1. Policies: `arn:aws:iam::aws:policy/AmazonRoute53DomainsFullAccess` or alternatively you can provide `arn:aws:iam::aws:policy/AmazonRoute53ReadOnlyAccess` and `arn:aws:iam::aws:policy/AmazonRoute53AutoNamingFullAccess`. 2. Trust relationship to allow workload accounts to create subdomains (replace `` with the actual value): ``` { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "AWS": "arn:aws:iam:::root" }, "Action": "sts:AssumeRole", "Condition": {} } ] } ``` 4. The actual settings for the GLOBAL_DNS_ACCOUNT, hosted zone name, subzone name, and the JupyterHub hub subdomain names are expected to be specified in the CDK context. Generically it is inside the cdk.context.json file of the current directory or in `~/.cdk.json` in your home directory. Example settings: ``` { "context": { "parent.dns.account": "", "parent.hostedzone.name": "domain.com", "dev.subzone.name": "hub.domain.com", "jupyterhub.subzone.name":"your.hub.domain.com", } } ``` ## Deploying Once all pre-requisites are set you should be able to get a working cluster with all the objectives met, including a JupyterHub where users can log in using their credentials from the identity provider given. ================================================ FILE: docs/patterns/karpenter.md ================================================ # Karpenter on EKS Karpenter add-on is based on the [Karpenter](https://github.com/aws/karpenter) open source node provisioning project. It provides a more efficient and cost-effective way to manage workloads by launching just the right compute resources to handle a cluster's application. Karpenter works by: - Watching for pods that the Kubernetes scheduler has marked as unschedulable, - Evaluating scheduling constraints (resource requests, nodeselectors, affinities, tolerations, and topology spread constraints) requested by the pods, - Provisioning nodes that meet the requirements of the pods, - Scheduling the pods to run on the new nodes, and - Removing the nodes when the nodes are no longer needed To learn more about Karpenter add on usage, please visit the documentation [here](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/karpenter/) This pattern deploys the following resources: - Creates EKS Cluster Control plane with public endpoint (for demo purpose only) with a managed node group - Deploys supporting add-ons: AwsLoadBalancerController, VpcCni, CoreDns, KubeProxy, CertManagerAddOn, KubeStateMetricsAddOn, MetricsServer - Deploy Karpenter on the EKS cluster ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. `make` ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` Updating npm ```sh npm install -g npm@latest ``` To view patterns and deploy karpenter pattern ```sh make list npx cdk bootstrap make pattern karpenter deploy ``` ## Verify the resources Run the update-kubeconfig command. You should be able to get the command from the CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name karpenter-blueprint --region --role-arn arn:aws:iam::xxxxxxxxx:role/karpenter-construct-bluepr-karpenterconstructbluepri-1OZNO42GH3OCB ``` Let's verify the resources created from the steps above. ```bash # Assuming add-on is installed in the karpenter namespace. $ kubectl get po -n karpenter NAME READY STATUS RESTARTS AGE karpenter-54fd978b89-hclmp 2/2 Running 0 99m ``` ### Testing with a sample deployment Now that the provisioner is deployed, Karpenter is active and ready to provision nodes. Create some pods using a deployment: ```bash cat < ## Prerequisites Ensure that you have installed the following tools on your machine: - [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) (also ensure it is [configured](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html#getting-started-quickstart-new)) - [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) - [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) - [tsc](https://www.typescriptlang.org/download) - [make](https://www.gnu.org/software/make/) Let’s start by setting the account and region environment variables: ```sh ACCOUNT_ID=$(aws sts get-caller-identity --query 'Account' --output text) AWS_REGION=$(aws configure get region) ``` ## Deployment Clone the repository: ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` Set the pattern's parameters in the CDK context by overriding the _cdk.json_ file (edit _PARENT_DOMAIN_NAME_ as it fits): ```sh PARENT_DOMAIN_NAME=example.com HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name $PARENT_DOMAIN_NAME --query "HostedZones[].Id" --output text | xargs basename) cat << EOF > cdk.json { "app": "npx ts-node dist/lib/common/default-main.js", "context": { "konveyor.parent.domain.name":"${PARENT_DOMAIN_NAME}", "konveyor.hosted.zone.id": "${HOSTED_ZONE_ID}" } } EOF ``` (Optional) The full list of parameters you can set in the _context_ is: ``` "context": { "konveyor.namespace.name": ..., "konveyor.parent.domain.name": ..., "konveyor.subdomain.label": ..., "konveyor.hosted.zone.id": ..., "konveyor.certificate.resource.name": ..., } ``` You can assign values to the above keys according to the following criteria (values are required where you don't see _default_ mentioned): - "konveyor.namespace.name": Konveyor's namespace, the default is "konveyor" - "konveyor.parent.domain.name": the parent domain in your Hosted Zone - "konveyor.subdomain.label": to be used as _{"subdomain.label"}.{"parent.domain.name"}_, the default is "backstage" - "konveyor.hosted.zone.id": the Hosted zone ID (format: 20x chars/numbers) - "konveyor.certificate.resource.name": resource name of the certificate, registered by the resource provider, the default is "konveyor-certificate" If you haven't done it before, [bootstrap your cdk account and region](https://docs.aws.amazon.com/cdk/v2/guide/bootstrapping.html). Run the following commands: ```sh make deps make build make pattern konveyor deploy ``` When deployment completes, the output will be similar to the following: ## Log in Once the deployment ends navigate to `https://.` And enter the default admin credentials: - Username: `admin` - Password: `Passw0rd!` ## Koveyor UI Login page Home Page ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh make pattern konveyor destroy ``` ================================================ FILE: docs/patterns/kubeflow.md ================================================ # Kubeflow on EKS The Kubeflow project is dedicated to making deployments of machine learning (ML) workflows on Kubernetes simple, portable and scalable. Our goal is not to recreate other services, but to provide a straightforward way to deploy best-of-breed open-source systems for ML to diverse infrastructures. Anywhere you are running Kubernetes, you should be able to run Kubeflow. This pattern deploys the following resources: - Creates EKS Cluster Control plane with public endpoint (for demo purpose only) with a managed node group - Deploys supporting add-ons: ClusterAutoScaler, AwsLoadBalancerController, VpcCni, CoreDns, KubeProxy, EbsCsiDriver, CertManagerAddOn, KubeStateMetricsAddOn, PrometheusNodeExporterAddOn, AdotCollectorAddOn, AmpAddOn, - Deploy Kubeflow on the EKS cluster ## Prerequisites: Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git ``` Updating npm ```sh npm install -g npm@latest ``` To view patterns and deploy kubeflow pattern ```sh make list cdk bootstrap make pattern kubeflow deploy ``` ## Verify the resources Run update-kubeconfig command. You should be able to get the command from CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name --region --role-arn arn:aws:iam::xxxxxxxxx:role/kubeflow-blueprint-kubeflowblueprintMastersRole0C1-saJBO ``` Let’s verify the resources created by Steps above. ```sh kubectl get nodes # Output shows the EKS Managed Node group nodes kubectl get ns | kubeflow # Output shows kubeflow namespace kubectl get pods --namespace=kubeflow-pipelines # Output shows kubeflow pods ``` ## Execute Machine learning jobs on Kubeflow log into Kubeflow pipeline UI by creating a port-forward to the ml-pipeline-ui service
```sh kubectl port-forward svc/ml-pipeline-ui 9000:80 -n =kubeflow-pipelines ``` and open this browser: http://localhost:9000/#/pipelines more pipeline examples can be found at https://www.kubeflow.org/docs/components/pipelines/legacy-v1/tutorials/ ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh cdk destroy kubeflow-blueprint ``` ## Disclaimer This pattern relies on an open source NPM package eks-blueprints-cdk-kubeflow-ext. Please refer to the package npm site for more information. https://www.npmjs.com/package/eks-blueprints-cdk-kubeflow-ext ================================================ FILE: docs/patterns/kubeshark.md ================================================ # Kubeshark AddOn [kubeshark](https://github.com/kubeshark/kubeshark) is an API Traffic Analyzer for Kubernetes providing real-time, protocol-level visibility into Kubernetes’ internal network, capturing and monitoring all traffic and payloads going in, out and across containers, pods, nodes and clusters. This pattern deploys the following resources: - Creates EKS Cluster Control plane with managed nodegroup - Install and set up kubeshark ## Prerequisites: Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) ## Project Setup 1.) Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git ``` 2.) Go inside project directory (eg. cdk-eks-blueprints-patterns) ```sh cd cdk-eks-blueprints-patterns ``` 3.) Install project dependencies. ```sh make deps ``` 4.) import kubeshark ``` npm i kubeshark ``` 5.) To view patterns and deploy kubeshark pattern, run the below command. ``` make list cdk bootstrap make pattern kubeshark deploy ``` ## Verify the resources Run update-kubeconfig command. You should be able to get the command from CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name --region --role-arn arn:aws:iam::xxxxxxxxx:role/eks-blue1-eksblue1AccessRole32C5DF05-1NBFCH8INI08A ``` 1.) verify the resources created by Steps above. ```sh $ kubectl get deployments -n kube-system NAME READY UP-TO-DATE AVAILABLE AGE blueprints-addon-kubeshark 1/1 1 1 20m ``` 2.) Access to kubeshark. ```sh $ kubectl -n kube-system port-forward svc/kubeshark-front 3000:80 ``` Open the [dashboard](http://localhost:3000) Then you should be able to see view like this ![dashboard](https://raw.githubusercontent.com/kubeshark/assets/master/png/kubeshark-ui.png) 3.) deploy nginx pod using the below command. ``` kubectl apply -f - < cdk.json { "app": "npx ts-node dist/lib/common/default-main.js", "context": { "conformitron.amp.endpoint": "https://aps-workspaces.${AWS_REGION}.amazonaws.com/workspaces/${AMP_WS_ID}/", "conformitron.amp.arn":"arn:aws:aps:${AWS_REGION}:${ACCOUNT_ID}:workspace/${AMP_WS_ID}", "conformitron.amg.endpoint": "${AMG_ENDPOINT_URL}", "conformitron.version": ["1.28","1.29","1.30"], "fluxRepository": { "name": "grafana-dashboards", "namespace": "grafana-operator", "repository": { "repoUrl": "https://github.com/aws-observability/aws-observability-accelerator", "name": "grafana-dashboards", "targetRevision": "main", "path": "./artifacts/grafana-operator-manifests/eks/infrastructure" }, "values": { "GRAFANA_CLUSTER_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/cluster.json", "GRAFANA_KUBELET_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json", "GRAFANA_NSWRKLDS_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json", "GRAFANA_NODEEXP_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodeexporter-nodes.json", "GRAFANA_NODES_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodes.json", "GRAFANA_WORKLOADS_DASH_URL" : "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/workloads.json" }, "kustomizations": [ { "kustomizationPath": "./artifacts/grafana-operator-manifests/eks/infrastructure" } ] } } } EOF ``` You are now ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack: ```bash make pattern multi-cluster-conformitron deploy multi-cluster-central-pipeline ``` Now you can go to [AWS CodePipeline console](https://eu-west-1.console.aws.amazon.com/codesuite/codepipeline/pipelines), and see how it was automatically created to deploy multiple Amazon EKS clusters to different environments. ## Grafana Dashboards ![Dashboard 1](images/ConformitronDashboard1.png) ![Dashboard 2](images/ConformitronDashboard2.png) ![Dashboard 3](images/ConformitronDashboard3.png) # SSM Cost Optimizations for conformitron clusters Running all the clusters by default for 24 hours results in a daily spend of $300+ To minimize these costs we have written a systems manager automation which automatically scales down autoscaling group to 0 desired nodes during off-business hours. On weekdays 5 PM PST clusters are scaled to 0 -> CRON EXPRESSION: `0 17 ? * MON-FRI *` On weekdays 5 AM PST clusters are scaled to 1 -> CRON EXPRESSION: `0 05 ? * MON-FRI *` On weekends clusters stay scaled to 0. These optimizations bring down the weekly cost to less than 1000$ essentially for a more than 60% cost savings. Please find the SSM Automation documents `lib/multi-cluster-construct/resources/cost-optimization/scaleDownEksToZero.yml` and `lib/multi-cluster-construct/resources/cost-optimization/scaleUpEksToOne.yml`. Lets take a look at one of the scripts `scaleDownEksToZero.yml` ```yaml schemaVersion: '0.3' ... ... mainSteps: ... ... inputs: Service: eks Api: UpdateNodegroupConfig <---- Update the managed node group clusterName: arm-1-26-blueprint <---- Modify according to your naming convention nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 <---- New Scaling Configuration maxSize: 1 desiredSize: 0 <---- Scale To zero ``` By triggering this automation at 5PM on Weekdays we automatically scale down clusters during off-hours. To run these scripts first you will have to modify update them with your own account_ID We will use `sed` command to automatically update the files ```bash sed "s/ACCOUNT_ID/$ACCOUNT_ID/g" scaleDownEksToZero.yml > scaleDownEksToZeroNew.yml sed "s/ACCOUNT_ID/$ACCOUNT_ID/g" scaleUpEksToOne.yml > scaleUpEksToOneNew.yml ``` 1. Then navigate to the Systems Manager > Documents and Create a new Automation. ![Cost Optimization Step 1](images/CostOptimizationSSM1.png) 1. Click on JSON and copy over the yml content to create a new runbook ![Cost Optimization Step 2](images/CostOptimizationSSM2.png) 1. Once saved, navigate to EventBridge > Scheduler > Schedules ![Cost Optimization Step 3](images/CostOptimizationEventBridge.png) 1. Create a new schedule with the CRON expression specified aboce ![Cost Optimization Step 4](images/CostOptimizationEventBridge2.png) 1. For Target select "StartAutomationExecution" and type in the document name from step 2 ![Cost Optimization Step 5](images/CostOptimizationEventBridge3.png) ================================================ FILE: docs/patterns/nginx.md ================================================ # NGINX Pattern ## Objective When setting up a target platform across multiple dimensions that question of ingress must be solved. Ideally, it should work in such as way that workloads provisioned on the target environments could be accessible via internet exposing sub-domains of some predefined global domain name. Communication with the workloads should leverage secure TLS protected Load balancer with proper public (or private) certificate. A single cluster will deploy workloads from multiple teams and each of them should be able to expose workloads routed to their corresponding namespace. So, teams are expected to define ingress objects. In addition, this approach should work not only for a single cluster, but also across multiple regions and environments. ## Approach Since we will be defining subdomains for a global enterprise domain across multiple environments, which are as a rule placed in separate AWS accounts, root domain should defined in a separate account. Let's call it global DNS account. Sub-domains are then defined in the target accounts (let's call them workload accounts). Our blueprint will then include the following: 1. NGINX ingress controller to enable teams to create/configure their ingress objects. 2. External DNS to integrate NGINX and public-facing NLB with Route53. 3. AWS Loadbalancer controller to provision an NLB instance with each cluster fronting the NGINX ingress. Deployed with a public certificate that will also be provisioned as part of the blueprint. 4. Team onboarding that leverage the ingress capabilities through ArgoCD. 5. Other popular add-ons. ## Prerequisites 1. `argo-admin-password` secret must be defined as plain text (not key/value) in `us-west-2` region. 2. The parent domain must be defined in a separate account (GLOBAL_DNS_ACCOUNT). 3. The GLOBAL_DNS_ACCOUNT must contain a role with a trust policy to the workload(s) account. We naed it `DomainOperatorRole` but you can choose any arbitrary name for it. 1. Policies: `arn:aws:iam::aws:policy/AmazonRoute53DomainsFullAccess` or alternatively you can provide `arn:aws:iam::aws:policy/AmazonRoute53ReadOnlyAccess` and `arn:aws:iam::aws:policy/AmazonRoute53AutoNamingFullAccess`. 2. Trust relationship to allow workload accounts to create subdomains (replace `` with the actual value): ``` { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "AWS": "arn:aws:iam:::root" }, "Action": "sts:AssumeRole", "Condition": {} } ] } ``` 4. The actual settings for the GLOBAL_DNS_ACCOUNT, hosted zone name and expected subzone name are expected to be specified in the CDK context. Generically it is inside the cdk.context.json file of the current directory or in `~/.cdk.json` in your home directory. Example settings: ``` { "context": { "parent.dns.account": "", "parent.hostedzone.name": "mycompany.a2z.com", "dev.subzone.name": "dev.mycompany.a2z.com", } } ``` ## Deploying Once all pre-requisites are set you should be able to get a working cluster with all the objectives met, including workloads with an example of team-specific ingress objects. ================================================ FILE: docs/patterns/observability/existing-eks-apiserver-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/existing-eks-observability-accelerators/existing-eks-apiserver-observability.md', '') }} ================================================ FILE: docs/patterns/observability/existing-eks-awsnative-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/existing-eks-observability-accelerators/existing-eks-awsnative-observability.md', '') }} ================================================ FILE: docs/patterns/observability/existing-eks-mixed-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/existing-eks-observability-accelerators/existing-eks-mixed-observability.md', '') }} ================================================ FILE: docs/patterns/observability/existing-eks-nginx-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/existing-eks-observability-accelerators/existing-eks-nginx-observability.md', '') }} ================================================ FILE: docs/patterns/observability/existing-eks-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/existing-eks-observability-accelerators/existing-eks-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/multi-acc-new-eks-mixed-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/multi-new-eks-observability-accelerators/multi-acc-new-eks-mixed-observability.md', '') }} ================================================ FILE: docs/patterns/observability/multi-account-monitoring.md ================================================ # Multi Account Open Source Observability Pattern. ## Architecture The following figure illustrates the architecture of the pattern we will be deploying for Multi Account Observability pattern using open source tooling such as AWS Distro for Open Telemetry (ADOT), Amazon Managed Service for Prometheus (AMP), Amazon Managed Grafana : ![Architecture](../images/setup_amg-cross-account.png) ## Objective 1. Deploying two production grade Amazon EKS cluster across 2 AWS Accounts ( Prod1, Prod2 account ) through a Continuous Deployment infrastructure pipeline triggered upon a commit to the repository that holds the pipeline configuration in an another AWS account (pipeline account). 1. Deploying ADOT add-on, AMP add-on to Prod 1 Amazon EKS Cluster to remote write metrics to AMP workspace in Prod 1 AWS Account. Deploying ADOT add-on, CloudWatch add-on to Prod 1 Amazon EKS Cluster to write metrics to CloudWatch in Prod 2 AWS Account. 1. Configuring GitOps tooling (ArgoCD addon) to support deployment of [ho11y](https://github.com/aws-observability/aws-o11y-recipes/tree/main/sandbox/ho11y) and [yelb](https://github.com/mreferre/yelb) sample applications, in a way that restricts each application to be deployed only into the team namespace, by using ArgoCD projects. 1. Setting up IAM roles in Prod 1 and Prod 2 Accounts to allow an AMG service role in the Monitoring account (4th AWS account) to access metrics from AMP workspace in Prod 1 account and CloudWatch namespace in Prod 2 account. 1. Setting Amazon Managed Grafana to visualize AMP metrics from Amazon EKS cluster in Prod account 1 and CloudWatch metrics on workloads in Amazon EKS cluster in Prod account 2. ### GitOps confguration For GitOps, the blueprint bootstrap the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. You can find the team-geordie configuration for this pattern in the workload repository under the folder [`team-geordie`](https://github.com/aws-samples/eks-blueprints-workloads/tree/main/teams/team-geordie). ## Prerequisites 1. AWS Control Tower deployed in your AWS environment in the management account. If you have not already installed AWS Control Tower, follow the [Getting Started with AWS Control Tower documentation](https://docs.aws.amazon.com/controltower/latest/userguide/getting-started-with-control-tower.html), or you can enable AWS Organizations in the AWS Management Console account and enable AWS SSO. 1. An AWS account under AWS Control Tower called Prod 1 Account(Workloads Account A aka prodEnv1) provisioned using the AWS Service Catalog Account Factory product AWS Control Tower Account vending process or AWS Organization. 1. An AWS account under AWS Control Tower called Prod 2 Account(Workloads Account B aka prodEnv2) provisioned using the AWS Service Catalog Account Factory product AWS Control Tower Account vending process or AWS Organization. 1. An AWS account under AWS Control Tower called Pipeline Account (aka pipelineEnv) provisioned using the AWS Service Catalog Account Factory product AWS Control Tower Account vending process or AWS Organization. 1. An AWS account under AWS Control Tower called Monitoring Account (Grafana Account aka monitoringEnv) provisioned using the AWS Service Catalog Account Factory product AWS Control Tower Account vending process or AWS Organization. ## Deploying 1. Fork this repository to your GitHub organisation/user. 1. Clone your forked repository. 1. Set environment variable `AWS_REGION` with region from where `pipelineEnv` account will be bootstrapped. ```bash export AWS_REGION= ``` 1. Install the AWS CDK Toolkit globally on your machine using ```bash npm install -g aws-cdk ``` 1. Create secret `github-ssh-key` in `AWS_REGION` of `pipelineEnv` account. This secret must contain GitHub SSH private key as a JSON structure containing fields `sshPrivateKey` and `url` in `pipelineEnv` account. This will be used by ArgoCD addon to authenticate against any GitHub repository (private or public). The secret is expected to be defined in the region where the pipeline will be deployed to. For more information on SSH credentials setup see [ArgoCD Secrets Support](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/#secrets-support). ```bash aws secretsmanager create-secret --region $AWS_REGION \ --name github-ssh-key \ --description "SSH private key for ArgoCD authentication to GitHub repository" \ --secret-string '{ "sshPrivateKey":"", "url":"git@github" }' ``` 1. Create `github-token` secret in `AWS_REGION` of `pipelineEnv` account. This secret must be stored as a plain text in AWS Secrets Manager for the GitHub pipeline in `pipelineEnv` account. For more information on how to set it up, please refer to the [docs](https://docs.aws.amazon.com/codepipeline/latest/userguide/GitHub-create-personal-token-CLI.html). The GitHub Personal Access Token should have these scopes: 1. *repo* - to read the repository 2. *admin:repo_hook* - if you plan to use webhooks (enabled by default) ```bash aws secretsmanager create-secret --region $AWS_REGION \ --name github-token \ --description "GitHub Personal Access Token for CodePipeline to access GitHub account" \ --secret-string "" ``` 1. Create secret `cdk-context` in `us-east-1` region as a plain text in AWS Secrets Manager for the GitHub pipeline in `pipelineEnv` account. `cdk-context` secret must be stored as a plain text in the following format in AWS Secrets Manager for cdk context for all the 4 AWS accounts used by the solution in `pipelineEnv` account. This secret must be created in `us-east-1` region. ```bash aws secretsmanager create-secret --region us-east-1 \ --name cdk-context \ --description "AWS account details of different environments used by Multi account open source Observability pattern" \ --secret-string '{ "context": { "prodEnv1": { "account": "", "region": "" }, "prodEnv2": { "account": "", "region": "" }, "pipelineEnv": { "account": "", "region": "" }, "monitoringEnv": { "account": "", "region": "" } } }' ``` 1. Create the following IAM users and attach `administrator` policy to required accounts. 1. IAM user `pipeline-admin` with `administrator` policy in Pipeline AWS Account ```bash aws iam create-user \ [--profile pipelineEnv-admin-profile] \ --user-name pipeline-admin aws iam attach-user-policy \ [--profile pipelineEnv-admin-profile] \ --user-name pipeline-admin \ --policy-arn arn:aws:iam::aws:policy/AdministratorAccess ``` 1. IAM user `prod1-admin` with `administrator` policy in Prod 1 AWS Account ```bash aws iam create-user \ [--profile prodEnv1-admin-profile] \ --user-name prod1-admin aws iam attach-user-policy \ [--profile prodEnv1-admin-profile] \ --user-name prod1-admin \ --policy-arn arn:aws:iam::aws:policy/AdministratorAccess ``` 1. IAM user `prod2-admin` with `administrator` policy in Prod 2 AWS Account ```bash aws iam create-user \ [--profile prodEnv2-admin-profile] \ --user-name prod2-admin aws iam attach-user-policy \ [--profile prodEnv2-admin-profile] \ --user-name prod2-admin \ --policy-arn arn:aws:iam::aws:policy/AdministratorAccess ``` 1. IAM user `mon-admin` with `administrator` policy in Monitoring AWS Account ```bash aws iam create-user \ [--profile monitoringEnv-admin-profile] \ --user-name mon-admin aws iam attach-user-policy \ [--profile monitoringEnv-admin-profile] \ --user-name mon-admin \ --policy-arn arn:aws:iam::aws:policy/AdministratorAccess ``` 1. IAM user `team-geordi` in Prod 1 and Prod 2 AWS Account ```bash aws iam create-user \ [--profile prodEnv1-admin-profile] \ --user-name team-geordi aws iam create-user \ [--profile prodEnv2-admin-profile] \ --user-name team-geordi ``` 1. IAM user `team-platform` in Prod 1 and Prod 2 AWS Account ```bash aws iam create-user \ [--profile prodEnv1-admin-profile] \ --user-name team-platform aws iam create-user \ [--profile prodEnv2-admin-profile] \ --user-name team-platform ``` 1. Install project dependencies by running `npm install` in the main folder of this cloned repository 1. Bootstrap all 4 AWS accounts using step mentioned for **different environment for deploying CDK applications** in [Deploying Pipelines](https://aws-quickstart.github.io/cdk-eks-blueprints/pipelines/#deploying-pipelines). If you have bootstrapped earlier, please remove them before proceeding with this step. Remember to set `pipelineEnv` account number in `--trust` flag. You can also refer to commands mentioned below: ```bash # bootstrap prodEnv1 account with trust access from pipelineEnv account env CDK_NEW_BOOTSTRAP=1 npx cdk bootstrap \ [--profile prodEnv1-admin-profile] \ --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess \ --trust \ aws:///$AWS_REGION # bootstrap prodEnv2 account with trust access from pipelineEnv account env CDK_NEW_BOOTSTRAP=1 npx cdk bootstrap \ [--profile prodEnv2-admin-profile] \ --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess \ --trust \ aws:///$AWS_REGION # bootstrap pipelineEnv account WITHOUT explicit trust env CDK_NEW_BOOTSTRAP=1 npx cdk bootstrap \ [--profile pipelineEnv-admin-profile] \ --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess \ aws:///$AWS_REGION # bootstrap monitoringEnv account with trust access from pipelineEnv account env CDK_NEW_BOOTSTRAP=1 npx cdk bootstrap \ [--profile monitoringEnv-admin-profile] \ --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess \ --trust \ aws:///$AWS_REGION ``` 1. Modify the code of `lib/pipeline-multi-env-gitops/index.ts` and `lib/multi-account-monitoring/pipeline.ts` in your forked repo to point to your GitHub username/organisation. Look for the declared const of `gitOwner` and change it to your GitHub username and commit changes to your forked repo. This is needed because the AWS CodePipeline that will be automatically created will be triggered upon commits that are made in your forked repo. 1. Once all pre-requisites are set you are ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack in `pipelineEnv` account: ```bash make build make pattern pipeline-multienv-monitoring deploy multi-account-central-pipeline ``` 1. Now you can go to [AWS CodePipeline console](https://eu-west-1.console.aws.amazon.com/codesuite/codepipeline/pipelines), and see how it was automatically created to deploy multiple Amazon EKS clusters to different environments. 1. The deployment automation will create `ampPrometheusDataSourceRole` with permissions to retrieve metrics from AMP in Prod 1 Account, `cloudwatchDataSourceRole` with permissions to retrieve metrics from CloudWatch in Prod 2 Account and `amgWorkspaceIamRole` in monitoring account to assume roles in Prod 1 and Prod 2 account for retrieving and visualizing metrics in Grafana. 1. Next, manually follow the following steps from [AWS Open Source blog](https://aws.amazon.com/blogs/opensource/setting-up-amazon-managed-grafana-cross-account-data-source-using-customer-managed-iam-roles/#:~:text=AWS%20SSO%20in%20the%20management%20account) : 1. AWS SSO in the management account 2. Query metrics in Monitoring account from Amazon Managed Prometheus workspace in Prod 1 Account 3. Query metrics in the Monitoring account from Amazon CloudWatch in Prod 1 Account ![Metrics from AMP](../images/AMG%20-%20Metrics%20from%20AMP.png) ![Metrics from CloudWatch](../images/AMG%20-%20Metrics%20from%20CloudWatch.png) ### Validating Custom Metrics and Traces from ho11y App 1. Run the below command in both clusters to generate traces to X-Ray and Amazon Managed Grafana Console out the sample `ho11y` app : ``` frontend_pod=`kubectl get pod -n geordie --no-headers -l app=frontend -o jsonpath='{.items[*].metadata.name}'` loop_counter=0 while [ $loop_counter -le 5000 ] ; do kubectl exec -n geordie -it $frontend_pod -- curl downstream0.geordie.svc.cluster.local; echo ; loop_counter=$[$loop_counter+1]; done ``` ### Traces and Service Map screenshots from X-Ray Console ![Traces of ho11y App on X-Ray Console](../images/XRAY%20-%20Traces.png) ![Service Map of ho11y App on X-Ray Console](../images/XRAY%20-%20Service%20Map.png) ### Custom Metrics from ho11y App on Amazon Managed Grafana Console using AMP as data source ![Exploring Metrics from ho11y with AMP as Data source in AMG Console](../images/Explore%20AMG.png) ### Custom Metrics from ho11y App on Amazon Managed Grafana Console using CloudWatch as data source ![Exploring Metrics from ho11y with CloudWatch as Data source in AMG Console](../images/Explore%20AMG.png) ### Notes This pattern consumes multiple Elastic IP addresses, because 3 VPCs with 3 subnets are created by this pattern in Prod 1 and Prod 2 AWS Accounts. Make sure your account limits for EIP are increased to support additional 9 EIPs (1 per Subnets). ================================================ FILE: docs/patterns/observability/single-new-eks-apiserver-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-apiserver-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-awsnative-fargate-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-fargate-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-gpu-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-gpu-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-graviton-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-graviton-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-java-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-java-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-mixed-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-mixed-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-native.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-nginx-opensource-observability.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-nginx-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/observability/single-new-eks-opensource.md ================================================ {{ external_markdown('https://raw.githubusercontent.com/aws-observability/cdk-aws-observability-accelerator/main/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-opensource-observability.md', '') }} ================================================ FILE: docs/patterns/paralus.md ================================================ # Paralus on EKS The Paralus project is a free open-source tool that enables controlled audited access to Kubernetes infrastructure. It comes with just-in-time service account creation and user-level credential management that integrates with your existing RBAC and SSO providers of choice. Learn more by visiting the offical documentation page: This pattern deploys the following resources: - Creates a single EKS cluster with a public endpoint (for demo purpose only) that includes a managed node group - Deploys supporting AddOn: AwsLoadBalancerController, VpcCni, KubeProxy, EbsCsiDriverAddOn - Deploy Paralus on the EKS cluster **NOTE: Paralus installs a few dependent modules such as Postgres, Kratos, and also comes with a built-in dashboard. At it's core, Paralus works atop domain-based routing, inter-service communication, and supports the AddOns mentioned above.** ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) ## Deploy an EKS Cluster using Amazon EKS Blueprints for CDK Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git ``` Update FQDN information for your installation following the example below: ```json fqdn: { "domain": "yourdomain.com", "hostname": "console-eks", "coreConnectorSubdomain": "*.core-connector.eks", "userSubdomain": "*.user.eks" } ``` Updating npm ```sh npm install -g npm@latest ``` To view patterns and deploy the Paralus pattern, run the commands below: ```sh cdk list cdk bootstrap make pattern paralus deploy ``` ## Verify the resources Run the update-kubeconfig command below. You should be able to get the command from the CDK output message once your cluster has been finished deploying. More information can be found at ```sh aws eks update-kubeconfig --name --region --role-arn arn:aws:iam::1234567890121:role/paralus-blueprint-paralusblueprintMastersRoleF3287-EI3XEBO1107B ``` Let’s verify the resources created by steps above. ```sh kubectl get nodes # Output will provide list of running nodes in your cluster kubectl get ns | grep paralus # Output shows Paralus namespace kubectl get pods --namespace=paralus-system # Output shows Paralus pods blueprints-addon-paralus-contour-contour-7857f4cd9-kqhgp 1/1 Running blueprints-addon-paralus-contour-envoy-mx8z7 2/2 Running blueprints-addon-paralus-fluent-bit-525tt 1/1 Running blueprints-addon-paralus-kratos-588775bc47-wf5gf 2/2 Running blueprints-addon-paralus-kratos-courier-0 2/2 Running blueprints-addon-paralus-postgresql-0 1/1 Running dashboard-6d8b54d78b-d8cks 1/1 Running paralus-66d9bbf698-qznzl 2/2 Running prompt-54d45cff79-h9x95 2/2 Running relay-server-79448564cb-nf5tj 2/2 Running ``` [Learn more](https://www.paralus.io/docs/architecture/core-components) about the various components that are deployed as part of Paralus. ## Configure DNS Settings Once Paralus is installed, continue with following steps to configure DNS settings, reset default password and start using Paralus. Obtain the external ip address by executing below command against the installation `kubectl get svc blueprints-addon-paralus-contour-envoy -n paralus-system` ```sh NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE blueprints-addon-paralus-contour-envoy LoadBalancer 10.100.101.216 a814da526d40d4661bf9f04d66ca53b5-65bfb655b5662d24.elb.us-west-2.amazonaws.com 80:31810/TCP,443:30292/TCP 10m ``` Update the DNS settings to add CNAME records: ```sh name: console-eks value: a814da526d40d4661bf9f04d66ca53b5-65bfb655b5662d24.elb.us-west-2.amazonaws.com name: *.core-connector.eks value: a814da526d40d4661bf9f04d66ca53b5-65bfb655b5662d24.elb.us-west-2.amazonaws.com name: *.user.eks value: a814da526d40d4661bf9f04d66ca53b5-65bfb655b5662d24.elb.us-west-2.amazonaws.com ``` Obtain your default password and reset it upon first login `kubectl logs -f --namespace paralus-system $(kubectl get pods --namespace paralus-system -l app.kubernetes.io/name='paralus' -o jsonpath='{ .items[0].metadata.name }') initialize | grep 'Org Admin default password:'` You can now access dashboard with ( refers to the hostname.domain specified during installation ), start importing clusters and using paralus. Note: you can also refer to this [paralus eks blogpost](https://www.paralus.io/blog/eks-quickstart#configuring-dns-settings) ## Paralus Features & Usage ## Configuring centralized kubectl access to clusters Kubectl is one of the most widely used tools to interact with Kubernetes. The command line tool allows you to deploy applications, inspect, and manage resources. It authenticates with the control plane for your cluster and makes API calls to the Kubernetes API. In short if you are working with Kubernetes - you will use kubectl the most. In most modern day scenarios, there are multiple users who are accessing various clusters. This makes it all more important to ensure that every user or group has access to only those resources that they are allowed to. A couple different approaches to achieve this include using namespaces and role based access control. While these are good, most enterprise grade application deployments require something more robust. That’s where Paralus comes in. It allows you to configure centralized kubectl access to multiple clusters all from a single dashboard. It allows you to create groups, assign projects and users, and provide access. Check out this blog post for a deep dive into how you can use Paralus to import different clusters to Paralus and configure access to them using zero trust principles built in. [Read More](https://www.paralus.io/blog/centralized-kubectl-access#the-use-case) ## Cleanup To clean up your EKS Blueprints, run the following commands: ```sh cdk destroy paralus-blueprint ``` ## Troubleshooting If postgres pvc is not getting a volume allocated, it probably is due to the iam permissions. Please refer this to assign approriate policies to kubernetes sa ## Disclaimer This pattern relies on an open-source NPM package paralus-eks-blueprints-addon. Please refer to the package npm site for more information. If you have any questions about the npm package or find any defect, please post in the source repo at ================================================ FILE: docs/patterns/pipeline-multi-env-gitops.md ================================================ # Pipeline Multi Environment Pattern ## Objective 1. Deploying an EKS cluster across 3 environments( dev, test, and prod ), with a Continuous Deployment pipeline triggered upon a commit to the repository that holds the pipeline configuration. 2. Configuring GitOps tooling (ArgoCD addon) to support multi-team and multi-repositories configuration, in a way that restricts each application to be deployed only into the team namespace, by using ArgoCD projects ### GitOps confguration For GitOps, the blueprint bootstrap the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. The pattern uses the ECSDEMO applications as sample applications to demonstrate how to setup a GitOps configuration with multiple teams and multiple applications. The pattern include the following configurations in terms io: 1. Application team - it defines 3 application teams that corresponds with the 3 sample applications used 2. ArgoCD bootstrap - the pattern configure the ArgoCD addon to point to the [workload repository](https://github.com/aws-samples/eks-blueprints-workloads) of the EKS Blueprints samples 3. ArgoCD projects - as part of the ArgoCD addon bootstrap, the pattern generate an ArgoCD project for each application team. The ArgoCD are used in order to restrict the deployment of an application to a specific target namespace You can find the App of Apps configuration for this pattern in the workload repository under the folder [`multi-repo`](https://github.com/aws-samples/eks-blueprints-workloads/tree/main/multi-repo). ## Prerequisites 1. Fork this repository to your GitHub organisation/user 2. Clone your forked repository 3. Install the AWS CDK Toolkit globally on your machine using ```bash npm install -g aws-cdk ``` 4. `github-ssh-key` - must contain GitHub SSH private key as a JSON structure containing fields `sshPrivateKey` and `url`. This will be used by ArgoCD addon to authenticate against ay GitHub repository (private or public). The secret is expected to be defined in the region where the pipeline will be deployed to. For more information on SSH credentials setup see [ArgoCD Secrets Support](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/#secrets-support). 5. `github-token` secret must be stored in AWS Secrets Manager for the GitHub pipeline. For more information on how to set it up, please refer to the [docs](https://docs.aws.amazon.com/codepipeline/latest/userguide/GitHub-create-personal-token-CLI.html). The GitHub Personal Access Token should have these scopes: 1. *repo* - to read the repository 2. *admin:repo_hook* - if you plan to use webhooks (enabled by default) 6. Create the relevant users that will be used by the different teams ```bash aws iam create-user --user-name frontend-user aws iam create-user --user-name nodejs-user aws iam create-user --user-name crystal-user aws iam create-user --user-name platform-user ``` 7. Install project dependencies by running `npm install` in the main folder of this cloned repository 8. In case you haven't done this before, bootstrap your AWS Account for AWS CDK use using: ```bash cdk bootstrap ``` 9. Modify the code in your forked repo to point to your GitHub username/organisation. This is needed because the AWS CodePipeline that will be automatically created will be triggered upon commits that are made in your forked repo. Open the [pattenrn file source code](../../lib/pipeline-multi-env-gitops/index.ts) and look for the declared const of `gitOwner`. Change it to your GitHub username. 10. *OPTIONAL* - As mentioned above, this pattern uses another repository for GitOps. This is the ArgoCD App of Apps configuration that resides in the [aws-samples](https://github.com/aws-samples/eks-blueprints-workloads/tree/main/multi-repo) organisation. If you would like to modify the App of Apps configuration and customise it to your needs, then use the following instructions: 1. Fork the [App of Apps](https://github.com/aws-samples/eks-blueprints-workloads/tree/main/multi-repo) workloads repo to your GitHub username 2. Modify the [pattern code](../../lib/pipeline-multi-env-gitops/index.ts) with the following changes: 1. Change the consts of `devArgoAddonConfig`, `testArgoAddonConfig`, and `prodArgoAddonConfig` to point to your GitHub username 2. In the `createArgoAddonConfig` function, look for the `git@github.com:aws-samples/eks-blueprints-workloads.git` code under the `sourceRepos` configurations, and add another reference to your forked workload repository ## Deploying Once all pre-requisites are set you are ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack: ```bash make pattern pipeline-multienv-gitops deploy eks-blueprint-pipeline-stack ``` Now you can go to [AWS CodePipeline console](https://eu-west-1.console.aws.amazon.com/codesuite/codepipeline/pipelines), and see how it was automatically created to deploy multiple Amazon EKS clusters to different environments. ### Notes 1. In case your pipeline fails on the first run, it's because that the AWS CodeBuild step needs elevated permissions at build time. This is described in the official [docs](https://aws-quickstart.github.io/cdk-eks-blueprints/pipelines/#troubleshooting). To resolve this, locate `AccessDeniedException` in the CodeBuild build logs, and attach the following inline policy to it: ```json { "Version": "2012-10-17", "Statement": [ { "Sid": "VisualEditor0", "Effect": "Allow", "Action": [ "sts:AssumeRole", "secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret", "cloudformation:*" ], "Resource": "*" } ] } ``` The above inconvenience has been fixed in the Blueprints framework as well as in the pattern, so please report such cases if you encounter them. This item is left here for reference in case customers modify the pattern to require additional permissions at build time. 2. This pattern consumes multiple Elastic IP addresses, because 3 VPCs with 3 subnets are created by this pattern. Make sure your account limit for EIP are increased to support additional 9 EIPs (1 per Subnets) ================================================ FILE: docs/patterns/secureingresscognito.md ================================================ # Secure Ingress using Cognito Pattern ## Objective The objective of this pattern is to provide a secure authentication mechanism for customer applications using Amazon Cognito, ALB, and Route53, ensuring that only authorized users can access the application. The Kubecost tool is used as a reference or sample implementation to demonstrate the pattern's capabilities. To achieve this objective, the pattern utilizes Amazon Cognito to provide user authentication for the application's ingress, with ALB's built-in support for user authentication handling routine tasks such as user sign-up, sign-in, and sign-out. In addition to Amazon Cognito, ALB integrates with any OpenID Connect compliant identity provider (IdP) for a single sign-on experience across applications. ACM and Route53 provide SSL/TLS certificates to secure connections to ALB and authenticate users, preventing sensitive information from being intercepted or tampered with during transmission. The pattern also leverages Kubecost to provide real-time cost visibility and analysis for Kubernetes clusters, enabling customers to make informed decisions about resource allocation and utilization. This pattern can be easily adapted and extended to secure ingress for any application, providing a unified and secure solution for user authentication while optimizing costs. By implementing this solution, Amazon EKS customers can have a reliable, scalable, and secure authentication mechanism for their applications, with a cost optimization tool to manage and reduce the costs associated with their Kubernetes clusters. ## Architecture ![Kubecost Architecture](./images/secure-ingress-kubecost-new.png) ## Approach This blueprint will include the following: * A new Well-Architected VPC with both Public and Private subnets. * A new Well-Architected EKS cluster in the region and account you specify. * [EBS CSI Driver Amazon EKS Add-on](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/ebs-csi-driver/) allows Amazon Elastic Kubernetes Service (Amazon EKS) clusters to manage the lifecycle of Amazon EBS volumes for persistent volumes. * AWS and Kubernetes resources needed to support [AWS Load Balancer Controller](https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html). * [Amazon VPC CNI add-on (VpcCni)](https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html) into your cluster to support native VPC networking for Amazon EKS. * [External-DNS](https://github.com/kubernetes-sigs/external-dns) allows integration of exposed Kubernetes services and Ingresses with DNS providers * [Kubecost](https://kubecost.com/) provides real-time cost visibility and insights by uncovering patterns that create overspending on infrastructure to help teams prioritize where to focus optimization efforts * [Argo CD](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/) is a declarative, GitOps continuous delivery tool for Kubernetes. The Argo CD add-on provisions Argo CD into an EKS cluster, and bootstraping your workloads from public and private Git repositories. * Create the necessary Cognito resources like user pool, user pool client, domain, [Pre sign-up Lambda trigger and Pre authentication Lambda triggers](https://docs.aws.amazon.com/cognito/latest/developerguide/cognito-user-identity-pools-working-with-aws-lambda-triggers.html) etc.., and passed to the Argo CD app of apps pattern from which ingress resources can reference. ## GitOps confguration For GitOps, the blueprint bootstrap the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) ## Deploy 1. Let’s start by setting a few environment variables. Change the Region as needed. ``` ACCOUNT_ID=$(aws sts get-caller-identity --query 'Account' --output text) AWS_REGION=us-west-2 ``` 2. Clone the repository and install dependency packages. This repository contains CDK v2 code written in TypeScript. ``` git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns npm i ``` 3. argo-admin-password secret must be defined as plain text (not key/value) in `us-west-2` region. ``` aws secretsmanager create-secret --name argo-admin-secret \ --description "Admin Password for ArgoCD" \ --secret-string "password123$" \ --region "us-west-2" ``` 4. The CDK code expects the allowed domain and subdomain names in the CDK context file (cdk.json). Create two environment variables. The PARENT_HOSTED_ZONE variable contains your company’s domain name. The DEV_SUBZONE_NAME will be the address for your Kubecost dashboard. Generate the cdk.json file: ``` PARENT_HOSTED_ZONE=mycompany.a2z.com DEV_SUBZONE_NAME=dev.mycompany.a2z.com cat << EOF > cdk.json { "app": "npx ts-node dist/lib/common/default-main.js", "context": { "parent.hostedzone.name": "${PARENT_HOSTED_ZONE}", "dev.subzone.name": "${DEV_SUBZONE_NAME}" } } EOF ``` 5. In this solution, we’ll allow access to the Kubecost dashboard based on user email addresses. You can control access to the dashboard by allow-listing an entire domain or individual email addresses. Users are required to sign-up before they can access the Kubecost dashboard. The pre sign-up Lambda trigger only allows sign-ups when user’s email domain matches allow-listed domains. When users sign-up, Cognito sends a verification code to their email address. Users have to verify access (using the one time valid code) to their email before they get access to the dashboard. If you’d like to limit access to the dashboard by email addresses, you can also create a parameter to store allowed email addresses and add a logic to the pre authentication Lambda trigger. Create below parameters with allowed email addresses and domains in the AWS Systems Manager Parameter Store: ``` export SSM_PARAMETER_KEY="/secure-ingress-auth-cognito/ALLOWED_DOMAINS" export SSM_PARAMETER_VALUE="emaildomain1.com,emaildomain2.com" aws ssm put-parameter \ --name "$SSM_PARAMETER_KEY" \ --value "$SSM_PARAMETER_VALUE" \ --type "String" \ --region $AWS_REGION ``` 6. Execute the commands below to bootstrap the AWS environment in `us-west-2` ``` cdk bootstrap aws://$ACCOUNT_ID/$AWS_REGION ``` 7. Run the following command from the root of this repository to deploy the pipeline stack: ``` make build make pattern secure-ingress-cognito deploy secure-ingress-blueprint ``` ## Cluster Access Once the deploy completes, you will see output in your terminal window similar to the following: ``` Outputs: secure-ingress-blueprint.secureingressblueprintClusterNameD6A1BE5C = secure-ingress-blueprint secure-ingress-blueprint.secureingressblueprintConfigCommandD0275968 = aws eks update-kubeconfig —name secure-ingress-blueprint —region us-west-2 —role-arn arn:aws:iam:::role/secure-ingress-blueprint-secureingressblueprintMas-7JD5S67SG7M0 secure-ingress-blueprint.secureingressblueprintGetTokenCommand21BE2184 = aws eks get-token —cluster-name secure-ingress-blueprint —region us-west-2 —role-arn arn:aws:iam:::role/secure-ingress-blueprint-secureingressblueprintMas-7JD5S67SG7M0 ``` ``` Stack ARN: arn:aws:cloudformation:us-west-2::stack/secure-ingress-blueprint/64017120-91ce-11ed-93b2-0a67951f5d5d ``` To update your Kubernetes config for your new cluster, copy and run the secure-ingress-blueprint.secureingressblueprintConfigCommandD0275968 command (the second command) in your terminal. ``` aws eks update-kubeconfig —name secure-ingress-blueprint —region us-west-2 —role-arn arn:aws:iam:::role/secure-ingress-blueprint-secureingressblueprintMas-7JD5S67SG7M0 ``` Validate that you now have kubectl access to your cluster via the following: ``` kubectl get all -n kubecost ``` You should see output that lists all namespaces in your cluster. ## Test authentication Point your browser to the URL of the Kubecost app in your cluster. You can get the URL from the cdk.json file using the below command. ``` awk -F':' '/dev.subzone.name/ {print $2}' cdk.json | tr -d '",' | xargs echo ``` Your browser will be redirected to a sign-in page. This page is provided by Amazon Cognito hosted UI. Since this is your first time accessing the application, sign up as a new user. The data you input here will be saved in the Amazon Cognito user pool you created earlier in the post. ![Cognito Signup Process](./images/Cognito-Signup-1.png) Select “Sign up” and use your email address and create a password ![Cognito Signup Process](./images/Cognito-Signup-2.png) ![Cognito Signup Process](./images/Cognito-Signup-3.png) Use the verification code received in your email and confirm the account. Once you sign in, ALB will send you to the Kubecost app’s UI: ![Kubecost](./images/Cognito-Kubecost-1.png) Select the “AWS Cluster #1” to view the cost overview, savings and efficiency details. ![Kubecost Dashboard](./images/Cognito-Kubecost-2.png) ================================================ FILE: docs/patterns/security/eks-config-rules.md ================================================ # Security Best Practices for Amazon EKS ## Objective The objective of this pattern is to demonstrate how to enable AWS Config configuration recorder and AWS Config managed rules for EKS security best practices in your AWS account `CDK_DEFAULT_ACCOUNT` and region `CDK_DEFAULT_REGION` and verify the status of the rules. ## Prerequisites 1. Follow the usage [instructions](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/README.md#usage) to install the dependencies and perform the repository setup. 2. `argo-admin-password` secret must be defined in Secrets Manager in the same region as the EKS cluster. ## Deploy To bootstrap the CDK toolkit and list all stacks in the app, run the following commands: ```bash cdk bootstrap make list ``` ### Deploy AWS Config Configuration Recorder Use the AWS Config setup blueprints pattern to enable AWS Config in your account and region by running the following command: ```bash make pattern eks-config-rules deploy eks-config-setup ``` ### Deploy Config Rules for EKS Security Best Practices Now enable the AWS Config managed rules for EKS security best practices by running the following command: ```bash make pattern eks-config-rules deploy eks-config-rules-setup ``` ## Verify ### Verify the status of the AWS Config managed rules for EKS security best practices Using the following AWS CLI command, get a list of the AWS Config rules with their evaluation status. ```bash aws configservice describe-config-rule-evaluation-status ``` The output will look something like the following. ```json { "ConfigRulesEvaluationStatus": [ ... { "ConfigRuleName": "eks-config-rules-setup-EksEndpointNoPublicAccess49-37QJEXYZALLB", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-luqz0p", "ConfigRuleId": "config-rule-luqz0p", "LastSuccessfulInvocationTime": "2023-05-30T00:33:26.878000+00:00", "LastSuccessfulEvaluationTime": "2023-05-30T00:33:27.539000+00:00", "FirstActivatedTime": "2023-05-27T00:32:41.020000+00:00", "FirstEvaluationStarted": true }, { "ConfigRuleName": "eks-config-rules-setup-EksOldestSupportedVersionAD-Z65N0TEQSF96", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-psbc54", "ConfigRuleId": "config-rule-psbc54", "LastSuccessfulInvocationTime": "2023-05-27T07:56:05.182000+00:00", "LastSuccessfulEvaluationTime": "2023-05-27T07:56:07.542000+00:00", "FirstActivatedTime": "2023-05-25T22:44:21.666000+00:00", "FirstEvaluationStarted": true }, { "ConfigRuleName": "eks-config-rules-setup-EksSecretsEncrypted7566BFCD-HUQX4WXUDEFA", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-kzohng", "ConfigRuleId": "config-rule-kzohng", "LastSuccessfulInvocationTime": "2023-05-30T00:33:26.902000+00:00", "LastSuccessfulEvaluationTime": "2023-05-30T00:33:27.616000+00:00", "FirstActivatedTime": "2023-05-27T00:32:41.006000+00:00", "FirstEvaluationStarted": true }, { "ConfigRuleName": "eks-config-rules-setup-EksSupportedVersionCDB3159A-1VNH10LGMMJX", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-oaio54", "ConfigRuleId": "config-rule-oaio54", "LastSuccessfulInvocationTime": "2023-05-27T07:56:05.223000+00:00", "LastSuccessfulEvaluationTime": "2023-05-27T07:56:05.420000+00:00", "FirstActivatedTime": "2023-05-25T22:51:26.563000+00:00", "FirstEvaluationStarted": true } ... ] } ``` You can search for the EKS specific rules. Make a note of the unique `ConfigRuleName` of each of the AWS Config rules for EKS security best practices. Using the unique names of the EKS Config rules from **your account and region** shown after running the previous AWS CLI command, you can verify each EKS Config rule configuration and state using the following AWS CLI command (remember to replace the rule names below with your rule names). ```bash aws configservice describe-config-rules --config-rule-names "eks-config-rules-setup-EksEndpointNoPublicAccess" "eks-config-rules-setup-EksOldestSupportedVersion" "eks-config-rules-setup-EksSecretsEncrypted" "eks-config-rules-set up-EksSupportedVersion" ``` ```json { "ConfigRules": [ { "ConfigRuleName": "eks-config-rules-setup-EksEndpointNoPublicAccess49-37QJEXYZALLB", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-luqz0p", "ConfigRuleId": "config-rule-luqz0p", "Source": { "Owner": "AWS", "SourceIdentifier": "EKS_ENDPOINT_NO_PUBLIC_ACCESS" }, "ConfigRuleState": "ACTIVE", "EvaluationModes": [ { "Mode": "DETECTIVE" } ] }, { "ConfigRuleName": "eks-config-rules-setup-EksOldestSupportedVersionAD-Z65N0TEQSF96", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-psbc54", "ConfigRuleId": "config-rule-psbc54", "Source": { "Owner": "AWS", "SourceIdentifier": "EKS_CLUSTER_OLDEST_SUPPORTED_VERSION" }, "InputParameters": "{\"oldestVersionSupported\":\"1.25\"}", "ConfigRuleState": "ACTIVE", "EvaluationModes": [ { "Mode": "DETECTIVE" } ] }, { "ConfigRuleName": "eks-config-rules-setup-EksSecretsEncrypted7566BFCD-HUQX4WXUDEFA", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-kzohng", "ConfigRuleId": "config-rule-kzohng", "Source": { "Owner": "AWS", "SourceIdentifier": "EKS_SECRETS_ENCRYPTED" }, "ConfigRuleState": "ACTIVE", "EvaluationModes": [ { "Mode": "DETECTIVE" } ] }, { "ConfigRuleName": "eks-config-rules-setup-EksSupportedVersionCDB3159A-1VNH10LGMMJX", "ConfigRuleArn": "arn:aws:config:us-east-1:XXXXXXXXXXX:config-rule/config-rule-oaio54", "ConfigRuleId": "config-rule-oaio54", "Source": { "Owner": "AWS", "SourceIdentifier": "EKS_CLUSTER_SUPPORTED_VERSION" }, "InputParameters": "{\"oldestVersionSupported\":\"1.25\"}", "ConfigRuleState": "ACTIVE", "EvaluationModes": [ { "Mode": "DETECTIVE" } ] } ] } ``` Note that you can see the parameter value of the rules with required `InputParameters` (`EKS_CLUSTER_OLDEST_SUPPORTED_VERSION` and `EKS_CLUSTER_OLDEST_SUPPORTED_VERSION`), and the `ConfigRuleState` for each of the rules which is `ACTIVE`. ================================================ FILE: docs/patterns/security/encryption-at-rest.md ================================================ # Data at Rest Encryption ## Objective The objective of this pattern is to demonstrate how to enable encryption at rest for EKS cluster using EBS/EFS storage. To achieve this objective, the pattern utilizes [EBS CSI Driver Amazon EKS Add-on](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/ebs-csi-driver/) to enable encryption-at-rest for EBS volumes and [EFS CSI Driver Amazon EKS Add-on](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/efs-csi-driver/) to enable encryption-at-rest for EFS volumes. The pattern also leverages [KMS resource provider](https://aws-quickstart.github.io/cdk-eks-blueprints/resource-providers/kms-key-providers/) to create KMS keys for EBS/EFS encryption-at-rest and [EFS File System resource provider](https://aws-quickstart.github.io/cdk-eks-blueprints/resource-providers/efs-providers/) to create an encrypted EFS file system. ## GitOps confguration For GitOps, the blueprint bootstraps the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. The sample repository contains the following workloads: 1. team-platform creates a storage class for EBS and EFS volumes. 2. team-data creates a persistent volume claim for EBS and EFS volumes and a pod that mounts the volumes. ## Prerequisites 1. Follow the usage [instructions](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/README.md#usage) to install the dependencies and perform the repository setup. 2. `argo-admin-password` secret must be defined in Secrets Manager in the same region as the EKS cluster. ## Deploy To bootstrap the CDK toolkit and list all stacks in the app, run the following commands: ```bash cdk bootstrap make list ``` To deploy the pattern, run the following command: ```bash make pattern data-at-rest-encryption deploy ``` ## Verify Now you can verify that the EBS and EFS volumes are encrypted. ### EBS To list all the PersistentVolumeClaims (PVCs) that exist in the Kubernetes cluster's namespace named "data", run the following command: ```bash kubectl get pvc -n data ``` The output should look similar to the following: ```bash NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE gp2-encrypted-pvc Bound pvc-78bd070e-8eba-4b01-a378-462bb806beb3 10Gi RWO gp2-encrypted 14m ``` To describe an Amazon Elastic Block Store (EBS) volume that is associated with a PersistentVolume (PV) in Kubernetes, run the following command (please replace the PVC-IDENTIFIER with the PVC name from the previous step): ```bash aws ec2 describe-volumes --region us-east-1 --filters "Name=tag:kubernetes.io/created-for/pv/name,Values=" --query 'Volumes[*].{VolumeId:VolumeId, Encrypted:Encrypted, KmsKeyId:KmsKeyId}' ``` The output should look similar to the following: ```bash [ { "VolumeId": "vol-09332f96a58e67385", "Encrypted": true, "KmsKeyId": "arn:aws:kms:us-east-1:111122223333:key/a8b9fa0b-955f-4f85-85c1-8f911003390e" } ] ``` ### EFS To list all the StorageClasses that are defined in the Kubernetes cluster, run the following command: ```bash kubectl get storageclass ``` The output should look similar to the following: ```bash NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE efs-encrypted (default) efs.csi.aws.com Delete Immediate false 70m ``` To retrieve the KMS Key ID parameter of a specific StorageClass named "efs-encrypted" in the Kubernetes cluster, run the following command: ```bash kubectl get storageclass efs-encrypted -o jsonpath='{.parameters.kmsKeyId}' ``` The output should look similar to the following: ```bash arn:aws:kms:us-east-1:111222333444:key/19f4f602-dcf3-42a5-8eef-38f2af4b3626% ``` To list all the PersistentVolumeClaims (PVCs) that exist in the Kubernetes cluster's namespace named "data", run the following command: ```bash kubectl get pvc -n data ``` The output should look similar to the following: ```bash NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE efs-encrypted-claim Bound pvc-06df2640-ae2f-44ae-8d5c-82c72e56a9ae 10Gi RWX efs-encrypted 63m ``` To list all the pods that are running in the Kubernetes cluster's namespace named "data", run the following command: ```bash kubectl get pods -n data ``` The output should look similar to the following: ```bash NAME READY STATUS RESTARTS AGE efs-encryption-app 1/1 Running 0 63m ``` To get detailed information about a PersistentVolumeClaim (PVC) named "efs-encrypted-claim" in the "data" namespace of the Kubernetes cluster, run the following command: ```bash kubectl describe pvc efs-encrypted-claim -n data ``` The output should look similar to the following: ```bash Name: efs-encrypted-claim Namespace: data StorageClass: efs-encrypted Status: Bound Volume: pvc-06df2640-ae2f-44ae-8d5c-82c72e56a9ae Labels: argocd.argoproj.io/instance=team-data Annotations: pv.kubernetes.io/bind-completed: yes pv.kubernetes.io/bound-by-controller: yes volume.beta.kubernetes.io/storage-provisioner: efs.csi.aws.com volume.kubernetes.io/storage-provisioner: efs.csi.aws.com Finalizers: [kubernetes.io/pvc-protection] Capacity: 10Gi Access Modes: RWX VolumeMode: Filesystem Used By: efs-encryption-app Events: ``` ================================================ FILE: docs/patterns/security/guardduty.md ================================================ # Amazon GuardDuty Protection ## Objective The objective of this pattern is to demonstrate how to enable Amazon GuardDuty Detector across your AWS accounts, use GuardDuty optional features, and how to automate notifications via Amazon SNS based on security findings generated by GuardDuty. Supported features: - [Foundational data sources](https://docs.aws.amazon.com/guardduty/latest/ug/guardduty_data-sources.html) - these data sources are enabled by default, no need to mention them in the pattern input - [EKS Audit Log Monitoring](https://docs.aws.amazon.com/guardduty/latest/ug/guardduty-eks-audit-log-monitoring.html) - [EKS Runtime Monitoring](https://docs.aws.amazon.com/guardduty/latest/ug/guardduty-eks-runtime-monitoring.html) - [Malware Protection in Amazon GuardDuty](https://docs.aws.amazon.com/guardduty/latest/ug/malware-protection.html) - [GuardDuty RDS Protection](https://docs.aws.amazon.com/guardduty/latest/ug/rds-protection.html) - [Amazon S3 Protection in Amazon GuardDuty](https://docs.aws.amazon.com/guardduty/latest/ug/s3-protection.html) The pattern consists of two components: - `GuardDutySetupStack` - enables GuardDuty Detector for the account. The stack also creates an SNS topic, SNS Subscription, and Amazon EventBridge Rule. - A blueprint that deploys a sample GitOps workload that triggers a GuardDuty finding. The list of optional features is adjustable via the `features` parameter in the [GuardDutySetupStack](../../../lib/security/guardduty-construct/guardduty-setup.ts) stack. ## GitOps configuration For GitOps, the blueprint bootstraps the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. The sample repository contains the following workloads: - `team-danger` runs a pod in a privileged mode which is a [security anti-pattern](https://docs.aws.amazon.com/guardduty/latest/ug/guardduty_finding-types-kubernetes.html#privilegeescalation-kubernetes-privilegedcontainer) - `team-danger` runs a pod with a [malicious file](https://docs.aws.amazon.com/guardduty/latest/ug/findings-malware-protection.html#execution-malware-kubernetes-maliciousfile) ## Prerequisites 1. Follow the usage [instructions](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/README.md#usage) to install the dependencies and perform the repository setup. 2. `argo-admin-password` secret must be defined in Secrets Manager in the same region as the EKS cluster. ## Deploy To bootstrap the CDK toolkit and list all stacks in the app, run the following commands: ```bash cdk bootstrap make list ``` ### Deploying the `GuardDutySetupStack` stack The `GuardDutySetupStack` stack enables GuardDuty Detector for the account with all the features of your choice enabled. To deploy the stack, run the following command: ```bash make pattern guardduty deploy guardduty-setup ``` ### Deploying the blueprint workload The blueprint deploys a sample GitOps workload that triggers a GuardDuty finding. To deploy the blueprint, run the following command: ```bash make pattern guardduty deploy guardduty-blueprint ``` ## Verify Run update-kubeconfig command. You should be able to get the command from CDK output message. More information can be found [here](https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access). Please replace ``, ``, and `` with the values from the CDK output message. ```bash aws eks update-kubeconfig --name --region --role-arn ``` ### Verifying that the GuardDuty detector is enabled Now you can check that the GuardDuty detector is successfully enabled with all the required data sources. To list all detectors in the region, run the following command: ```bash aws guardduty list-detectors --region us-east-1 ``` The output should look like this: ```json { "DetectorIds": [ "80c3c03d44819a984b035b000aa9b3da" ] } ``` To check the detector's configuration, run the following command (please replace `` with the ID of the detector): ```bash aws guardduty get-detector --detector-id --region us-east-1 ``` The output should look like this: ```json { "CreatedAt": "2023-04-14T15:55:27.088Z", "FindingPublishingFrequency": "SIX_HOURS", "ServiceRole": "arn:aws:iam::123456789012:role/aws-service-role/guardduty.amazonaws.com/AWSServiceRoleForAmazonGuardDuty", "Status": "ENABLED", "UpdatedAt": "2023-04-14T15:55:27.088Z", "DataSources": { "CloudTrail": { "Status": "ENABLED" }, "DNSLogs": { "Status": "ENABLED" }, "FlowLogs": { "Status": "ENABLED" }, "S3Logs": { "Status": "ENABLED" }, "Kubernetes": { "AuditLogs": { "Status": "ENABLED" } }, "MalwareProtection": { "ScanEc2InstanceWithFindings": { "EbsVolumes": { "Status": "ENABLED" } }, "ServiceRole": "arn:aws:iam::123456789012:role/aws-service-role/malware-protection.guardduty.amazonaws.com/AWSServiceRoleForAmazonGuardDutyMalwareProtection" } }, "Tags": {}, "Features": [ { "Name": "CLOUD_TRAIL", "Status": "ENABLED", "UpdatedAt": "2023-04-14T11:08:44-05:00" }, { "Name": "DNS_LOGS", "Status": "ENABLED", "UpdatedAt": "2023-04-14T11:08:44-05:00" }, { "Name": "FLOW_LOGS", "Status": "ENABLED", "UpdatedAt": "2023-04-14T11:08:44-05:00" }, { "Name": "S3_DATA_EVENTS", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00" }, { "Name": "EKS_AUDIT_LOGS", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00" }, { "Name": "EBS_MALWARE_PROTECTION", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00" }, { "Name": "RDS_LOGIN_EVENTS", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00" }, { "Name": "EKS_RUNTIME_MONITORING", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00", "AdditionalConfiguration": [ { "Name": "EKS_ADDON_MANAGEMENT", "Status": "ENABLED", "UpdatedAt": "2023-04-14T10:55:27-05:00" } ] } ] } ``` ### Verifying that the GuardDuty findings are generated To list all findings in the region, run the following command (please replace `` with the ID of the detector): ```bash aws guardduty list-findings --detector-id --region us-east-1 ``` The output should look like this: ```json { "FindingIds": [ "f2c3859c6ca25b3057d13470a992bbd7" ] } ``` To check the finding's details, run the following command (please replace `` and `` with the ID of the detector and the ID of the finding): ```bash aws guardduty get-findings --detector-id --finding-ids --region us-east-1 ``` The list of findings contains `PrivilegeEscalation:Kubernetes/PrivilegedContainer` as expected: ```json { "Findings": [ { "AccountId": "123456789012", "Arn": "arn:aws:guardduty:us-east-1:123456789012:detector/94c3858788bc1444ceedab472bab5d7e/finding/f2c3859c6ca25b3057d13470a992bbd7", "CreatedAt": "2023-03-22T21:28:07.748Z", "Description": "A privileged container with root level access was launched on EKS Cluster guardduty-blueprint. If this behavior is not expected, it may indicate that your credentials are compromised.", "Id": "f2c3859c6ca25b3057d13470a992bbd7", "Partition": "aws", "Region": "us-east-1", "Resource": { "EksClusterDetails": { "Name": "guardduty-blueprint", "Arn": "arn:aws:eks:us-east-1:123456789012:cluster/guardduty-blueprint", "VpcId": "vpc-02b68c9ddc1d403ab", "Status": "ACTIVE", "Tags": [], "CreatedAt": "2023-03-22T15:48:25.752000-05:00" }, "KubernetesDetails": { "KubernetesUserDetails": { "Username": "system:serviceaccount:argocd:argocd-application-controller", "Uid": "1871d525-442e-487f-ae60-81336d1ff0cf", "Groups": [ "system:serviceaccounts", "system:serviceaccounts:argocd", "system:authenticated" ] }, "KubernetesWorkloadDetails": { "Name": "privileged-pod", "Type": "pods", "Uid": "33a3c89e-3280-474d-b8cb-fdf03394fc15", "Namespace": "argocd", "HostNetwork": false, "Containers": [ { "Name": "app", "Image": "centos", "ImagePrefix": "", "SecurityContext": { "Privileged": true } } ] } }, "ResourceType": "EKSCluster" }, "SchemaVersion": "2.0", "Service": { "Action": { "ActionType": "KUBERNETES_API_CALL", "KubernetesApiCallAction": { "RequestUri": "/api/v1/namespaces/argocd/pods", "Verb": "create", "UserAgent": "argocd-application-controller/v0.0.0 (linux/amd64) kubernetes/$Format", "RemoteIpDetails": { "City": { "CityName": "UNKNOWN" }, "Country": {}, "GeoLocation": { "Lat": 0.0, "Lon": 0.0 }, "IpAddressV4": "10.0.205.129", "Organization": { "Asn": "0", "AsnOrg": "UNKNOWN", "Isp": "UNKNOWN", "Org": "UNKNOWN" } }, "StatusCode": 201 } }, "Archived": false, "Count": 1, "DetectorId": "94c3858788bc1444ceedab472bab5d7e", "EventFirstSeen": "2023-03-22T21:27:18.186Z", "EventLastSeen": "2023-03-22T21:27:18.630Z", "ResourceRole": "TARGET", "ServiceName": "guardduty", "AdditionalInfo": { "Value": "{}", "Type": "default" } }, "Severity": 5, "Title": "Privileged container with root level access launched on the EKS Cluster.", "Type": "PrivilegeEscalation:Kubernetes/PrivilegedContainer", "UpdatedAt": "2023-03-22T21:28:07.748Z" } ] } ``` ### Verifying that the GuardDuty Runtime Monitoring agents are automatically deployed To verify that the GuardDuty Runtime Monitoring agents are automatically deployed, run the following command: ```bash kubectl get pods -A ``` The output should look like this: ```bash NAMESPACE NAME READY STATUS RESTARTS AGE amazon-guardduty aws-guardduty-agent-qrm22 1/1 Running 0 25m argocd blueprints-addon-argocd-application-controller-0 1/1 Running 0 3m25s argocd blueprints-addon-argocd-applicationset-controller-7c4c75877579s 1/1 Running 0 3m25s argocd blueprints-addon-argocd-dex-server-c6687d84f-q4697 1/1 Running 1 (3m21s ago) 3m25s argocd blueprints-addon-argocd-notifications-controller-7c74f76c5wh4nb 1/1 Running 0 3m25s argocd blueprints-addon-argocd-redis-595cc69fff-9985j 1/1 Running 0 3m25s argocd blueprints-addon-argocd-repo-server-7f75c7796c-229c4 1/1 Running 0 3m25s argocd blueprints-addon-argocd-server-86867c9dd8-p6qk7 1/1 Running 0 3m25s argocd privileged-pod 1/1 Running 0 115s kube-system aws-node-4lhp7 1/1 Running 0 26m kube-system coredns-79989457d9-jncrb 1/1 Running 0 32m kube-system coredns-79989457d9-l5jcg 1/1 Running 0 32m kube-system kube-proxy-hwkwm 1/1 Running 0 26m ``` As you can see, the GuardDuty Runtime Monitoring agent is deployed in the `amazon-guardduty` namespace. ================================================ FILE: docs/patterns/security/image-scanning.md ================================================ # Amazon ECR Image Scanning ## Objective The objective of this pattern is to demonstrate how to enable and configure Amazon ECR image scanning. The following scanning types are offered: - **Enhanced scanning** — Amazon ECR integrates with Amazon Inspector to provide automated, continuous scanning of your repositories. Your container images are scanned for both operating systems and programing language package vulnerabilities. As new vulnerabilities appear, the scan results are updated and Amazon Inspector emits an event to EventBridge to notify you. - **Basic scanning** — Amazon ECR uses the Common Vulnerabilities and Exposures (CVEs) database from the open-source Clair project. With basic scanning, you configure your repositories to scan on push or you can perform manual scans and Amazon ECR provides a list of scan findings. The pattern consists of two components: - `ImageScanningSetupStack` - configures the Amazon ECR image scanning and the ECR automated re-scan duration in Inspector. - A blueprint that deploys a sample GitOps workload that pushes images to Amazon ECR and triggers the image scanning. ## Configuration You can configure the following parameters in the [ImageScanningSetupStack](../../../lib/security/image-vulnerability-scanning/image-scanning-setup.ts) stack: - `scanType` - The type of scan to perform. Valid values are `BASIC` and `ENHANCED`. - Enhanced scanning only: - `enhancedContinuousScanDuration` - the Amazon ECR automated re-scan duration setting determines how long Amazon Inspector continuously monitors images pushed into repositories. When the number of days from when an image is first pushed exceeds the automated re-scan duration configuration, Amazon Inspector stops monitoring the image. When Amazon Inspector stops monitoring an image, the scan status of the image is changed to inactive with a reason code of expired, and all associated findings for the image are scheduled to be closed. Valid values are `LIFETIME`, `DAYS_30`, and `DAYS_180`. - `enhancedScanRules` - the scanning rules. - Basic scanning only: - `basicScanRules` - the scanning rules. Please refer to the [Amazon ECR Image Scanning](https://docs.aws.amazon.com/AmazonECR/latest/userguide/image-scanning.html) documentation for more information and how to use filters. ## GitOps confguration For GitOps, the blueprint bootstraps the ArgoCD addon and points to the [EKS Blueprints Workload](https://github.com/aws-samples/eks-blueprints-workloads) sample repository. The sample repository contains the following workloads: - `team-scan` pushes a Docker image to Amazon ECR and triggers the image scanning. ## Prerequisites 1. Follow the usage [instructions](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/README.md#usage) to install the dependencies and perform the repository setup. 2. `argo-admin-password` secret must be defined in Secrets Manager in the same region as the EKS cluster. ## Deploy To bootstrap the CDK toolkit and list all stacks in the app, run the following commands: ```bash cdk bootstrap make list ``` ### Deploying the `ImageScanningSetupStack` stack The `ImageScanningSetupStack` configures the Amazon ECR image scanning and the ECR automated re-scan duration in Inspector. To deploy the stack, run the following command: ```bash make pattern ecr-image-scanning deploy image-scanning-setup ``` ### Deploying the blueprint The blueprint deploys a sample GitOps workload that pushes images to Amazon ECR and triggers the image scanning. To deploy the blueprint, run the following command: ```bash make pattern ecr-image-scanning deploy image-scanning-workload-blueprint ``` ## Verify ### Verifying that the image scanning is enabled To verify that the image scanning is enabled at the registry level, run the following command: ```bash aws ecr get-registry-scanning-configuration ``` The output should look similar to the following: ```json { "registryId": "123456789012", "scanningConfiguration": { "scanType": "ENHANCED", "rules": [ { "scanFrequency": "CONTINUOUS_SCAN", "repositoryFilters": [ { "filter": "prod", "filterType": "WILDCARD" } ] }, { "scanFrequency": "SCAN_ON_PUSH", "repositoryFilters": [ { "filter": "*", "filterType": "WILDCARD" } ] } ] } } ``` ### Verifying that the image is pushed to Amazon ECR To verify that the image is pushed to Amazon ECR, run the following command (please replace `` with the repository name): ```bash aws ecr describe-images --repository-name ``` The output should look similar to the following: ```json { "imageDetails": [ { "registryId": "123456789012", "repositoryName": "image-scanning-workload-blueprint-imagescanningrepository754c6116-arh0wk3afnkw", "imageDigest": "sha256:a1801b843b1bfaf77c501e7a6d3f709401a1e0c83863037fa3aab063a7fdb9dc", "imageTags": [ "latest" ], "imageSizeInBytes": 83520228, "imagePushedAt": "2023-04-17T17:22:33-05:00", "imageManifestMediaType": "application/vnd.docker.distribution.manifest.v2+json", "artifactMediaType": "application/vnd.docker.container.image.v1+json", "lastRecordedPullTime": "2023-04-17T17:22:33.966000-05:00" } ] } ``` ### Checking the image scanning findings To check the image scanning findings, run the following command (please replace `` with the repository name): ```bash aws ecr describe-image-scan-findings --repository-name --image-id imageTag=latest ``` The output should look similar to the following: ```json { "imageScanFindings": { "enhancedFindings": [ { "awsAccountId": "123456789012", "description": "basic/unit-name.c in systemd prior to 246.15, 247.8, 248.5, and 249.1 has a Memory Allocation with an Excessive Size Value (involving strdupa and alloca for a pathname controlled by a local attacker) that results in an operating system crash.", "findingArn": "arn:aws:inspector2:us-east-1:123456789012:finding/0407d7719da0fc8a8f44991f0bf524d6", "firstObservedAt": "2023-04-17T17:40:39.940000-05:00", "lastObservedAt": "2023-04-17T17:40:39.940000-05:00", "packageVulnerabilityDetails": { "cvss": [ { "baseScore": 4.9, "scoringVector": "AV:L/AC:L/Au:N/C:N/I:N/A:C", "source": "NVD", "version": "2.0" }, { "baseScore": 5.5, "scoringVector": "CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H", "source": "NVD", "version": "3.1" } ], "referenceUrls": [ "https://www.debian.org/security/2021/dsa-4942", "https://lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/42TMJVNYRY65B4QCJICBYOEIVZV3KUYI/", "https://lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/2LSDMHAKI4LGFOCSPXNVVSEWQFAVFWR7/", "https://security.gentoo.org/glsa/202107-48", "https://cert-portal.siemens.com/productcert/pdf/ssa-222547.pdf" ], "relatedVulnerabilities": [], "source": "NVD", "sourceUrl": "https://nvd.nist.gov/vuln/detail/CVE-2021-33910", "vendorCreatedAt": "2021-07-20T14:15:00-05:00", "vendorSeverity": "MEDIUM", "vendorUpdatedAt": "2022-06-14T06:15:00-05:00", "vulnerabilityId": "CVE-2021-33910", "vulnerablePackages": [ { "arch": "X86_64", "epoch": 0, "name": "systemd-pam", "packageManager": "OS", "release": "45.el8", "sourceLayerHash": "sha256:a1d0c75327776413fa0db9ed3adcdbadedc95a662eb1d360dad82bb913f8a1d1", "version": "239" }, { "arch": "X86_64", "epoch": 0, "name": "systemd", "packageManager": "OS", "release": "45.el8", "sourceLayerHash": "sha256:a1d0c75327776413fa0db9ed3adcdbadedc95a662eb1d360dad82bb913f8a1d1", "version": "239" }, { "arch": "X86_64", "epoch": 0, "name": "systemd-libs", "packageManager": "OS", "release": "45.el8", "sourceLayerHash": "sha256:a1d0c75327776413fa0db9ed3adcdbadedc95a662eb1d360dad82bb913f8a1d1", "version": "239" }, { "arch": "X86_64", "epoch": 0, "name": "systemd-udev", "packageManager": "OS", "release": "45.el8", "sourceLayerHash": "sha256:a1d0c75327776413fa0db9ed3adcdbadedc95a662eb1d360dad82bb913f8a1d1", "version": "239" } ] }, "remediation": { "recommendation": { "text": "None Provided" } }, "resources": [ { "details": { "awsEcrContainerImage": { "architecture": "amd64", "imageHash": "sha256:a1801b843b1bfaf77c501e7a6d3f709401a1e0c83863037fa3aab063a7fdb9dc", "imageTags": [ "latest" ], "platform": "CENTOS_8", "pushedAt": "2023-04-17T17:22:33-05:00", "registry": "123456789012", "repositoryName": "image-scanning-workload-blueprint-imagescanningrepository754c6116-arh0wk3afnkw" } }, "id": "arn:aws:ecr:us-east-1:123456789012:repository/image-scanning-workload-blueprint-imagescanningrepository754c6116-arh0wk3afnkw/sha256:a1801b843b1bfaf77c501e7a6d3f709401a1e0c83863037fa3aab063a7fdb9dc", "tags": {}, "type": "AWS_ECR_CONTAINER_IMAGE" } ], "score": 5.5, "scoreDetails": { "cvss": { "adjustments": [], "score": 5.5, "scoreSource": "NVD", "scoringVector": "CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H", "version": "3.1" } }, "severity": "MEDIUM", "status": "ACTIVE", "title": "CVE-2021-33910 - systemd-pam, systemd and 2 more", "type": "PACKAGE_VULNERABILITY", "updatedAt": "2023-04-17T17:40:39.940000-05:00" }, } ... ``` You can also check the findings in Inspector2. ```bash aws inspector2 list-findings ``` The output should look similar to the following: ```json { "findings": [ { "awsAccountId": "123456789012", "description": "When curl is instructed to get content using the metalink feature, and a user name and password are used to download the metalink XML file, those same credentials are then subsequently passed on to each of the servers from which curl will download or try to download the contents from. Often contrary to the user's expectations and intentions and without telling the user it happened.", "exploitAvailable": "NO", "findingArn": "arn:aws:inspector2:us-east-1:123456789012:finding/006e8eac196bf27417099413ce74eb1a", "firstObservedAt": "2023-04-14T21:03:02.932000-05:00", "fixAvailable": "YES", "inspectorScore": 5.3, "inspectorScoreDetails": { "adjustedCvss": { "adjustments": [], "cvssSource": "NVD", "score": 5.3, "scoreSource": "NVD", "scoringVector": "CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:H/I:N/A:N", "version": "3.1" } }, "lastObservedAt": "2023-04-17T13:34:41.687000-05:00", "packageVulnerabilityDetails": { "cvss": [ { "baseScore": 2.6, "scoringVector": "AV:N/AC:H/Au:N/C:P/I:N/A:N", "source": "NVD", "version": "2.0" }, { "baseScore": 5.3, "scoringVector": "CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:H/I:N/A:N", "source": "NVD", "version": "3.1" } ], "referenceUrls": [ "https://hackerone.com/reports/1213181", "https://security.gentoo.org/glsa/202212-01", "https://cert-portal.siemens.com/productcert/pdf/ssa-389290.pdf", "https://lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/FRUCW2UVNYUDZF72DQLFQR4PJEC6CF7V/", "https://www.oracle.com/security-alerts/cpuoct2021.html" ], "relatedVulnerabilities": [], "source": "NVD", "sourceUrl": "https://nvd.nist.gov/vuln/detail/CVE-2021-22923", "vendorCreatedAt": "2021-08-05T16:15:00-05:00", "vendorSeverity": "MEDIUM", "vendorUpdatedAt": "2023-01-05T12:17:00-06:00", "vulnerabilityId": "CVE-2021-22923", "vulnerablePackages": [ { "arch": "AARCH64", "epoch": 0, "fixedInVersion": "0:7.61.1-18.el8_4.1", "name": "curl", "packageManager": "OS", "release": "18.el8", "remediation": "dnf update curl", "sourceLayerHash": "sha256:52f9ef134af7dd14738733e567402af86136287d9468978d044780a6435a1193", "version": "7.61.1" }, { "arch": "AARCH64", "epoch": 0, "fixedInVersion": "0:7.61.1-18.el8_4.1", "name": "libcurl-minimal", "packageManager": "OS", "release": "18.el8", "remediation": "dnf update libcurl-minimal", "sourceLayerHash": "sha256:52f9ef134af7dd14738733e567402af86136287d9468978d044780a6435a1193", "version": "7.61.1" } ] }, "remediation": { "recommendation": { "text": "None Provided" } }, "resources": [ { "details": { "awsEcrContainerImage": { "architecture": "arm64", "imageHash": "sha256:65a4aad1156d8a0679537cb78519a17eb7142e05a968b26a5361153006224fdc", "imageTags": [ "latest" ], "platform": "CENTOS_8", "pushedAt": "2023-04-17T13:34:34-05:00", "registry": "123456789012", "repositoryName": "cdk-hnb659fds-container-assets-123456789012-us-east-1" } }, "id": "arn:aws:ecr:us-east-1:123456789012:repository/cdk-hnb659fds-container-assets-123456789012-us-east-1/sha256:65a4aad1156d8a0679537cb78519a17eb7142e05a968b26a5361153006224fdc", "partition": "aws", "region": "us-east-1", "tags": {}, "type": "AWS_ECR_CONTAINER_IMAGE" } ], "severity": "MEDIUM", "status": "CLOSED", "title": "CVE-2021-22923 - curl, libcurl-minimal", "type": "PACKAGE_VULNERABILITY", "updatedAt": "2023-04-17T13:36:44.258000-05:00" }, ... ``` ================================================ FILE: docs/patterns/security/securityhub.md ================================================ # AWS Security Hub Monitoring ## Objective The objective of this pattern is to demonstrate how to enable AWS Security Hub and default security standards in your AWS account, verify that it is enabled, and get findings from AWS Security Hub. The pattern will enable AWS Security Hub in the `CDK_DEFAULT_ACCOUNT` and `CDK_DEFAULT_REGION`. ## Prerequisites 1. Follow the usage [instructions](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/README.md#usage) to install the dependencies and perform the repository setup. 2. `argo-admin-password` secret must be defined in Secrets Manager in the same region as the EKS cluster. 3. Complete the steps to [enable AWS Config and deploy the Security Best Practices for Amazon EKS AWS Config managed rules](eks-config-rules.md). **Optional (but recommended):** If you have not done so already, follow the steps to deploy the [Amazon GuardDuty stack and blueprint](guardduty.md). Since Amazon GuardDuty automatically sends its findings to AWS Security Hub, the sample EKS finding will appear in AWS Security Hub about five minutes after it has been enabled in the same region. ## Deploy To bootstrap the CDK toolkit and list all stacks in the app, run the following commands: ```bash cdk bootstrap make list ``` ### Deploy AWS Security Hub To enable AWS Security Hub in the account and region deploy the stack, run the following command. ```bash make pattern securityhub deploy securityhub-setup ``` Once deployed, AWS Security Hub will automatically enable all controls that are part of the default security standards. Currently, the default security standards that are automatically enabled are [AWS Foundational Security Best Practices](https://docs.aws.amazon.com/securityhub/latest/userguide/fsbp-standard.html) and the [Center for Internet Security (CIS) AWS Foundations Benchmark v1.2.0](https://docs.aws.amazon.com/securityhub/latest/userguide/cis-aws-foundations-benchmark.html). ## Verify ### Verify that AWS Security Hub is enabled Now you can check that AWS Security Hub is successfully enabled by using the AWS CLI to query the same account and region. Using the AWS CLI run following command in the same account and region where you deployed the stack. ```bash aws securityhub describe-hub ``` If you successfully enabled AWS Security Hub, you will see the following. ```json { "HubArn": "arn:aws:securityhub:us-east-1:XXXXXXXXXXXX:hub/default", "SubscribedAt": "2021-08-18T00:52:40.624Z", "AutoEnableControls": true, "ControlFindingGenerator": "SECURITY_CONTROL" } ``` ### View findings in AWS Security Hub Use the following AWS CLI commands to view your findings in AWS Security Hub. To list critical findings and findings related to controls that have a failed status according to the enabled [AWS Security Hub security standards](https://docs.aws.amazon.com/securityhub/latest/userguide/securityhub-standards.html) in the same account and region, run the following command: ```bash aws securityhub get-findings --filter 'SeverityLabel={Value=CRITICAL,Comparison=EQUALS},ComplianceStatus={Value=FAILED,Comparison=EQUALS}' ``` Below is an example of an IAM finding that relates to a [failed IAM control](https://docs.aws.amazon.com/securityhub/latest/userguide/iam-controls.html#iam-6) that AWS Security Hub found related to the enabled [security standards](https://docs.aws.amazon.com/securityhub/latest/userguide/standards-reference.html), and will likely be present in your list of findings if you or your organization are not using a hardware MFA device for your AWS root account: ```json { "Findings": [ { "SchemaVersion": "2018-10-08", "Id": "arn:aws:securityhub:us-east-1:XXXXXXXXXXX:security-control/IAM.6/finding/494ffa38-0b6e-46d1-98f4-e605ec09d045", "ProductArn": "arn:aws:securityhub:us-east-1::product/aws/securityhub", "ProductName": "Security Hub", "CompanyName": "AWS", "Region": "us-east-1", "GeneratorId": "security-control/IAM.6", "AwsAccountId": "XXXXXXXXXXX", "Types": [ "Software and Configuration Checks/Industry and Regulatory Standards" ], "FirstObservedAt": "2023-03-04T00:54:44.307Z", "LastObservedAt": "2023-05-31T01:20:18.210Z", "CreatedAt": "2023-03-04T00:54:44.307Z", "UpdatedAt": "2023-05-31T01:20:05.845Z", "Severity": { "Label": "CRITICAL", "Normalized": 90, "Original": "CRITICAL" }, "Title": "Hardware MFA should be enabled for the root user", "Description": "This AWS control checks whether your AWS account is enabled to use a hardware multi-factor authentication (MFA) device to sign in with root user credentials.", "Remediation": { "Recommendation": { "Text": "For information on how to correct this issue, consult the AWS Security Hub controls documentation.", "Url": "https://docs.aws.amazon.com/console/securityhub/IAM.6/remediation" } }, "ProductFields": { "RelatedAWSResources:0/name": "securityhub-root-account-hardware-mfa-enabled-24e3b344", "RelatedAWSResources:0/type": "AWS::Config::ConfigRule", "aws/securityhub/ProductName": "Security Hub", "aws/securityhub/CompanyName": "AWS", "Resources:0/Id": "arn:aws:iam::XXXXXXXXXXX:root", "aws/securityhub/FindingId": "arn:aws:securityhub:us-east-1::product/aws/securityhub/arn:aws:securityhub:us-east-1:XXXXXXXXXXX:security-control/IAM.6/finding/494ffa38-0b6e-46d1-98f4-e605ec09d045" }, "Resources": [ { "Type": "AwsAccount", "Id": "AWS::::Account:XXXXXXXXXXX", "Partition": "aws", "Region": "us-east-1" } ], "Compliance": { "Status": "FAILED", "RelatedRequirements": [ "CIS AWS Foundations Benchmark v1.2.0/1.14", "CIS AWS Foundations Benchmark v1.4.0/1.6", "NIST.800-53.r5 AC-2(1)", "NIST.800-53.r5 AC-3(15)", "NIST.800-53.r5 IA-2(1)", "NIST.800-53.r5 IA-2(2)", "NIST.800-53.r5 IA-2(6)", "NIST.800-53.r5 IA-2(8)", "PCI DSS v3.2.1/8.3.1" ], "SecurityControlId": "IAM.6", "AssociatedStandards": [ { "StandardsId": "ruleset/cis-aws-foundations-benchmark/v/1.2.0" }, { "StandardsId": "standards/aws-foundational-security-best-practices/v/1.0.0" }, { "StandardsId": "standards/cis-aws-foundations-benchmark/v/1.4.0" }, { "StandardsId": "standards/nist-800-53/v/5.0.0" }, { "StandardsId": "standards/pci-dss/v/3.2.1" } ] }, "WorkflowState": "NEW", "Workflow": { "Status": "NEW" }, "RecordState": "ACTIVE", "FindingProviderFields": { "Severity": { "Label": "CRITICAL", "Original": "CRITICAL" }, "Types": [ "Software and Configuration Checks/Industry and Regulatory Standards" ] } } ] } ``` To search for findings related to the Security Best Practices for Amazon EKS Config managed rules, run the following AWS CLI command: ```bash aws securityhub get-findings --filters 'GeneratorId={Value="security-control/EKS.1", Comparison="EQUALS"}' ``` You might see a finding such as the following: ```json { "Findings": [ { "SchemaVersion": "2018-10-08", "Id": "arn:aws:securityhub:us-east-1:XXXXXXXXXXX:security-control/EKS.1/finding/931a06d9-1b1d-431b-8b91-1ff86829b400", "ProductArn": "arn:aws:securityhub:us-east-1::product/aws/securityhub", "ProductName": "Security Hub", "CompanyName": "AWS", "Region": "us-east-1", "GeneratorId": "security-control/EKS.1", "AwsAccountId": "XXXXXXXXXXX", "Types": [ "Software and Configuration Checks/Industry and Regulatory Standards" ], "FirstObservedAt": "2023-05-09T10:34:36.736Z", "LastObservedAt": "2023-05-30T10:27:41.205Z", "CreatedAt": "2023-05-09T10:34:36.736Z", "UpdatedAt": "2023-05-30T10:27:34.574Z", "Severity": { "Label": "HIGH", "Normalized": 70, "Original": "HIGH" }, "Title": "EKS cluster endpoints should not be publicly accessible", "Description": "This control checks whether an Amazon EKS cluster endpoint is publicly accessible. The control fails if an EKS cluster has an endpoint that is publicly accessible.", "Remediation": { "Recommendation": { "Text": "For information on how to correct this issue, consult the AWS Security Hub controls documentation.", "Url": "https://docs.aws.amazon.com/console/securityhub/EKS.1/remediation" } }, "ProductFields": { "RelatedAWSResources:0/name": "securityhub-eks-endpoint-no-public-access-f5aecad6", "RelatedAWSResources:0/type": "AWS::Config::ConfigRule", "aws/securityhub/ProductName": "Security Hub", "aws/securityhub/CompanyName": "AWS", "aws/securityhub/annotation": "Cluster Endpoint of starter-blueprint is Publicly accessible", "Resources:0/Id": "arn:aws:eks:us-east-1:XXXXXXXXXXX:cluster/starter-blueprint", "aws/securityhub/FindingId": "arn:aws:securityhub:us-east-1::product/aws/securityhub/arn:aws:securityhub:us-east-1:XXXXXXXXXXX:security-control/EKS.1/finding/931a06d9-1b1d-431b-8b91-1ff86829b400" }, "Resources": [ { "Type": "AwsEksCluster", "Id": "arn:aws:eks:us-east-1:XXXXXXXXXXX:cluster/starter-blueprint", "Partition": "aws", "Region": "us-east-1" } ], "Compliance": { "Status": "FAILED", "RelatedRequirements": [ "NIST.800-53.r5 AC-21", "NIST.800-53.r5 AC-3", "NIST.800-53.r5 AC-3(7)", "NIST.800-53.r5 AC-4", "NIST.800-53.r5 AC-4(21)", "NIST.800-53.r5 AC-6", "NIST.800-53.r5 SC-7", "NIST.800-53.r5 SC-7(11)", "NIST.800-53.r5 SC-7(16)", "NIST.800-53.r5 SC-7(20)", "NIST.800-53.r5 SC-7(21)", "NIST.800-53.r5 SC-7(3)", "NIST.800-53.r5 SC-7(4)", "NIST.800-53.r5 SC-7(9)" ], "SecurityControlId": "EKS.1", "AssociatedStandards": [ { "StandardsId": "standards/aws-foundational-security-best-practices/v/1.0.0" }, { "StandardsId": "standards/nist-800-53/v/5.0.0" } ] }, "WorkflowState": "NEW", "Workflow": { "Status": "NEW" }, "RecordState": "ACTIVE", "FindingProviderFields": { "Severity": { "Label": "HIGH", "Original": "HIGH" }, "Types": [ "Software and Configuration Checks/Industry and Regulatory Standards" ] } } ] } ``` To see any findings generated by Amazon GuardDuty in AWS Security Hub, run the following command: ```bash aws securityhub get-findings --filters 'ProductName={Value="GuardDuty",Comparison="EQUALS"}' ``` ```json { "Findings": [ { "SchemaVersion": "2018-10-08", "Id": "arn:aws:guardduty:us-east-1:XXXXXXXXXXX:detector/68b6db88cfef1e59333ecbccd8e816b5/finding/0ec437473c147f649d1437f94d615224", "ProductArn": "arn:aws:securityhub:us-east-1::product/aws/guardduty", "ProductName": "GuardDuty", "CompanyName": "Amazon", "Region": "us-east-1", "GeneratorId": "arn:aws:guardduty:us-east-1:XXXXXXXXXXX:detector/68b6db88cfef1e59333ecbccd8e816b5", "AwsAccountId": "XXXXXXXXXXX", "Types": [ "TTPs/PrivilegeEscalation/PrivilegeEscalation:Kubernetes-PrivilegedContainer" ], ... "Severity": { "Product": 5, "Label": "MEDIUM", "Normalized": 50 }, "Title": "Privileged container with root level access launched on the EKS Cluster.", "Description": "A privileged container with root level access was launched on EKS Cluster guardduty-blueprint. If this behavior is not expected, it may indicate that your credentials are compromised.", "SourceUrl": "https://us-east-1.console.aws.amazon.com/guardduty/home?region=us-east-1#/findings?macros=current&fId=0ec437473c147f649d1437f94d615224", "ProductFields": { ... }, "Resources": [ { ... } ], "WorkflowState": "NEW", "Workflow": { "Status": "NEW" }, "RecordState": "ACTIVE", "FindingProviderFields": { "Severity": { "Label": "MEDIUM" }, "Types": [ "TTPs/PrivilegeEscalation/PrivilegeEscalation:Kubernetes-PrivilegedContainer" ] }, "Sample": false } ] } ``` If you deployed the [Amazon GuardDuty Protection EKS Blueprints pattern](https://github.com/aws-samples/cdk-eks-blueprints-patterns/blob/main/docs/patterns/security/guardduty.md) to the same account and region where you enabled AWS Security Hub, you should see Amazon GuardDuty findings like the one above. The sample workload deployed with the [Amazon GuardDuty pattern](guardduty.md) which contains a privileged container is detected by Amazon GuardDuty and generates the `Kubernetes-PrivilegedContainer` finding. Amazon GuardDuty automatically sent this finding to AWS Security Hub where it can be viewed and triaged. ================================================ FILE: docs/patterns/union.md ================================================ # Union.ai on EKS Pattern Union.ai empowers AI development teams to rapidly ship high-quality code to production by offering optimized performance, resource efficiency, and workflow authoring experience. With Union.ai your team can: - Run complex AI workloads with performance, scale, and efficiency. - Scale out to multiple regions, clusters, and clouds as needed for resource availability, scale, or compliance. Union.ai’s modular architecture allows for great flexibility and control. The customer can decide how many clusters to have, their shape, and who has access to what. All communication is encrypted.

Union Self-managed Architecture

## Prerequisites Ensure that you have installed the following tools on your machine: - [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) (also ensure it is [configured](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html#getting-started-quickstart-new)) - [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) - [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) - [tsc](https://www.typescriptlang.org/download) - [make](https://www.gnu.org/software/make/) - uctl ### Installing `uctl` On Mac: ```bash brew tap unionai/homebrew-tap brew install uctl ``` With cURL: ```bash curl -sL https://raw.githubusercontent.com/unionai/uctl/main/install.sh | bash ``` ## Deployment ### Setup Union Credentials Both the control plane URL and cluster name will be provided by Union. Union will also provide authentication information for your account to access the hosted control plane. ```bash export UNION_CONTROL_PLANE_URL= export UNION_CLUSTER_NAME= export UNION_ORG_NAME= uctl config init --host=$UNION_CONTROL_PLANE_URL uctl selfserve provision-dataplane-resources --clusterName $UNION_CLUSTER_NAME --provider aws ``` This command will output the ID, name, and secret used by Union services to communicate with the control plane. ### Create Union Secrets in AWS Secrets Manager ```bash export UNION_SECRET_NAME=union-secret aws secretsmanager create-secret --name $UNION_SECRET_NAME \ --secret-string "{\"host\":\"$UNION_CONTROL_PLANE_URL\",\"clusterName\":\"$UNION_CLUSTER_NAME\",\"orgName\":\"$UNION_ORG_NAME\"}" export UNION_CLIENT_SECRET_NAME=union-client-secret export UNION_CLIENT_ID_SECRET_VALUE= export UNION_SECRET_SECRET_VALUE= aws secretsmanager create-secret --name $UNION_CLIENT_SECRET_NAME \ --secret-string "{\"clientId\":\"$UNION_CLIENT_ID_SECRET_VALUE\",\"clientSecret\":\"$UNION_SECRET_SECRET_VALUE\"}" ``` ### Clone the repository: ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` ### Run the following commands: ```sh make deps make build make pattern unionai deploy ``` ### Validation Run the command: ```bash kubectl get deploy -A ``` Output should be: ```bash NAMESPACE NAME READY UP-TO-DATE AVAILABLE AGE kube-system blueprints-addon-metrics-server 1/1 1 1 57d kube-system blueprints-addon-union-dataplane-kube-state-metrics 1/1 1 1 57d unionai executor 1/1 1 1 57d unionai flytepropeller 1/1 1 1 57d unionai flytepropeller-webhook 1/1 1 1 57d unionai opencost 1/1 1 1 57d unionai prometheus-operator 1/1 1 1 57d unionai syncresources 1/1 1 1 57d unionai union-operator 1/1 1 1 57d unionai union-operator-proxy 1/1 1 1 57d ``` To validate the cluster has been successfully registered to the Union control plane run the command: ```bash uctl get cluster ``` Output should be: ```bash ----------- ------- --------------- ----------- | NAME | ORG | STATE | HEALTH | ----------- ------- --------------- ----------- | | | STATE_ENABLED | HEALTHY | ----------- ------- --------------- ----------- 1 rows ``` ### 8. Register and run example workflows ```bash uctl register examples --project=union-health-monitoring --domain=development uctl validate snacks --project=union-health-monitoring --domain=development ---------------------- ----------------------------------- ---------- -------------------------------- -------------- ----------- --------------- | NAME | LAUNCH PLAN NAME | VERSION | STARTED AT | ELAPSED TIME | RESULT | ERROR MESSAGE | ---------------------- ----------------------------------- ---------- -------------------------------- -------------- ----------- --------------- | alskkhcd6wx5m6cqjlwm | basics.hello_world.hello_world_wf | v0.3.341 | 2025-05-09T18:30:02.968183352Z | 4.452440953s | SUCCEEDED | | ---------------------- ----------------------------------- ---------- -------------------------------- -------------- ----------- --------------- 1 rows ``` ================================================ FILE: docs/patterns/windows.md ================================================ # Windows Nodes on EKS We (AWS) have received many requests to add windows node group support from the customers who run their workloads on Windows. Customers want to scale these workloads on Kubernetes alongside their Linux workloads. Amazon EKS supports windows node groups and you can Windows worker node group to an Amazon EKS cluster. This pattern Creates EKS Cluster Control plane with a managed node group running windows node. Please check our AWS doc on [Enabling Windows support for your Amazon EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/windows-support.html) to learn more about considerations, prerequisites on running windows nodes with EKS cluster. Also please refer to this AWS doc to learn about [Amazon EKS optimized Windows AMIs](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-windows-ami.html). ### Addons Not all of the listed EKS addons support windows. We are currently working on a list of supported addons documentation which will be published [here](https://github.com/aws-quickstart/cdk-eks-blueprints/blob/main/docs/addons/index.md). ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. `make` ## Configuration Options The pattern exposes the `WindowsBuilder` construct to build cluster with windows node groups. At the moment, adding windows nodes to the cluster requires at least one linux node group present to deploy core add-ons, such as VPC-CNI and CoreDNS. The `WindowsBuilder` provides a set of options, most of which are similar to managed node groups. In addition, it provides an attribute `noScheduleForWindowsNodes : true | false`. When set to `true` it will automatically add a `NoSchedule` taint to the Windows nodes. This approach is a safe way to disallow any application that does not provide proper tolerations to be scheduled on Windows nodes. In this scenario, in order to schedule a workload (application/add-on) on Windows nodes, customers can apply the following node selectors and tolerations to their deployments: ```yaml nodeSelector: kubernetes.io/os: windows tolerations: - key: "os" operator: "Equal" value: "windows" effect: "NoSchedule" ``` ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` Updating npm ```sh npm install -g npm@latest ``` To view patterns and deploy kubeflow pattern ```sh make list npx cdk bootstrap make pattern windows deploy ``` ## Verify the resources Run the update-kubeconfig command. You should be able to get the command from the CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name windows-eks-blueprint --region --role-arn arn:aws:iam::xxxxxxxxx:role/windows-construct-bluepr-windowsconstructbluepri-1OZNO42GH3OCB ``` Let's verify the resources created from the steps above. ```sh kubectl get nodes -o json | jq -r '.items[] | "Name: ",.metadata.name,"\nInstance Type: ",.metadata.labels."beta.kubernetes.io/instance-type","\nOS Type: ",.metadata.labels."beta.kubernetes.io/os","\n"' # Output shows Windows and Linux Nodes ``` ## Deploy sample windows application Create a namespace for the windows app called windows ```sh kubectl create ns windows ``` Create a yaml file for the app from the configuration below and save it as windows-server-2022.yaml ```yaml --- apiVersion: apps/v1 kind: Deployment metadata: name: windows-server-iis-ltsc2022 namespace: windows spec: selector: matchLabels: app: windows-server-iis-ltsc2022 tier: backend track: stable replicas: 2 template: metadata: labels: app: windows-server-iis-ltsc2022 tier: backend track: stable spec: containers: - name: windows-server-iis-ltsc2022 image: mcr.microsoft.com/windows/servercore/iis:windowsservercore-ltsc2022 ports: - name: http containerPort: 80 imagePullPolicy: IfNotPresent command: - powershell.exe - -command - "Add-WindowsFeature Web-Server; Invoke-WebRequest -UseBasicParsing -Uri 'https://dotnetbinaries.blob.core.windows.net/servicemonitor/2.0.1.6/ServiceMonitor.exe' -OutFile 'C:\\ServiceMonitor.exe'; echo '

Our first pods running on Windows managed node groups! Powered by Windows Server LTSC 2022.

' > C:\\inetpub\\wwwroot\\iisstart.htm; C:\\ServiceMonitor.exe 'w3svc'; " nodeSelector: kubernetes.io/os: windows tolerations: - key: "os" operator: "Equal" value: "windows" effect: "NoSchedule" --- apiVersion: v1 kind: Service metadata: name: windows-server-iis-ltsc2022-service namespace: windows spec: ports: - port: 80 protocol: TCP targetPort: 80 selector: app: windows-server-iis-ltsc2022 tier: backend track: stable sessionAffinity: None type: LoadBalancer ``` Deploy the sample app ```sh kubectl apply -f windows-server-2022.yaml ``` Verify the resources created successfully ```sh kubectl get -n windows svc,deploy,pods ``` ### Reference Please reference our [blog](https://aws.amazon.com/blogs/containers/deploying-amazon-eks-windows-managed-node-groups/#:~:text=2.-,Deploy%20a%20sample%20application,-Now%20that%20ourhttps://aws.amazon.com/blogs/containers/deploying-amazon-eks-windows-managed-node-groups/#:~:text=2.-,Deploy%20a%20sample%20application,-Now%20that%20our) on Deploying Amazon EKS Windows managed node groups to learn more about this topic. ## Cleanup First delete the windows app ```sh kubectl delete -f windows-server-2022.yaml kubectl delete ns windows ``` To clean up your EKS Blueprint, run the following command: ```sh make pattern windows destroy ``` ================================================ FILE: docs/patterns/workloads-codecommit.md ================================================ # EKS Cluster with ArgoCD and Workloads in private AWS CodeCommit repository ## Objective This example shows how to provision an EKS cluster with: - ArgoCD - Workloads deployed by ArgoCD - Private AWS CodeCommit repository to store the configurations of workloads - Setup to trigger ArgoCD projects sync on git push to AWS CodeCommit repository Pattern source: /lib/workloads-codecommit-construct/index.ts ## Architecture ![Architectural diagram](./images/argocd-cc.png) To better understand how ArgoCD works with EKS Blueprints, read the EKS Blueprints ArgoCD [Documentation](https://aws-quickstart.github.io/cdk-eks-blueprints/addons/argo-cd/) - After a push to AWS CodeCommit repository notification trigger calls AWS Lambda - AWS Lambda calls ArgoCD webhook URL to trigger ArgoCD projects sync ## Prerequisites Ensure that you have installed the following tools on your machine. 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) 3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) 4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) 5. [jq](https://jqlang.github.io/jq/) 6. `make` ## Deploy EKS Cluster with Amazon EKS Blueprints for CDK 1. Clone the repository ```sh git clone https://github.com/aws-samples/cdk-eks-blueprints-patterns.git cd cdk-eks-blueprints-patterns ``` 2. Update npm ```sh npm install -g npm@latest ``` 3. View patterns and deploy workloads-codecommit pattern ```sh make list npx cdk bootstrap make pattern workloads-codecommit deploy ``` ## Verify the resources 1. Run the update-kubeconfig command. You should be able to get the command from the CDK output message. More information can be found at https://aws-quickstart.github.io/cdk-eks-blueprints/getting-started/#cluster-access ```sh aws eks update-kubeconfig --name workloads-codecommit-blueprint --region --role-arn arn:aws:iam::xxxxxxxxx:role/workloads-codecommit-blue-workloadscodecommitbluepr-VH6YOKWPAt5H ``` 2. Verify the resources created from the steps above. ```bash $ kubectl get po -n argocd NAME READY STATUS RESTARTS AGE blueprints-addon-argocd-application-controller-0 1/1 Running 0 1h blueprints-addon-argocd-applicationset-controller-7b78c7fc5dmkx 1/1 Running 0 1h blueprints-addon-argocd-dex-server-6cf94ddc54-p68pl 1/1 Running 0 1h blueprints-addon-argocd-notifications-controller-6f6b7d95ckhf6p 1/1 Running 0 1h blueprints-addon-argocd-redis-b8dbc7dc6-dvbkr 1/1 Running 0 1h blueprints-addon-argocd-repo-server-66df7f448f-kvwmw 1/1 Running 0 1h blueprints-addon-argocd-server-584db5f545-8xp48 1/1 Running 0 1h ``` ## Get ArgoCD Url and credentials ```bash until kubectl get svc blueprints-addon-argocd-server -n argocd -o json | jq --raw-output '.status.loadBalancer.ingress[0].hostname' | grep -m 1 "elb.amazonaws.com"; do sleep 5 ; done; export ARGOCD_SERVER=`kubectl get svc blueprints-addon-argocd-server -n argocd -o json | jq --raw-output '.status.loadBalancer.ingress[0].hostname'` export CC_REPO_NAME=eks-blueprints-workloads-cc echo "ArgoCD URL: https://$ARGOCD_SERVER" echo "ArgoCD server user: admin" echo "ArgoCD admin password: $(kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d)" ``` ## Create notification trigger from AWS CodeCommit push to ArgoCD Sync ```bash export LAMBDA_ARN=$(aws lambda get-function --function-name eks-blueprints-workloads-cc-webhook | jq -r .Configuration.FunctionArn) cat > trigger.json </test'], testMatch: ['**/*.test.ts'], transform: { '^.+\\.tsx?$': 'ts-jest' } }; ================================================ FILE: lib/amp-monitoring/index.ts ================================================ import { Construct } from 'constructs'; // Blueprints Lib import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as amp from 'aws-cdk-lib/aws-aps'; // Team implementations import * as team from '../teams/multi-account-monitoring'; /** * Demonstrates how to leverage more than one node group along with Fargate profiles. */ export default class AmpMonitoringConstruct { build(scope: Construct, id: string, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const stackID = `${id}-blueprint`; this.create(scope, accountID, awsRegion) .build(scope, stackID); } create(scope: Construct, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const ampWorkspaceName = "multi-account-monitoring"; const ampPrometheusEndpoint = (blueprints.getNamedResource(ampWorkspaceName) as unknown as amp.CfnWorkspace).attrPrometheusEndpoint; return blueprints.EksBlueprint.builder() .account(accountID) .region(awsRegion) .version('auto') .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) .addOns( new blueprints.AwsLoadBalancerControllerAddOn, new blueprints.CertManagerAddOn, new blueprints.KubeStateMetricsAddOn, new blueprints.PrometheusNodeExporterAddOn, new blueprints.AdotCollectorAddOn, new blueprints.addons.AmpAddOn({ ampPrometheusEndpoint: ampPrometheusEndpoint, }), new blueprints.XrayAdotAddOn, new blueprints.NginxAddOn, new blueprints.ClusterAutoScalerAddOn, new blueprints.SecretsStoreAddOn ) .teams(new team.TeamGeordi, new team.CorePlatformTeam); } } ================================================ FILE: lib/argo-config-managent/index.ts ================================================ ================================================ FILE: lib/aws-batch-on-eks-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { BatchEksTeam } from '@aws-quickstart/eks-blueprints'; import { PolicyStatement, Effect } from 'aws-cdk-lib/aws-iam'; export default class BatchOnEKSConstruct { build(scope: Construct, id: string, teams: BatchEksTeam[]) { const batchIamPolicy = new PolicyStatement({ effect: Effect.ALLOW, actions: [ "cloudwatch:PutMetricData", "ec2:DescribeVolumes", "ec2:DescribeTags", "logs:PutLogEvents", "logs:DescribeLogStreams", "logs:DescribeLogGroups", "logs:CreateLogStream", "logs:CreateLogGroup" ], resources: ["*"], }); const stackID = `${id}-blueprint`; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION!) .addOns( new blueprints.AwsBatchAddOn(), new blueprints.AwsForFluentBitAddOn({ iamPolicies:[batchIamPolicy], values: { cloudWatch: { enabled: true, region: process.env.CDK_DEFAULT_REGION!, logGroupName: '/aws/batch/batch-team-a-logs' }, tolerations: [{ "key": "batch.amazonaws.com/batch-node", "operator": "Exists" }] } }) ) .teams(...teams) .version('auto') .build(scope, stackID); } } ================================================ FILE: lib/backstage-construct/backstage-secret-addon.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from "@aws-quickstart/eks-blueprints/dist/utils"; import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; export interface BackstageSecretAddOnProps { /** * Backstage Namespace */ namespace: string, /** * The name of the Secret */ databaseSecretTargetName: string, /** * The name of the Secret from the Resource Provider */ databaseSecretResourceName: string } export class BackstageSecretAddOn implements blueprints.ClusterAddOn { readonly props: BackstageSecretAddOnProps; constructor(props: BackstageSecretAddOnProps) { this.props = props; } @dependable(blueprints.addons.ExternalsSecretsAddOn.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; const secretStoreName = "secret-manager-store"; const secretStore = new eks.KubernetesManifest(cluster.stack, "ClusterSecretStore", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ClusterSecretStore", metadata: { name: secretStoreName, namespace: this.props.namespace }, spec: { provider: { aws: { service: "SecretsManager", region: cluster.stack.region, auth: { jwt: { serviceAccountRef: { name: "external-secrets-sa", namespace: "external-secrets", }, }, }, }, }, }, }, ], }); const databaseCredentialsSecret: ISecret | undefined = clusterInfo.getResource(this.props.databaseSecretResourceName); if (databaseCredentialsSecret === undefined) { throw new Error("Database Secret not found in context"); } const databaseInstanceCredentialsSecretName = databaseCredentialsSecret.secretName; const externalSecret = new eks.KubernetesManifest(cluster.stack, "BackstageDatabaseExternalSecret", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ExternalSecret", metadata: { name: "external-backstage-db-secret", namespace: this.props.namespace }, spec: { secretStoreRef: { name: secretStoreName, kind: "ClusterSecretStore", }, target: { name: this.props.databaseSecretTargetName, }, data: [ { secretKey: "POSTGRES_PASSWORD", remoteRef: { key: databaseInstanceCredentialsSecretName, property: "password" } }, { secretKey: "POSTGRES_USER", remoteRef: { key: databaseInstanceCredentialsSecretName, property: "username" } }, ], }, }, ], }); externalSecret.node.addDependency(secretStore); return Promise.resolve(secretStore); } } ================================================ FILE: lib/backstage-construct/database-credentials.ts ================================================ import { Secret,ISecret } from 'aws-cdk-lib/aws-secretsmanager'; import { ResourceContext, ResourceProvider } from '@aws-quickstart/eks-blueprints'; export interface DatabaseInstanceCredentialsProviderProps { /** * The username for the database secret */ username: string, } export class DatabaseInstanceCredentialsProvider implements ResourceProvider { readonly props: DatabaseInstanceCredentialsProviderProps; constructor(props: DatabaseInstanceCredentialsProviderProps) { this.props = props; } provide(context: ResourceContext): ISecret { return new Secret(context.scope, "database-secret", { generateSecretString: { secretStringTemplate: JSON.stringify({ username: this.props.username, }), excludePunctuation: true, includeSpace: false, generateStringKey: "password" } }); } } ================================================ FILE: lib/backstage-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { BackstageSecretAddOn, BackstageSecretAddOnProps } from './backstage-secret-addon'; import { DatabaseInstanceCredentialsProvider, DatabaseInstanceCredentialsProviderProps } from './database-credentials'; import * as databaseInstanceProvider from './rds-database-instance'; export class BackstageConstruct extends Construct { constructor(scope: Construct, id: string) { super(scope, id); const props = { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION, namespace: blueprints.utils.valueFromContext(scope, "backstage.namespace.name", "backstage"), backstageImageRegistry: blueprints.utils.valueFromContext(scope, "backstage.image.registry.name", "youraccount.dkr.ecr.yourregion.amazonaws.com"), backstageImageRepository: blueprints.utils.valueFromContext(scope, "backstage.image.repository.name", "backstage"), backstageImageTag: blueprints.utils.valueFromContext(scope, "backstage.image.tag.name", "latest"), parentDomain: blueprints.utils.valueFromContext(scope, "backstage.parent.domain.name", "example.com"), backstageLabel: blueprints.utils.valueFromContext(scope, "backstage.subdomain.label", "backstage"), hostedZoneId: blueprints.utils.valueFromContext(scope, "backstage.hosted.zone.id", "1234"), certificateResourceName: blueprints.utils.valueFromContext(scope, "backstage.certificate.resource.name", "backstage-certificate"), databaseResourceName: blueprints.utils.valueFromContext(scope, "backstage.database.resource.name", "backstage-database"), databaseInstancePort: blueprints.utils.valueFromContext(scope, "backstage.database.instance.port", 5432), databaseSecretResourceName: blueprints.utils.valueFromContext(scope, "backstage.database.secret.resource.name", "backstage-database-credentials"), username: blueprints.utils.valueFromContext(scope, "backstage.database.username", "postgres"), databaseSecretTargetName: blueprints.utils.valueFromContext(scope, "backstage.database.secret.target.name", "backstage-database-secret"), }; const subdomain = props.backstageLabel+"."+props.parentDomain; const databaseInstanceCredentialsProviderProps = { username: props.username } as DatabaseInstanceCredentialsProviderProps; const databaseInstanceProps = { vpcResourceName: blueprints.GlobalResources.Vpc, databaseInstancePort: props.databaseInstancePort, databaseSecretResourceName: props.databaseSecretResourceName } as databaseInstanceProvider.DatabaseInstanceProviderProps; const backstageSecretAddOnProps = { namespace: props.namespace, databaseSecretResourceName: props.databaseSecretResourceName, databaseSecretTargetName: props.databaseSecretTargetName } as BackstageSecretAddOnProps; const backstageAddOnProps = { namespace: props.namespace, subdomain: subdomain, certificateResourceName: props.certificateResourceName, imageRegistry: props.backstageImageRegistry, imageRepository: props.backstageImageRepository, imageTag: props.backstageImageTag, databaseResourceName: props.databaseResourceName, databaseSecretTargetName: props.databaseSecretTargetName } as blueprints.BackstageAddOnProps; const addOns: Array = [ new blueprints.CalicoOperatorAddOn(), new blueprints.ClusterAutoScalerAddOn(), new blueprints.AwsLoadBalancerControllerAddOn(), new blueprints.VpcCniAddOn(), new blueprints.CoreDnsAddOn(), new blueprints.KubeProxyAddOn(), new blueprints.ExternalDnsAddOn({ hostedZoneResources: [blueprints.GlobalResources.HostedZone] }), new blueprints.addons.ExternalsSecretsAddOn({}), new blueprints.addons.SSMAgentAddOn(), new BackstageSecretAddOn(backstageSecretAddOnProps), new blueprints.BackstageAddOn(backstageAddOnProps) ]; blueprints.EksBlueprint.builder() .account(props.account) .region(props.region) .resourceProvider(blueprints.GlobalResources.Vpc, new blueprints.VpcProvider()) .resourceProvider(blueprints.GlobalResources.HostedZone, new blueprints.ImportHostedZoneProvider(props.hostedZoneId, props.parentDomain)) .resourceProvider(props.certificateResourceName, new blueprints.CreateCertificateProvider("elb-certificate", subdomain, blueprints.GlobalResources.HostedZone)) .resourceProvider(props.databaseSecretResourceName, new DatabaseInstanceCredentialsProvider(databaseInstanceCredentialsProviderProps)) .resourceProvider(props.databaseResourceName, new databaseInstanceProvider.DatabaseInstanceProvider(databaseInstanceProps)) .addOns(...addOns) .version('auto') .teams() .build(scope, props.backstageLabel+"-blueprint"); } } ================================================ FILE: lib/backstage-construct/rds-database-instance.ts ================================================ import * as rds from 'aws-cdk-lib/aws-rds'; import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; import { IVpc, Peer, SecurityGroup, SubnetType, Port } from 'aws-cdk-lib/aws-ec2'; import { ResourceContext, ResourceProvider } from '@aws-quickstart/eks-blueprints'; export interface DatabaseInstanceProviderProps { /** * Name of the VPC registered as a resource */ vpcResourceName: string, /** * Port to be used by the database */ databaseInstancePort: number, /** * The name of the Secret registered as a resource */ databaseSecretResourceName: string } export class DatabaseInstanceProvider implements ResourceProvider { readonly props: DatabaseInstanceProviderProps; constructor(props: DatabaseInstanceProviderProps) { this.props = props; } provide(context: ResourceContext): rds.IDatabaseInstance { const id = context.scope.node.id; const databaseCredentialsSecret = context.get(this.props.databaseSecretResourceName); if (databaseCredentialsSecret === undefined) { throw new Error("Database Secret not found in context"); } const vpc = context.get(this.props.vpcResourceName); if (vpc === undefined) { throw new Error("VPC not found in context"); } const dbSecurityGroup = new SecurityGroup(context.scope, id+"-security-group", { vpc: vpc }); dbSecurityGroup.addIngressRule(Peer.ipv4(vpc.vpcCidrBlock), Port.tcp(this.props.databaseInstancePort), "Connect from within VPC"); const rdsConfig: rds.DatabaseInstanceProps = { engine: rds.DatabaseInstanceEngine.POSTGRES, vpc, vpcSubnets: { subnetType: SubnetType.PRIVATE_WITH_EGRESS, }, securityGroups: [dbSecurityGroup], credentials: rds.Credentials.fromSecret(databaseCredentialsSecret), }; return new rds.DatabaseInstance(context.scope, id+"-database-instance", rdsConfig); } } ================================================ FILE: lib/bottlerocket-construct/index.ts ================================================ import * as eks from 'aws-cdk-lib/aws-eks'; import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as team from '../teams'; /** * Bottlerocket pattern shows how to specify the OS for the node group * and leverage container-optimized Bottlerocket OS: https://aws.amazon.com/bottlerocket/ */ export default class BottlerocketConstruct { build(scope: Construct, id: string) { const stackID = `${id}-blueprint`; const accountID = process.env.CDK_DEFAULT_ACCOUNT!; const platformTeam = new team.TeamPlatform(accountID); const clusterProvider = new blueprints.MngClusterProvider({ version: eks.KubernetesVersion.V1_25, amiType: eks.NodegroupAmiType.BOTTLEROCKET_X86_64 }); blueprints.EksBlueprint.builder() .account(accountID) .region('us-east-1') .clusterProvider(clusterProvider) .addOns( new blueprints.AwsLoadBalancerControllerAddOn, new blueprints.CertManagerAddOn, new blueprints.AdotCollectorAddOn, new blueprints.AppMeshAddOn, new blueprints.ClusterAutoScalerAddOn, new blueprints.NginxAddOn, new blueprints.ArgoCDAddOn, new blueprints.CalicoOperatorAddOn, new blueprints.MetricsServerAddOn, new blueprints.CloudWatchAdotAddOn, new blueprints.SecretsStoreAddOn ) .teams(platformTeam) .build(scope, stackID); } } ================================================ FILE: lib/cloudwatch-monitoring/index.ts ================================================ import { Construct } from 'constructs'; // Blueprints Lib import * as blueprints from '@aws-quickstart/eks-blueprints'; import { cloudWatchDeploymentMode } from '@aws-quickstart/eks-blueprints'; // Team implementation import * as team from '../teams/multi-account-monitoring'; /** * Demonstration of how to use CloudWatch Adot add-on. */ export default class CloudWatchMonitoringConstruct { build(scope: Construct, id: string, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const stackID = `${id}-blueprint`; this.create(scope, accountID, awsRegion) .build(scope, stackID); } create(scope: Construct, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const cloudWatchAdotAddOn = new blueprints.addons.CloudWatchAdotAddOn({ deploymentMode: cloudWatchDeploymentMode.DEPLOYMENT, namespace: 'default', name: 'adot-collector-cloudwatch', metricsNameSelectors: ['apiserver_request_.*', 'container_memory_.*', 'container_threads', 'otelcol_process_.*', 'ho11y*'], podLabelRegex: 'frontend|downstream(.*)' }); return blueprints.EksBlueprint.builder() .account(accountID) .region(awsRegion) .version('auto') .addOns( new blueprints.AwsLoadBalancerControllerAddOn, new blueprints.CertManagerAddOn, new blueprints.KubeStateMetricsAddOn, new blueprints.PrometheusNodeExporterAddOn, new blueprints.AdotCollectorAddOn, cloudWatchAdotAddOn, new blueprints.XrayAdotAddOn, new blueprints.NginxAddOn, new blueprints.ClusterAutoScalerAddOn, new blueprints.SecretsStoreAddOn ) .teams(new team.TeamGeordi, new team.CorePlatformTeam); } } ================================================ FILE: lib/common/construct-utils.ts ================================================ import { utils } from "@aws-quickstart/eks-blueprints"; import { HelmAddOn } from '@aws-quickstart/eks-blueprints'; import * as cdk from 'aws-cdk-lib'; export const logger = utils.logger; export function errorHandler(app: cdk.App, message: string, error?: Error) { logger.info(message); if(error){ logger.error(error.name, error.message, error.stack); } new EmptyStack(app); } export function configureApp(logLevel? : number): cdk.App { logger.settings.minLevel = logLevel ?? 2; // debug., 3 info logger.settings.hideLogPositionForProduction = true; utils.userLog.info("=== Run make compile before each run, if any code modification was made. === \n\n"); const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; HelmAddOn.validateHelmVersions = true; return new cdk.App({context: { account, region }}); } export async function prevalidateSecrets(pattern: string, region?: string, ...secrets: string[]) { for(let secret of secrets) { try { await utils.validateSecret(secret, region ?? process.env.CDK_DEFAULT_REGION!); } catch(error) { throw new Error(`${secret} secret must be setup for the ${pattern} pattern to work`); } } } export function getJsonSecret(secretString: string, key?: string): string { const parsed = JSON.parse(secretString); return key ? parsed[key] : parsed; } export class EmptyStack extends cdk.Stack { constructor(scope: cdk.App, ...message: string[]) { super(scope, "empty-error-stack"); if(message) { message.forEach(m => logger.info(m)); } } } ================================================ FILE: lib/common/default-main.ts ================================================ import { EmptyStack, configureApp } from "./construct-utils"; const app = configureApp(); new EmptyStack(app, "To work with patterns use:", "$ make list # to list all patterns", "$ make pattern ", "Example:", "$ make pattern fargate deploy"); ================================================ FILE: lib/crossplane-argocd-gitops/custom-addons/crossplane-helm-provider-addon.ts ================================================ import 'source-map-support/register'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from '@aws-quickstart/eks-blueprints/dist/utils'; import { UpboundCrossplaneAddOn } from './upbound-crossplane-addon'; export class CrossplaneHelmProviderAddon implements blueprints.ClusterAddOn { id?: string | undefined; readonly helmProviderVersion: string; constructor(helmProviderVersion: string) { this.helmProviderVersion = helmProviderVersion; } @dependable(UpboundCrossplaneAddOn.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; const roleBinding = { apiVersion: "rbac.authorization.k8s.io/v1", kind: "ClusterRoleBinding", metadata: { name: "helm-provider" }, subjects: [ { kind: "ServiceAccount", name: "helm-provider", namespace: "upbound-system" } ], roleRef: { kind: "ClusterRole", name: "cluster-admin", apiGroup: "rbac.authorization.k8s.io" } }; const runtimeConfig = { apiVersion: "pkg.crossplane.io/v1beta1", kind: "DeploymentRuntimeConfig", metadata: { name: "helm-runtime-config" }, spec: { deploymentTemplate: { spec: { replicas: 1, selector: {}, template: {} } }, serviceAccountTemplate: { metadata: { name: "helm-provider" } } } }; const provider = { apiVersion: "pkg.crossplane.io/v1", kind: "Provider", metadata: { name: "helm-provider" }, spec: { package: 'xpkg.upbound.io/crossplane-contrib/provider-helm:'+this.helmProviderVersion, runtimeConfigRef: { name: "helm-runtime-config" } } }; const runtimeHelmConfig = new eks.KubernetesManifest(clusterInfo.cluster.stack, "runtimeHelmConfig", { cluster: cluster, manifest: [roleBinding, runtimeConfig] }); const awsHelmProvider = new eks.KubernetesManifest(clusterInfo.cluster.stack, "providerHelmResource", { cluster: cluster, manifest: [provider] }); awsHelmProvider.node.addDependency(runtimeHelmConfig); return Promise.resolve(runtimeHelmConfig); } } ================================================ FILE: lib/crossplane-argocd-gitops/custom-addons/crossplane-k8s-provider-addon.ts ================================================ import 'source-map-support/register'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from '@aws-quickstart/eks-blueprints/dist/utils'; import { UpboundCrossplaneAddOn } from './upbound-crossplane-addon'; export class CrossplaneK8sProviderAddon implements blueprints.ClusterAddOn { id?: string | undefined; readonly k8sProviderVersion: string; constructor(k8sProviderVersion: string) { this.k8sProviderVersion = k8sProviderVersion; } @dependable(UpboundCrossplaneAddOn.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; const roleBinding = { apiVersion: "rbac.authorization.k8s.io/v1", kind: "ClusterRoleBinding", metadata: { name: "kubernetes-provider" }, subjects: [ { kind: "ServiceAccount", name: "kubernetes-provider", namespace: "upbound-system" } ], roleRef: { kind: "ClusterRole", name: "cluster-admin", apiGroup: "rbac.authorization.k8s.io" } }; const runtimeConfig = { apiVersion: "pkg.crossplane.io/v1beta1", kind: "DeploymentRuntimeConfig", metadata: { name: "kubernetes-runtime-config" }, spec: { deploymentTemplate: { spec: { replicas: 1, selector: {}, template: {} } }, serviceAccountTemplate: { metadata: { name: "kubernetes-provider" } } } }; const providerK8sResource = { apiVersion: "pkg.crossplane.io/v1", kind: "Provider", metadata: { name: "kubernetes-provider" }, spec: { package: 'xpkg.upbound.io/crossplane-contrib/provider-kubernetes:'+this.k8sProviderVersion, runtimeConfigRef: { name: "kubernetes-runtime-config" } } }; const runtimeK8sConfig = new eks.KubernetesManifest(clusterInfo.cluster.stack, "runtimeK8sConfig", { cluster: cluster, manifest: [roleBinding, runtimeConfig] }); const awsK8sProvider = new eks.KubernetesManifest(clusterInfo.cluster.stack, "awsK8sProvider", { cluster: cluster, manifest: [providerK8sResource] }); awsK8sProvider.node.addDependency(runtimeK8sConfig); return Promise.resolve(runtimeK8sConfig); } } ================================================ FILE: lib/crossplane-argocd-gitops/custom-addons/custom-iam-role-creator.ts ================================================ import * as iam from 'aws-cdk-lib/aws-iam'; import { IManagedPolicy } from 'aws-cdk-lib/aws-iam'; import * as blueprints from '@aws-quickstart/eks-blueprints'; export class CreateNamedRoleProvider implements blueprints.ResourceProvider { /** * Constructor to create role provider. * @param roleId role id * @param assumedBy @example new iam.ServicePrincipal('ec2.amazonaws.com') * @param policies */ constructor(private roleId: string, private roleName: string, private assumedBy: iam.IPrincipal, private policies?: IManagedPolicy[]){} provide(context: blueprints.ResourceContext): iam.Role { return new iam.Role(context.scope, this.roleId, { assumedBy: this.assumedBy, managedPolicies: this.policies, roleName: this.roleName }); } } ================================================ FILE: lib/crossplane-argocd-gitops/custom-addons/upbound-crossplane-addon.ts ================================================ import 'source-map-support/register'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { Construct } from 'constructs'; import { Values } from "@aws-quickstart/eks-blueprints/dist/spi"; import { merge } from "ts-deepmerge"; import { createNamespace } from '@aws-quickstart/eks-blueprints/dist/utils'; import { Policy, PolicyDocument} from 'aws-cdk-lib/aws-iam'; import * as cdk from 'aws-cdk-lib'; /** * User provided options for the Helm Chart */ export interface UpboundCrossplaneAddOnProps extends blueprints.HelmAddOnUserProps { /** * To Create Namespace using CDK */ createNamespace?: boolean; } const defaultProps: blueprints.HelmAddOnProps = { name: 'uxp', release: 'blueprints-addon-uxp', namespace: 'upbound-system', chart: 'universal-crossplane', version: '1.14.5-up.1', repository: 'https://charts.upbound.io/stable', values: {}, }; export class UpboundCrossplaneAddOn extends blueprints.HelmAddOn { readonly options: UpboundCrossplaneAddOnProps; constructor( props?: UpboundCrossplaneAddOnProps) { super({...defaultProps, ...props}); this.options = this.props as UpboundCrossplaneAddOnProps; } deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; // Create the `upbound-system` namespace. const ns = createNamespace(this.options.namespace!, cluster, true); // Create the CrossPlane AWS Provider IRSA. const serviceAccountName = "provider-aws"; const sa = cluster.addServiceAccount(serviceAccountName, { name: serviceAccountName, namespace: this.options.namespace!, }); sa.node.addDependency(ns); sa.role.attachInlinePolicy(new Policy(cluster.stack, 'eks-connect-policy', { document: PolicyDocument.fromJson({ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": ["sts:AssumeRole"], "Resource": `arn:aws:iam::${cluster.stack.account}:role/eks-workload-connector-role` }, { "Effect": "Allow", "Action": ["eks:*"], "Resource": `*` } ] })})); clusterInfo.addAddOnContext(UpboundCrossplaneAddOn.name, { arn: sa.role.roleArn }); new cdk.CfnOutput(cluster.stack, 'providerawssaiamrole', { value: sa.role.roleArn, description: 'provider AWS IAM role', exportName : 'providerawssaiamrole' }); let values: Values = this.options.values ?? {}; values = merge(values, values); const chart = this.addHelmChart(clusterInfo, values, false, true); chart.node.addDependency(sa); return Promise.resolve(chart); } } ================================================ FILE: lib/crossplane-argocd-gitops/custom-addons/upbound-crossplane-eks-provider-addon.ts ================================================ import 'source-map-support/register'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from '@aws-quickstart/eks-blueprints/dist/utils'; import { UpboundCrossplaneAddOn } from './upbound-crossplane-addon'; import { Policy, PolicyDocument} from 'aws-cdk-lib/aws-iam'; export class UpboundCrossplaneEKSProviderAddOn implements blueprints.ClusterAddOn { id?: string | undefined; readonly UpboundEKSProviderVersion: string; constructor(UpboundEKSProviderVersion: string) { this.UpboundEKSProviderVersion = UpboundEKSProviderVersion; } @dependable(UpboundCrossplaneAddOn.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; // Create the CrossPlane EKS Provider IRSA. const serviceAccountName = "provider-aws-eks"; const upboundNamespace = "upbound-system"; const sa = cluster.addServiceAccount(serviceAccountName, { name: serviceAccountName, namespace: upboundNamespace, }); sa.role.attachInlinePolicy(new Policy(cluster.stack, 'eks-workload-connector-policy', { document: PolicyDocument.fromJson({ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": ["sts:AssumeRole"], "Resource": `arn:aws:iam::${cluster.stack.account}:role/eks-workload-connector-role` }, { "Effect": "Allow", "Action": ["eks:*"], "Resource": `*` } ] })})); // clusterInfo.addAddOnContext(UpboundCrossplaneEKSProviderAddOn.name, { // arn: sa.role.roleArn // }); const runtimeConfig = new eks.KubernetesManifest(clusterInfo.cluster.stack, "runtimeConfig", { cluster: cluster, manifest: [ { apiVersion: "pkg.crossplane.io/v1beta1", kind: "DeploymentRuntimeConfig", metadata: { name: "aws-eks-runtime-config" }, spec: { deploymentTemplate: { spec: { replicas: 1, selector: {}, template: {} } }, serviceAccountTemplate: { metadata: { name: "provider-aws-eks" } } } }, ], }); const awsEksProvider = new eks.KubernetesManifest(clusterInfo.cluster.stack, "EKSProvider", { cluster: cluster, manifest: [ { apiVersion: "pkg.crossplane.io/v1", kind: "Provider", metadata: { name: "provider-aws-eks", }, spec: { package: 'xpkg.upbound.io/upbound/provider-aws-eks:'+this.UpboundEKSProviderVersion, runtimeConfigRef: { name: "aws-eks-runtime-config" } }, }, ], }); // runtimeConfig.node.addDependency(sa); awsEksProvider.node.addDependency(runtimeConfig); return Promise.resolve(runtimeConfig); } } ================================================ FILE: lib/crossplane-argocd-gitops/management-cluster-builder.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from 'aws-cdk-lib/aws-eks'; import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; import { UpboundCrossplaneAddOn } from './custom-addons/upbound-crossplane-addon'; import { UpboundCrossplaneEKSProviderAddOn } from './custom-addons/upbound-crossplane-eks-provider-addon'; import { CrossplaneK8sProviderAddon } from './custom-addons/crossplane-k8s-provider-addon'; import { CrossplaneHelmProviderAddon } from './custom-addons/crossplane-helm-provider-addon'; // import { TeamSpoc } from './custom-addons/secret-provider-secret'; const gitUrl = 'https://github.com/ajpaws/eks-blueprints-workloads.git'; const k8sProviderVersion = 'v0.13.0'; const UpboundEKSProviderVersion = 'v1.1.0'; const helmProviderVersion = 'v0.19.0'; export default class ManagementClusterBuilder { readonly account: string; readonly region: string; constructor(account: string,region: string) { this.account = account; this.region = region; } create(scope: Construct, id: string, mngProps: blueprints.MngClusterProviderProps) { blueprints.HelmAddOn.validateHelmVersions = false; const addOns: Array = [ new blueprints.addons.ExternalsSecretsAddOn, new UpboundCrossplaneAddOn, new UpboundCrossplaneEKSProviderAddOn(UpboundEKSProviderVersion), new CrossplaneK8sProviderAddon(k8sProviderVersion), new CrossplaneHelmProviderAddon(helmProviderVersion), new blueprints.SecretsStoreAddOn, new blueprints.ArgoCDAddOn({ bootstrapRepo: { repoUrl: gitUrl, path: `./crossplane-argocd-gitops/envs/dev`, targetRevision: 'main' }, bootstrapValues: { clusterA: { clusterName: 'workload-amd-1-29-blueprint' }, clusterB: { clusterName: 'workload-arm-1-29-blueprint' }, common: { providerConfigAWSName: 'common-provider-config-aws', eksConnectorRoleName: 'eks-workload-connector-role', accountId: `${process.env.CDK_DEFAULT_ACCOUNT}`, region: `${process.env.CDK_DEFAULT_REGION}`, crossplaneNamespace: 'upbound-system' } }, }), ]; const clusterProvider = new blueprints.MngClusterProvider({...mngProps, clusterName:id }); return ObservabilityBuilder.builder() .clusterProvider(clusterProvider) .version(eks.KubernetesVersion.V1_29) .enableNativePatternAddOns() .enableControlPlaneLogging() .addOns(...addOns); } } ================================================ FILE: lib/crossplane-argocd-gitops/multi-cluster-options.ts ================================================ import {CapacityType, KubernetesVersion} from "aws-cdk-lib/aws-eks"; import * as ec2 from "aws-cdk-lib/aws-ec2"; import * as eks from "aws-cdk-lib/aws-eks"; export const K8S_VERSIONS_PROD : KubernetesVersion[] = [KubernetesVersion.V1_27, KubernetesVersion.V1_28, KubernetesVersion.V1_29]; export const K8S_VERSIONS_DEV : KubernetesVersion[] = [ KubernetesVersion.of("1.29")]; export interface MultiClusterOptions { readonly account: string; readonly region: string; minSize?: number; maxSize?: number; desiredSize?: number; gitHubSecret?: string; nodeGroupCapacityType: CapacityType; instanceTypes?: ec2.InstanceType[]; amiType?: eks.NodegroupAmiType; k8sVersions: KubernetesVersion[]; } ================================================ FILE: lib/crossplane-argocd-gitops/multi-cluster-pipeline.ts ================================================ import { Construct } from "constructs"; import * as blueprints from '@aws-quickstart/eks-blueprints'; import {K8S_VERSIONS_DEV, MultiClusterOptions} from "./multi-cluster-options"; import {CapacityType, KubernetesVersion} from "aws-cdk-lib/aws-eks"; import {NodegroupAmiType} from "aws-cdk-lib/aws-eks"; import * as eks from "aws-cdk-lib/aws-eks"; import * as ec2 from "aws-cdk-lib/aws-ec2"; import ManagementClusterBuilder from "./management-cluster-builder"; import {GenericClusterProvider, LookupRoleProvider} from "@aws-quickstart/eks-blueprints"; import {IRole} from "aws-cdk-lib/aws-iam"; import * as iam from 'aws-cdk-lib/aws-iam'; import {ManagedNodeGroup} from "@aws-quickstart/eks-blueprints/dist/cluster-providers/types"; import { prevalidateSecrets } from "../common/construct-utils"; import {CreateNamedRoleProvider} from "./custom-addons/custom-iam-role-creator"; // const account = process.env.CDK_DEFAULT_ACCOUNT ?? ""; const account = process.env.CDK_DEFAULT_ACCOUNT!; //const region = process.env.CDK_DEFAULT_REGION ?? "us-east-1"; const region = process.env.CDK_DEFAULT_REGION!; const minSize = parseInt(process.env.NODEGROUP_MIN ?? "1"); const maxSize = parseInt(process.env.NODEGROUP_MAX ?? "3"); const desiredSize = parseInt(process.env.NODEGROUP_DESIRED ?? "1"); const gitHubSecret = process.env.GITHUB_SECRET ?? "cdk_blueprints_github_secret"; const props : MultiClusterOptions = { account, region, minSize, maxSize, desiredSize, gitHubSecret, nodeGroupCapacityType: CapacityType.ON_DEMAND, k8sVersions: K8S_VERSIONS_DEV // K8S_VERSIONS_PROD for full deploy }; const mngProps: blueprints.MngClusterProviderProps = { version: KubernetesVersion.V1_29, instanceTypes: [ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.XLARGE2)], amiType: eks.NodegroupAmiType.AL2_X86_64, desiredSize: 2, maxSize: 3, }; console.info("Running CDK with id: crossplane-argocd-gitops" ); console.info("Running CDK with: " + JSON.stringify(props)); export default class MultiClusterPipelineConstruct { async buildAsync(scope: Construct, id: string) { const k8sVersions = props.k8sVersions ?? K8S_VERSIONS_DEV; const region :string = props.region; const account : string = props.account; const gitProps = { owner :'aws-samples', secretName : props.gitHubSecret ?? 'cdk_blueprints_github_secret', repoName : 'cdk-eks-blueprints-patterns', revision : 'main' // use this to target a certain branch for deployment }; await prevalidateSecrets(gitProps.secretName, region); const addOns: Array = [ new blueprints.ExternalsSecretsAddOn({ namespace: "external-secrets", values: { webhook: { port: 9443 } } }) ]; const clusterProps: blueprints.MngClusterProviderProps = { minSize: props.minSize, maxSize: props.maxSize, desiredSize: props.desiredSize, nodeGroupCapacityType: props.nodeGroupCapacityType, }; const stages : blueprints.StackStage[] = []; const vpcProvider= new blueprints.VpcProvider(); const eksConnectorRole = new CreateNamedRoleProvider("eks-workload-connector-role", "eks-workload-connector-role", new iam.AccountPrincipal(account), [ iam.ManagedPolicy.fromAwsManagedPolicyName("AdministratorAccess") ]); const baseBlueprintARM = blueprints.EksBlueprint.builder() .resourceProvider(blueprints.GlobalResources.Vpc, vpcProvider) .resourceProvider('eks-workload-connector-role', eksConnectorRole) .account(account) .addOns(...addOns) .useDefaultSecretEncryption(true); const baseBlueprintAMD = blueprints.EksBlueprint.builder() .resourceProvider(blueprints.GlobalResources.Vpc, vpcProvider) .resourceProvider('eks-workload-connector-role', new LookupRoleProvider('eks-workload-connector-role')) .account(account) .addOns(...addOns) .useDefaultSecretEncryption(true); const mgmtCluster = new ManagementClusterBuilder(account, region) .create(scope, 'eks-mgmt-cluster', mngProps) .account(account) .region(region) .resourceProvider(blueprints.GlobalResources.Vpc, vpcProvider); const mgmtStage = [{id: `mgmt-cluster-stage` , stackBuilder: mgmtCluster}]; for(const k8sVersion of k8sVersions) { baseBlueprintARM.version(k8sVersion); const blueprintAMD = baseBlueprintAMD .clusterProvider( new GenericClusterProvider( { version: k8sVersion, mastersRole: blueprints.getNamedResource('eks-workload-connector-role') as IRole, managedNodeGroups : [addManagedNodeGroup( 'amd-tst-ng',{...clusterProps, amiType : NodegroupAmiType.AL2_X86_64, instanceTypes: [ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.XLARGE)]})] }) ); stages.push({ id: `workload-amd-` + k8sVersion.version.replace(".", "-"), stackBuilder : blueprintAMD.clone(props.region).id(`amd-` + k8sVersion.version.replace(".", "-")) }); const blueprintARM = baseBlueprintARM .clusterProvider( new GenericClusterProvider( { version: k8sVersion, mastersRole: blueprints.getNamedResource('eks-workload-connector-role') as IRole, managedNodeGroups : [addManagedNodeGroup('arm-tst-ng',{...clusterProps, amiType : NodegroupAmiType.AL2_ARM_64, instanceTypes: [ec2.InstanceType.of(ec2.InstanceClass.M7G, ec2.InstanceSize.XLARGE)]})] }) ); stages.push({ id: `workload-arm-` + k8sVersion.version.replace(".", "-"), stackBuilder : blueprintARM.clone(props.region).id(`arm-` + k8sVersion.version.replace(".", "-")) }); } blueprints.CodePipelineStack.builder() .application('npx ts-node bin/crossplane-argocd-gitops.ts') .name(id) .owner(gitProps.owner) .codeBuildPolicies( ([ new iam.PolicyStatement({ resources: ["*"], actions: [ "codebuild:*", "sts:AssumeRole", "secretsmanager:GetSecretValue", "secretsmanager:ListSecrets", "secretsmanager:DescribeSecret", "cloudformation:*" ] }) ]) ) .repository({ targetRevision : gitProps.revision, credentialsSecretName: gitProps.secretName, repoUrl: gitProps.repoName, trigger: blueprints.GitHubTrigger.POLL } ) .wave({ id: `mgmt-cluster-stage`, stages: mgmtStage }) .wave({ id: `${id}-wave`, stages }) .build(scope, id, { env: { account, region } }); } } function addManagedNodeGroup(id: string, clusterProps: blueprints.MngClusterProviderProps): ManagedNodeGroup { return { id, minSize: clusterProps.minSize, maxSize: clusterProps.maxSize, amiType: clusterProps.amiType, instanceTypes: clusterProps.instanceTypes, desiredSize: clusterProps.desiredSize }; } ================================================ FILE: lib/custom-networking-ipv4-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as ec2 from "aws-cdk-lib/aws-ec2"; import { KubernetesVersion, NodegroupAmiType } from 'aws-cdk-lib/aws-eks'; import * as eks from "aws-cdk-lib/aws-eks"; export default class CustomNetworkingIPv4Construct { constructor(scope: Construct, id: string) { const stackId = `${id}-blueprint`; const mngProps = { version: KubernetesVersion.V1_25, endpointAccess: eks.EndpointAccess.PUBLIC_AND_PRIVATE, instanceTypes: [new ec2.InstanceType('m5.large')], amiType: NodegroupAmiType.AL2_X86_64, desiredSize: 2, maxSize: 3, vpcSubnets: [{ subnetType: ec2.SubnetType.PUBLIC }] }; const clusterProvider = new blueprints.MngClusterProvider(mngProps); blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(new blueprints.VpcCniAddOn({ customNetworkingConfig: { subnets: [ blueprints.getNamedResource("secondary-cidr-subnet-0"), blueprints.getNamedResource("secondary-cidr-subnet-1"), blueprints.getNamedResource("secondary-cidr-subnet-2"), ] }, awsVpcK8sCniCustomNetworkCfg: true, eniConfigLabelDef: 'topology.kubernetes.io/zone' }), new blueprints.AwsLoadBalancerControllerAddOn(), new blueprints.CoreDnsAddOn(), new blueprints.KubeProxyAddOn(), ) .resourceProvider(blueprints.GlobalResources.Vpc, new blueprints.VpcProvider(undefined, { primaryCidr: "10.2.0.0/16", secondaryCidr: "100.64.0.0/16", secondarySubnetCidrs: ["100.64.0.0/24", "100.64.1.0/24", "100.64.2.0/24"] })) .clusterProvider(clusterProvider) .build(scope, stackId); } } ================================================ FILE: lib/datadog-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { DatadogAddOn } from '@datadog/datadog-eks-blueprints-addon'; import { prevalidateSecrets } from '../common/construct-utils'; const SECRET_API_KEY = 'datadog-api-key'; export default class DatadogConstruct { async buildAsync(scope: Construct, id: string) { await prevalidateSecrets(DatadogConstruct.name, process.env.CDK_DEFAULT_REGION!, SECRET_API_KEY); const stackID = `${id}-blueprint`; const addOns: Array = [ new DatadogAddOn({ apiKeyAWSSecret: SECRET_API_KEY }) ]; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION!) .version('auto') .addOns(...addOns) .build(scope, stackID); } } ================================================ FILE: lib/dynatrace-construct/index.ts ================================================ import { EksBlueprint } from '@aws-quickstart/eks-blueprints'; import { DynatraceAddOn } from '@dynatrace/dynatrace-eks-blueprints-addon'; import * as cdk from 'aws-cdk-lib'; import { prevalidateSecrets } from '../common/construct-utils'; export default class DynatraceOperatorConstruct { async buildAsync(scope: cdk.App, id: string) { await prevalidateSecrets(DynatraceOperatorConstruct.name, undefined, 'dynatrace-tokens'); // AddOns for the cluster const stackId = `${id}-blueprint`; const DynatraceOperator = new DynatraceAddOn(); EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .version('auto') .addOns(DynatraceOperator) .build(scope, stackId); } } ================================================ FILE: lib/emr-eks/index.ts ================================================ import { EksBlueprint, AwsLoadBalancerControllerAddOn, CertManagerAddOn, ClusterAutoScalerAddOn, CoreDnsAddOn, EbsCsiDriverAddOn, EmrEksAddOn, EmrEksTeam, KubeProxyAddOn, MetricsServerAddOn, VpcCniAddOn } from '@aws-quickstart/eks-blueprints'; import * as cdk from 'aws-cdk-lib'; export default class EmrEksConstruct { build(scope: cdk.App, id: string, teams: EmrEksTeam[]) { const stackId = `${id}-blueprint`; EksBlueprint.builder().addOns( new AwsLoadBalancerControllerAddOn, new VpcCniAddOn(), new CoreDnsAddOn(), new MetricsServerAddOn, new ClusterAutoScalerAddOn, new CertManagerAddOn, new EbsCsiDriverAddOn, new KubeProxyAddOn, new EmrEksAddOn ).teams( ...teams ) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/fargate-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as eks from 'aws-cdk-lib/aws-eks'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as team from '../teams'; /** * Demonstrates how to use Fargate cluster provider. * Along with the specified profiles, Fargate cluster automatically creates * a default profile with selectors for the default namespace. */ export default class FargateConstruct { constructor(scope: Construct, id: string) { // Setup platform team const accountID = process.env.CDK_DEFAULT_ACCOUNT!; const platformTeam = new team.TeamPlatform(accountID); const fargateProfiles: Map = new Map([ ["team1", { selectors: [{ namespace: "team1" }] }] ]); const stackID = `${id}-blueprint`; const clusterProvider = new blueprints.FargateClusterProvider({ fargateProfiles, version: eks.KubernetesVersion.V1_25 }); blueprints.EksBlueprint.builder() .account(accountID) .clusterProvider(clusterProvider) .teams(platformTeam) .addOns( new blueprints.VpcCniAddOn(), new blueprints.AwsLoadBalancerControllerAddOn, new blueprints.AppMeshAddOn, new blueprints.NginxAddOn, new blueprints.ArgoCDAddOn, new blueprints.MetricsServerAddOn ) .build(scope, stackID); } } ================================================ FILE: lib/generative-ai-showcase/deployment/showcase-deployment.ytpl ================================================ apiVersion: apps/v1 kind: Deployment metadata: name: bedrock-showcase-model namespace: "{{namespace}}" labels: app: bedrock-showcase-model spec: replicas: 1 selector: matchLabels: app: bedrock-showcase-model template: metadata: labels: app: bedrock-showcase-model spec: serviceAccountName: bedrock-service-account containers: - name: bedrock-showcase-model image: "{{imageName}}:{{imageTag}}" imagePullPolicy: IfNotPresent env: - name: BWB_ENDPOINT_URL value: "https://bedrock.{{region}}.amazonaws.com/" - name: BWB_PROFILE_NAME value: "default" - name: BWB_REGION_NAME value: "{{region}}" ports: - containerPort: 8501 volumeMounts: - mountPath: /dev/shm name: dshm volumes: - emptyDir: sizeLimit: 1Gi medium: Memory name: dshm --- apiVersion: v1 kind: Service metadata: name: bedrock-showcase-model-service namespace: "{{namespace}}" spec: ports: - protocol: TCP port: 80 targetPort: 8501 type: NodePort selector: app: bedrock-showcase-model --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: bedrock-showcase-model-ingress namespace: "{{namespace}}" annotations: alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip spec: ingressClassName: alb rules: - http: paths: - path: / pathType: Prefix backend: service: name: bedrock-showcase-model-service port: number: 80 ================================================ FILE: lib/generative-ai-showcase/index.ts ================================================ import { ApplicationTeam, BedrockBuilder, ClusterInfo } from "@aws-quickstart/eks-blueprints"; import * as blueprints from "@aws-quickstart/eks-blueprints"; import * as spi from '@aws-quickstart/eks-blueprints/dist/spi'; import { Construct } from "constructs"; import { loadYaml, readYamlDocument } from "@aws-quickstart/eks-blueprints/dist/utils"; import { KubectlProvider, ManifestDeployment } from "@aws-quickstart/eks-blueprints/dist/addons/helm-addon/kubectl-provider"; export default class GenAIShowcase { constructor(scope: Construct, id: string) { const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const stackID = `${id}-blueprint`; const bedrockTeamProps: blueprints.teams.BedrockTeamProps = { name: blueprints.utils.valueFromContext(scope, "bedrock.pattern.name", "showcase"), namespace: blueprints.utils.valueFromContext(scope, "bedrock.pattern.namespace", "bedrock"), createNamespace: true, serviceAccountName: 'bedrock-service-account', extensionFunction: extensionFunction }; BedrockBuilder.builder() .account(account) .region(region) .version('auto') .addBedrockTeam(bedrockTeamProps) .build(scope, stackID); } } function extensionFunction(team: ApplicationTeam, clusterInfo: ClusterInfo) { const values: spi.Values = { namespace: team.teamProps.namespace, imageName: blueprints.utils.valueFromContext(clusterInfo.cluster, "bedrock.pattern.image.name", undefined), imageTag: blueprints.utils.valueFromContext(clusterInfo.cluster, "bedrock.pattern.image.tag", undefined), region: clusterInfo.cluster.stack.region }; // Apply manifest const doc = readYamlDocument(__dirname + '/deployment/showcase-deployment.ytpl'); const manifest = doc.split("---").map((e: any) => loadYaml(e)); const manifestDeployment: ManifestDeployment = { name: team.teamProps.name, namespace: team.teamProps.namespace!, manifest, values }; const manifestConstruct = new KubectlProvider(clusterInfo).addManifest(manifestDeployment); manifestConstruct.node.addDependency(team.serviceAccount); } ================================================ FILE: lib/generative-ai-showcase/python/Dockerfile ================================================ FROM python:3.9.15 WORKDIR /opt RUN apt-get update \ && apt-get install -y unzip \ && apt-get install -y curl COPY requirements.txt . RUN pip install -r requirements.txt RUN curl https://d2eo22ngex1n9g.cloudfront.net/Documentation/SDK/bedrock-python-sdk.zip --output bedrock-python-sdk.zip RUN unzip bedrock-python-sdk.zip -d bedrock-python-sdk RUN pip install ./bedrock-python-sdk/botocore-*-py3-none-any.whl RUN pip install ./bedrock-python-sdk/boto3-*-py3-none-any.whl RUN pip install ./bedrock-python-sdk/awscli-*-py3-none-any.whl COPY *.py /opt/ EXPOSE 8501 ENTRYPOINT ["streamlit", "run"] CMD ["/opt/showcase_app.py"] ================================================ FILE: lib/generative-ai-showcase/python/requirements.txt ================================================ langchain streamlit ================================================ FILE: lib/generative-ai-showcase/python/showcase_app.py ================================================ import streamlit as st import showcase_lib as glib import showcase_examples as examples st.set_page_config(page_title="Demo Showcase", layout="wide") st.title("Demo Showcase") col1, col2, col3 = st.columns(3) with col1: st.subheader("Prompt template") prompts_keys = list(examples.prompts) prompt_selection = st.selectbox("Select a prompt template:", prompts_keys) with st.expander("View prompt"): selected_prompt_template_text = examples.prompts[prompt_selection] prompt_text = st.text_area("Prompt template text:", value=selected_prompt_template_text, height=350) with col2: st.subheader("User input") inputs_keys = list(examples.inputs) input_selection = st.selectbox("Select an input example:", inputs_keys) selected_input_template_text = examples.inputs[input_selection] input_text = st.text_area("Input text:", value=selected_input_template_text, height=350) process_button = st.button("Run", type="primary") with col3: st.subheader("Result") if process_button: with st.spinner("Running..."): response_content = glib.get_text_response(user_input=input_text, template=prompt_text) st.write(response_content) ================================================ FILE: lib/generative-ai-showcase/python/showcase_examples.py ================================================ ################################################################################################################################# prompts = {} #pre-defined prompt templatess, include "{user_input}" to merge input content inputs = {} #used to merge into prompt templates, merged into the "{user_input}" placeholder defaults = {} #used for default values in simple examples ################################################################################################################################# # PROMPTS ################################################################################################################################# prompts["Reply Template"] = """ {user_input} Please write a reply to the above text: """ ################################################################################################################################# prompts["Summarize"] = """ {user_input} Please summarize the above content: """ ################################################################################################################################# prompts["Sentiment"] = """ {user_input} Sentiment of the above content (Positive or negative): """ ################################################################################################################################# prompts["Recommendation"] = """ {user_input} Recommended next step based on the above content: """ ################################################################################################################################# # INPUTS ################################################################################################################################# inputs["Complementary Customer Email"] = """ Dear Acme Investments, I am writing to compliment one of your customer service representatives, Shirley Scarry. I recently had the pleasure of speaking with Shirley regarding my loan. Shirley was extremely helpful and knowledgeable, and went above and beyond to ensure that all of my questions were answered. Shirley also had Robert Herbford join the call, who wasn't quite as helpful. My wife, Clara Bradford, didn't like him at all. Shirley's professionalism and expertise were greatly appreciated, and I would be happy to recommend Acme Investments to others based on my experience. Sincerely, Carson Bradford """ ################################################################################################################################# inputs["Ethics Complaint Email"] = """ Dear Acme Investments, I am writing to bring to your attention a situation that I believe to be unethical on the part of one of your account managers, Roger Longbottom. I recently met with Roger to discuss my investment portfolio and was deeply concerned to hear that he suggested I invest in a certain stock. When I asked him why he thought this was a good investment, he stated that the stock was currently undervalued and was likely to increase in value in the near future. However, upon further research, I have discovered that the stock in question has a questionable reputation. It has been the subject of multiple lawsuits and has been found to have engaged in questionable business practices. I believe Roger was aware of these facts, but failed to disclose them to me. As a result, I feel I was misled into making an unwise investment decision. I therefore urge you to investigate whether Roger has acted unethically and take appropriate action if necessary. Yours sincerely, Carson Bradford """ ================================================ FILE: lib/generative-ai-showcase/python/showcase_lib.py ================================================ import os from langchain.llms.bedrock import Bedrock from langchain import PromptTemplate def get_llm(): model_kwargs = { "maxTokenCount": 1024, "stopSequences": [], "temperature": 0, "topP": 0.9 } llm = Bedrock( # credentials_profile_name=os.environ.get("BWB_PROFILE_NAME"), #sets the profile name to use for AWS credentials (if not the default) region_name=os.environ.get("BWB_REGION_NAME"), #sets the region name (if not the default) endpoint_url=os.environ.get("BWB_ENDPOINT_URL"), #sets the endpoint URL (if necessary) model_id="amazon.titan-tg1-large", #use the Anthropic Claude model model_kwargs=model_kwargs) #configure the properties for Claude return llm def get_prompt(user_input, template): prompt_template = PromptTemplate.from_template(template) #this will automatically identify the input variables for the template prompt = prompt_template.format(user_input=user_input) return prompt def get_text_response(user_input, template): #text-to-text client function llm = get_llm() prompt = get_prompt(user_input, template) return llm.predict(prompt) #return a response to the prompt ================================================ FILE: lib/generic-cluster-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; import * as eks from 'aws-cdk-lib/aws-eks'; // Blueprints Lib import * as blueprints from '@aws-quickstart/eks-blueprints'; // Team implementations import * as team from '../teams'; /** * Demonstrates how to leverage more than one node group along with Fargate profiles. */ export default class GenericClusterConstruct { build(scope: Construct, id: string) { // Setup platform team const accountID = process.env.CDK_DEFAULT_ACCOUNT!; const platformTeam = new team.TeamPlatform(accountID); const stackID = `${id}-blueprint`; const clusterProvider = new blueprints.GenericClusterProvider({ version: eks.KubernetesVersion.V1_25, managedNodeGroups: [ { id: "mng-ondemand", amiType: eks.NodegroupAmiType.AL2_X86_64, instanceTypes: [new ec2.InstanceType('m5.2xlarge')] }, { id: "mng2-spot", instanceTypes: [ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.MEDIUM)], nodeGroupCapacityType: eks.CapacityType.SPOT } ], fargateProfiles: { "fp1": { fargateProfileName: "fp1", selectors: [{ namespace: "serverless1" }] }, "fp2": { fargateProfileName: "fp2", selectors: [{ namespace: "serverless2" }] } } }); blueprints.EksBlueprint.builder() .account(accountID) .region(process.env.CDK_DEFAULT_REGION!) .clusterProvider(clusterProvider) .addOns( new blueprints.AwsLoadBalancerControllerAddOn, new blueprints.CertManagerAddOn, new blueprints.AdotCollectorAddOn, new blueprints.AppMeshAddOn, new blueprints.NginxAddOn, new blueprints.ArgoCDAddOn, new blueprints.CalicoOperatorAddOn, new blueprints.MetricsServerAddOn, new blueprints.ClusterAutoScalerAddOn, new blueprints.CloudWatchAdotAddOn, new blueprints.XrayAdotAddOn, new blueprints.SecretsStoreAddOn ) .teams(platformTeam) .version('auto') .build(scope, stackID); } } ================================================ FILE: lib/gmaestro-construct/index.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as gmaestroAddOn from '@granulate/gmaestro-eks-blueprints-addon'; import * as cdk from 'aws-cdk-lib'; import {prevalidateSecrets} from "../common/construct-utils"; export default class GmaestroConstruct { async buildAsync(scope: cdk.App, id: string) { const clientIdSecretName = process.env.MAESTRO_SECRET_NAME; if (clientIdSecretName === undefined) { throw new Error("secret must be setup for the gMaestro pattern pattern to work"); } await prevalidateSecrets(GmaestroConstruct.name, process.env.CDK_DEFAULT_REGION!, clientIdSecretName); const clusterName = blueprints.utils.valueFromContext(scope, "clusterName", undefined); const namespace = blueprints.utils.valueFromContext(scope, "namespace", undefined); if (clusterName === undefined || namespace === undefined) { throw new Error("clusterName and namespace must be setup for the gMaestro pattern pattern to work"); } const stackId = `${id}-blueprint`; let gmaestroAddOnProps = { clientIdSecretName: clientIdSecretName, clusterName: clusterName, createNamespace: true, namespace: namespace, } as gmaestroAddOn.GmaestroAddOnProps; const addOns: Array = [ new blueprints.MetricsServerAddOn(), new blueprints.addons.ClusterAutoScalerAddOn(), new gmaestroAddOn.GmaestroAddOn(gmaestroAddOnProps) ]; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(...addOns) .build(scope, stackId); } } ================================================ FILE: lib/gpu-construct/index.ts ================================================ import * as ec2 from "aws-cdk-lib/aws-ec2"; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from "constructs"; import { GpuBuilder, GpuOptions } from "@aws-quickstart/eks-blueprints"; export default class GpuConstruct { build(scope: Construct, id: string) { const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const stackID = `${id}-eks-blueprint`; const options: GpuOptions = { kubernetesVersion: eks.KubernetesVersion.of("1.27"), instanceClass: ec2.InstanceClass.G5, instanceSize: ec2.InstanceSize.XLARGE12 }; const values = { driver: { enabled: true }, mig: { strategy: 'mixed' }, devicePlugin: { enabled: true, version: 'v0.13.0' }, migManager: { enabled: true, WITH_REBOOT: true }, toolkit: { version: 'v1.13.1-centos7' }, operator: { defaultRuntime: 'containerd' }, gfd: { version: 'v0.8.0' } }; GpuBuilder.builder(options) .account(account) .region(region) .enableGpu({values}) .build(scope, stackID); } } ================================================ FILE: lib/graviton-construct/index.ts ================================================ import * as blueprints from "@aws-quickstart/eks-blueprints"; import { GravitonBuilder } from "@aws-quickstart/eks-blueprints"; import { CfnWorkspace } from "aws-cdk-lib/aws-aps"; import * as ec2 from "aws-cdk-lib/aws-ec2"; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from "constructs"; export default class GravitonConstruct { build(scope: Construct, id: string) { const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const stackID = `${id}-blueprint`; const ampWorkspaceName = "graviton-amp-workspaces"; const ampWorkspace: CfnWorkspace = blueprints.getNamedResource(ampWorkspaceName); const options: Partial = { version: eks.KubernetesVersion.of("1.27"), instanceTypes: [ec2.InstanceType.of(ec2.InstanceClass.M7G, ec2.InstanceSize.XLARGE)], desiredSize: 3, minSize: 2, maxSize: 5, }; GravitonBuilder.builder(options) .account(account) .region(region) .resourceProvider( blueprints.GlobalResources.Vpc, new blueprints.VpcProvider() ) .resourceProvider( "efs-file-system", new blueprints.CreateEfsFileSystemProvider({ name: "efs-file-systems", }) ) .resourceProvider( ampWorkspaceName, new blueprints.CreateAmpProvider( ampWorkspaceName, ampWorkspaceName ) ) .addOns( new blueprints.addons.IstioBaseAddOn(), new blueprints.addons.IstioControlPlaneAddOn(), new blueprints.addons.KubeStateMetricsAddOn(), new blueprints.addons.MetricsServerAddOn(), new blueprints.addons.PrometheusNodeExporterAddOn(), new blueprints.addons.ExternalsSecretsAddOn(), new blueprints.addons.SecretsStoreAddOn(), new blueprints.addons.CalicoOperatorAddOn(), new blueprints.addons.CertManagerAddOn(), new blueprints.addons.AdotCollectorAddOn(), new blueprints.addons.AmpAddOn({ ampPrometheusEndpoint: ampWorkspace.attrPrometheusEndpoint }), new blueprints.addons.CloudWatchLogsAddon({ logGroupPrefix: "/aws/eks/graviton-blueprint", }), new blueprints.addons.EfsCsiDriverAddOn(), new blueprints.addons.FluxCDAddOn(), new blueprints.addons.GrafanaOperatorAddon(), new blueprints.addons.XrayAdotAddOn() ) .build(scope, stackID); } } ================================================ FILE: lib/import-cluster/index.ts ================================================ import { Construct } from "constructs"; import * as blueprints from "@aws-quickstart/eks-blueprints"; import { GlobalResources } from "@aws-quickstart/eks-blueprints"; import { TeamRikerSetup, TeamScan } from "../teams"; export class ImportClusterConstruct { /** * Create a blueprint that imports an existing cluster. * @param scope stack scope */ async build(scope: Construct) { /* *Modify these constants for your use case. */ const clusterName = "quickstart-cluster"; const kubectlRoleName = "awsqs-kubernetes-helm"; const region = process.env.CDK_DEFAULT_REGION!; const sdkCluster = await blueprints.describeCluster(clusterName, region); /** * Assumes the supplied role is registered in the target cluster for kubectl access. */ const importClusterProvider = blueprints.ImportClusterProvider.fromClusterAttributes(sdkCluster, blueprints.getResource(context => new blueprints.LookupRoleProvider(kubectlRoleName).provide(context))); const vpcId = sdkCluster.resourcesVpcConfig?.vpcId; blueprints.EksBlueprint.builder() .clusterProvider(importClusterProvider) .resourceProvider(GlobalResources.Vpc, new blueprints.VpcProvider(vpcId)) // Important! register cluster VPC .addOns(new blueprints.AppMeshAddOn()) .teams(new TeamRikerSetup(scope, "./lib/teams/team-riker/")) .teams(new TeamScan()) .account(process.env.CDK_DEFAULT_ACCOUNT!) .region('us-east-2') .build(scope, "imported-cluster"); } } ================================================ FILE: lib/instana-construct/index.ts ================================================ import { loadYaml } from "@aws-quickstart/eks-blueprints/dist/utils"; import * as cdk from "aws-cdk-lib"; import { InstanaOperatorAddon } from "@instana/aws-eks-blueprint-addon"; import { EksBlueprint, utils } from "@aws-quickstart/eks-blueprints"; import { prevalidateSecrets } from "../common/construct-utils"; export const instanaProps: { [key: string]: any } = {}; export default class InstanaConstruct { async buildAsync(scope: cdk.App, id: string) { try { await prevalidateSecrets(InstanaConstruct.name, undefined, 'instana-secret-params'); const secretParamName: string = utils.valueFromContext(scope, "secretParamName", undefined); //console.log(`secretParamName is ${secretParamName}`); if(secretParamName != undefined) { instanaProps.secretParamName = secretParamName; } const yamlObject = loadYaml(JSON.stringify(instanaProps)); //console.log(`instanaProps is ${yamlObject}`); const stackId = `${id}-blueprint`; const addOns = new InstanaOperatorAddon(yamlObject); EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION!) .addOns(addOns) .version('auto') .build(scope, stackId); console.log("Blueprint built successfully."); } catch (error) { console.error("Error:", error); throw new Error(`environment variables must be setup for the instana-operator pattern to work`); } } } ================================================ FILE: lib/ipv6-construct/index.ts ================================================ import * as cdk from 'aws-cdk-lib'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { Construct } from "constructs"; import { IpFamily } from 'aws-cdk-lib/aws-eks'; export default class IpV6Construct { build(scope: Construct, id: string) { const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const stackID = `${id}-blueprint`; const ipFamily = IpFamily.IP_V6; //IpFamily.IP_V6 is equivalent to "ipv6" // AddOns for the cluster. For ipv6 cluster, we haven't tested with all the addons except for the below addons. const addOns: Array = [ new blueprints.addons.VpcCniAddOn(), new blueprints.addons.KarpenterAddOn(), new blueprints.addons.SecretsStoreAddOn() ]; blueprints.EksBlueprint.builder() .account(account) .region(region) .version('auto') .ipFamily(ipFamily) .addOns(...addOns) .build(scope, stackID); } } ================================================ FILE: lib/jupyterhub-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as cdk from 'aws-cdk-lib'; export default class JupyterHubConstruct { constructor(scope: Construct, id: string, props: cdk.StackProps) { const stackId = `${id}-blueprint`; blueprints.EksBlueprint.builder() .account(props.env!.account!) .region(props.env!.region!) .version('auto') .addOns( new blueprints.EfsCsiDriverAddOn({replicaCount: 1}), new blueprints.VpcCniAddOn(), new blueprints.KubeProxyAddOn(), new blueprints.ClusterAutoScalerAddOn(), new blueprints.JupyterHubAddOn({ efsConfig:{ removalPolicy: cdk.RemovalPolicy.DESTROY, pvcName: "efs-persist", capacity: "120Gi", }, oidcConfig: { callbackUrl: blueprints.utils.valueFromContext(scope, "callbackUrl", "https://www.example.com/hub/oauth_callback"), authUrl: blueprints.utils.valueFromContext(scope, "authUrl", "https://yourid.oidcprovider.com/authorize"), tokenUrl: blueprints.utils.valueFromContext(scope, "tokenUrl", "https://yourid.oidcprovider.com/oauth/token"), userDataUrl: blueprints.utils.valueFromContext(scope, "userDataUrl", "https://yourid.oidcprovider.com/userinfo"), clientId: blueprints.utils.valueFromContext(scope, "clientId", "yourClientIdString"), clientSecret: blueprints.utils.valueFromContext(scope, "clientSecret", "yourClientSecretString"), scope: blueprints.utils.valueFromContext(scope, "scope",["openid","name","profile","email"]), usernameKey: blueprints.utils.valueFromContext(scope, "usernameKey", "name"), }, serviceType: blueprints.JupyterHubServiceType.CLUSTERIP, values: { prePuller: { hook: { enabled: false }, } } }) ) .build(scope, stackId); } } ================================================ FILE: lib/karpenter-construct/index.ts ================================================ import { EksBlueprint } from "@aws-quickstart/eks-blueprints"; import * as blueprints from "@aws-quickstart/eks-blueprints"; import { Construct } from "constructs"; export default class KarpenterConstruct { constructor(scope: Construct, id: string) { const account = process.env.CDK_DEFAULT_ACCOUNT!; const region = process.env.CDK_DEFAULT_REGION!; const stackID = `${id}-blueprint`; const karpenterAddOn = new blueprints.addons.KarpenterAddOn({ version: 'v0.33.1', nodePoolSpec: { requirements: [ { key: 'node.kubernetes.io/instance-type', operator: 'In', values: ['m5.large'] }, { key: 'topology.kubernetes.io/zone', operator: 'In', values: [`${region}a`,`${region}b`, `${region}c`]}, { key: 'kubernetes.io/arch', operator: 'In', values: ['amd64','arm64']}, { key: 'karpenter.sh/capacity-type', operator: 'In', values: ['on-demand']}, ], disruption: { consolidationPolicy: "WhenUnderutilized", expireAfter: "259200s" }, weight: 20, }, ec2NodeClassSpec:{ subnetSelectorTerms: [ { tags: { "Name": `${stackID}/${stackID}-vpc/*` } } ], securityGroupSelectorTerms: [ { tags: { [`kubernetes.io/cluster/${stackID}`]: "owned" } } ], amiFamily: "AL2" }, interruptionHandling: true, }); EksBlueprint.builder() .account(account) .region(region) .version('auto') .addOns( new blueprints.addons.AwsLoadBalancerControllerAddOn(), new blueprints.addons.VpcCniAddOn(), new blueprints.addons.CoreDnsAddOn(), new blueprints.addons.KubeProxyAddOn(), new blueprints.addons.CertManagerAddOn(), new blueprints.addons.KubeStateMetricsAddOn(), new blueprints.addons.SSMAgentAddOn(), new blueprints.addons.MetricsServerAddOn(), karpenterAddOn, ) .build(scope, stackID); } } ================================================ FILE: lib/kasten-k10-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { KastenK10AddOn } from '@kastenhq/kasten-eks-blueprints-addon'; export default class KastenK10Construct { constructor(scope: Construct, id: string) { const stackId = `${id}-blueprint`; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .version('auto') .addOns(new blueprints.ClusterAutoScalerAddOn, new KastenK10AddOn) .build(scope, stackId); } } ================================================ FILE: lib/keptn-construct/index.ts ================================================ import { Construct } from 'constructs'; import { EksBlueprint } from '@aws-quickstart/eks-blueprints'; import { KeptnControlPlaneAddOn } from '@keptn/keptn-controlplane-eks-blueprints-addon'; export default class KeptnControlPlaneConstruct { constructor(scope: Construct, id: string) { // AddOns for the cluster const stackId = `${id}-blueprint`; const keptnControlPlane = new KeptnControlPlaneAddOn({ // uncomment after you setup the ssm secret keptn-secrets. // ssmSecretName: 'keptn-secrets' }); EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(keptnControlPlane) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/komodor-construct/index.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import { KomodorAddOn } from '@komodor/komodor-eks-blueprints-addon'; import { Construct } from "constructs"; export default class KomodorConstruct extends Construct { constructor(scope: Construct, id: string) { super(scope, id); const stackId = `${id}-blueprint`; const addOns: Array = [ new KomodorAddOn({ clusterName: stackId, apiKey: "", //replace with your API key values: {} // add any custom Helm values }) ]; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(...addOns) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/konveyor-construct/index.ts ================================================ import { StackProps } from "aws-cdk-lib"; import { Construct } from "constructs"; import * as blueprints from "@aws-quickstart/eks-blueprints"; import { KonveyorAddOn, OlmAddOn, } from "@claranet-ch/konveyor-eks-blueprint-addon"; export interface KonveyorConstructProps extends StackProps { /** * The AWS Account ID */ account: string; /** * Region where AddOn will be deployed */ region: string; /** * Parent domain name where the subdomain will be assigned */ parentDomain: string; /** * Subdomain name to be assigned to the loadbalancer */ konveyorLabel: string; /** * Hosted Zone ID */ hostedZoneId: string; /** * Name of the SSL certificate to be attached to the load balancer */ certificateResourceName: string; } export class KonveyorConstruct extends Construct { constructor(scope: Construct, id: string) { super(scope, id); // Definition of the add-on's properties const props = { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION, namespace: blueprints.utils.valueFromContext( scope, "konveyor.namespace.name", "konveyor" ), parentDomain: blueprints.utils.valueFromContext( scope, "konveyor.parent.domain.name", "example.com" ), konveyorLabel: blueprints.utils.valueFromContext( scope, "konveyor.subdomain.label", "konveyor" ), hostedZoneId: blueprints.utils.valueFromContext( scope, "konveyor.hosted.zone.id", "1234567890" ), certificateResourceName: blueprints.utils.valueFromContext( scope, "konveyor.certificate.resource.name", "konveyor-certificate" ), }; const subdomain = props.konveyorLabel + "." + props.parentDomain; const addOns: Array = [ new blueprints.AwsLoadBalancerControllerAddOn(), new blueprints.VpcCniAddOn(), new blueprints.CoreDnsAddOn(), new blueprints.KubeProxyAddOn(), new blueprints.ExternalDnsAddOn({ hostedZoneResources: [blueprints.GlobalResources.HostedZone], }), new blueprints.EbsCsiDriverAddOn(), new OlmAddOn(), new KonveyorAddOn({ certificateResourceName: props.certificateResourceName, subdomain, featureAuthRequired: "true", }), ]; blueprints.EksBlueprint.builder() .account(props.account) .region(props.region) .resourceProvider( blueprints.GlobalResources.HostedZone, new blueprints.ImportHostedZoneProvider( props.hostedZoneId, props.parentDomain ) ) .resourceProvider( props.certificateResourceName, new blueprints.CreateCertificateProvider( "elb-certificate", subdomain, blueprints.GlobalResources.HostedZone ) ) .version('auto') .addOns(...addOns) .build(scope, props.konveyorLabel + "-cluster"); } } ================================================ FILE: lib/kubecost-construct/index.ts ================================================ import { Construct } from 'constructs'; import { EksBlueprint } from '@aws-quickstart/eks-blueprints'; import { KubecostAddOn } from '@kubecost/kubecost-eks-blueprints-addon'; export default class KubecostConstruct { constructor(scope: Construct, id: string) { // AddOns for the cluster const stackId = `${id}-blueprint`; EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(new KubecostAddOn()) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/kubeflow-construct/index.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { KubeflowAddOn } from 'eks-blueprints-cdk-kubeflow-ext'; import * as amp from 'aws-cdk-lib/aws-aps'; import * as eks from 'aws-cdk-lib/aws-eks'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; export default class KubeflowConstruct { constructor(scope: Construct, id: string) { const stackId = `${id}-blueprint`; const ampWorkspaceName = "kubeflow-monitoring"; const ampPrometheusEndpoint = (blueprints.getNamedResource(ampWorkspaceName) as unknown as amp.CfnWorkspace).attrPrometheusEndpoint; const mngProps: blueprints.MngClusterProviderProps = { version: eks.KubernetesVersion.V1_29, instanceTypes: [new ec2.InstanceType("m5.2xlarge")], amiType: eks.NodegroupAmiType.AL2_X86_64, desiredSize: 2, maxSize: 3, }; blueprints.EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) .addOns( new blueprints.AwsLoadBalancerControllerAddOn(), new blueprints.ClusterAutoScalerAddOn(), new blueprints.VpcCniAddOn(), new blueprints.CoreDnsAddOn(), new blueprints.KubeProxyAddOn(), new blueprints.EbsCsiDriverAddOn(), new blueprints.CertManagerAddOn(), new blueprints.KubeStateMetricsAddOn(), new blueprints.MetricsServerAddOn(), new blueprints.PrometheusNodeExporterAddOn(), new blueprints.addons.IstioBaseAddOn({ version: "1.18.2" }), new blueprints.addons.IstioControlPlaneAddOn({ version: "1.18.2" }), new blueprints.addons.IstioIngressGatewayAddon({ version: "1.18.2" }), new blueprints.addons.IstioCniAddon({ version: "1.18.2" }), new blueprints.AdotCollectorAddOn(), new blueprints.addons.AmpAddOn({ ampPrometheusEndpoint: ampPrometheusEndpoint, }), new KubeflowAddOn({ namespace: 'kubeflow-pipelines' }) ) .clusterProvider(new blueprints.MngClusterProvider(mngProps)) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/kubeshark-construct/index.ts ================================================ import { Construct } from 'constructs'; import { EksBlueprint } from '@aws-quickstart/eks-blueprints'; import { KubesharkAddOn } from 'kubeshark'; export default class KubesharkConstruct { constructor(scope: Construct, id: string) { // AddOns for the cluster const stackId = `${id}-blueprint`; EksBlueprint.builder() .account(process.env.CDK_DEFAULT_ACCOUNT!) .region(process.env.CDK_DEFAULT_REGION) .addOns(new KubesharkAddOn({repository: "https://helm.kubeshark.com"})) .version('auto') .build(scope, stackId); } } ================================================ FILE: lib/multi-account-monitoring/amg-iam-setup.ts ================================================ import { Construct } from 'constructs'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as cdk from 'aws-cdk-lib'; /** * Defines properties for the AMG IAM setup. */ export interface AmgIamSetupStackProps extends cdk.StackProps { /** * Role to create for the AMG stack that grants access to the specified accounts for AMP and CloudWatch metrics. */ roleName: string, /** * Monitored accounts. These contain ampPrometheusDataSourceRole and cloudwatchPrometheusDataSourceRole roles * with trust relationship to the monitoring (AMG) account. */ accounts: string[] } /** * Stack provisions IAM in the moniitoring account with turst relationship to the monitored account for metrics. */ export class AmgIamSetupStack extends cdk.Stack { constructor(scope: Construct, id: string, props: AmgIamSetupStackProps) { super(scope, id, props); const role = new iam.Role(this, 'amg-iam-role', { roleName: props.roleName, assumedBy: new iam.ServicePrincipal('grafana.amazonaws.com'), description: 'Service Role for Amazon Managed Grafana', }); for (let i = 0; i < props.accounts.length; i++) { role.addToPolicy(new iam.PolicyStatement({ actions: [ "sts:AssumeRole" ], resources: [`arn:aws:iam::${props.accounts[i]}:role/ampPrometheusDataSourceRole`, `arn:aws:iam::${props.accounts[i]}:role/cloudwatchDataSourceRole` ], })); } new cdk.CfnOutput(this, 'AMGRole', { value: role ? role.roleArn : "none" }); } } ================================================ FILE: lib/multi-account-monitoring/amp-iam-setup.ts ================================================ import { NestedStack, NestedStackProps } from 'aws-cdk-lib'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { Construct } from 'constructs'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as cdk from 'aws-cdk-lib'; /** * Stack the creates the role with trust relationship to the monitoring account to * get AMP metrics. */ export class AmpIamSetupStack extends NestedStack { public static builder(roleName: string, trustAccount: string): blueprints.NestedStackBuilder { return { build(scope: Construct, id: string, props: NestedStackProps) { return new AmpIamSetupStack(scope, id, props, roleName, trustAccount); } }; } constructor(scope: Construct, id: string, props: NestedStackProps, roleName: string, trustAccount: string) { super(scope, id, props); const role = new iam.Role(this, 'amp-iam-trust-role', { roleName: roleName, assumedBy: new iam.AccountPrincipal(trustAccount), description: 'AMP role to assume from central account', }); role.addToPolicy(new iam.PolicyStatement({ actions: [ "aps:ListWorkspaces", "aps:DescribeWorkspace", "aps:QueryMetrics", "aps:GetLabels", "aps:GetSeries", "aps:GetMetricMetadata", "xray:PutTraceSegments", "xray:PutTelemetryRecords", "xray:GetSamplingRules", "xray:GetSamplingTargets", "xray:GetSamplingStatisticSummaries", "xray:BatchGetTraces", "xray:GetServiceGraph", "xray:GetTraceGraph", "xray:GetTraceSummaries", "xray:GetGroups", "xray:GetGroup", "xray:ListTagsForResource", "xray:GetTimeSeriesServiceStatistics", "xray:GetInsightSummaries", "xray:GetInsight", "xray:GetInsightEvents", "xray:GetInsightImpactGraph", "ssm:GetParameter" ], resources: ["*"], })); new cdk.CfnOutput(this, 'AMPTrustRole', { value: role ? role.roleArn : "none" }); } } ================================================ FILE: lib/multi-account-monitoring/cloudwatch-iam-setup.ts ================================================ import { NestedStack, NestedStackProps } from 'aws-cdk-lib'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import { Construct } from 'constructs'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as cdk from 'aws-cdk-lib'; /** * Stack the creates the role with trust relationship to the monitoring account to * get CloudWatch metrics. */ export class CloudWatchIamSetupStack extends NestedStack { public static builder(roleName: string, trustAccount: string): blueprints.NestedStackBuilder { return { build(scope: Construct, id: string, props: NestedStackProps) { return new CloudWatchIamSetupStack(scope, id, props, roleName, trustAccount); } }; } constructor(scope: Construct, id: string, props: NestedStackProps, roleName: string, trustAccount: string) { super(scope, id, props); const role = new iam.Role(this, 'cloudwatch-iam-trust-role', { roleName: roleName, assumedBy: new iam.AccountPrincipal(trustAccount), description: 'CloudWatch role to assume from central account', }); role.addToPolicy(new iam.PolicyStatement({ actions: [ "cloudwatch:DescribeAlarmsForMetric", "cloudwatch:DescribeAlarmHistory", "cloudwatch:DescribeAlarms", "cloudwatch:ListMetrics", "cloudwatch:GetMetricStatistics", "cloudwatch:GetMetricData", "logs:DescribeLogGroups", "logs:GetLogGroupFields", "logs:StartQuery", "logs:StopQuery", "logs:GetQueryResults", "logs:GetLogEvents", "ec2:DescribeTags", "ec2:DescribeInstances", "ec2:DescribeRegions", "tag:GetResources", "xray:PutTraceSegments", "xray:PutTelemetryRecords", "xray:GetSamplingRules", "xray:GetSamplingTargets", "xray:GetSamplingStatisticSummaries", "xray:BatchGetTraces", "xray:GetServiceGraph", "xray:GetTraceGraph", "xray:GetTraceSummaries", "xray:GetGroups", "xray:GetGroup", "xray:ListTagsForResource", "xray:GetTimeSeriesServiceStatistics", "xray:GetInsightSummaries", "xray:GetInsight", "xray:GetInsightEvents", "xray:GetInsightImpactGraph", "ssm:GetParameter" ], resources: ["*"], })); new cdk.CfnOutput(this, 'CloudWatchTrustRole', { value: role ? role.roleArn : "none" }); } } ================================================ FILE: lib/multi-account-monitoring/index.ts ================================================ export { PipelineMultiEnvMonitoring } from './pipeline'; ================================================ FILE: lib/multi-account-monitoring/pipeline.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as cdk from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; import AmpMonitoringConstruct from '../amp-monitoring'; import CloudWatchMonitoringConstruct from '../cloudwatch-monitoring'; import { AmgIamSetupStack, AmgIamSetupStackProps } from './amg-iam-setup'; import { AmpIamSetupStack } from './amp-iam-setup'; import { CloudWatchIamSetupStack } from './cloudwatch-iam-setup'; const logger = blueprints.utils.logger; /** * Function relies on a secret called "cdk-context" defined in the target region (pipeline account must have it) * @returns */ export async function populateAccountWithContextDefaults(): Promise { // Populate Context Defaults for all the accounts const cdkContext = JSON.parse(await blueprints.utils.getSecretValue('cdk-context', 'us-east-1'))['context'] as PipelineMultiEnvMonitoringProps; logger.debug(`Retrieved CDK context ${JSON.stringify(cdkContext)}`); return cdkContext; } export interface PipelineMultiEnvMonitoringProps { /** * Production workload environment (account/region) #1 */ prodEnv1: cdk.Environment; /** * Production workload environment (account/region) #2 */ prodEnv2: cdk.Environment; /** * Environment (account/region) where pipeline will be running (generally referred to as CICD account) */ pipelineEnv: cdk.Environment; /** * Environment (account/region) where monitoring dashboards will be configured. */ monitoringEnv: cdk.Environment; } /** * Main multi-account monitoring pipeline. */ export class PipelineMultiEnvMonitoring { async buildAsync(scope: Construct) { const context = await populateAccountWithContextDefaults(); // environments IDs consts const PROD1_ENV_ID = `eks-prod1-${context.prodEnv1.region}`; const PROD2_ENV_ID = `eks-prod2-${context.prodEnv2.region}`; const MON_ENV_ID = `central-monitoring-${context.monitoringEnv.region}`; const blueprintAmp = new AmpMonitoringConstruct().create(scope, context.prodEnv1.account, context.prodEnv1.region); const blueprintCloudWatch = new CloudWatchMonitoringConstruct().create(scope, context.prodEnv2.account, context.prodEnv2.region); // Argo configuration per environment const prodArgoAddonConfig = createArgoAddonConfig('prod', 'https://github.com/aws-samples/eks-blueprints-workloads.git'); // const { gitOwner, gitRepositoryName } = await getRepositoryData(); const gitOwner = 'aws-samples'; const gitRepositoryName = 'cdk-eks-blueprints-patterns'; const amgIamSetupStackProps: AmgIamSetupStackProps = { roleName: "amgWorkspaceIamRole", accounts: [context.prodEnv1.account!, context.prodEnv2.account!], env: { account: context.monitoringEnv.account!, region: context.monitoringEnv.region! } }; blueprints.CodePipelineStack.builder() .application("npx ts-node bin/pipeline-multienv-monitoring.ts") .name("multi-account-central-pipeline") .owner(gitOwner) .codeBuildPolicies([ new iam.PolicyStatement({ resources: ["*"], actions: [ "sts:AssumeRole", "secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret", "cloudformation:*" ] }) ]) .repository({ repoUrl: gitRepositoryName, credentialsSecretName: 'github-token', targetRevision: 'main', }) .enableCrossAccountKeys() .wave({ id: "prod-test", stages: [ { id: PROD1_ENV_ID, stackBuilder: blueprintAmp .clone(context.prodEnv1.region, context.prodEnv1.account) .addOns(new blueprints.NestedStackAddOn({ builder: AmpIamSetupStack.builder("ampPrometheusDataSourceRole", context.monitoringEnv.account!), id: "amp-iam-nested-stack" })) .addOns( prodArgoAddonConfig, ) }, { id: PROD2_ENV_ID, stackBuilder: blueprintCloudWatch .clone(context.prodEnv2.region, context.prodEnv2.account) .addOns(new blueprints.NestedStackAddOn({ builder: CloudWatchIamSetupStack.builder("cloudwatchDataSourceRole", context.monitoringEnv.account!), id: "cloudwatch-iam-nested-stack" })) .addOns( prodArgoAddonConfig, ) }, { id: MON_ENV_ID, stackBuilder: { build(scope: Construct): cdk.Stack { return new AmgIamSetupStack(scope, "amg-iam-setup", amgIamSetupStackProps); } } }, ], }) .build(scope, "multi-account-central-pipeline", { env: context.pipelineEnv }); } } function createArgoAddonConfig(environment: string, repoUrl: string): blueprints.ArgoCDAddOn { return new blueprints.ArgoCDAddOn( { bootstrapRepo: { repoUrl: repoUrl, path: `envs/${environment}`, targetRevision: 'main', }, bootstrapValues: { spec: { ingress: { host: 'teamblueprints.com', } }, }, } ); } ================================================ FILE: lib/multi-cluster-construct/cluster-secret-store-addon.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from '@aws-quickstart/eks-blueprints/dist/utils'; export class ClusterSecretStoreAddon implements blueprints.ClusterAddOn { id?: string | undefined; @dependable(blueprints.addons.ExternalsSecretsAddOn.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; const clusterSecretStore = new eks.KubernetesManifest(clusterInfo.cluster, "ClusterSecretStore", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ClusterSecretStore", metadata: {name: "eksa-secret-store"}, spec: { provider: { aws: { service: "SecretsManager", region: clusterInfo.cluster.stack.region, auth: { jwt: { serviceAccountRef: { name: "external-secrets-sa", namespace: "external-secrets", }, }, }, }, }, }, }, ], }); const clusterConfigMapStore = new eks.KubernetesManifest(clusterInfo.cluster, "ClusterConfigMap", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ClusterSecretStore", metadata: {name: "eksa-configmap-store"}, spec: { provider: { aws: { service: "ParameterStore", region: clusterInfo.cluster.stack.region, auth: { jwt: { serviceAccountRef: { name: "external-secrets-sa", namespace: "external-secrets", }, }, }, }, }, }, }, ], }); clusterConfigMapStore.node.addDependency(clusterSecretStore); return Promise.resolve(clusterSecretStore); } } ================================================ FILE: lib/multi-cluster-construct/clusterMapping.ts ================================================ import * as eks from 'aws-cdk-lib/aws-eks'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; /** * Instance Mapping for fields such as chart, version, managed IAM policy. */ export interface InstanceMapping { amiType: eks.NodegroupAmiType, instanceType: ec2.InstanceType, } /** * List of all clusters deployed by conformitron */ export enum ClusterName { ARM = "arm", X86 = "x86", BR_X86 = "br-x86", BR_ARM = "br-arm", MONITORING = "grafana-monitoring" } export const clusterMappings : {[key in ClusterName]?: InstanceMapping } = { [ClusterName.ARM]: { amiType: eks.NodegroupAmiType.AL2_ARM_64, instanceType: ec2.InstanceType.of(ec2.InstanceClass.M7G, ec2.InstanceSize.XLARGE2) }, [ClusterName.X86]: { amiType: eks.NodegroupAmiType.AL2_X86_64, instanceType: ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.XLARGE2) }, [ClusterName.BR_ARM]: { amiType: eks.NodegroupAmiType.BOTTLEROCKET_ARM_64, instanceType: ec2.InstanceType.of(ec2.InstanceClass.M7G, ec2.InstanceSize.XLARGE2) }, [ClusterName.BR_X86]: { amiType: eks.NodegroupAmiType.BOTTLEROCKET_X86_64, instanceType: ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.XLARGE2) }, [ClusterName.MONITORING]: { amiType: eks.NodegroupAmiType.AL2_X86_64, instanceType: ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.LARGE) } }; ================================================ FILE: lib/multi-cluster-construct/grafana-monitor-builder.ts ================================================ import { Construct } from 'constructs'; import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from 'aws-cdk-lib/aws-eks'; import { GrafanaOperatorSecretAddon } from './grafana-operator-secret-addon'; import * as fs from 'fs'; export class GrafanaMonitoringConstruct { build(scope: Construct, id: string, contextAccount?: string, contextRegion?: string ) { const stackId = `${id}-grafana-monitor`; const account = contextAccount! || process.env.ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; const region = contextRegion! || process.env.AWS_REGION! || process.env.CDK_DEFAULT_REGION!; this.create(scope, account, region) .build(scope, stackId); } create(scope: Construct, contextAccount?: string, contextRegion?: string ) { const account = contextAccount! || process.env.ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; const region = contextRegion! || process.env.AWS_REGION! || process.env.CDK_DEFAULT_REGION!; // TODO: CFN import https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.Fn.html#static-importwbrvaluesharedvaluetoimport const ampWorkspaceName = "conformitronWorkspace"; const ampEndpoint = blueprints.utils.valueFromContext(scope, "conformitron.amp.endpoint", "https://aps-workspaces..amazonaws.com/workspaces//"); const ampWorkspaceArn = blueprints.utils.valueFromContext(scope, "conformitron.amp.arn", "arn:aws:aps:::workspace/"); const ampAddOnProps: blueprints.AmpAddOnProps = { ampPrometheusEndpoint: ampEndpoint, ampRules: { ampWorkspaceArn: ampWorkspaceArn, ruleFilePaths: [ __dirname + '/resources/amp-config/alerting-rules.yml', __dirname + '/resources/amp-config/recording-rules.yml' ] } }; let doc = blueprints.utils.readYamlDocument(__dirname + '/resources/otel-collector-config.yml'); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableJavaMonJob }}", "{{ stop enableJavaMonJob }}", false ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableNginxMonJob }}", "{{ stop enableNginxMonJob }}", false ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableIstioMonJob }}", "{{ stop enableIstioMonJob }}", false ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableAPIserverJob }}", "{{ stop enableAPIserverJob }}", false ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableAdotMetricsCollectionJob}}", "{{ stop enableAdotMetricsCollectionJob }}", false ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableAdotMetricsCollectionTelemetry }}", "{{ stop enableAdotMetricsCollectionTelemetry }}", true ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableAdotContainerLogsReceiver }}", "{{ stop enableAdotContainerLogsReceiver }}", true ); doc = blueprints.utils.changeTextBetweenTokens( doc, "{{ start enableAdotContainerLogsExporter }}", "{{ stop enableAdotContainerLogsExporter }}", true ); fs.writeFileSync(__dirname + '/resources/otel-collector-config-new.yml', doc); ampAddOnProps.openTelemetryCollector = { manifestPath: __dirname + '/resources/otel-collector-config-new.yml', manifestParameterMap: { logGroupName: `/aws/eks/conformitron/workspace`, logStreamName: `$NODE_NAME`, logRetentionDays: 30, awsRegion: region } }; const fluxRepository: blueprints.FluxGitRepo = blueprints.utils.valueFromContext(scope, "fluxRepository", undefined); fluxRepository.values!.AMG_AWS_REGION = region; fluxRepository.values!.AMG_ENDPOINT_URL = blueprints.utils.valueFromContext(scope, "conformitron.amg.endpoint","https://.grafana-workspace..amazonaws.com"); Reflect.defineMetadata("ordered", true, blueprints.addons.GrafanaOperatorAddon); //sets metadata ordered to true for GrafanaOperatorAddon const addOns: Array = [ new blueprints.addons.FluxCDAddOn({"repositories": [fluxRepository]}), new GrafanaOperatorSecretAddon(), new blueprints.addons.SSMAgentAddOn() ]; return blueprints.ObservabilityBuilder.builder() .account(account) .region(region) .version(eks.KubernetesVersion.V1_27) .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) .withAmpProps(ampAddOnProps) .enableOpenSourcePatternAddOns() .addOns( ...addOns ); } } ================================================ FILE: lib/multi-cluster-construct/grafana-operator-secret-addon.ts ================================================ import 'source-map-support/register'; // to get better stack traces and debugging import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import { Construct } from 'constructs'; import { dependable } from '@aws-quickstart/eks-blueprints/dist/utils'; export class GrafanaOperatorSecretAddon implements blueprints.ClusterAddOn { id?: string | undefined; @dependable(blueprints.addons.ExternalsSecretsAddOn.name, blueprints.addons.GrafanaOperatorAddon.name) deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { const cluster = clusterInfo.cluster; const secretStore = new eks.KubernetesManifest(clusterInfo.cluster.stack, "ClusterSecretStore", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ClusterSecretStore", metadata: { name: "ssm-parameter-store", namespace: "default" }, spec: { provider: { aws: { service: "ParameterStore", region: clusterInfo.cluster.stack.region, auth: { jwt: { serviceAccountRef: { name: "external-secrets-sa", namespace: "external-secrets", }, }, }, }, }, }, }, ], }); const externalSecret = new eks.KubernetesManifest(clusterInfo.cluster.stack, "ExternalSecret", { cluster: cluster, manifest: [ { apiVersion: "external-secrets.io/v1beta1", kind: "ExternalSecret", metadata: { name: "external-grafana-admin-credentials", namespace: "grafana-operator" }, spec: { secretStoreRef: { name: "ssm-parameter-store", kind: "ClusterSecretStore", }, target: { name: "grafana-admin-credentials" }, data: [ { secretKey: "GF_SECURITY_ADMIN_APIKEY", remoteRef: { key: "/grafana-api-key" }, }, ], }, }, ], }); externalSecret.node.addDependency(secretStore); return Promise.resolve(secretStore); } } ================================================ FILE: lib/multi-cluster-construct/multi-cluster-builder.ts ================================================ import { Construct } from 'constructs'; // Blueprints Lib import * as blueprints from '@aws-quickstart/eks-blueprints'; import { ClusterSecretStoreAddon } from './cluster-secret-store-addon'; export default class MultiClusterBuilderConstruct { build(scope: Construct, id: string, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const stackID = `${id}-blueprint`; this.create(scope, accountID, awsRegion) .build(scope, stackID); } create(scope: Construct, account?: string, region?: string ) { // Setup platform team const accountID = account ?? process.env.CDK_DEFAULT_ACCOUNT! ; const awsRegion = region ?? process.env.CDK_DEFAULT_REGION! ; const ampEndpoint = blueprints.utils.valueFromContext(scope, "conformitron.amp.endpoint", "https://aps-workspaces..amazonaws.com/workspaces//"); const ampAddOnProps: blueprints.AmpAddOnProps = { ampPrometheusEndpoint: ampEndpoint, }; ampAddOnProps.openTelemetryCollector = { manifestPath: __dirname + '/resources/otel-collector-config-new.yml', manifestParameterMap: { logGroupName: `/aws/eks/conformitron/cluster`, logStreamName: `$NODE_NAME`, logRetentionDays: 30, awsRegion: region } }; return blueprints.ObservabilityBuilder.builder() .account(accountID) .region(awsRegion) .withAmpProps(ampAddOnProps) .enableOpenSourcePatternAddOns() .addOns( new blueprints.addons.FluxCDAddOn({ repositories:[{ name: "eks-cloud-addons-conformance", namespace: "flux-system", repository: { repoUrl: 'https://github.com/aws-samples/eks-anywhere-addons', targetRevision: "main", }, values: { }, kustomizations: [ {kustomizationPath: "./eks-anywhere-common/Addons/Core/Botkube"}, {kustomizationPath: "./eks-anywhere-common/Addons/Core/Kube-Observer"}, {kustomizationPath: "./eks-anywhere-common/Testers/"}, {kustomizationPath: "./eks-cloud/Testers"}, {kustomizationPath: "./eks-anywhere-common/Addons/Partner"}, {kustomizationPath: "./eks-cloud/Partner"}, ], }], }), new ClusterSecretStoreAddon(), new blueprints.addons.EbsCsiDriverAddOn(), new blueprints.addons.ClusterAutoScalerAddOn() ); } } ================================================ FILE: lib/multi-cluster-construct/pipeline.ts ================================================ import * as blueprints from '@aws-quickstart/eks-blueprints'; import * as eks from 'aws-cdk-lib/aws-eks'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; import MultiClusterBuilderConstruct from './multi-cluster-builder'; import { GrafanaMonitoringConstruct } from './grafana-monitor-builder'; import { ClusterName, clusterMappings } from './clusterMapping'; /** * Main multi-cluster deployment pipeline. */ export class PipelineMultiCluster { async buildAsync(scope: Construct) { const accountID = process.env.ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT! ; const region = process.env.AWS_REGION! || process.env.CDK_DEFAULT_REGION!; const versions = blueprints.utils.valueFromContext(scope, "conformitron.versions", ["1.28","1.29","1.30"]); const CLUSTER_VERSIONS = versions.map((v: string) => eks.KubernetesVersion.of(v)); // Stages in codepipeline const stages : blueprints.StackStage[] = []; const blueprintGrafanaConstruct = new GrafanaMonitoringConstruct(); const blueprintGrafana = blueprintGrafanaConstruct.create(scope, accountID, region); stages.push({ id: ClusterName.MONITORING, stackBuilder: blueprintGrafana .clone(region, accountID) }); /* TODO: Seperate region for clusters than infra account region, trust policy is created when pipeline is bootstrapped. It will be helpful for enterprise customers. Similar to approach in multi-region-construct pattern */ let clusterProps: blueprints.MngClusterProviderProps; for(const version of CLUSTER_VERSIONS) { const blueprintBuilderX86 = new MultiClusterBuilderConstruct().create(scope, accountID, region); clusterProps = this.buildClusterProps( clusterMappings[ClusterName.X86]!.amiType, clusterMappings[ClusterName.X86]!.instanceType ); const blueprintX86 = blueprintBuilderX86 .version(version) .clusterProvider(new blueprints.MngClusterProvider(clusterProps)) .useDefaultSecretEncryption(true); stages.push({ id: ClusterName.X86 + "-" + version.version.replace(".", "-"), stackBuilder : blueprintX86.clone(region) }); const blueprintBuilderArm = new MultiClusterBuilderConstruct().create(scope, accountID, region); clusterProps = this.buildClusterProps( clusterMappings[ClusterName.ARM]!.amiType, clusterMappings[ClusterName.ARM]!.instanceType ); const blueprintARM = blueprintBuilderArm .version(version) .clusterProvider(new blueprints.MngClusterProvider(clusterProps)) .useDefaultSecretEncryption(true); stages.push({ id: ClusterName.ARM + "-" + version.version.replace(".", "-"), stackBuilder : blueprintARM.clone(region) }); } // Only deploy lates kube version on BR Clusters const LATEST_VERSION = CLUSTER_VERSIONS.at(CLUSTER_VERSIONS.length-1)!; const blueprintBuilderBrX86= new MultiClusterBuilderConstruct().create(scope, accountID, region); clusterProps = this.buildClusterProps( clusterMappings[ClusterName.BR_X86]!.amiType, clusterMappings[ClusterName.BR_X86]!.instanceType ); const blueprintBrX86 = blueprintBuilderBrX86 .version(LATEST_VERSION) .clusterProvider(new blueprints.MngClusterProvider(clusterProps)) .useDefaultSecretEncryption(true); stages.push({ id: ClusterName.BR_X86 + "-" + LATEST_VERSION.version.replace(".", "-"), stackBuilder : blueprintBrX86.clone(region) }); const blueprintBuilderBrArm = new MultiClusterBuilderConstruct().create(scope, accountID, region); clusterProps = this.buildClusterProps( clusterMappings[ClusterName.BR_ARM]!.amiType, clusterMappings[ClusterName.BR_ARM]!.instanceType ); const blueprintBottleRocketArm = blueprintBuilderBrArm .version(LATEST_VERSION) .clusterProvider(new blueprints.MngClusterProvider(clusterProps)) .useDefaultSecretEncryption(true); stages.push({ id: ClusterName.BR_ARM + "-" + LATEST_VERSION.version.replace(".", "-"), stackBuilder : blueprintBottleRocketArm.clone(region) }); const gitOwner = 'Howlla'; const gitRepositoryName = 'cdk-eks-blueprints-patterns'; blueprints.CodePipelineStack.builder() .application('npx ts-node bin/multi-cluster-conformitron.ts') .name('multi-cluster-central-pipeline') .owner(gitOwner) .codeBuildPolicies(blueprints.DEFAULT_BUILD_POLICIES) .repository({ repoUrl: gitRepositoryName, credentialsSecretName: 'github-token', targetRevision: 'conformitronPipeline', trigger: blueprints.GitHubTrigger.POLL }) .wave({ id: "prod-test", stages }) .build(scope, "multi-cluster-central-pipeline", { env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: region, } }); } buildClusterProps(amiType:eks.NodegroupAmiType,instanceType:ec2.InstanceType) : blueprints.MngClusterProviderProps{ let clusterProps : blueprints.MngClusterProviderProps = { maxSize : 2, minSize : 1, desiredSize: 1, diskSize: 100, amiType: amiType, instanceTypes:[instanceType] }; return clusterProps; } } ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/alerting-rules.yml ================================================ groups: - name: infra-alerts-01 rules: - alert: NodeNetworkInterfaceFlapping expr: changes(node_network_up{device!~"veth.+",job="node-exporter"}[2m]) > 2 for: 2m labels: severity: warning annotations: description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} summary: Network interface is often changing its status - alert: NodeFilesystemSpaceFillingUp expr: (node_filesystem_avail_bytes{fstype!="",job="node-exporter"} / node_filesystem_size_bytes{fstype!="",job="node-exporter"} * 100 < 15 and predict_linear(node_filesystem_avail_bytes{fstype!="",job="node-exporter"}[6h], 24 * 60 * 60) < 0 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: warning annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. summary: Filesystem is predicted to run out of space within the next 24 hours. - alert: NodeFilesystemSpaceFillingUp expr: (node_filesystem_avail_bytes{fstype!="",job="node-exporter"} / node_filesystem_size_bytes{fstype!="",job="node-exporter"} * 100 < 10 and predict_linear(node_filesystem_avail_bytes{fstype!="",job="node-exporter"}[6h], 4 * 60 * 60) < 0 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: critical annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. summary: Filesystem is predicted to run out of space within the next 4 hours. - alert: NodeFilesystemAlmostOutOfSpace expr: (node_filesystem_avail_bytes{fstype!="",job="node-exporter"} / node_filesystem_size_bytes{fstype!="",job="node-exporter"} * 100 < 3 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 30m labels: severity: warning annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. summary: Filesystem has less than 3% space left. - alert: NodeFilesystemAlmostOutOfSpace expr: (node_filesystem_avail_bytes{fstype!="",job="node-exporter"} / node_filesystem_size_bytes{fstype!="",job="node-exporter"} * 100 < 5 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 30m labels: severity: critical annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. summary: Filesystem has less than 5% space left. - alert: NodeFilesystemFilesFillingUp expr: (node_filesystem_files_free{fstype!="",job="node-exporter"} / node_filesystem_files{fstype!="",job="node-exporter"} * 100 < 40 and predict_linear(node_filesystem_files_free{fstype!="",job="node-exporter"}[6h], 24 * 60 * 60) < 0 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: warning annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. summary: Filesystem is predicted to run out of inodes within the next 24 hours. - alert: NodeFilesystemFilesFillingUp expr: (node_filesystem_files_free{fstype!="",job="node-exporter"} / node_filesystem_files{fstype!="",job="node-exporter"} * 100 < 20 and predict_linear(node_filesystem_files_free{fstype!="",job="node-exporter"}[6h], 4 * 60 * 60) < 0 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: critical annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. summary: Filesystem is predicted to run out of inodes within the next 4 hours. - alert: NodeFilesystemAlmostOutOfFiles expr: (node_filesystem_files_free{fstype!="",job="node-exporter"} / node_filesystem_files{fstype!="",job="node-exporter"} * 100 < 5 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: warning annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. summary: Filesystem has less than 5% inodes left. - alert: NodeFilesystemAlmostOutOfFiles expr: (node_filesystem_files_free{fstype!="",job="node-exporter"} / node_filesystem_files{fstype!="",job="node-exporter"} * 100 < 3 and node_filesystem_readonly{fstype!="",job="node-exporter"} == 0) for: 1h labels: severity: critical annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. summary: Filesystem has less than 3% inodes left. - alert: NodeNetworkReceiveErrs expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 for: 1h labels: severity: warning annotations: description: The {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes. summary: Network interface is reporting many receive errors. - alert: NodeNetworkTransmitErrs expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 for: 1h labels: severity: warning annotations: description: The {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes. summary: Network interface is reporting many transmit errors. - alert: NodeHighNumberConntrackEntriesUsed expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 labels: severity: warning annotations: description: The {{ $value | humanizePercentage }} of conntrack entries are used. summary: Number of conntrack are getting close to the limit. - alert: NodeTextFileCollectorScrapeError expr: node_textfile_scrape_error{job="node-exporter"} == 1 labels: severity: warning annotations: description: Node Exporter text file collector failed to scrape. summary: Node Exporter text file collector failed to scrape. - alert: NodeClockSkewDetected expr: (node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0) for: 10m labels: severity: warning annotations: description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. summary: Clock skew detected. - alert: NodeClockNotSynchronising expr: min_over_time(node_timex_sync_status[5m]) == 0 and node_timex_maxerror_seconds >= 16 for: 10m labels: severity: warning annotations: description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. summary: Clock not synchronising. - alert: NodeRAIDDegraded expr: node_md_disks_required - ignoring(state) (node_md_disks{state="active"}) > 0 for: 15m labels: severity: critical annotations: description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. summary: RAID Array is degraded - alert: NodeRAIDDiskFailure expr: node_md_disks{state="failed"} > 0 labels: severity: warning annotations: description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. summary: Failed device in RAID array - alert: NodeFileDescriptorLimit expr: (node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70) for: 15m labels: severity: warning annotations: description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. summary: Kernel is predicted to exhaust file descriptors limit soon. - alert: NodeFileDescriptorLimit expr: (node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90) for: 15m labels: severity: critical annotations: description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. summary: Kernel is predicted to exhaust file descriptors limit soon. - name: infra-alerts-02 rules: - alert: KubeNodeNotReady expr: kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} == 0 for: 15m labels: severity: warning annotations: description: The {{ $labels.node }} has been unready for more than 15 minutes. summary: Node is not ready. - alert: KubeNodeUnreachable expr: (kube_node_spec_taint{effect="NoSchedule",job="kube-state-metrics",key="node.kubernetes.io/unreachable"} unless ignoring(key, value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 for: 15m labels: severity: warning annotations: description: The {{ $labels.node }} is unreachable and some workloads may be rescheduled. summary: Node is unreachable. - alert: KubeletTooManyPods expr: count by(cluster, node) ((kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on(instance, pod, namespace, cluster) group_left(node) topk by(instance, pod, namespace, cluster) (1, kube_pod_info{job="kube-state-metrics"})) / max by(cluster, node) (kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1) > 0.95 for: 15m labels: severity: info annotations: description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. summary: Kubelet is running at capacity. - alert: KubeNodeReadinessFlapping expr: sum by(cluster, node) (changes(kube_node_status_condition{condition="Ready",status="true"}[15m])) > 2 for: 15m labels: severity: warning annotations: description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. summary: Node readiness status is flapping. - alert: KubeletPlegDurationHigh expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 for: 5m labels: severity: warning annotations: description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. - alert: KubeletPodStartUpLatencyHigh expr: histogram_quantile(0.99, sum by(cluster, instance, le) (rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m]))) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet"} > 60 for: 15m labels: severity: warning annotations: description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. summary: Kubelet Pod startup latency is too high. - alert: KubeletClientCertificateExpiration expr: kubelet_certificate_manager_client_ttl_seconds < 604800 labels: severity: warning annotations: description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. summary: Kubelet client certificate is about to expire. - alert: KubeletClientCertificateExpiration expr: kubelet_certificate_manager_client_ttl_seconds < 86400 labels: severity: critical annotations: description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. summary: Kubelet client certificate is about to expire. - alert: KubeletServerCertificateExpiration expr: kubelet_certificate_manager_server_ttl_seconds < 604800 labels: severity: warning annotations: description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. summary: Kubelet server certificate is about to expire. - alert: KubeletServerCertificateExpiration expr: kubelet_certificate_manager_server_ttl_seconds < 86400 labels: severity: critical annotations: description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. summary: Kubelet server certificate is about to expire. - alert: KubeletClientCertificateRenewalErrors expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 for: 15m labels: severity: warning annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes). summary: Kubelet has failed to renew its client certificate. - alert: KubeletServerCertificateRenewalErrors expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 for: 15m labels: severity: warning annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes). summary: Kubelet has failed to renew its server certificate. - alert: KubeletDown expr: absent(up{job="kubelet"} == 1) for: 15m labels: severity: critical annotations: description: Kubelet has disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery. - alert: KubeVersionMismatch expr: count by(cluster) (count by(git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"}, "git_version", "$1", "git_version", "(v[0-9]*.[0-9]*).*"))) > 1 for: 15m labels: severity: warning annotations: description: There are {{ $value }} different semantic versions of Kubernetes components running. summary: Different semantic versions of Kubernetes components running. - alert: KubeClientErrors expr: (sum by(cluster, instance, job, namespace) (rate(rest_client_requests_total{code=~"5.."}[5m])) / sum by(cluster, instance, job, namespace) (rate(rest_client_requests_total[5m]))) > 0.01 for: 15m labels: severity: warning annotations: description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' summary: Kubernetes API server client is experiencing errors. - alert: KubeClientCertificateExpiration expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 labels: severity: warning annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. summary: Client certificate is about to expire. - alert: KubeClientCertificateExpiration expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 labels: severity: critical annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. summary: Client certificate is about to expire. - alert: KubeAggregatedAPIErrors expr: sum by(name, namespace, cluster) (increase(aggregator_unavailable_apiservice_total[10m])) > 4 labels: severity: warning annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m. summary: Kubernetes aggregated API has reported errors. - name: infra-alerts-03 rules: - alert: KubeAggregatedAPIDown expr: (1 - max by(name, namespace, cluster) (avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85 for: 5m labels: severity: warning annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m. summary: Kubernetes aggregated API is down. - alert: KubeAPIDown expr: absent(up{job="kube-admin"} == 1) for: 15m labels: severity: critical annotations: description: KubeAPI has disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery. - alert: KubeAPITerminatedRequests expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / (sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m]))) > 0.2 for: 5m labels: severity: warning annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. - alert: KubePersistentVolumeFillingUp expr: (kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",namespace=~".*"}) < 0.03 and kubelet_volume_stats_used_bytes{job="kubelet",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free. summary: PersistentVolume is filling up. - alert: KubePersistentVolumeFillingUp expr: (kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",namespace=~".*"}) < 0.15 and kubelet_volume_stats_used_bytes{job="kubelet",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. summary: PersistentVolume is filling up. - alert: KubePersistentVolumeInodesFillingUp expr: (kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",namespace=~".*"}) < 0.03 and kubelet_volume_stats_inodes_used{job="kubelet",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes. summary: PersistentVolumeInodes is filling up. - alert: KubePersistentVolumeInodesFillingUp expr: (kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",namespace=~".*"}) < 0.15 and kubelet_volume_stats_inodes_used{job="kubelet",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. summary: PersistentVolumeInodes are filling up. - alert: KubePersistentVolumeErrors expr: kube_persistentvolume_status_phase{job="kube-state-metrics",phase=~"Failed|Pending"} > 0 for: 5m labels: severity: critical annotations: description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. - alert: KubeCPUOvercommit expr: sum(namespace_cpu:kube_pod_container_resource_requests:sum) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 and (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 for: 10m labels: severity: warning annotations: description: Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. summary: Cluster has overcommitted CPU resource requests. - alert: KubeMemoryOvercommit expr: sum(namespace_memory:kube_pod_container_resource_requests:sum) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 and (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 for: 10m labels: severity: warning annotations: description: Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure. summary: Cluster has overcommitted memory resource requests. - alert: KubeCPUQuotaOvercommit expr: sum(min without(resource) (kube_resourcequota{job="kube-state-metrics",resource=~"(cpu|requests.cpu)",type="hard"})) / sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) > 1.5 for: 5m labels: severity: warning annotations: description: Cluster has overcommitted CPU resource requests for Namespaces. summary: Cluster has overcommitted CPU resource requests. - alert: KubeMemoryQuotaOvercommit expr: sum(min without(resource) (kube_resourcequota{job="kube-state-metrics",resource=~"(memory|requests.memory)",type="hard"})) / sum(kube_node_status_allocatable{job="kube-state-metrics",resource="memory"}) > 1.5 for: 5m labels: severity: warning annotations: description: Cluster has overcommitted memory resource requests for Namespaces. summary: Cluster has overcommitted memory resource requests. - alert: KubeQuotaAlmostFull expr: kube_resourcequota{job="kube-state-metrics",type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics",type="hard"} > 0) > 0.9 < 1 for: 15m labels: severity: info annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. summary: Namespace quota is going to be full. - alert: KubeQuotaFullyUsed expr: kube_resourcequota{job="kube-state-metrics",type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics",type="hard"} > 0) == 1 for: 15m labels: severity: info annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. summary: Namespace quota is fully used. - alert: KubeQuotaExceeded expr: kube_resourcequota{job="kube-state-metrics",type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics",type="hard"} > 0) > 1 for: 15m labels: severity: warning annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. summary: Namespace quota has exceeded the limits. - alert: CPUThrottlingHigh expr: sum by(container, pod, namespace) (increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) / sum by(container, pod, namespace) (increase(container_cpu_cfs_periods_total[5m])) > (25 / 100) for: 15m labels: severity: info annotations: description: The {{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}. summary: Processes experience elevated CPU throttling. - alert: KubePodCrashLooping expr: max_over_time(kube_pod_container_status_waiting_reason{job="kube-state-metrics",namespace=~".*",reason="CrashLoopBackOff"}[5m]) >= 1 for: 15m labels: severity: warning annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason:"CrashLoopBackOff"). summary: Pod is crash looping. - alert: KubePodNotReady expr: sum by(namespace, pod, cluster) (max by(namespace, pod, cluster) (kube_pod_status_phase{job="kube-state-metrics",namespace=~".*",phase=~"Pending|Unknown"}) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}))) > 0 for: 15m labels: severity: warning annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) has been in a non-ready state for longer than 15 minutes. summary: Pod has been in a non-ready state for more than 15 minutes. - alert: KubeDeploymentGenerationMismatch expr: kube_deployment_status_observed_generation{job="kube-state-metrics",namespace=~".*"} != kube_deployment_metadata_generation{job="kube-state-metrics",namespace=~".*"} for: 15m labels: severity: warning annotations: description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. summary: Deployment generation mismatch due to possible roll-back - alert: KubeDeploymentReplicasMismatch expr: (kube_deployment_spec_replicas{job="kube-state-metrics",namespace=~".*"} > kube_deployment_status_replicas_available{job="kube-state-metrics",namespace=~".*"}) and (changes(kube_deployment_status_replicas_updated{job="kube-state-metrics",namespace=~".*"}[10m]) == 0) for: 15m labels: severity: warning annotations: description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. summary: Deployment has not matched the expected number of replicas. - name: infra-alerts-04 rules: - alert: KubeStatefulSetReplicasMismatch expr: (kube_statefulset_status_replicas_ready{job="kube-state-metrics",namespace=~".*"} != kube_statefulset_status_replicas{job="kube-state-metrics",namespace=~".*"}) and (changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics",namespace=~".*"}[10m]) == 0) for: 15m labels: severity: warning annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. summary: Deployment has not matched the expected number of replicas. - alert: KubeStatefulSetGenerationMismatch expr: kube_statefulset_status_observed_generation{job="kube-state-metrics",namespace=~".*"} != kube_statefulset_metadata_generation{job="kube-state-metrics",namespace=~".*"} for: 15m labels: severity: warning annotations: description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. summary: StatefulSet generation mismatch due to possible roll-back - alert: KubeStatefulSetUpdateNotRolledOut expr: (max without(revision) (kube_statefulset_status_current_revision{job="kube-state-metrics",namespace=~".*"} unless kube_statefulset_status_update_revision{job="kube-state-metrics",namespace=~".*"}) * (kube_statefulset_replicas{job="kube-state-metrics",namespace=~".*"} != kube_statefulset_status_replicas_updated{job="kube-state-metrics",namespace=~".*"})) and (changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics",namespace=~".*"}[5m]) == 0) for: 15m labels: severity: warning annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. summary: StatefulSet update has not been rolled out. - alert: KubeDaemonSetRolloutStuck expr: ((kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace=~".*"}) or (kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace=~".*"} != 0) or (kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace=~".*"}) or (kube_daemonset_status_number_available{job="kube-state-metrics",namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace=~".*"})) and (changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace=~".*"}[5m]) == 0) for: 15m labels: severity: warning annotations: description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes. summary: DaemonSet rollout is stuck. - alert: KubeContainerWaiting expr: sum by(namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",namespace=~".*"}) > 0 for: 1h labels: severity: warning annotations: description: Pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. summary: Pod container waiting longer than 1 hour - alert: KubeDaemonSetNotScheduled expr: kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace=~".*"} - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace=~".*"} > 0 for: 10m labels: severity: warning annotations: description: The {{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled. summary: DaemonSet pods are not scheduled. - alert: KubeDaemonSetMisScheduled expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace=~".*"} > 0 for: 15m labels: severity: warning annotations: description: The {{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run. summary: DaemonSet pods are misscheduled. - alert: KubeJobNotCompleted expr: time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics",namespace=~".*"} and kube_job_status_active{job="kube-state-metrics",namespace=~".*"} > 0) > 43200 labels: severity: warning annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete. summary: Job did not complete in time - alert: KubeJobFailed expr: kube_job_failed{job="kube-state-metrics",namespace=~".*"} > 0 for: 15m labels: severity: warning annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert. summary: Job failed to complete. - alert: KubeHpaReplicasMismatch expr: (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics",namespace=~".*"} != kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics",namespace=~".*"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics",namespace=~".*"} > kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics",namespace=~".*"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics",namespace=~".*"} < kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics",namespace=~".*"}) and changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics",namespace=~".*"}[15m]) == 0 for: 15m labels: severity: warning annotations: description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes. summary: HPA has not matched descired number of replicas. - alert: KubeHpaMaxedOut expr: kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics",namespace=~".*"} == kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics",namespace=~".*"} for: 15m labels: severity: warning annotations: description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes. summary: HPA is running at max replicas - alert: KubeStateMetricsListErrors expr: (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) / sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m]))) > 0.01 for: 15m labels: severity: critical annotations: description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects or at all. summary: kube-state-metrics is experiencing errors in list operations. - alert: KubeStateMetricsWatchErrors expr: (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) / sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m]))) > 0.01 for: 15m labels: severity: critical annotations: description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects or at all. summary: kube-state-metrics is experiencing errors in watch operations. - alert: KubeStateMetricsShardingMismatch expr: stdvar(kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0 for: 15m labels: severity: critical annotations: description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. summary: kube-state-metrics sharding is misconfigured. - alert: KubeStateMetricsShardsMissing expr: 2 ^ max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1 - sum(2 ^ max by(shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"})) != 0 for: 15m labels: severity: critical annotations: description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. summary: kube-state-metrics shards are missing. - alert: KubeAPIErrorBudgetBurn expr: sum(apiserver_request:burnrate1h) > (14.4 * 0.01) and sum(apiserver_request:burnrate5m) > (14.4 * 0.01) for: 2m labels: long: 1h severity: critical short: 5m annotations: description: The API server is burning too much error budget. summary: The API server is burning too much error budget. - alert: KubeAPIErrorBudgetBurn expr: sum(apiserver_request:burnrate6h) > (6 * 0.01) and sum(apiserver_request:burnrate30m) > (6 * 0.01) for: 15m labels: long: 6h severity: critical short: 30m annotations: description: The API server is burning too much error budget. summary: The API server is burning too much error budget. - alert: KubeAPIErrorBudgetBurn expr: sum(apiserver_request:burnrate1d) > (3 * 0.01) and sum(apiserver_request:burnrate2h) > (3 * 0.01) for: 1d labels: long: 1d severity: warning short: 2h annotations: description: The API server is burning too much error budget. summary: The API server is burning too much error budget. - alert: KubeAPIErrorBudgetBurn expr: sum(apiserver_request:burnrate3d) > (1 * 0.01) and sum(apiserver_request:burnrate6h) > (1 * 0.01) for: 3h labels: long: 3d severity: warning short: 6h annotations: description: The API server is burning too much error budget. summary: The API server is burning too much error budget. - alert: TargetDown expr: 100 * (count by(job, namespace, service) (up == 0) / count by(job, namespace, service) (up)) > 10 for: 10m labels: severity: warning annotations: description: The {{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down. - name: infra-alerts-05 rules: - alert: Watchdog expr: vector(1) labels: severity: none annotations: description: This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. - alert: InfoInhibitor expr: ALERTS{severity="info"} == 1 unless on(namespace) ALERTS{alertname!="InfoInhibitor",alertstate="firing",severity=~"warning|critical"} == 1 labels: severity: none annotations: description: This is an alert that is used to inhibit info alerts. By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with other alerts. This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a severity of 'warning' or 'critical' starts firing on the same namespace. This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". - alert: etcdInsufficientMembers expr: sum by(job) (up{job=~".*etcd.*"} == bool 1) < ((count by(job) (up{job=~".*etcd.*"}) + 1) / 2) for: 3m labels: severity: critical annotations: message: etcd cluster "{{ $labels.job }}":insufficient members ({{ $value }}). - alert: etcdHighNumberOfLeaderChanges expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3 for: 15m labels: severity: warning annotations: message: etcd cluster "{{ $labels.job }}":instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour. - alert: etcdNoLeader expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 for: 1m labels: severity: critical annotations: message: message:etcd cluster "{{ $labels.job }}":member {{ $labels.instance }} has no leader. - alert: etcdHighNumberOfFailedGRPCRequests expr: 100 * sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{grpc_code!="OK",job=~".*etcd.*"}[5m])) / sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) > 1 for: 10m labels: severity: warning annotations: message: etcd cluster "{{ $labels.job }}":{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}. - alert: etcdGRPCRequestsSlow expr: histogram_quantile(0.99, sum by(job, instance, grpc_service, grpc_method, le) (rate(grpc_server_handling_seconds_bucket{grpc_type="unary",job=~".*etcd.*"}[5m]))) > 0.15 for: 10m labels: severity: critical annotations: message: etcd cluster "{{ $labels.job }}":gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}. - alert: etcdMemberCommunicationSlow expr: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.15 for: 10m labels: severity: warning annotations: message: message:etcd cluster "{{ $labels.job }}":member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}. - alert: etcdHighNumberOfFailedProposals expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 for: 15m labels: severity: warning annotations: message: etcd cluster "{{ $labels.job }}":{{ $value }} proposal failures within the last hour on etcd instance {{ $labels.instance }}. - alert: etcdHighFsyncDurations expr: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.5 for: 10m labels: severity: warning annotations: message: etcd cluster "{{ $labels.job }}":99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}. - alert: etcdHighCommitDurations expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.25 for: 10m labels: severity: warning annotations: message: etcd cluster "{{ $labels.job }}":99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}. - alert: etcdHighNumberOfFailedHTTPRequests expr: sum by(method) (rate(etcd_http_failed_total{code!="404",job=~".*etcd.*"}[5m])) / sum by(method) (rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) > 0.01 for: 10m labels: severity: warning annotations: message: The {{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }} - alert: etcdHighNumberOfFailedHTTPRequests expr: sum by(method) (rate(etcd_http_failed_total{code!="404",job=~".*etcd.*"}[5m])) / sum by(method) (rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) > 0.05 for: 10m labels: severity: warning annotations: message: The {{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}. - alert: etcdHTTPRequestsSlow expr: histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15 for: 10m labels: severity: warning annotations: message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow. ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/apiserver/recording-rules.yml ================================================ groups: - name: apiserver-monitoring rules: - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: code_resource:apiserver_request_total:rate5m - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: code_resource:apiserver_request_total:rate5m - expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h])) record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h - expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30) record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d - expr: |- 1 - ( ( # write too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) ) + ( # read too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( ( sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) or vector(0) ) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) ) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d) labels: verb: all record: apiserver_request:availability30d - expr: |- 1 - ( sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( # too slow ( sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) or vector(0) ) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) labels: verb: read record: apiserver_request:availability30d - expr: |- 1 - ( ( # too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) labels: verb: write record: apiserver_request:availability30d - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: "0.99" verb: read record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: "0.99" verb: write record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile - expr: | histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) labels: quantile: "0.9" record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/istio/alerting-rules.yml ================================================ groups: - name: "istio.basic.alerting-rules" rules: - alert: IngressTrafficMissing annotations: summary: 'ingress gateway traffic missing' description: '[Critical]: ingress gateway traffic missing, likely other monitors are misleading, check client logs' expr: > absent(istio_requests_total{destination_service_namespace=~"service-graph.*",reporter="source",source_workload="istio-ingressgateway"})==1 for: 5m - alert: IstioMetricsMissing annotations: summary: 'Istio Metrics missing' description: '[Critical]: Check prometheus deployment or whether the prometheus filters are applied correctly' expr: > absent(istio_request_total)==1 or absent(istio_request_duration_milliseconds_bucket)==1 for: 5m - name: "istio.workload.alerting-rules" rules: - alert: HTTP5xxRateHigh annotations: summary: '5xx rate too high' description: 'The HTTP 5xx errors rate higher than 0.05 in 5 mins' expr: > sum(irate(istio_requests_total{reporter="destination", response_code=~"5.*"}[5m])) / sum(irate(istio_requests_total{reporter="destination"}[5m])) > 0.05 for: 5m - alert: WorkloadLatencyP99High expr: histogram_quantile(0.99, sum(irate(istio_request_duration_milliseconds_bucket{source_workload=~"svc.*"}[5m])) by (source_workload,namespace, le)) > 160 for: 10m annotations: description: 'The workload request latency P99 > 160ms ' message: "Request duration has slowed down for workload: {{`{{$labels.source_workload}}`}} in namespace: {{`{{$labels.namespace}}`}}. Response duration is {{`{{$value}}`}} milliseconds" - alert: IngressLatencyP99High expr: histogram_quantile(0.99, sum(irate(istio_request_duration_milliseconds_bucket{source_workload=~"istio.*"}[5m])) by (source_workload,namespace, le)) > 250 for: 10m annotations: description: 'The ingress latency P99 > 250ms ' message: "Request duration has slowed down for ingress: {{`{{$labels.source_workload}}`}} in namespace: {{`{{$labels.namespace}}`}}. Response duration is {{`{{$value}}`}} milliseconds" - name: "istio.infra.alerting-rules" rules: - alert: ProxyContainerCPUUsageHigh expr: (sum(rate(container_cpu_usage_seconds_total{namespace!="kube-system", container=~"istio-proxy", namespace!=""}[5m])) BY (namespace, pod, container) * 100) > 80 for: 5m annotations: summary: "Proxy Container CPU usage (namespace {{ $labels.namespace }}) (pod {{ $labels.pod }}) (container {{ $labels.container }}) VALUE = {{ $value }}\n" description: "Proxy Container CPU usage is above 80%" - alert: ProxyContainerMemoryUsageHigh expr: (sum(container_memory_working_set_bytes{namespace!="kube-system", container=~"istio-proxy", namespace!=""}) BY (container, pod, namespace) / (sum(container_spec_memory_limit_bytes{namespace!="kube-system", container!="POD"}) BY (container, pod, namespace) > 0)* 100) > 80 for: 5m annotations: summary: "Proxy Container Memory usage (namespace {{ $labels.namespace }}) (pod {{ $labels.pod }}) (container {{ $labels.container }}) VALUE = {{ $value }}\n" description: "Proxy Container Memory usage is above 80%" - alert: IngressMemoryUsageIncreaseRateHigh expr: avg(deriv(container_memory_working_set_bytes{container=~"istio-proxy",namespace="istio-system"}[60m])) > 200 for: 180m annotations: summary: "Ingress proxy Memory change rate, VALUE = {{ $value }}\n" description: "Ingress proxy Memory Usage increases more than 200 Bytes/sec" - alert: IstiodContainerCPUUsageHigh expr: (sum(rate(container_cpu_usage_seconds_total{namespace="istio-system", container="discovery"}[5m])) BY (pod) * 100) > 80 for: 5m annotations: summary: "Istiod Container CPU usage (namespace {{ $labels.namespace }}) (pod {{ $labels.pod }}) (container {{ $labels.container }}) VALUE = {{ $value }}\n" description: "Isitod Container CPU usage is above 80%" - alert: IstiodMemoryUsageHigh expr: (sum(container_memory_working_set_bytes{namespace="istio-system", container="discovery"}) BY (pod) / (sum(container_spec_memory_limit_bytes{namespace="istio-system", container="discovery"}) BY (pod) > 0)* 100) > 80 for: 5m annotations: summary: "Istiod Container Memory usage (namespace {{ $labels.namespace }}) (pod {{ $labels.pod }}) (container {{ $labels.container }}) VALUE = {{ $value }}\n" description: "Istiod Container Memory usage is above 80%" - alert: IstiodMemoryUsageIncreaseRateHigh expr: sum(deriv(container_memory_working_set_bytes{namespace="istio-system",pod=~"istiod-.*"}[60m])) > 1000 for: 300m annotations: summary: "Istiod Container Memory usage increase rate high, VALUE = {{ $value }}\n" description: "Istiod Container Memory usage increases more than 1k Bytes/sec" - name: "istio.controlplane.alerting-rules" rules: - alert: IstiodxdsPushErrorsHigh annotations: summary: 'istiod push errors is too high' description: 'istiod push error rate is higher than 0.05' expr: > sum(irate(pilot_xds_push_errors{app="istiod"}[5m])) / sum(irate(pilot_xds_pushes{app="istiod"}[5m])) > 0.05 for: 5m - alert: IstiodxdsRejectHigh annotations: summary: 'istiod rejects rate is too high' description: 'istiod rejects rate is higher than 0.05' expr: > sum(irate(pilot_total_xds_rejects{app="istiod"}[5m])) / sum(irate(pilot_xds_pushes{app="istiod"}[5m])) > 0.05 for: 5m - alert: IstiodContainerNotReady annotations: summary: 'istiod container not ready' description: 'container: discovery not running' expr: > kube_pod_container_status_running{namespace="istio-system", container="discovery", component=""} == 0 for: 5m - alert: IstiodUnavailableReplica annotations: summary: 'Istiod unavailable pod' description: 'Istiod unavailable replica > 0' expr: > kube_deployment_status_replicas_unavailable{deployment="istiod", component=""} > 0 for: 5m - alert: Ingress200RateLow annotations: summary: 'ingress gateway 200 rate drops' description: 'The expected rate is 100 per ns, the limit is set based on 15ns' expr: > sum(rate(istio_requests_total{reporter="source", source_workload="istio-ingressgateway",response_code="200",destination_service_namespace=~"service-graph.*"}[5m])) < 1490 for: 30m ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/istio/recording-rules.yml ================================================ groups: - name: "istio.recording-rules" interval: 5s rules: - record: "workload:istio_requests_total" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_requests_total) - record: "workload:istio_request_duration_milliseconds_count" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_duration_milliseconds_count) - record: "workload:istio_request_duration_milliseconds_sum" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_duration_milliseconds_sum) - record: "workload:istio_request_duration_milliseconds_bucket" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_duration_milliseconds_bucket) - record: "workload:istio_request_bytes_count" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_bytes_count) - record: "workload:istio_request_bytes_sum" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_bytes_sum) - record: "workload:istio_request_bytes_bucket" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_request_bytes_bucket) - record: "workload:istio_response_bytes_count" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_response_bytes_count) - record: "workload:istio_response_bytes_sum" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_response_bytes_sum) - record: "workload:istio_response_bytes_bucket" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_response_bytes_bucket) - record: "workload:istio_tcp_sent_bytes_total" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_tcp_sent_bytes_total) - record: "workload:istio_tcp_received_bytes_total" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_tcp_received_bytes_total) - record: "workload:istio_tcp_connections_opened_total" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_tcp_connections_opened_total) - record: "workload:istio_tcp_connections_closed_total" expr: | sum without(instance, kubernetes_namespace, kubernetes_pod_name) (istio_tcp_connections_closed_total) ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/java/alerting-rules.yml ================================================ groups: - name: default-alert rules: - alert: metric:alerting_rule expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 80 for: 1m labels: severity: warning annotations: summary: "JVM heap warning" description: "JVM heap of instance `{{$labels.instance}}` from application `{{$labels.application}}` is above 80% for one minute. (current=`{{$value}}%`)" ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/java/recording-rules.yml ================================================ groups: - name: default-metric rules: - record: metric:recording_rule expr: avg(rate(container_cpu_usage_seconds_total[5m])) ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/nginx/alerting-rules.yml ================================================ groups: - name: Nginx-HTTP-4xx-error-rate rules: - alert: metric:alerting_rule expr: sum(rate(nginx_http_requests_total{status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5 for: 1m labels: severity: critical annotations: summary: Nginx high HTTP 4xx error rate (instance {{ $labels.instance }}) description: "Too many HTTP requests with status 4xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - name: Nginx-HTTP-5xx-error-rate rules: - alert: metric:alerting_rule expr: sum(rate(nginx_http_requests_total{status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5 for: 1m labels: severity: critical annotations: summary: Nginx high HTTP 5xx error rate (instance {{ $labels.instance }}) description: "Too many HTTP requests with status 5xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - name: Nginx-high-latency rules: - alert: metric:alerting_rule expr: histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket[2m])) by (host, node)) > 3 for: 2m labels: severity: warning annotations: summary: Nginx latency high (instance {{ $labels.instance }}) description: "Nginx p99 latency is higher than 3 seconds\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" ================================================ FILE: lib/multi-cluster-construct/resources/amp-config/recording-rules.yml ================================================ groups: - name: infra-rules-01 rules: - record: "node_namespace_pod:kube_pod_info:" expr: topk by(cluster, namespace, pod) (1, max by(cluster, node, namespace, pod) (label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)"))) - record: node:node_num_cpu:sum expr: count by(cluster, node) (sum by(node, cpu) (node_cpu_seconds_total{job="node-exporter"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:))) - record: :node_memory_MemAvailable_bytes:sum expr: sum by(cluster) (node_memory_MemAvailable_bytes{job="node-exporter"} or (node_memory_Buffers_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Slab_bytes{job="node-exporter"})) - record: cluster:node_cpu:ratio_rate5m expr: sum by (cluster) (rate(node_cpu_seconds_total{job="node-exporter",mode!="idle",mode!="iowait",mode!="steal"}[5m])) / count by (cluster) (sum by(cluster, instance, cpu) (node_cpu_seconds_total{job="node-exporter"})) - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum by(cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet"}) labels: quantile: 0.99 - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile expr: histogram_quantile(0.9, sum by(cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet"}) labels: quantile: 0.9 - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile expr: histogram_quantile(0.5, sum by(cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet"}) labels: quantile: 0.5 - record: instance:node_num_cpu:sum expr: count without(cpu, mode) (node_cpu_seconds_total{job="node-exporter",mode="idle"}) - record: instance:node_cpu_utilisation:rate5m expr: 1 - avg without(cpu) (sum without(mode) (rate(node_cpu_seconds_total{job="node-exporter",mode=~"idle|iowait|steal"}[5m]))) - record: instance:node_load1_per_cpu:ratio expr: (node_load1{job="node-exporter"} / instance:node_num_cpu:sum{job="node-exporter"}) - record: instance:node_memory_utilisation:ratio expr: 1 - ((node_memory_MemAvailable_bytes{job="node-exporter"} or (node_memory_Buffers_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Slab_bytes{job="node-exporter"})) / node_memory_MemTotal_bytes{job="node-exporter"}) - record: instance:node_vmstat_pgmajfault:rate5m expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) - record: instance_device:node_disk_io_time_seconds:rate5m expr: rate(node_disk_io_time_seconds_total{device=~"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+",job="node-exporter"}[5m]) - record: instance_device:node_disk_io_time_weighted_seconds:rate5m expr: rate(node_disk_io_time_weighted_seconds_total{device=~"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+",job="node-exporter"}[5m]) - record: instance:node_network_receive_bytes_excluding_lo:rate5m expr: sum without(device) (rate(node_network_receive_bytes_total{device!="lo",job="node-exporter"}[5m])) - record: instance:node_network_transmit_bytes_excluding_lo:rate5m expr: sum without(device) (rate(node_network_transmit_bytes_total{device!="lo",job="node-exporter"}[5m])) - record: instance:node_network_receive_drop_excluding_lo:rate5m expr: sum without(device) (rate(node_network_receive_drop_total{device!="lo",job="node-exporter"}[5m])) - record: instance:node_network_transmit_drop_excluding_lo:rate5m expr: sum without(device) (rate(node_network_transmit_drop_total{device!="lo",job="node-exporter"}[5m])) - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.99 - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.99 - name: infra-rules-02 rules: - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.99 - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile expr: histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.9 - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile expr: histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.9 - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile expr: histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.9 - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile expr: histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.5 - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile expr: histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.5 - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile expr: histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m]))) labels: quantile: 0.5 - record: instance:node_cpu:rate:sum expr: sum by(instance) (rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) - record: instance:node_network_receive_bytes:rate:sum expr: sum by(instance) (rate(node_network_receive_bytes_total[3m])) - record: instance:node_network_transmit_bytes:rate:sum expr: sum by(instance) (rate(node_network_transmit_bytes_total[3m])) - record: instance:node_cpu:ratio expr: sum without(cpu, mode) (rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) / on(instance) group_left() count by(instance) (sum by(instance, cpu) (node_cpu_seconds_total)) - record: cluster:node_cpu:sum_rate5m expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) - record: cluster:node_cpu:ratio expr: cluster:node_cpu:sum_rate5m / count(sum by(instance, cpu) (node_cpu_seconds_total)) - record: count:up1 expr: count without(instance, pod, node) (up == 1) - record: count:up0 expr: count without(instance, pod, node) (up == 0) - record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum by(cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[5m]))) > 0 labels: quantile: 0.99 verb: read - record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile expr: histogram_quantile(0.99, sum by(cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 labels: quantile: 0.99 verb: write - record: apiserver_request:burnrate1d expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[1d]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) labels: verb: read - record: apiserver_request:burnrate1h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1h])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1h])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1h])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[1h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) labels: verb: read - record: apiserver_request:burnrate2h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[2h])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[2h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[2h])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[2h])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[2h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) labels: verb: read - name: infra-rules-03 rules: - record: apiserver_request:burnrate30m expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[30m])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[30m])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[30m])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[30m])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[30m]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) labels: verb: read - record: apiserver_request:burnrate3d expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[3d])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[3d])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[3d])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[3d])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[3d]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) labels: verb: read - record: apiserver_request:burnrate5m expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[5m])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[5m])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[5m])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[5m])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[5m]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read - record: apiserver_request:burnrate6h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[6h])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[6h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[6h])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[6h])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[6h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) labels: verb: read - record: apiserver_request:burnrate1d expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[1d]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) labels: verb: read - record: apiserver_request:burnrate1d expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) - ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",scope=~"resource|",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) or vector(0)) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="5",scope="namespace",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])) + sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="30",scope="cluster",subresource!~"proxy|attach|log|exec|portforward",verb=~"LIST|GET"}[1d])))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"LIST|GET"}[1d]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) labels: verb: write - record: apiserver_request:burnrate1h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[1h]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) labels: verb: write - record: apiserver_request:burnrate2h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[2h]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) labels: verb: write - record: apiserver_request:burnrate30m expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[30m]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) labels: verb: write - record: apiserver_request:burnrate3d expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[3d]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) labels: verb: write - record: apiserver_request:burnrate5m expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write - record: apiserver_request:burnrate6h expr: ((sum by(cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - sum by(cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",le="1",subresource!~"proxy|attach|log|exec|portforward",verb=~"POST|PUT|PATCH|DELETE"}[6h]))) + sum by(cluster) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))) / sum by(cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) labels: verb: write - record: code_verb:apiserver_request_total:increase30d expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 - record: code:apiserver_request_total:increase30d expr: sum by(cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read - record: code:apiserver_request_total:increase30d expr: sum by(cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: verb: write - record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h expr: sum by(cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h])) - record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d expr: sum by(cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30) - record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate expr: sum by(cluster, namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!="",job="kubelet"}[5m])) * on(cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})) - record: node_namespace_pod_container:container_memory_working_set_bytes expr: container_memory_working_set_bytes{image!="",job="kubelet"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""})) - record: node_namespace_pod_container:container_memory_rss expr: container_memory_rss{image!="",job="kubelet"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""})) - name: infra-rules-04 rules: - record: node_namespace_pod_container:container_memory_cache expr: container_memory_cache{image!="",job="kubelet"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""})) - record: node_namespace_pod_container:container_memory_swap expr: container_memory_swap{image!="",job="kubelet"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""})) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests expr: kube_pod_container_resource_requests{job="kube-state-metrics",resource="memory"} * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1)) - record: namespace_memory:kube_pod_container_resource_requests:sum expr: sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_requests{job="kube-state-metrics",resource="memory"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1))) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests expr: kube_pod_container_resource_requests{job="kube-state-metrics",resource="cpu"} * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1)) - record: namespace_cpu:kube_pod_container_resource_requests:sum expr: sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_requests{job="kube-state-metrics",resource="cpu"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1))) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits expr: kube_pod_container_resource_limits{job="kube-state-metrics",resource="memory"} * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1)) - record: namespace_memory:kube_pod_container_resource_limits:sum expr: sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_limits{job="kube-state-metrics",resource="memory"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1))) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits expr: kube_pod_container_resource_limits{job="kube-state-metrics",resource="cpu"} * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1)) - record: namespace_cpu:kube_pod_container_resource_limits:sum expr: sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_limits{job="kube-state-metrics",resource="cpu"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1))) - record: namespace_workload_pod:kube_pod_owner:relabel expr: max by(cluster, namespace, workload, pod) (label_replace(label_replace(kube_pod_owner{job="kube-state-metrics",owner_kind="ReplicaSet"}, "replicaset", "$1", "owner_name", "(.*)") * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (1, max by(replicaset, namespace, owner_name) (kube_replicaset_owner{job="kube-state-metrics"})), "workload", "$1", "owner_name", "(.*)")) labels: workload_type: deployment - record: namespace_workload_pod:kube_pod_owner:relabel expr: max by(cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job="kube-state-metrics",owner_kind="DaemonSet"}, "workload", "$1", "owner_name", "(.*)")) labels: workload_type: daemonset - record: namespace_workload_pod:kube_pod_owner:relabel expr: max by(cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job="kube-state-metrics",owner_kind="StatefulSet"}, "workload", "$1", "owner_name", "(.*)")) labels: workload_type: statefulset - record: namespace_workload_pod:kube_pod_owner:relabel expr: max by(cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job="kube-state-metrics",owner_kind="Job"}, "workload", "$1", "owner_name", "(.*)")) labels: workload_type: job ================================================ FILE: lib/multi-cluster-construct/resources/cost-optimization/scaleDownEksToZero.yml ================================================ schemaVersion: '0.3' description: |- --- # Scale down all conformitron EKS cluster to 0 assumeRole: arn:aws:iam::ACCOUNT_ID:role/SsmEksRole mainSteps: - name: scaleEKSClusterToZero action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_1 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-26-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_1 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_2 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-27-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_2 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_3 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_3 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_4 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: br-ARM1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_4 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_5 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: br-X861-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_5 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_6 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-26-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_6 action: aws:executeAwsApi nextStep: scaleEKSClusterToZero_7 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-27-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 - name: scaleEKSClusterToZero_7 action: aws:executeAwsApi isEnd: true inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 0 maxSize: 1 desiredSize: 0 ================================================ FILE: lib/multi-cluster-construct/resources/cost-optimization/scaleUpEksToOne.yml ================================================ schemaVersion: '0.3' description: |- --- # Scale down all conformitron EKS cluster to1 assumeRole: arn:aws:iam::ACCOUNT_ID:role/SsmEksRole mainSteps: - name: scaleEKSClusterToOne action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_1 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-26-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_1 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_2 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-27-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_2 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_3 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: arm-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_3 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_4 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: br-arm-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_4 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_5 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: br-x86-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_5 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_6 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-26-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_6 action: aws:executeAwsApi nextStep: scaleEKSClusterToOne_7 isEnd: false inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-27-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 - name: scaleEKSClusterToOne_7 action: aws:executeAwsApi isEnd: true inputs: Service: eks Api: UpdateNodegroupConfig clusterName: x86-1-28-blueprint nodegroupName: eks-blueprints-mng scalingConfig: minSize: 1 maxSize: 1 desiredSize: 1 ================================================ FILE: lib/multi-cluster-construct/resources/otel-collector-config.yml ================================================ # # OpenTelemetry Collector configuration # Metrics pipeline with Prometheus Receiver and AWS Remote Write Exporter sending metrics to Amazon Managed Prometheus # apiVersion: opentelemetry.io/v1alpha1 kind: OpenTelemetryCollector metadata: name: otel-collector-amp namespace: "{{namespace}}" spec: mode: "{{deploymentMode}}" image: public.ecr.aws/aws-observability/aws-otel-collector:v0.37.0 resources: limits: cpu: "1" memory: "2Gi" requests: cpu: "1" memory: "2Gi" serviceAccount: adot-collector podSecurityContext: runAsGroup: 0 runAsUser: 0 volumeMounts: - name: varlogpods mountPath: /var/log/pods readOnly: true volumes: - name: varlogpods hostPath: path: /var/log/pods env: - name: NODE_NAME valueFrom: fieldRef: fieldPath: spec.nodeName config: | receivers: prometheus: config: global: scrape_interval: 15s scrape_timeout: 10s external_labels: cluster: "{{clusterName}}" scrape_configs: {{ start enableAdotMetricsCollectionJob}} - job_name: otel-collector-metrics scrape_interval: 10s static_configs: - targets: ['localhost:8888'] {{ stop enableAdotMetricsCollectionJob }} - job_name: 'kubernetes-kubelet' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc.cluster.local:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/$${1}/proxy/metrics - job_name: 'kubelet' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc.cluster.local:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor {{ start enableAPIserverJob }} - job_name: 'apiserver' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [ __meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name, ] action: keep regex: default;kubernetes;https metric_relabel_configs: - action: keep source_labels: [__name__] - source_labels: [__name__, le] separator: ; regex: apiserver_request_duration_seconds_bucket;(0.15|0.2|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2|3|3.5|4|4.5|6|7|8|9|15|25|40|50) replacement: $1 action: drop {{ stop enableAPIserverJob }} - job_name: serviceMonitor/default/kube-prometheus-stack-prometheus-node-exporter/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (prometheus-node-exporter);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: serviceMonitor/default/kube-prometheus-stack-prometheus/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-prometheus);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_self_monitor, __meta_kubernetes_service_labelpresent_self_monitor] separator: ; regex: (true);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-web replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-web action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: serviceMonitor/default/kube-prometheus-stack-operator/0 honor_labels: true honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-operator);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: https replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: https action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/2 honor_labels: true honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics/probes scheme: https authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name, __meta_kubernetes_service_labelpresent_app_kubernetes_io_name] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_k8s_app, __meta_kubernetes_service_labelpresent_k8s_app] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: https-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_k8s_app] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: https-metrics action: replace - source_labels: [__metrics_path__] separator: ; regex: (.*) target_label: metrics_path replacement: $$1 action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/1 honor_labels: true honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics/cadvisor scheme: https authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name, __meta_kubernetes_service_labelpresent_app_kubernetes_io_name] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_k8s_app, __meta_kubernetes_service_labelpresent_k8s_app] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: https-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_k8s_app] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: https-metrics action: replace - source_labels: [__metrics_path__] separator: ; regex: (.*) target_label: metrics_path replacement: $$1 action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/0 honor_labels: true honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: https authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name, __meta_kubernetes_service_labelpresent_app_kubernetes_io_name] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_k8s_app, __meta_kubernetes_service_labelpresent_k8s_app] separator: ; regex: (kubelet);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: https-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_k8s_app] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: https-metrics action: replace - source_labels: [__metrics_path__] separator: ; regex: (.*) target_label: metrics_path replacement: $$1 action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kube-state-metrics/0 honor_labels: true honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_instance, __meta_kubernetes_service_labelpresent_app_kubernetes_io_instance] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name, __meta_kubernetes_service_labelpresent_app_kubernetes_io_name] separator: ; regex: (kube-state-metrics);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: serviceMonitor/default/kube-prometheus-stack-kube-scheduler/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-kube-scheduler);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kube-proxy/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-kube-proxy);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kube-etcd/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-kube-etcd);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-kube-controller-manager/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-kube-controller-manager);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-coredns/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-coredns);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-metrics replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_jobLabel] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-metrics action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" namespaces: own_namespace: false names: - kube-system - job_name: serviceMonitor/default/kube-prometheus-stack-apiserver/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: https authorization: type: Bearer credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt server_name: kubernetes follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_component, __meta_kubernetes_service_labelpresent_component] separator: ; regex: (kubernetes);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: https replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_component] separator: ; regex: (.+) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: https action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: serviceMonitor/default/kube-prometheus-stack-alertmanager/0 honor_timestamps: true scrape_interval: 30s scrape_timeout: 10s metrics_path: /metrics scheme: http follow_redirects: true enable_http2: true relabel_configs: - source_labels: [job] separator: ; regex: (.*) target_label: __tmp_prometheus_job_name replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app] separator: ; regex: (kube-prometheus-stack-alertmanager);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release] separator: ; regex: (kube-prometheus-stack);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_service_label_self_monitor, __meta_kubernetes_service_labelpresent_self_monitor] separator: ; regex: (true);true replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] separator: ; regex: http-web replacement: $$1 action: keep - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Node;(.*) target_label: node replacement: $$1 action: replace - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] separator: ; regex: Pod;(.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_namespace] separator: ; regex: (.*) target_label: namespace replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: service replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_name] separator: ; regex: (.*) target_label: pod replacement: $$1 action: replace - source_labels: [__meta_kubernetes_pod_container_name] separator: ; regex: (.*) target_label: container replacement: $$1 action: replace - source_labels: [__meta_kubernetes_service_name] separator: ; regex: (.*) target_label: job replacement: $$1 action: replace - separator: ; regex: (.*) target_label: endpoint replacement: http-web action: replace - source_labels: [__address__] separator: ; regex: (.*) modulus: 1 target_label: __tmp_hash replacement: $$1 action: hashmod - source_labels: [__tmp_hash] separator: ; regex: "0" replacement: $$1 action: keep kubernetes_sd_configs: - role: endpoints kubeconfig_file: "" follow_redirects: true enable_http2: true namespaces: own_namespace: false names: - default - job_name: 'kube-state-metrics' static_configs: - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] - job_name: 'node-exporter' kubernetes_sd_configs: - role: endpoints ec2_sd_configs: relabel_configs: - source_labels: [ __address__ ] action: keep regex: '.*:9100$' - action: replace source_labels: [__meta_kubernetes_endpoint_node_name] target_label: nodename {{ start enableJavaMonJob }} - job_name: 'kubernetes-java-jmx' sample_limit: {{javaScrapeSampleLimit}} metrics_path: {{javaPrometheusMetricsEndpoint}} kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [ __address__ ] action: keep regex: '.*:9404$' - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: [ __meta_kubernetes_namespace ] target_label: Namespace - source_labels: [ __meta_kubernetes_pod_name ] action: replace target_label: pod_name - action: replace source_labels: [ __meta_kubernetes_pod_container_name ] target_label: container_name - action: replace source_labels: [ __meta_kubernetes_pod_controller_kind ] target_label: pod_controller_kind - action: replace source_labels: [ __meta_kubernetes_pod_phase ] target_label: pod_controller_phase metric_relabel_configs: - source_labels: [ __name__ ] regex: 'jvm_gc_collection_seconds.*' action: drop {{ stop enableJavaMonJob }} {{ start enableNginxMonJob }} - job_name: 'kubernetes-nginx' sample_limit: {{nginxScrapeSampleLimit}} metrics_path: {{nginxPrometheusMetricsEndpoint}} kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [ __address__ ] action: keep regex: '.*:10254$' - source_labels: [__meta_kubernetes_pod_container_name] target_label: container action: replace - source_labels: [__meta_kubernetes_pod_node_name] target_label: host action: replace - source_labels: [__meta_kubernetes_namespace] target_label: namespace action: replace metric_relabel_configs: - source_labels: [__name__] regex: 'go_memstats.*' action: drop - source_labels: [__name__] regex: 'go_gc.*' action: drop - source_labels: [__name__] regex: 'go_threads' action: drop - regex: exported_host action: labeldrop {{ stop enableNginxMonJob }} {{ start enableIstioMonJob }} - honor_labels: true job_name: kubernetes-istio kubernetes_sd_configs: - role: pod relabel_configs: - action: keep regex: true source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: drop regex: true source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow - action: replace regex: (https?) source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_scheme target_label: __scheme__ - action: replace regex: (.+) source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_path target_label: __metrics_path__ - action: replace regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) replacement: '[$$2]:$$1' source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_port - __meta_kubernetes_pod_ip target_label: __address__ - action: replace regex: (\d+);((([0-9]+?)(\.|$)){4}) replacement: $$2:$$1 source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_port - __meta_kubernetes_pod_ip target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) replacement: __param_$1 - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: keep source_labels: [ __address__ ] regex: '.*:15020$$' - action: drop regex: Pending|Succeeded|Failed|Completed source_labels: - __meta_kubernetes_pod_phase {{ stop enableIstioMonJob }} {{ start enableAdotContainerLogsReceiver }} filelog: include: [ /var/log/pods/*/*/*.log ] include_file_name: false include_file_path: true start_at: end operators: # Find out which format is used by kubernetes - type: router id: get-format routes: - output: parser-docker expr: 'body matches "^\\{"' - output: parser-crio expr: 'body matches "^[^ Z]+ "' - output: parser-containerd expr: 'body matches "^[^ Z]+Z"' # Parse CRI-O format - type: regex_parser id: parser-crio regex: '^(?P