Repository: ivx/yet-another-cloudwatch-exporter Branch: master Commit: cbafcecb8da1 Files: 202 Total size: 836.9 KB Directory structure: gitextract_8d8x638_/ ├── .dockerignore ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug.yml │ │ └── feature.yml │ ├── dependabot.yml │ └── workflows/ │ ├── ci.yml │ ├── container_description.yml │ └── golangci-lint.yml ├── .gitignore ├── .golangci.yml ├── .promu.yml ├── .yamllint ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTE.md ├── Dockerfile ├── LICENSE ├── MAINTAINERS.md ├── Makefile ├── Makefile.common ├── NOTICE ├── README.md ├── SECURITY.md ├── VERSION ├── cmd/ │ └── yace/ │ ├── main.go │ ├── main_test.go │ └── scraper.go ├── docker-compose/ │ ├── README.md │ ├── docker-compose.yaml │ ├── grafana/ │ │ └── datasource.yaml │ ├── prometheus.yaml │ └── yace-config.yaml ├── docs/ │ ├── configuration.md │ ├── embedding.md │ ├── feature_flags.md │ └── installation.md ├── examples/ │ ├── alb.yml │ ├── apigw.yml │ ├── apprunner.yaml │ ├── appstream.yml │ ├── backup.yml │ ├── cwagent.yml │ ├── ds.yml │ ├── dx.yml │ ├── ebs.yml │ ├── ec.yml │ ├── ec2.yml │ ├── ecs.yml │ ├── elb.yml │ ├── es.yml │ ├── historic-data.yml │ ├── kafka.yml │ ├── kinesis.yml │ ├── kms.yml │ ├── lambda.yml │ ├── lambda_edge.yml │ ├── logs.yml │ ├── mq.yml │ ├── networkmanager.yml │ ├── ngw.yml │ ├── nlb.yml │ ├── private-link-endpoints.yaml │ ├── private-link-services.yaml │ ├── qldb.yml │ ├── quicksight.yml │ ├── rds.yml │ ├── redshift-serverless.yml │ ├── s3.yml │ ├── ses.yaml │ ├── sns.yml │ ├── sqs.yml │ ├── usage.yml │ └── vpn.yml ├── go.mod ├── go.sum ├── mixin/ │ ├── README.md │ ├── config.libsonnet │ ├── dashboards/ │ │ ├── all.libsonnet │ │ ├── common.libsonnet │ │ ├── ebs.libsonnet │ │ ├── ec2.libsonnet │ │ ├── lambda.libsonnet │ │ ├── rds.libsonnet │ │ └── s3.libsonnet │ ├── jsonnetfile.json │ ├── jsonnetfile.lock.json │ ├── mixin.libsonnet │ └── util.libsonnet └── pkg/ ├── clients/ │ ├── README.md │ ├── account/ │ │ └── client.go │ ├── cloudwatch/ │ │ ├── client.go │ │ ├── client_test.go │ │ ├── concurrency_client.go │ │ └── input.go │ ├── factory.go │ ├── factory_test.go │ └── tagging/ │ ├── client.go │ ├── concurrency_client.go │ ├── filters.go │ └── filters_test.go ├── config/ │ ├── config.go │ ├── config_test.go │ ├── feature_flags.go │ ├── feature_flags_test.go │ ├── services.go │ ├── services_test.go │ └── testdata/ │ ├── config_test.yml │ ├── custom_namespace.ok.yml │ ├── custom_namespace_without_name.bad.yml │ ├── custom_namespace_without_namespace.bad.yml │ ├── custom_namespace_without_region.bad.yml │ ├── discovery_job_exported_tags_alias.bad.yml │ ├── discovery_job_exported_tags_mismatch.bad.yml │ ├── discovery_job_type_alias.bad.yml │ ├── discovery_job_type_unknown.bad.yml │ ├── empty_rolearn.ok.yml │ ├── externalid_with_empty_rolearn.bad.yml │ ├── externalid_without_rolearn.bad.yml │ ├── multiple_roles.ok.yml │ ├── sts_region.ok.yml │ └── unknown_version.bad.yml ├── exporter.go ├── exporter_enhancedmetrics_test.go ├── exporter_test.go ├── internal/ │ └── enhancedmetrics/ │ ├── config/ │ │ └── provider.go │ ├── registry.go │ ├── registry_test.go │ ├── service/ │ │ ├── dynamodb/ │ │ │ ├── client.go │ │ │ ├── client_test.go │ │ │ ├── service.go │ │ │ └── service_test.go │ │ ├── elasticache/ │ │ │ ├── client.go │ │ │ ├── client_test.go │ │ │ ├── service.go │ │ │ └── service_test.go │ │ ├── lambda/ │ │ │ ├── client.go │ │ │ ├── client_test.go │ │ │ ├── service.go │ │ │ └── service_test.go │ │ ├── rds/ │ │ │ ├── client.go │ │ │ ├── client_test.go │ │ │ ├── service.go │ │ │ └── service_test.go │ │ └── services.go │ ├── service.go │ └── service_test.go ├── job/ │ ├── cloudwatchrunner/ │ │ ├── customnamespace.go │ │ ├── discovery.go │ │ └── runner.go │ ├── custom.go │ ├── discovery.go │ ├── discovery_test.go │ ├── getmetricdata/ │ │ ├── compact.go │ │ ├── compact_test.go │ │ ├── iterator.go │ │ ├── iterator_test.go │ │ ├── processor.go │ │ ├── processor_test.go │ │ ├── windowcalculator.go │ │ └── windowcalculator_test.go │ ├── listmetrics/ │ │ └── processor.go │ ├── maxdimassociator/ │ │ ├── associator.go │ │ ├── associator_api_gateway_test.go │ │ ├── associator_client_vpn_test.go │ │ ├── associator_ddosprotection_test.go │ │ ├── associator_directoryservice_test.go │ │ ├── associator_dx_test.go │ │ ├── associator_ec2_test.go │ │ ├── associator_ec_test.go │ │ ├── associator_ecs_test.go │ │ ├── associator_event_roles_test.go │ │ ├── associator_globalaccelerator_test.go │ │ ├── associator_gwlb_test.go │ │ ├── associator_ipam_test.go │ │ ├── associator_kms_test.go │ │ ├── associator_lambda_test.go │ │ ├── associator_logging_test.go │ │ ├── associator_logs_test.go │ │ ├── associator_mediaconvert_test.go │ │ ├── associator_memorydb_test.go │ │ ├── associator_mq_test.go │ │ ├── associator_qldb_test.go │ │ ├── associator_redshift_serverless_test.go │ │ ├── associator_sagemaker_endpoint_test.go │ │ ├── associator_sagemaker_inf_component_test.go │ │ ├── associator_sagemaker_inf_rec_test.go │ │ ├── associator_sagemaker_pipeline_test.go │ │ ├── associator_sagemaker_processing_test.go │ │ ├── associator_sagemaker_test.go │ │ ├── associator_sagemaker_training_test.go │ │ └── associator_sagemaker_transform_test.go │ ├── resourcemetadata/ │ │ └── resource.go │ ├── scrape.go │ ├── scraper.go │ ├── scraper_test.go │ └── static.go ├── model/ │ ├── model.go │ └── model_test.go └── promutil/ ├── migrate.go ├── migrate_test.go ├── prometheus.go └── prometheus_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ data/ .build/ .tarballs/ !.build/linux-amd64/ !.build/linux-arm64/ !.build/linux-armv7/ !.build/linux-ppc64le/ !.build/linux-riscv64/ !.build/linux-s390x/ ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms patreon: thomaspeitz ================================================ FILE: .github/ISSUE_TEMPLATE/bug.yml ================================================ name: 🐞 Bug description: File a bug report title: "[BUG] " labels: [bug] body: - type: checkboxes attributes: label: Is there an existing issue for this? description: Please search to see if an issue already exists for the bug you encountered. options: - label: I have searched the existing issues required: true - type: textarea attributes: label: YACE version description: The output of running `yace version`. validations: required: false - type: textarea attributes: label: Config file description: The config file passed to the `--config.file` option. validations: required: false - type: textarea attributes: label: Current Behavior description: A concise description of what you're experiencing. validations: required: false - type: textarea attributes: label: Expected Behavior description: A concise description of what you expected to happen. validations: required: false - type: textarea attributes: label: Steps To Reproduce description: Steps to reproduce the behavior. placeholder: | 1. In this environment... 2. With this config... 3. Run '...' 4. See error... validations: required: false - type: textarea attributes: label: Anything else? description: | Links? References? Anything that will give us more context about the issue you are encountering! Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. validations: required: false ================================================ FILE: .github/ISSUE_TEMPLATE/feature.yml ================================================ name: 🆕 Feature description: Request a new feature title: "[FEATURE] <title>" labels: [enhancement] body: - type: checkboxes attributes: label: Is there an existing issue for this? description: Please search to see if an issue already exists for the feature you are requesting. options: - label: I have searched the existing issues required: true - type: textarea attributes: label: Feature description description: A concise description of what you're expecting. validations: required: true - type: textarea attributes: label: What might the configuration look like? description: Example configuration (useful as a baseline during development). placeholder: | ```yml discovery: jobs: - type: <name of service> period: 30 length: 600 metrics: - name: SomeExportedMetric statistics: [Minimum, Maximum] ``` validations: required: false - type: textarea attributes: label: Anything else? description: | Links? References? Anything that will give us more context about the issue you are encountering! Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. validations: required: false ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: gomod directory: / schedule: interval: monthly open-pull-requests-limit: 10 groups: aws-sdk-v2: patterns: - "github.com/aws/aws-sdk-go-v2*" - package-ecosystem: github-actions directory: / schedule: interval: monthly open-pull-requests-limit: 10 ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: tags: - 'v*' branches: - master pull_request: workflow_call: jobs: test_go: name: Go tests runs-on: ubuntu-latest container: image: quay.io/prometheus/golang-builder:1.26-base steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: prometheus/promci-setup@5af30ba8c199a91d6c04ebdc3c48e630e355f62d # v0.1.0 - run: make test build: name: Build for common architectures runs-on: ubuntu-latest if: | !(github.event_name == 'push' && github.event.ref == 'refs/heads/master') && !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) strategy: matrix: thread: [ 0, 1, 2 ] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: prometheus/promci/build@769ee18070cd21cfc2a24fa912349fd3e48dee58 # v0.6.0 with: promu_opts: "-p linux/amd64 -p windows/amd64 -p darwin/amd64 -p linux/arm64 -p windows/arm64 -p darwin/arm64" parallelism: 3 thread: ${{ matrix.thread }} build_all: name: Build for all architectures runs-on: ubuntu-latest if: | (github.event_name == 'push' && github.event.ref == 'refs/heads/master') || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) strategy: matrix: thread: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: prometheus/promci/build@769ee18070cd21cfc2a24fa912349fd3e48dee58 # v0.6.0 with: parallelism: 12 thread: ${{ matrix.thread }} verify-example-configs: name: Verify runs-on: ubuntu-latest container: image: quay.io/prometheus/golang-builder:1.26-base steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - run: make build - name: Verify example configs run: find ./examples -name "*.yml" -print0 | xargs -0 -I % ./yace verify-config -config.file % publish_master: name: Publish master branch artifacts runs-on: ubuntu-latest needs: [test_go, build_all, verify-example-configs] if: | (github.repository == 'prometheus-community/yet-another-cloudwatch-exporter') && (github.event_name == 'push' && github.event.ref == 'refs/heads/master') steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: prometheus/promci/publish_main@769ee18070cd21cfc2a24fa912349fd3e48dee58 # v0.6.0 with: docker_hub_organization: prometheuscommunity docker_hub_login: ${{ secrets.docker_hub_login }} docker_hub_password: ${{ secrets.docker_hub_password }} quay_io_organization: prometheuscommunity quay_io_login: ${{ secrets.quay_io_login }} quay_io_password: ${{ secrets.quay_io_password }} publish_release: name: Publish release artifacts runs-on: ubuntu-latest needs: [test_go, build_all, verify-example-configs] if: | (github.repository == 'prometheus-community/yet-another-cloudwatch-exporter') && (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v0.')) steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: prometheus/promci/publish_release@769ee18070cd21cfc2a24fa912349fd3e48dee58 # v0.6.0 with: docker_hub_organization: prometheuscommunity docker_hub_login: ${{ secrets.docker_hub_login }} docker_hub_password: ${{ secrets.docker_hub_password }} quay_io_organization: prometheuscommunity quay_io_login: ${{ secrets.quay_io_login }} quay_io_password: ${{ secrets.quay_io_password }} github_token: ${{ secrets.PROMBOT_GITHUB_TOKEN }} ================================================ FILE: .github/workflows/container_description.yml ================================================ --- name: Push README to Docker Hub on: push: paths: - "README.md" - "README-containers.md" - ".github/workflows/container_description.yml" branches: [ main, master ] permissions: contents: read jobs: PushDockerHubReadme: runs-on: ubuntu-latest name: Push README to Docker Hub if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 env: DOCKER_USER: ${{ secrets.DOCKER_HUB_LOGIN }} DOCKER_PASS: ${{ secrets.DOCKER_HUB_PASSWORD }} with: destination_container_repo: ${{ env.DOCKER_REPO_NAME }} provider: dockerhub short_description: ${{ env.DOCKER_REPO_NAME }} # Empty string results in README-containers.md being pushed if it # exists. Otherwise, README.md is pushed. readme_file: '' PushQuayIoReadme: runs-on: ubuntu-latest name: Push README to quay.io if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to quay.io uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 env: DOCKER_APIKEY: ${{ secrets.QUAY_IO_API_TOKEN }} with: destination_container_repo: ${{ env.DOCKER_REPO_NAME }} provider: quay # Empty string results in README-containers.md being pushed if it # exists. Otherwise, README.md is pushed. readme_file: '' ================================================ FILE: .github/workflows/golangci-lint.yml ================================================ --- # This action is synced from https://github.com/prometheus/prometheus name: golangci-lint on: push: branches: [main, master, 'release-*'] paths: - "go.sum" - "go.mod" - "**.go" - "scripts/errcheck_excludes.txt" - ".github/workflows/golangci-lint.yml" - ".golangci.yml" tags: ['v*'] pull_request: permissions: # added using https://github.com/step-security/secure-repo contents: read jobs: golangci: permissions: contents: read # for actions/checkout to fetch code pull-requests: read # for golangci/golangci-lint-action to fetch pull requests name: lint runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Install Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: 1.26.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Get golangci-lint version id: golangci-lint-version run: echo "version=$(make print-golangci-lint-version)" >> $GITHUB_OUTPUT - name: Lint uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0 with: args: --verbose version: ${{ steps.golangci-lint-version.outputs.version }} ================================================ FILE: .gitignore ================================================ .build yet-another-cloudwatch-exporter !charts/yet-another-cloudwatch-exporter vendor dist /yace *.tar.gz ================================================ FILE: .golangci.yml ================================================ version: "2" output: formats: text: path: stderr colors: false linters: default: none enable: - asasalint - bodyclose - copyloopvar - errcheck - errorlint - exhaustive - govet - ineffassign - misspell - nilerr - nolintlint - nonamedreturns - predeclared - revive - sloglint - staticcheck - unconvert - unused exclusions: generated: lax presets: - comments - common-false-positives - legacy - std-error-handling paths: - third_party$ - builtin$ - examples$ formatters: enable: - gofmt - gofumpt - goimports settings: goimports: local-prefixes: - github.com/prometheus-community/yet-another-cloudwatch-exporter exclusions: generated: lax paths: - third_party$ - builtin$ - examples$ ================================================ FILE: .promu.yml ================================================ go: # This must match .circle/config.yml. version: 1.26 repository: path: github.com/prometheus-community/yet-another-cloudwatch-exporter build: binaries: - name: yace path: ./cmd/yace ldflags: | -X github.com/prometheus/common/version.Version={{.Version}} -X github.com/prometheus/common/version.Revision={{.Revision}} -X github.com/prometheus/common/version.Branch={{.Branch}} -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}} -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} tarball: files: - LICENSE - NOTICE ================================================ FILE: .yamllint ================================================ --- extends: default ignore: | **/node_modules web/api/v1/testdata/openapi_*_golden.yaml rules: braces: max-spaces-inside: 1 level: error brackets: max-spaces-inside: 1 level: error commas: disable comments: disable comments-indentation: disable document-start: disable indentation: spaces: consistent indent-sequences: consistent key-duplicates: ignore: | config/testdata/section_key_dup.bad.yml line-length: disable truthy: check-keys: false ================================================ FILE: CHANGELOG.md ================================================ ## main / (unreleased) ## 0.64.0 / 2026-03-27 **Important news and breaking changes** - BREAKING CHANGE: AWS SDK v1 support has been removed. The `aws-sdk-v1` feature flag is now a no-op and will be silently ignored. AWS SDK v1 reached end-of-support on July 31, 2025. SDK v2 has been the default since v0.63.0 (September 2025). Users who were passing `--enable-feature aws-sdk-v1` should remove the flag, as it no longer has any effect. If you use YACE as a library, the `v1` and `v2` sub-packages under `pkg/clients/` have been removed. All client implementations now live directly in their parent packages (e.g. `pkg/clients/cloudwatch`, `pkg/clients/tagging`, `pkg/clients/account`). Import paths like `pkg/clients/v1`, `pkg/clients/cloudwatch/v2`, etc. must be updated accordingly. * [CHANGE] Remove AWS SDK v1 support and deprecate `aws-sdk-v1` feature flag by @tristanburgess. #1825 * [CHANGE] Add Andrii Kushch and Tristan Burgess as maintainers by @cristiangreco. #1788 * [FEATURE] Implement Enhanced Metrics framework and initial set of metrics by @andriikushch. #1795 * [FEATURE] Add support for `AWS/EKS` namespace by @LS80. #1760 * [FEATURE] Split out Bedrock metrics into all needed namespaces by @tristanburgess. #1766 * [FEATURE] Separate aliases for Bedrock namespaces by @tristanburgess. #1767 * [ENHANCEMENT] Update Go build to 1.26, replace `gopkg.in/yaml.v2` with supported fork, sync upstream Prometheus files and migrate PromCI tooling by @SuperQ. #1831 * [ENHANCEMENT] Add AWS/Bedrock GuardrailArn dimension-based resource tagging by @tristanburgess. #1761 * [ENHANCEMENT] Add DimensionRegexps support for AWS Backup service by @amitshl. #1775 * [ENHANCEMENT] Add DimensionRegexps for AWS/Cassandra by @bdeore. #1693 * [ENHANCEMENT] Add DimensionRegexp to ElasticBeanstalk by @benbridts. #1690 * [ENHANCEMENT] Test exporter with mocked clients by @jeschkies. #1791 * [ENHANCEMENT] Add privatelink examples to docs by @cuscal-brad. #1765 * [BUGFIX] Fix AWS SageMaker dimension name handling for case sensitivity by @andriikushch. #1793 * [BUGFIX] Fix Docker configuration paths for AWS credentials by @andriikushch. #1804 ## 0.63.0 / 2025-09-25 **Important news and breaking changes** - NOTE: As of Prometheus 3.0, UTF-8 strings are valid for metric names and label names. However, for backward compatibility, this release of YACE still uses the old, stricter legacy validation scheme. UTF-8 validation will be enabled in a feature version of YACE, thus requiring that your remote destination is compatible with UTF-8 support. - BREAKING CHANGE: the AWS SDK v2 is now the default in YACE. Use the flag `aws-sdk-v1` to switch back to SDK v2. Flag `aws-sdk-v2` has been removed. - NEW FEATURE: `exportAllDataPoints`, enables the inclusion of past metric data points from the CloudWatch response if available. * [CHANGE] Make aws sdk v2 the default choice by @cristiangreco * [FEATURE] Support history data export by @woehrl01 * [FEATURE] Add AWS/Transfer as available service by @thepalbi * [FEATURE] Add auto-discovery for Directory Services(MicrosoftAD) by @RuslanMustaev * [FEATURE] Add support for Redshift-Serverless by @nickbazinet * [FEATURE] Add db connections avg panel to RDS dashboard by @yduartep * [FEATURE] Add example for lambda_edge by @tyagian * [FEATURE] sagemaker: additional InferenceComponent support by @tristanburgess * [ENHANCEMENT] Update Go version by @SuperQ * [ENHANCEMENT] Use Prometheus common version library by @SuperQ * [ENHANCEMENT] Update container repositories by @SuperQ * [ENHANCEMENT] Speed up build metric name by @jeschkies * [ENHANCEMENT] Add guard to hot logging location in associator by @thepalbi * [ENHANCEMENT] Update resource association logic to try both with and without dimension fixes by @tristanburgess * [ENHANCEMENT] Change discovery runtime model field from Type -> Namespace by @kgeckhart * [BUGFIX] Fix `CachingFactory` concurrent usage issues by @andriikushch * [BUGFIX] Correctly run tests in CI and fix failing tests by @jeschkies * [BUGFIX] Fix doc about non-existing `debug` flag by @zipkid * [BUGFIX] Update URL to Helm Chart in docs by @koralowiec * [BUGFIX] Add missing license header to `associator_logging_test.go` by @cristiangreco * [BUGFIX] Dashboards: replace `scrape_job` label with `job` by @yduartep * [BUGFIX] RDS dashboard: use average for cpu utilization to align with AWS best practices by @yduartep ## 0.62.1 / 2025-01-03 **Important news and breaking changes** Bugfix release to address artifacts build error. The most important news is the same as 0.62.0: as of November 2024, YACE is part of prometheus-community. Read more about it in these announcement posts: - https://prometheus.io/blog/2024/11/19/yace-joining-prometheus-community/ - https://grafana.com/blog/2024/11/19/yace-moves-to-prometheus-community/ * [ENHANCEMENT] Adopt log/slog, drop custom logging pkg by @tjhop * [ENHANCEMENT] Bump github.com/prometheus/common from 0.60.1 to 0.61.0 * [ENHANCEMENT] Bump golang.org/x/sync from 0.9.0 to 0.10.0 * [ENHANCEMENT] Bump the aws-sdk-v2 group * [ENHANCEMENT] Synchronize common files from prometheus/prometheus * [ENHANCEMENT] Update CHANGELOG format by @SuperQ * [BUGFIX] Fix artifact publishing by @SuperQ ## 0.62.0 / 2024-12-19 **Important news and breaking changes** * As of November 2024, YACE is part of prometheus-community. Read more about it in these announcement posts: - https://prometheus.io/blog/2024/11/19/yace-joining-prometheus-community/ - https://grafana.com/blog/2024/11/19/yace-moves-to-prometheus-community/ **Bugfixes and features** Features: * Add ContainerInsights service by @JetSquirrel * Add AWS/Scheduler and AWS/ECR services by @andriikushch * Add AWS/VpcLattice service by @greymd * Add AWS/QuickSight service by @choppedpork * Add AWS/Timestream service by @andriikushch * Add Network Manager / Cloud WAN support by @kylehodgetts * RDS: include RDS Proxy metrics within the RDS namespace by @vitaliyf * Mediapackage: include mediapackagev2 namespace by @henrylaiBrightcove Bugs: * Add parentheses to sanitize list to prevent invalid metric name generation by @nixargh Docs: * Review and update supported services in README by @cristiangreco * Mention support for AWS/MediaPackage by @prathamesh-sonpatki * Update README and MAINTAINERS files to mention the move to prometheus-community by @cristiangreco Refactoring: * Start a unified scraper by @kgeckhart * Refactor prom metric creation by @kgeckhart * Update for Prometheus Community by @SuperQ * Update Docker build by @SuperQ * Fix linting issues detected by golangci-lint 1.60.3 by @cristiangreco * Update build tools and CI to Go 1.23 by @cristiangreco **Dependencies** * Bump actions/checkout from 4.2.0 to 4.2.2 * Bump alpine from 3.20.1 to 3.20.3 * Bump github.com/aws/aws-sdk-go from 1.54.7 to 1.55.5 * Bump github.com/aws/smithy-go from 1.22.0 to 1.22.1 * Bump github.com/prometheus/client_golang from 1.19.1 to 1.20.5 * Bump github.com/prometheus/common from 0.54.0 to 0.60.1 * Bump github.com/stretchr/testify from 1.9.0 to 1.10.0 * Bump github.com/urfave/cli/v2 from 2.27.2 to 2.27.5 * Bump golang from 1.22 to 1.23 * Bump golang.org/x/sync from 0.7.0 to 0.9.0 * Bump golangci/golangci-lint-action from 6.0.1 to 6.1.1 * Bump grafana/regexp to `20240607082908-2cb410fa05da` * Bump the aws-sdk-v2 group **New contributors** * @prathamesh-sonpatki made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1465 * @JetSquirrel made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1463 * @greymd made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1506 * @choppedpork made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1477 * @SuperQ made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1568 * @prombot made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1570 * @nixargh made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1563 * @kylehodgetts made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1580 * @vitaliyf made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1501 * @henrylaiBrightcove made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1544 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.61.2...v0.62.0 ## 0.61.2 / 2024-06-25 Bugfix release to update the `goreleaser` configuration (again!), please refer to the release notes for `0.61.0` for actual code changes. https://github.com/prometheus-community/yet-another-cloudwatch-exporter/releases/tag/v0.61.0 ## 0.61.1 / 2024-06-25 Bugfix release to update the `goreleaser` configuration, please refer to the release notes for `0.61.0` for actual code changes. https://github.com/prometheus-community/yet-another-cloudwatch-exporter/releases/tag/v0.61.0 ## 0.61.0 / 2024-06-25 **Important news and breaking changes** * This release adds support for AWS account aliases (by @thepalbi). If the role used by YACE has `"iam:ListAccountAliases"` permission, the account alias (if any) is added as a label to the `aws_account_info` metric. **Bugfixes and features** Features: * Add AWS/EC2CapacityReservations to the services list by @luismy * Add support for MediaPackage metrics by @theunissenne * Add AWS/AppRunner as supported service by @fabiiw05 Bugs: * Fix association with gwlb by @vainiusd Refactoring: * Add support for batching by time params by @kgeckhart **Dependencies** * Bump alpine from 3.19.1 to 3.20.1 * Bump github.com/aws/aws-sdk-go from 1.53.1 to 1.54.7 * Bump github.com/aws/aws-sdk-go-v2/service/ec2 from 1.161.4 to 1.162.0 in the aws-sdk-v2 group * Bump github.com/prometheus/common from 0.53.0 to 0.54.0 * Bump golangci/golangci-lint-action from 5.3.0 to 6.0.1 * Bump goreleaser/goreleaser-action from 5 to 6 * Bump the aws-sdk-v2 group **New contributors** * @luismy made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1341 * @fabiiw05 made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1433 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.60.0...v0.61.0 ## 0.60.0 / 2024-05-14 **Bugfixes and features** Features: * add cloudwatch log metric support by @vainiusd * feat: add AWS/RUM as supported service by @hexionas Bugs: * Fix all value for function_name variable in lambda dashboard by @thepalbi * Fix rounding period deprecation notice by @cristiangreco Docs: * README: update config example by @cristiangreco * Fix ElastiCache metric namespace typo on README by @Roberdvs Refactoring: * getmetricdata: Move batching to an iterator by @kgeckhart **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.51.21 to 1.53.1 * Bump github.com/aws/aws-sdk-go-v2/service/ec2 from 1.156.0 to 1.160.0 * Bump github.com/prometheus/client_golang from 1.19.0 to 1.19.1 * Bump github.com/prometheus/common from 0.52.3 to 0.53.0 * Bump github.com/urfave/cli/v2 from 2.27.1 to 2.27.2 * Bump golangci/golangci-lint-action from 4.0.0 to 5.3.0 * Bump the aws-sdk-v2 group with 13 updates **New contributors** * @Roberdvs made their first contribution * @hexionas made their first contribution **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.59.0...v0.60.0 ## 0.59.0 / 2024-04-18 **Important news and breaking changes** This release brings a bunch of breaking changes: * Setting `roundingPeriod` for discovery jobs is deprecated, a warning will be logged at startup. This is being deprecated in favor of always using the metric period. The implementation for `roundingPeriod` can result in inconsistent Start and EndTime between batches. This negates its intent to ensure Start and EndTimes align with the metric period for [CloudWatch best practices](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_GetMetricData.html). This has the potential to produce data which will look inaccurate when compared against CloudWatch itself driving a lot of confusion. See https://github.com/prometheus-community/yet-another-cloudwatch-exporter/issues/1290 for further context. * Setting `delay` at the metric level is deprecated, a warning will be logged at startup. This `delay` configuration has existed for a long time but was never utilized. Deprecating it and eventually removing it was chosen to simplify the configuration. See https://github.com/prometheus-community/yet-another-cloudwatch-exporter/issues/1290#issuecomment-1948904375 for further context. * For discovery jobs, the `type` field and the keys of `exportedTagsOnMetrics` must be the AWS namespace rather than the alias (the README contains an up-to-date list of namespaces). Aliases are not allowed anymore. An error will be thrown at startup in an invalid namespace or an alias is used. * Some metric names have been changed to avoid duplicating the namespace. This includes: - `aws_es_esreporting_failed_request_sys_err_count` is `aws_es_reporting_failed_request_sys_err_count` - `aws_es_esreporting_failed_request_user_err_count` is `aws_es_reporting_failed_request_user_err_count` - `aws_es_esreporting_request_count` is `aws_es_reporting_request_count` - `aws_es_esreporting_success_count` is `aws_es_reporting_success_count` - `aws_kafka_kafka_app_logs_disk_used` is `aws_kafka_app_logs_disk_used` - `aws_kafka_kafka_data_logs_disk_used` is `aws_kafka_data_logs_disk_used` - `aws_rds_rdsto_aurora_postgre_sqlreplica_lag` is `aws_rds_to_aurora_postgre_sqlreplica_lag` - `aws_glue_glue_.*` is `aws_glue_.*` These breaking changes will allow making the configuration easier to understand and less error prone, and also to build better documentation around supported services. **Bugfixes and features** Features: * Add AWS/SecretsManager to the services list by @taraspos * Support partner events buses by @HristoStoyanovYotpo * `discovery.exportedTagsOnMetrics`: validate that keys match one of the job types defined by @cristiangreco Refactoring: * Update comment in factory.go by @andriikushch * getmetricdata: move window calculator to processor by @kgeckhart * promutil: clean up prom metric names that duplicate parts of the namespace by @tristanburgess * promutil: rewrite sanitisation funcs for memory optimisation by @cristiangreco * Do not allow using aliases as job types in discovery jobs by @cristiangreco **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.51.16 to 1.51.21 * Bump github.com/aws/aws-sdk-go-v2 group * Bump github.com/prometheus/common from 0.52.2 to 0.52.3 **New contributors** * @taraspos made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1330 * @HristoStoyanovYotpo made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1359 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.58.0...v0.59.0 ## 0.58.0 / 2024-04-06 **Bugfixes and features** Features: * Simplify CloudWatch API call counters by @kgeckhart Bugs: * Fixed issue with generated Prometheus metric name when working with AWS namespaces which have a leading special character, like `/aws/sagemaker/TrainingJobs` by @tristanburgess Refactoring: * Add abstraction for `GetMetricsData` processing by @kgeckhart * `GetMetricData`: refactor QueryID generation and result mapping by @kgeckhart * Refactored out the name-building part of `promutil.BuildNamespaceInfoMetrics()` and `promutil.BuildMetrics()` into `promutil.BuildMetricName()` by @tristanburgess * Set initial maps size in promutil/migrate by @cristiangreco **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.50.30 to 1.51.16 * Bump github.com/prometheus/common from 0.49.0 to 0.52.2 * Bump golang.org/x/sync from 0.6.0 to 0.7.0 * Bump the aws-sdk-v2 group with 14 updates **New contributors** * @tristanburgess made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1351 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.57.1...v0.58.0 ## 0.57.1 / 2024-03-07 **Important news and breaking changes** * Reverted a change from 0.57.0 to fix scraping of ApiGateway resources. **Bugfixes and features** Bugs: * ApiGateway: bugfix to restore FilterFunc for correct mapping of resources by @cristiangreco **Dependencies** ## What's Changed * Bump github.com/aws/aws-sdk-go from 1.50.26 to 1.50.30 * Bump github.com/prometheus/client_golang from 1.18.0 to 1.19.0 * Bump github.com/prometheus/common from 0.48.0 to 0.49.0 * Bump github.com/stretchr/testify from 1.8.4 to 1.9.0 * Bump the aws-sdk-v2 group **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.57.0...v0.57.1 # v0.57.0 **Important news and breaking changes** * New job setting `includeContextOnInfoMetrics` can be used to include contextual information (account_id, region, and customTags) on "info" metrics and cloudwatch metrics. This can be particularly useful when cloudwatch metrics might not be present or when using "info" metrics to understand where your resources exist. * No more need to add the `apigateway:GET` permissions for ApiGateway discovery jobs, as that API is not being used anymore. **Bugfixes and features** Features: * Add serverless ElastiCache support by @pkubicsek-sb * Add GWLB support by @vainiusd * Add support for KMS metrics by @daharon * Optionally include context labels (account, region, customTags) on info metrics with `includeContextOnInfoMetrics` by @kgeckhart * Improve usability and performance of searchTags by @kgeckhart * Add metric yace_cloudwatch_getmetricdata_metrics_total by @keyolk Bugs: * Fix race condition in scraper registry usage by @cristiangreco * Restore default behaviour of returning nil/absent metrics as NaN by @nhinds * Remove filtering of ApiGateway namespace resources by @cristiangreco Refactoring: * Refactor dimensions regexp usage for discovery jobs by @cristiangreco * Simplify associator usage by @kgeckhart * Update build tools and CI to go 1.22 by @cristiangreco * Restructure fields on CloudwatchData by @kgeckhart **Dependencies** * Bump alpine from 3.19.0 to 3.19.1 * Bump github.com/aws/aws-sdk-go from 1.49.19 to 1.50.26 * Bump github.com/aws/smithy-go from 1.19.0 to 1.20.1 * Bump github.com/prometheus/common from 0.45.0 to 0.48.0 * Bump golang from 1.21 to 1.22 * Bump golangci/golangci-lint-action from 3.7.0 to 4.0.0 * Bump the aws-sdk-v2 group **New contributors** * @vainiusd made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1093 * @daharon made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/1306 * @keyolk made their first contribution in https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pull/939 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.56.0...v0.57.0 # v0.56.0 **Important news and breaking changes** * Release v0.55.0 didn't include binaries artifact due to an issue with the release pipeline. * The `list-metrics-callback` and `max-dimensions-associator` feature flags have been removed: their behaviour is now the new default. **Bugfixes and features** Features: * Add new CloudWatch API concurrency limiter by @thepalbi * Remove feature flag `list-metrics-callback` by @cristiangreco * Remove feature flag `max-dimensions-associator` by @cristiangreco * Add support for AWS/Bedrock metrics by @thepalbi * Add support for AWS/Events by @raanand-dig * Add support for AWS/DataSync by @wkneewalden * Add support for AWS/IPAM by @pkubicsek-sb Bugs: * Remove unsupported MWAA resource filter by @matej-g * DDoSProtection: Include regionless protectedResources in us-east-1 by @kgeckhart * aws sdk v2: ensure region is respected for all aws clients by @kgeckhart * SageMaker: Associator buildLabelsMap to lower case EndpointName to match ARN by @GGonzalezGomez * Update goreleaser action by @cristiangreco Refactoring: * Decouple config models from internal models by @cristiangreco * Change config Validate() signature to include model conversion by @cristiangreco **Dependencies** * Bump actions/setup-go from 4 to 5 * Bump alpine from 3.18.3 to 3.19.0 * Bump docker/setup-buildx-action from 2 to 3 * Bump docker/setup-qemu-action from 2 to 3 * Bump github.com/aws/aws-sdk-go from 1.45.24 to 1.49.19 * Bump github.com/aws/smithy-go from 1.17.0 to 1.19.0 * Bump github.com/prometheus/client_golang from 1.16.0 to 1.18.0 * Bump github.com/prometheus/common from 0.44.0 to 0.45.0 * Bump github.com/urfave/cli/v2 from 2.25.7 to 2.27.1 * Bump golang.org/x/sync from 0.3.0 to 0.6.0 * Bump goreleaser/goreleaser-action from 4 to 5 * Bump the aws-sdk-v2 group dependencies **New contributors** * @GGonzalezGomez * @wkneewalden * @pkubicsek-sb **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.55.0...v0.56.0 # v0.55.0 **Important news and breaking changes** * jobs of type `customNamespace`, which were deprecated in `v0.51.0`, are now **un-deprecated** due to customers' feedback * new feature flag `always-return-info-metrics`: return info metrics even if there are no CloudWatch metrics for the resource. This is useful if you want to get a complete picture of your estate, for example if you have some resources which have not yet been used. **Bugfixes and features** Features: * Un-deprecate custom namespace jobs by @cristiangreco * scrape: Return resources even if there are no metrics by @iainlane * kinesisanalytics application: add tags support by @raanand-dig * Add support for AWS/ClientVPN by @hc2p * Add support for QLDB by @alexandre-alvarengazh Bugs: * main: Initialise logger when exiting if needed by @iainlane Docs: * Create sqs.yml example file by @dverzolla Refactoring: * Update code to go 1.21 by @cristiangreco * aws sdk v2 use EndpointResolverV2 by @kgeckhart * move duplicated fields from CloudwatchData to a new JobContext by @kgeckhart **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.44.328 to 1.45.7 * Bump the aws-sdk-v2 group with 2 updates * Bump actions/checkout from 3 to 4 by **New Contributors** * @raanand-dig * @dverzolla * @iainlane * @hc2p * @alexandre-alvarengazh **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.54.1...v0.55.0 # v0.54.1 Bugs: * sdk v2: Set RetryMaxAttempts on root config instead client options by @kgeckhart * Match FIPS implementation between sdk v1 and sdk v2 by @kgeckhart * Fix regex for vpc-endpoint-service by @cristiangreco **Dependencies** * Bump golangci/golangci-lint-action from 3.6.0 to 3.7.0 * Bump github.com/aws/aws-sdk-go from 1.44.327 to 1.44.328 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.54.0...v0.54.1 # v0.54.0 **Bugfixes and features** Features: * Log features enabled at startup by @cristiangreco * Use go-kit logger and add `log.format` flag by @cristiangreco Bugs: * Remove tagged resource requirement from TrustedAdvisor by @kgeckhart * Fix: RDS dashboard filtering by job value by @andriikushch * Review dimensions regexps for APIGateway by @cristiangreco * Fix syntax in rds.libsonnet by @andriikushch * Fix the `FilterId` label value selection for s3 dashboard by @andriikushch * MaxDimAssociator: loop through all mappings by @cristiangreco * MaxDimAssociator: wrap some expensive debug logs by @cristiangreco * MaxDimAssociator: compile AmazonMQ broker suffix regex once by @cristiangreco * Limit number of goroutines for GetMetricData calls by @cristiangreco * Reduce uncessary pointer usage in getmetricdata code path by @kgeckhart * Improve perf in discovery jobs metrics to data lookup by @thepalbi * Improve FIPS endpoints resolve logic for sdk v1 by @thepalbi Docs: * Add more config examples (ApiGW, SES, SNS, ECS) by @cristiangreco Refactoring: * Refactor clients.Cache -> clients.Factory by @kgeckhart * dependabot: use group updates for aws sdk v2 by @cristiangreco * Add debug logging to maxdimassociator by @cristiangreco **Dependencies** New dependecies: * github.com/go-kit/log v0.2.1 Updates: * Docker image: bump alpine from 3.18.2 to 3.18.3 * Docker image: bump golang from 1.20 to 1.21 * Bump github.com/aws/smithy-go from 1.13.5 to 1.14.2 * Bump github.com/aws/aws-sdk-go and aws-sdk-go-v2 to latest versions **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.53.0...v0.54.0 # v0.53.0 **Bugfixes and features** Services: * Add Auto Discovery Support For Sagemaker by @charleschangdp * Add support for AWS/TrustedAdvisor by @cristiangreco Bugs: * fix(kafkaconnect): update resource filter by @cgowthaman * Validate should fail when no roles are configured by @thepalbi * Fix default value for nilToZero and addCloudwatchTimestamp in static job by @cristiangreco * ddos protection: Discover resources outside us-east-1 **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.44.284 to 1.44.290 * Bump github.com/aws/aws-sdk-go-v2/service/amp from 1.16.12 to 1.16.13 * Bump github.com/aws/aws-sdk-go-v2/service/apigatewayv2 from 1.13.12 to 1.13.13 * Bump github.com/aws/aws-sdk-go-v2/service/cloudwatch from 1.26.1 to 1.26.2 * Bump github.com/aws/aws-sdk-go-v2/service/ec2 from 1.100.0 to 1.102.0 * Bump github.com/prometheus/client_golang from 1.15.1 to 1.16.0 * Bump github.com/prometheus/common from 0.43.0 to 0.44.0 * Bump github.com/urfave/cli/v2 from 2.25.6 to 2.25.7 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.52.0...v0.53.0 # v0.52.0 **Important news and breaking changes** This releases introduces the feature flag `aws-sdk-v2` (by @kgeckhart), which changes YACE networking layer to use the AWS sdk v2 package. Read on for more details and considerations. * The main benefit of sdk v2 is deserialization/serialization is done via code generation vs reflection which drastically lowers memory/cpu usage for large scrape jobs * Considerations before enabling sdk v2: 1. FIPS is not supported in v2 as v2 delegates all URL resolution to the sdk and AWS does not have FIPS compliant endpoints for AutoScaling API and Tagging API. The v1 implementation worked around this by hard coding FIPS URLs where they existed and using non-FIPS URLs otherwise. This work around was not ported to v2 and is unlikely to be ported. 2. sdk v2 uses regional sts endpoints by default vs global sts which is [considered legacy by aws](https://docs.aws.amazon.com/sdkref/latest/guide/feature-sts-regionalized-endpoints.html). The `sts-region` job configuration is still respected when setting the region for sts and will be used if provided. If you still require global sts instead of regional set the `sts-region` to `aws-global`. **Bugfixes and features** Features: * Discovery jobs support `recentlyActiveOnly` parameter to reduce number of old metrics returned by CloudWatch API by @PerGon * Feature flag `aws-sdk-v2`: use the more performant AWS sdk v2 (see above section) by @kgeckhart Services: * Add support for API Gateway V2 by @matej-g * Add support for MediaConvert by @theunissenne * Add support for CWAgent by @cristiangreco * Add support for memorydb by @glebpom Docs: * ALB example: use Average for ConsumedLCUs by @cristiangreco * Update configuration.md: deprecated custom namespace jobs by @wimsymons * Update permissions examples and docs in readme by @kgeckhart * Add example for ElastiCache by @cristiangreco * Update mixin readme by @cristiangreco Bugs: * Fix AmazonMQ Broker name dimension match by @cristiangreco * Fix invalid GH action file and broken test case by @cristiangreco * Fix namespace case in metrics conversion by @cristiangreco * Make exporter options a non-global type by @kgeckhart * Fix debug logging in discovery jobs by @cristiangreco Refactoring: * Refactor AWS sdk client usage to hide behind new ClientCache by @kgeckhart * Introduce model types to replace sdk types in cloudwatch client by @kgeckhart **Dependencies** New dependencies: * github.com/aws/aws-sdk-go-v2/config 1.18.27 * github.com/aws/aws-sdk-go-v2/service/amp 1.16.11 * github.com/aws/aws-sdk-go-v2/service/apigateway 1.13.13 * github.com/aws/aws-sdk-go-v2/service/autoscaling 1.28.9 * github.com/aws/aws-sdk-go-v2/service/cloudwatch 1.26.1 * github.com/aws/aws-sdk-go-v2/service/databasemigrationservice 1.25.7 * github.com/aws/aws-sdk-go-v2/service/ec2 1.100.0 * github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi 1.14.14 * github.com/aws/aws-sdk-go-v2/service/storagegateway 1.18.14 Updates: * Bump alpine from 3.17.3 to 3.18.2 * Bump github.com/aws/aws-sdk-go from 1.44.249 to 1.44.284 * Bump github.com/prometheus/common from 0.42.0 to 0.43.0 * Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3 * Bump github.com/stretchr/testify from 1.8.2 to 1.8.4 * Bump github.com/urfave/cli/v2 from 2.25.1 to 2.25.6 * Bump golang.org/x/sync from 0.1.0 to 0.3.0 * Bump golangci/golangci-lint-action from 3.4.0 to 3.6.0 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.51.0...v0.52.0 # v0.51.0 **Important breaking changes** * Jobs of type `customNamespace` are **deprecated** and might be removed in a future release (please reach out if you're still using this feature) **Bugfixes and features** Features: * Add feature flags support by @thepalbi * Feature flag `max-dimensions-associator`: new resource-matching algorithm for discovery jobs. It fixes metrics attribution for ECS. Please test it out and report any issue! * Feature flag `list-metrics-callback`: reduce memory usage of ListMetrics API requests Services: * Add support for AWS/Usage namespace by @cristiangreco * Fix ECS regexes by @cristiangreco Docs: * Add docker compose support for easier development by @thepalbi * Add more config examples by @cristiangreco * Review docs about embedding yace by @cristiangreco Bugs: * Fix for Dockerfile smell DL3007 by @grosa1 Refactoring: * Refactor Tagging/CloudWatch clients by @cristiangreco * CloudWatch client: split out input builders into separate file by @cristiangreco * Refactor promutils migrate functions by @cristiangreco * Use grafana/regexp by @cristiangreco * Refactor implementation of getFilteredMetricDatas by @cristiangreco * Remove uneeded Describe implementation by @kgeckhart * Add counter to see if duplicate metrics are still a problem by @kgeckhart * Refactor label consistency and duplicates by @kgeckhart * Refactor GetMetricData calls in discovery jobs by @cristiangreco **Dependencies** * Bump github.com/aws/aws-sdk-go from 1.44.235 to 1.44.249 * Bump github.com/prometheus/common from 0.41.0 to 0.42.0 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.50.0...v0.51.0 # v0.50.0 **Important breaking changes** * Change `UpdateMetrics` signature to accept options and return error by @cristiangreco -- if you embed YACE as a Go library this is a breaking change. **Bugfixes and features** Features: * Refactor API clients concurrency handling by @cristiangreco * Add feature flags support by @thepalbi * Allow discovery jobs to return result even if there are no resources by @kgeckhart * Add flag to enable pprof profiling endpoints by @cristiangreco Services: * Add a ResourceFilter to ElasticBeanstalk by @benbridts Docs: * Update config docs format by @cristiangreco Refactoring: * Linting: fix revive issues by @cristiangreco * Remove extra error log when no resources are found by @kgeckhart * Wrap debug logging in FilterMetricData by @cristiangreco * Minor internal refactorings by @cristiangreco **Dependencies** * Bump actions/setup-go from 3 to 4 * Bump github.com/aws/aws-sdk-go from 1.44.215 to 1.44.235 * Bump github.com/urfave/cli/v2 from 2.25.0 to 2.25.1 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.49.2...v0.50.0 # v0.49.2 ## Bugfixes and features * Update release action to use goreleaser docker image v1.16.0 # v0.49.1 ## Bugfixes and features * Update release action to use Go 1.20 # v0.49.0 ## Important breaking changes * From now on we're dropping the `-alpha` suffix from the version number. YACE will be considered alpha quality until v1.0.0. * The helm chart is now hosted at https://github.com/nerdswords/helm-charts, please refer to the instructions in the new repo. ## Bugfixes and features Helm chart: * Move helm chart out of this repo by @cristiangreco * Update helm repo link in README.md by @cristiangreco New services: * Add support for Container, queue, and database metrics for MWAA by @millin * Add support for acm-pca service by @jutley Docs updates: * Docs review: move "install" and "configuration" in separate docs by @cristiangreco * Docs: Fix example config link by @matej-g * Add example config files by @cristiangreco Internal refactoring: * Code refactoring: split out job and api code by @cristiangreco * Minor refactoring of pkg/apicloudwatch and pkg/apitagging by @cristiangreco * Refactor CW metrics to resource association logic and add tests by @thepalbi * Wrap service filter errors by @kgeckhart ## Dependencies * Bump github.com/aws/aws-sdk-go from 1.44.194 to 1.44.215 * Bump github.com/prometheus/common from 0.37.0 to 0.41.0 * Bump github.com/stretchr/testify from 1.8.1 to 1.8.2 * Bump github.com/urfave/cli/v2 from 2.24.3 to 2.25.0 * Bump golang.org/x/sync from 0.0.0-20220722155255-886fb9371eb4 to 0.1.0 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.48.0-alpha...v0.49.0 # v0.48.0-alpha **Bugfixes and features**: * Revert "Publish helm chart before releasing binaries". **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.47.0-alpha...v0.48.0-alpha # v0.47.0-alpha **Bugfixes and features**: * Add Elemental MediaLive, MediaConnect to supported services by @davemt * Add support for OpenSearch Serverless by @Hussainoxious * Makefile: always add build version ldflags by @cristiangreco * Publish helm chart before releasing binaries by @cristiangreco * Build with Go 1.20 by @cristiangreco **Dependencies**: * Bump github.com/aws/aws-sdk-go from 1.44.192 to 1.44.194 * Bump github.com/urfave/cli/v2 from 2.24.2 to 2.24.3 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.46.0-alpha...v0.47.0-alpha ## 0.46.0-alpha / 2023-02-02 **Breaking changes**: - If you use Yace as a library: this release changes the package name `pkg/logger` to `pkg/logging`. **Bugfixes and features**: * Fix to set logging level correctly by @cristiangreco * ct: disable validate-maintainers by @cristiangreco **Dependencies**: * Bump github.com/aws/aws-sdk-go from 1.44.189 to 1.44.192 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/helm-chart-0.11.0...v0.46.0-alpha ## 0.45.0-alpha / 2023-01-30 **Breaking changes**: - Note if you use Yace as a library: this release changes the signature of `config.Load` method. **Bugfixes and features**: * Helm chart update to customize port name by @nikosmeds * Clear up docs and re-organize sections by @thepalbi * Helm: add README file template by @cristiangreco * Config parsing: emit warning messages for invalid configs by @cristiangreco * Pre-compile dimensions regexps for supported services by @cristiangreco * AWS/DX: add more dimension regexps by @cristiangreco **Dependencies**: * Bump github.com/aws/aws-sdk-go from 1.44.182 to 1.44.189 * Bump github.com/urfave/cli/v2 from 2.23.7 to 2.24.2 * Bump golangci/golangci-lint-action from 3.3.1 to 3.4.0 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.44.0-alpha...v0.45.0-alpha ## 0.44.0-alpha / 2023-01-23 **Breaking changes**: - Note if you use Yace as a library: this release changes the packages and funcs exported publicly, you will need to review the imports (although signatures are mostly unchanged) **Bugfixes and features**: * Refactor code into separate packages by @cristiangreco * Refactor list of supported services and filter funcs by @cristiangreco * Wrap debug logging to avoid expensive operations by @cristiangreco * Fix to use length of metrics level on customNamespace by @masshash * feat: bump helm chart by @rasta-rocket * feat: release helm chart when Chart.yml is updated by @rasta-rocket * Add test for configuration of services list by @cristiangreco * GolangCI: review linters settings by @cristiangreco **Dependencies**: * Bump azure/setup-helm from 1 to 3 * Bump docker/setup-buildx-action from 1 to 2 * Bump docker/setup-qemu-action from 1 to 2 * Bump github.com/aws/aws-sdk-go from 1.44.175 to 1.44.182 * Bump github.com/prometheus/client_golang from 1.13.0 to 1.14.0 * Bump helm/chart-releaser-action from 1.4.1 to 1.5.0 * Bump helm/kind-action from 1.2.0 to 1.5.0 **Full Changelog**: https://github.com/prometheus-community/yet-another-cloudwatch-exporter/compare/v0.43.0-alpha...v0.44.0-alpha ## 0.43.0-alpha / 2023-01-02 * add support to custom namespaces with their dimensions (by @arielly-parussulo) * Optimise support for custom namespaces to use GetMetricData API (by @code-haven) * GH workflows: run "publish" workflows only in this repo. (by @cristiangreco) * Bump Go version to 1.19 for CI and docker image. (by @cristiangreco) * Fix not to refer to loop variable in a goroutine (by @masshash) * Validate tags when converting to prometheus labels (by @cristiangreco) * Bump github.com/aws/aws-sdk-go from 1.44.127 to 1.44.167 * Bump golangci/golangci-lint-action from 3.3.0 to 3.3.1 * Bump github.com/urfave/cli/v2 from 2.23.0 to 2.23.7 ## 0.42.0-alpha / 2022-11-03 * Resolve logging issue (@datsabk) * MediaTailor - Correct dimension regex for MT (@scott-mccracken) * Helm chart update for optional test-connection pod (@nikosmeds) * Helm chart update to set priorityClassName (@nikosmeds) * Bump github.com/aws/aws-sdk-go from 1.44.122 to 1.44.127 * Bump github.com/urfave/cli/v2 from 2.20.3 to 2.23.0 ## 0.41.0-alpha / 2022-10-27 * Clean up unused variables. (@cristiangreco) * Fix typo: sts-endpoint should be sts-region. (@cristiangreco) * Enabled Managed prometheus metrics (@datsabk) * Add support for AWS Kafka Connect (@cgowthaman) * Import CloudWatch mixin. (@jeschkies) * main.go refactoring: define cmd action as a separate func. (@cristiangreco) * Add support for EMR Serverless (@cgowthaman) ## 0.40.0-alpha / 2022-09-15 * Fix typo in Charts.yml (@yasharne) * Subcommand `verify-config` actually validates the config file. (@cristiangreco) * Add dimensions regex for AmazonMQ. (@cristiangreco) * Fix metrics with additional dimensions being not being scraped. (@cristiangreco) * Remove unused code, add test for RemoveDuplicateMetrics. (@cristiangreco) * Bump github.com/sirupsen/logrus * Bump github.com/urfave/cli/v2 * Bump github.com/aws/aws-sdk-go * Bump actions/setup-python ## 0.39.0-alpha / 2022-09-08 * Improve code quality and unblock this release (cristiangreco) * Add helm chart (vkobets) * Fix DX metrics (paulojmdias) * Fix searchTags and bad dimension name (femiagbabiaka) * Handle empty list in filter metric tests (mtt88) * Add AWS Elemental MediaTailor support (scott-mccracken) * Support storagegateway metrics (sedan07) * Filter api gateway resources to skip "stages" (ch4rms) * Bump aws-sdk, urfave/cli, prometheus/client_golang ## 0.38.0-alpha / 2022-07-13 * Set max page size for tagging API requests (#617) * Build with Go 1.18 ## 0.37.0-alpha / 2022-07-05 * New config `dimensionNameRequirements` allows autodiscovery jobs to only fetch metrics that include specified dimensions (jutley) * Update deps ## 0.36.2-alpha / 2022-06-29 * Cost Reduction - Use less API requests if no tagged resources are found (cristiangreco) * Update deps ## 0.36.1-alpha / 2022-06-22 * Use structured logs for logging interface (kgeckhart) ## 0.36.0-alpha / 2022-06-20 * *BREAKING CHANGE FOR LIBRARY USERS* Major refactoring of usage of logging library (kgeckhart) * Minor update of deps and security patches (urfave/cli/v2, golangci/golangci-lint-action, github.com/prometheus/client_golang, github.com/stretchr/testify, github.com/aws/aws-sdk-go * Updates of Readme (markwallsgrove) ## 0.35.0-alpha / 2022-04-26 * Update dependencies * Improve / Document way how to use the exporter as external library (kgeckhart) * Refactor label consistency (kgeckhart) * Add suppot for vpc-endpoint (AWS/PrivateLinkEndpoints) (aleslash) * Add support for vpc-endpoint-service (AWS/PrivateLinkServices) (aleslash) ## 0.34.0-alpha / 2022-03-26 * Update dependencies * Add weekly dependabot updates (jylitalo) * Add support for regional sts endpoints (matt-mercer) * Add multi-arch docker build (charlie-haley) New services * Add global accelerator support (charlie-haley) * Add AppStream support (jhuesemann) * Add Managed Apache Airflow support (sdenham) * Add KinesisAnalytics support (gumpt) Bug Fixes * Fix targetgroup arn lookup (domcyrus) * Fix WorkGroup Dimension are not showing in Athena Metrics (sahajavidya) * Improve regex performance (kgeckhart) * Fix prometheus reload causing a goroutine leak (gumpt / cristiangreco) Docs * Added help for new contributors (aleslash) ## 0.33.0-alpha / 2021-12-10 * Add /healthz route which allows to deploy more secure with helm (aleslash) * Read DMS replication instance identifier from the DMS API (nhinds) ## 0.32.0-alpha / 2021-11-19 * [BREAKING] Fix the calculation of start and end times for GetMetricData (csquire) ``` floating-time-window is now replaced with roundingPeriod Specifies how the current time is rounded before calculating start/end times for CloudWatch GetMetricData requests. This rounding is optimize performance of the CloudWatch request. This setting only makes sense to use if, for example, you specify a very long period (such as 1 day) but want your times rounded to a shorter time (such as 5 minutes). to For example, a value of 300 will round the current time to the nearest 5 minutes. If not specified, the roundingPeriod defaults to the same value as shortest period in the job. ``` * Improve testing / linting (cristiangreco) * Verify cli parameters and improve cli parsing (a0s) * Allow to configure yace cli parameters via env variables (a0s) * Improve error handling of cloudwatch (matthewnolf) * Add support for directconnect and route53 health checks * Improve throttling handling to AWS APIs (anilkun) * Add issue templates to improve support (NickLarsenNZ) * Allow setting default values for statistics (surminus) * Fix apigateway method and resouce dimension bug (aleslash) Thanks a lot to all contributors! - Lovely to see so much efforts especially in testing to get this project more and more stable. - I know we are far away from a nice tested code base but we are improving in the right direction and I really love to see all of your efforts there. It is really appreciated from my side. I just contacted AWS to get some open source credits so we can build some kind of end to end tests. This shoud allow us to find tricky bugs earlier and not only when we ship things. Love to all of you, Thomas! ## 0.31.0-alpha / 2021-09-23 * [BREAKING] Decoupled scraping is now default. Removed code which allowed to use scraper without it. ``` # Those flags are just ignored -decoupled-scraping=false -decoupled-scraping=true ``` * [BREAKING] Small timeframes of scraping can be used again now. In the past yace decided the scraping interval based on config. This magic was removed for simplicity. ``` # In the past this would have in some cases still set --scraping-interval 600 --scraping-interval 10 # Now it really would scrape every 10 seconds which could introduce big API costs. So please watch # your API requests! --scraping-interval 10 ``` * Fix problems with start/endtime of scrapes (klarrio-dlamb) * Add support for Database Migration Service metrics * Allow to hotreload config via /reload (antoniomerlin) ## 0.30.1-alpha / 2021-09-13 * *SECURITY* Fix issue with building binaries. Please update to mitigate (https://nvd.nist.gov/vuln/detail/CVE-2020-14039) * Thanks jeason81 for reporting this security incident! ## 0.30.0-alpha / 2021-09-07 * *BREAKING* Introduce new version field to config file (jylitalo) ``` # Before discovery: jobs: # After apiVersion: v1alpha1 discovery: jobs: ``` * [BUG] Fix issues with nilToZero (eminugurkenar) * [BUG] Fix race condition setting end time for discovery jobs (cristiangreco) * Simplify session creation code (jylitalo) * Major improvement of aws discovery code (jylitalo) * Major rewrite of the async scraping logic (rabunkosar-dd) * Add support for AWS/ElasticBeanstalk (andyzasl) * Upgrade golang to 1.17 * Upgrade golang libraries to newest versions ## 0.29.0-alpha / 2021-09-01 Okay, private things settled. We have a new organisation for the project. Lets boost it and get the open PRs merged! This version is like 0.28.0-alpha but docker images hosted on ghcr.io and published via new github organisation nerdswords. Find details [here](https://medium.com/@IT_Supertramp/reorganizing-yace-79d7149b9584). Thanks to all there waiting and using the product! :) - *BREAKING CHANGE* Using a new docker registry / organisation: ```yaml # Before quay.io/invisionag/yet-another-cloudwatch-exporter:v0.29.0-alpha # Now ghcr.io/nerdswords/yet-another-cloudwatch-exporter:v0.29.0-alpha ``` ## 0.28.0-alpha / 2021-07-09 Sorry folks, I currently struggle a little bit to get things merged fast due to a lot of private stuff. Really appreciate all your PRs and hope to get the bigger ones (which are sadly still not merged yet) into next release. Really appreciate any person working on this project! - Have a nice day :) - *BREAKING CHANGE* Added support for specifying an External ID with IAM role Arns (cristiangreco) ```yaml # Before discovery: jobs: - type: rds roleArns: - "arn:aws:iam::123456789012:role/Prometheus" # After discovery: jobs: - type: rds roles: - roleArn: "arn:aws:iam::123456789012:role/Prometheus" externalId: "shared-external-identifier" # optional ``` - Add alias for AWS/Cognito service (tohjustin) - Fix logic in dimensions for Transit Gateway Attachments (rhys-evans) - Fix bug with scraping intervals (boazreicher) - Support arm64 builds (alias-dev) - Fix IgnoreLength logic (dctrwatson) - Simplify code base (jylitalo) - Simplify k8s deployments for new users (mahmoud-abdelhafez) - Handle metrics with '%' in their name (darora) - Fix classic elb name (nhinds) - Skip metrics in edge cases (arvidsnet) Freshly shipped new integrations: - Certificate Manager (mksh) - WorkSpaces (kl4w) - DDoSProtection / Shield (arvidsnet) ## 0.27.0-alpha / 2021-05-07 - Make exporter a library. (jeschkies) - Add CLI option to validate config file (zswanson) - Fix multidimensional static metric (nmiculinic) - Fix scrapes running in EKS fail after first scrape (rrusso1982) - Fix Docker build (jeschkies) - Allow to use this project in China (insectme) - Fix error retrieving kafka metrics (friedrichg) Freshly integrated: - Add AWS/NetworkFirewall (rhys-evans) - Add AWS/Cassandra (bjhaid) - Add AWS/AmazonMQ (saez0pub) - Add AWS/Athena (haarchri) - Add AWS/Neptune (benjaminaaron) Thanks to doc fixes: calvinbui ## 0.26.3-alpha / 2021-03-15 ## 0.26.2-alpha / 2021-03-15 - Fix CI issue ## 0.26.0-alpha / 2021-03-15 - *BREAKING CHANGE* Removed a need to use static dimensions in dynamic jobs in cases, when they cannot be parsed from ARNs (AndrewChubatiuk) ``` # Before metrics: - name: NumberOfObjects statistics: - Average additionalDimensions: - name: StorageType value: AllStorageTypes # After metrics: - name: NumberOfObjects statistics: - Average ``` * *BREAKING CHANGE* Use small case for searchTags config option (AndrewChubatiuk) ``` # Before searchTags: - Key: type Value: public # After searchTags: - key: type value: public ``` * *BREAKING CHANGE* CloudFront renamed from `cf` to `cloudfront` ``` # Before - type: cf # After - type: cloudfront ``` - Added regular expressions to parse dimensions from resources (AndrewChubatiuk) - Added option to use floating time windows (zqad) - Added CLI option to validate config file (zswanson) - Added AWS network Firewall (rhys-evans) - Fixed multidimensional static metric (nmiculinic) - Tidy up code (jylitalo) ## 0.25.0-alpha / 2021-01-05 - *BREAKING CHANGE* Use NaN as default if AWS returns nil (arnitolog) - Add autodiscovery for AWS/EC2Spot (singhjagmohan1000) - Add autodiscovery for DocumentDB (haarchri) - Add autodiscovery for GameLift (jp) - Added support for fips compliant endpoints (smcavallo) - Update deps and build with golang 1.15 (smcavallo) ## 0.24.0-alpha / 2020-12-07 - Add API Gateway IAM info to README (Botono) - Fix sorting of datapoints, add test util functions (Botono) - Fix missing DataPoints and improve yace in various ways (vishalraina) - Added Github action file to basic validation of incoming PR (vishalraina) - Fix info metrics missing (goya) - Add rds db clusters (goya) - Fix missing labels (goya) ## 0.23.0-alpha / 2020-10-02 - Add sampleCount statistics (udhos) - Add WAFv2 support (mksh) ## 0.22.0-alpha / 2020-10-02 - Fix alb issues (reddoggad) - Add nlb support (reddoggad) ## 0.21.0-alpha / 2020-09-21 - Big tidy up of code, remove old methods and refactor used ones (jylitalo) - Fix crashes where labels are not collected correctly (rrusso1982) - Fix pointer bug causing metrics to be missing (jylitalo) - Allow more then 25 apigateways to be discovered (udhos) ## 0.20.0-alpha / 2020-07-31 - Add api-gateway support (smcavallo) - Improve metrics validation (jylitalo) - Fix metrics with '<', '>' chars ## 0.19.1-alpha / 2020-07-17 - Remove error during build ## 0.19.0-alpha / 2020-07-17 Wow what a release. Thanks to all contributors. This is our biggest release and it made me a lot of fun to see all those contributions. From small doc changes (love those) to major rewrites of big components or new complex features. Thanks! * *BREAKING CHANGE* Add support for multiple roleArns (jylitalo) ```yaml # Before --- discovery: jobs: - type: rds roleArn: "arn:aws:iam::123456789012:role/Prometheus" # After discovery: jobs: - type: rds roleArns: - "arn:aws:iam::123456789012:role/Prometheus" ``` * Upgrade golang from 1.12 to 1.14 * Major linting of code and improving global code quality. (jylitalo) * Improve logging (jylitalo) * Add config validation. (daviddetorres) * Added support for tags with '@' char included (afroschauer ) * Added Transit Gateway Attachment Metrics (rhys-evans) * Fix information gathering if no data is retrieved by cloudwatch (daviddetorres) * Improve docs (calvinbui) * Add redshift support (smcavallo) * Allow easier configuration through adding period / addCloudwatchTimestamp setting additionally to job level. (rrusso1982) * Add initial unit tests (smcavallo) * Add new configuration to allow snake case labels (rrusso1982) * Fix complex metric dimension bug (rrusso1982) * Upgrade golang packages (smcavallo) * Set up correct partition for ASG for AWS China and GovCloud Regions (smcavallo) * Add ability to set custom tags to discovery job metrics (goya) ## 0.18.0-alpha / 2020-06-15 * *BREAKING CHANGE* Add support for multiple regions (goya) ```yaml # Before --- discovery: jobs: - type: rds region: eu-west-1 # After discovery: jobs: - type: rds regions: - eu-west-1 ``` * Fix missing alb target group metrics (abhi4890 ) * Added support for step functions (smcavallo) ## 0.17.0-alpha / 2020-05-14 * Added support for sns / firehose (rhys-evans) * Added support for fsx / appsync (arnitolog) ## 0.16.0-alpha / 2020-04-06 * Hugh rewrite: Decouple scraping and serving metrics. Thanks so much daviddetorres! * *BREAKING CHANGE* Decoupled scraping and set scraping interval to 5 minutes. ``` The flag 'decoupled-scraping' makes the exporter to scrape Cloudwatch metrics in background in fixed intervals, in stead of each time that the '/metrics' endpoint is fetched. This protects from the abuse of API requests that can cause extra billing in AWS account. This flag is activated by default. If the flag 'decoupled-scraping' is activated, the flag 'scraping-interval' defines the seconds between scrapes. Its default value is 300. ``` * Hugh rewrite: Rewrite of metric gathering to reduce API Limit problems. Thanks so much daviddetorres! * Improvment of ALB data gathering and filtering (daviddetorres) * Detect and fix bug after merge (deanrock) * Add cloudfront support (mentos1386) ## 0.15.0-alpha / 2020-02-21 * Fixed docker run command in README.md (daviddetorres) * Added support for Nat Gateway / Transit Gateway / Route 53 Resolver (j-nix) * Added support for ECS/ContainerInsights (daviddetorres) * Fix pagination for getMetricList (eminugurkenar) ## 0.14.7-alpha / 2020-01-09 * Change logging to json format (bheight-Zymergen) ## 0.14.6-alpha / 2020-01-03 * Add support for kafka (eminugurkenar) * Add structured json logging (bheight-Zymergen) * Increase code readability (bheight-Zymergen) * Fix ecs scraping bug (rabunkosar-dd) * Fix aws cloudwatch period bug (rabunkosar-dd) ## 0.14.5-alpha / 2019-10-29 * Fix sts api calls without specifying a region (nhinds) * Update aws-sdk to v1.25.21 (nhinds) ## 0.14.4-alpha / 2019-10-25 * Fix github actions (nhinds) * Update aws-sdk-go (deanrock) * Avoid appending to a shared dimensions variable from inside a loop (nhinds) * Remove hardcoded StorageType dimension from S3 metric (nhinds) ## 0.14.3-alpha / 2019-10-11 * Fix problems and crashes with ALBs and ELBs (Deepak1100) ## 0.14.2-alpha / 2019-10-04 * **BREAKING** Changing user in Docker image to be non root to adhere to potential security requirements. (whitlekx) * Fix prometheus metric bug with new services with '-' e.g. ecs-svc. ## 0.14.1-alpha / 2019-09-06 * Was accidentally with code from 01.14.0-alpha released. ## 0.14.0-alpha / 2019-08-24 * **BREAKING** Default command in Dockerfile is changed to yace. This removes the need to add yace as command. ```yaml # Before command: - "yace" - "--config.file=/tmp/config.yml" # After args: - "--config.file=/tmp/config.yml" ``` * Add support for Elastic MapReduce (nhinds) * Add support for SQS - (alext) * Add support for ECS Services as ecs-svc * Add support for NLB * Add retries to cloudwatch api calls (Deepak1100) * Fix dimension labels for static jobs (alext) ## 0.13.7 / 2019-07-09 * Add region as exported label to metrics ## 0.13.6 / 2019-06-24 * Fix errors with "=" in tags (cdchris12) * Add curl to container for easy debugging (cdchris12) ## 0.13.5-alpha / 2019-06-09 * Limit concurrency of aws calls ## 0.13.4 / 2019-06-03 * Add Autoscaling group support (wjam) * Fix strange AWS namespace bug for static exports (AWS/EC2/API) * Add warning if metric length of less than 300s is configured / Interminent metrics ## 0.13.3 / 2019-04-26 * Fix ALB problems. Target Group metrics are now exported as aws_albtg ``` aws_albtg_request_count_sum{dimension_LoadBalancer="app/Test-ALB/fec38de4cf0cacb1",dimension_TargetGroup="targetgroup/Test/708ecba11979327b",name="arn:aws:elasticloadbalancing:eu-west-1:237935892384916:targetgroup/Test/708dcba119793234"} 0 ``` ## 0.13.2 / 2019-04-26 * CI problem ## 0.13.1-alpha / 2019-04-03 * **BREAKING** For some metrics `cloudwatch:ListMetrics` iam permissions are needed. Please update your role! * **BREAKING** Add 'v' to indicate it is a version number in docker tag / version output ``` # Before image: quay.io/invisionag/yet-another-cloudwatch-exporter:0.13.0 # After image: quay.io/invisionag/yet-another-cloudwatch-exporter:v0.13.0 ``` * Use golang 1.12.0 to build * Use goreleaser to release * Update aws dependencies * Use github actions as CI * Migrate dependency management to golang modules ## 0.13.0-alpha / 2019-03-18 * **BREAKING** For some metrics `cloudwatch:ListMetrics` iam permissions are needed. Please update your role! * **BREAKING** As adding cloudwatch timestamp breaks some metrics I decided to not set it as default anymore. This should make it easier for new users to have fun with this project. It fixes for some users `non-histogram and non-summary metrics should not have "_sum" suffix` bug. ```yaml # Before metrics: - name: FreeStorageSpace disableTimestamp: true # After metrics: - name: FreeStorageSpace # Before metrics: - name: FreeStorageSpace # After metrics: - name: FreeStorageSpace useCloudwatchTimestamp: true ``` * Add ability to specify additional dimensions on discovery jobs e.g. for BucketSizeBytes metrics on S3 (abuchananTW) * Fix incorrect dimension value in case of alb in discovery config (GeeksWine) * Add CLI command to debug output * Add DynamoDB support ## 0.12.0 / 2019-02-04 * **BREAKING** Add the exact timestamps from CloudWatch to the exporter Prometheus metrics (LeePorte) * Add a new option `disableTimestamp` to not include a timestamp for a specific metric (it can be useful for sparse metrics, e.g. from S3) (LeePorte) * Add support for kinesis (AndrewChubatiuk) ## 0.11.0 / 2018-12-28 * **BREAKING** Add snake_case to prometheus metrics (sanchezpaco) ```yaml # Before aws_elb_requestcount_sum # After aws_elb_request_count_sum ``` * Add optional delay setting to scraping (Deepak1100) ```yaml period: 60 length: 900 delay: 300 ``` ## 0.10.0 / 2018-12-03 * Reduce usage of listMetrics calls (nhinds) * Add support of iam roles (nhinds) * Add optional roleArn setting, which allows scraping with different roles e.g. pull data from mulitple AWS accounts using cross-acount roles (nhinds) ```yaml metrics: - name: FreeStorageSpace roleArn: xxx statistics: - 'Sum' period: 600 length: 60 ``` ## 0.9.0 / 2018-11-16 * Add lambda support (nhinds) * Fix support for listing multiple statistics per metric (nhinds) * Add tag labels on metrics for easy querying (nhinds) ``` # Before aws_ec2_cpuutilization_average + on (name) group_left(tag_Name) aws_ec2_info # After, now name tags are on metrics and no grouping needed aws_ec2_cpuutilization_average ``` * **BREAKING** Change config syntax. Now you can define tags which are exported as labels on metrics. Before: ```yaml discovery: - region: eu-west-1 type: "es" searchTags: - Key: type Value: ^(easteregg|k8s)$ metrics: - name: FreeStorageSpace statistics: - 'Sum' period: 600 length: 60 ``` New Syntax with optional exportedTagsOnMetrics: ```yaml discovery: exportedTagsOnMetrics: ec2: - Name jobs: - region: eu-west-1 type: "es" searchTags: - Key: type Value: ^(easteregg|k8s)$ metrics: - name: FreeStorageSpace statistics: - 'Sum' period: 600 length: 60 ``` ## 0.8.0 / 2018-11-02 * Added VPN connection metrics (AndrewChubatiuk) * Added ExtendedStatistics / percentiles (linefeedse) * Added Average Statistic (AndrewChubatiuk) ## 0.7.0-alpha / 2018-10-19 * ALB Support (linefeedse) * Custom lables for static metrics Example ```yaml static: - namespace: AWS/AutoScaling region: eu-west-1 dimensions: - name: AutoScalingGroupName value: Test customTags: - Key: CustomTag Value: CustomValue metrics: - name: GroupInServiceInstances statistics: - 'Minimum' period: 60 length: 300 ``` ## 0.6.1 / 2018-10-09 * Sanitize colons in tags (linefeedse) ## 0.6.0 / 2018-09-20 * **BREAKING**: Period/length uses now seconds instead of minutes * **BREAKING**: Config file uses new syntax to support static * Support of --debug flag which outputs some dev debug informations * Support of metrics who are not included in tags api (e.g. autoscaling metrics) Before ```yaml jobs: - discovery: region: eu-west-1 metrics: - name: HealthyHostCount statistics: - 'Minimum' period: 60 length: 300 ``` New Syntax: ```yaml discovery: - region: eu-west-1 type: elb searchTags: - Key: KubernetesCluster Value: production metrics: - name: HealthyHostCount statistics: - 'Minimum' period: 60 length: 300 static: - namespace: AWS/AutoScaling region: eu-west-1 dimensions: - name: AutoScalingGroupName value: Test metrics: - name: GroupInServiceInstances statistics: - 'Minimum' period: 60 length: 300 ``` ## 0.5.0 / 2018-08-07 * Support of EFS - Elastic File System * Support of EBS - Elastic Block Storage ## 0.4.0 / 2018-08-07 * **BREAKING**: Config file uses list as statistics config option, this should reduce api calls for more than one statistics. Before: ```yaml jobs: - discovery: metrics: statistics: 'Maximum' ``` After: ```yaml jobs: - discovery: metrics: statistics: - 'Maximum' ``` * Start to track changes in CHANGELOG.md * Better error handling (discordianfish) * Increase speed, not only each jobs threaded but now each metric * Add s3 support * Fix potential race condition during cloudwatch access * Fix bug ignoring period in cloudwatch config * Use interfaces for aws access and prepare code for unit tests * Implement minimum, average, maximum, sum for cloudwatch api * Implement way to handle multiple data returned by cloudwatch * Update go dependencies ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Prometheus Community Code of Conduct Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). ================================================ FILE: CONTRIBUTE.md ================================================ # CONTRIBUTE ## Steps to Contribute * We use [golangci-lint](https://github.com/golangci/golangci-lint) for linting the code. Make it sure to install it first. * Check out repository running `git clone https://github.com/prometheus-community/yet-another-cloudwatch-exporter.git` * For linting, please run `make lint` * For building, please run `make build` * For running locally, please run `./yace` * Best practices: * commit should be as small as possible * branch from the *master* branch * add tests relevant to the fixed bug or new feature ## How to release * `git tag v0.13.1-alpha && git push --tags` ================================================ FILE: Dockerfile ================================================ ARG ARCH="amd64" ARG OS="linux" FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>" ARG ARCH="amd64" ARG OS="linux" COPY .build/${OS}-${ARCH}/yace /bin/yace COPY examples/ec2.yml /etc/yace/config.yml EXPOSE 5000 USER nobody ENTRYPOINT [ "/bin/yace" ] CMD [ "--config.file=/etc/yace/config.yml" ] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2024 The Prometheus Authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MAINTAINERS.md ================================================ # Maintainers - Thomas Peitz (info@thomas-peitz.de / @thomaspeitz) - Cristian Greco (cristian.greco@grafana.com / @cristiangreco) - Andrii Kushch (andrii.kushch@grafana.com / @andriikushch) - Tristan Burgess (tristan.burgess@grafana.com / @tristanburgess) ================================================ FILE: Makefile ================================================ # Copyright 2024 The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Needs to be defined before including Makefile.common to auto-generate targets DOCKER_ARCHS ?= amd64 armv7 arm64 DOCKER_REPO ?= prometheuscommunity include Makefile.common STATICCHECK_IGNORE = DOCKER_IMAGE_NAME ?= yet-another-cloudwatch-exporter ================================================ FILE: Makefile.common ================================================ # Copyright The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # A common Makefile that includes rules to be reused in different prometheus projects. # !!! Open PRs only against the prometheus/prometheus/Makefile.common repository! # Example usage : # Create the main Makefile in the root project directory. # include Makefile.common # customTarget: # @echo ">> Running customTarget" # # Ensure GOBIN is not set during build so that promu is installed to the correct path unexport GOBIN GO ?= go GOFMT ?= $(GO)fmt FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) GOOPTS ?= GOHOSTOS ?= $(shell $(GO) env GOHOSTOS) GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH) GO_VERSION ?= $(shell $(GO) version) GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION)) PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.') PROMU := $(FIRST_GOPATH)/bin/promu pkgs = ./... ifeq (arm, $(GOHOSTARCH)) GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM) GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM) else GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) endif GOTEST := $(GO) test GOTEST_DIR := ifneq ($(CIRCLE_JOB),) ifneq ($(shell command -v gotestsum 2> /dev/null),) GOTEST_DIR := test-results GOTEST := gotestsum --junitfile $(GOTEST_DIR)/unit-tests.xml -- endif endif PROMU_VERSION ?= 0.18.1 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= GOLANGCI_LINT_VERSION ?= v2.11.4 GOLANGCI_FMT_OPTS ?= # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386 arm64)) # If we're in CI and there is an Actions file, that means the linter # is being run in Actions, so we don't need to run it here. ifneq (,$(SKIP_GOLANGCI_LINT)) GOLANGCI_LINT := else ifeq (,$(CIRCLE_JOB)) GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint else ifeq (,$(wildcard .github/workflows/golangci-lint.yml)) GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint endif endif endif PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) DOCKERBUILD_CONTEXT ?= ./ DOCKER_REPO ?= prom # Check if deprecated DOCKERFILE_PATH is set ifdef DOCKERFILE_PATH $(error DOCKERFILE_PATH is deprecated. Use DOCKERFILE_VARIANTS ?= $(DOCKERFILE_PATH) in the Makefile) endif DOCKER_ARCHS ?= amd64 arm64 armv7 ppc64le riscv64 s390x DOCKERFILE_ARCH_EXCLUSIONS ?= DOCKER_REGISTRY_ARCH_EXCLUSIONS ?= quay.io:riscv64 DOCKERFILE_VARIANTS ?= $(wildcard Dockerfile Dockerfile.*) # Function to extract variant from Dockerfile label. # Returns the variant name from io.prometheus.image.variant label, or "default" if not found. define dockerfile_variant $(strip $(or $(shell sed -n 's/.*io\.prometheus\.image\.variant="\([^"]*\)".*/\1/p' $(1)),default)) endef # Check for duplicate variant names (including default for Dockerfiles without labels). DOCKERFILE_VARIANT_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df))) DOCKERFILE_VARIANT_NAMES_SORTED := $(sort $(DOCKERFILE_VARIANT_NAMES)) ifneq ($(words $(DOCKERFILE_VARIANT_NAMES)),$(words $(DOCKERFILE_VARIANT_NAMES_SORTED))) $(error Duplicate variant names found. Each Dockerfile must have a unique io.prometheus.image.variant label, and only one can be without a label (default)) endif # Build variant:dockerfile pairs for shell iteration. DOCKERFILE_VARIANTS_WITH_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)):$(df)) # Shell helper to check whether a dockerfile/arch pair is excluded. define dockerfile_arch_is_excluded case " $(DOCKERFILE_ARCH_EXCLUSIONS) " in \ *" $$dockerfile:$(1) "*) true ;; \ *) false ;; \ esac endef # Shell helper to check whether a registry/arch pair is excluded. # Extracts registry from DOCKER_REPO (e.g., quay.io/prometheus -> quay.io) define registry_arch_is_excluded registry=$$(echo "$(DOCKER_REPO)" | cut -d'/' -f1); \ case " $(DOCKER_REGISTRY_ARCH_EXCLUSIONS) " in \ *" $$registry:$(1) "*) true ;; \ *) false ;; \ esac endef BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS)) SANITIZED_DOCKER_IMAGE_TAG := $(subst +,-,$(DOCKER_IMAGE_TAG)) ifeq ($(GOHOSTARCH),amd64) ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows)) # Only supported on amd64 test-flags := -race endif endif # This rule is used to forward a target like "build" to "common-build". This # allows a new "build" target to be defined in a Makefile which includes this # one and override "common-build" without override warnings. %: common-% ; .PHONY: common-all common-all: precheck style check_license lint yamllint unused build test .PHONY: common-style common-style: @echo ">> checking code style" @fmtRes=$$($(GOFMT) -d $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -name '*.go' -print)); \ if [ -n "$${fmtRes}" ]; then \ echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \ echo "Please ensure you are using $$($(GO) version) for formatting code."; \ exit 1; \ fi .PHONY: common-check_license common-check_license: @echo ">> checking license header" @licRes=$$(for file in $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -type f -iname '*.go' -print) ; do \ awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \ done); \ if [ -n "$${licRes}" ]; then \ echo "license header checking failed:"; echo "$${licRes}"; \ exit 1; \ fi @echo ">> checking for copyright years 2026 or later" @futureYearRes=$$(git grep -E 'Copyright (202[6-9]|20[3-9][0-9])' -- '*.go' ':!:vendor/*' || true); \ if [ -n "$${futureYearRes}" ]; then \ echo "Files with copyright year 2026 or later found (should use 'Copyright The Prometheus Authors'):"; echo "$${futureYearRes}"; \ exit 1; \ fi .PHONY: common-deps common-deps: @echo ">> getting dependencies" $(GO) mod download .PHONY: update-go-deps update-go-deps: @echo ">> updating Go dependencies" @for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \ $(GO) get $$m; \ done $(GO) mod tidy .PHONY: common-test-short common-test-short: $(GOTEST_DIR) @echo ">> running short tests" $(GOTEST) -short $(GOOPTS) $(pkgs) .PHONY: common-test common-test: $(GOTEST_DIR) @echo ">> running all tests" $(GOTEST) $(test-flags) $(GOOPTS) $(pkgs) $(GOTEST_DIR): @mkdir -p $@ .PHONY: common-format common-format: $(GOLANGCI_LINT) @echo ">> formatting code" $(GO) fmt $(pkgs) ifdef GOLANGCI_LINT @echo ">> formatting code with golangci-lint" $(GOLANGCI_LINT) fmt $(GOLANGCI_FMT_OPTS) endif .PHONY: common-vet common-vet: @echo ">> vetting code" $(GO) vet $(GOOPTS) $(pkgs) .PHONY: common-lint common-lint: $(GOLANGCI_LINT) ifdef GOLANGCI_LINT @echo ">> running golangci-lint" $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs) endif .PHONY: common-lint-fix common-lint-fix: $(GOLANGCI_LINT) ifdef GOLANGCI_LINT @echo ">> running golangci-lint fix" $(GOLANGCI_LINT) run --fix $(GOLANGCI_LINT_OPTS) $(pkgs) endif .PHONY: common-yamllint common-yamllint: @echo ">> running yamllint on all YAML files in the repository" ifeq (, $(shell command -v yamllint 2> /dev/null)) @echo "yamllint not installed so skipping" else yamllint . endif # For backward-compatibility. .PHONY: common-staticcheck common-staticcheck: lint .PHONY: common-unused common-unused: @echo ">> running check for unused/missing packages in go.mod" $(GO) mod tidy @git diff --exit-code -- go.sum go.mod .PHONY: common-build common-build: promu @echo ">> building binaries" $(PROMU) build --prefix $(PREFIX) $(PROMU_BINARIES) .PHONY: common-tarball common-tarball: promu @echo ">> building release tarball" $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) .PHONY: common-docker-repo-name common-docker-repo-name: @echo "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)" .PHONY: common-docker $(BUILD_DOCKER_ARCHS) common-docker: $(BUILD_DOCKER_ARCHS) $(BUILD_DOCKER_ARCHS): common-docker-%: @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ dockerfile=$${variant#*:}; \ variant_name=$${variant%%:*}; \ if $(call dockerfile_arch_is_excluded,$*); then \ echo "Skipping $$variant_name variant for linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ distroless_arch="$*"; \ if [ "$*" = "armv7" ]; then \ distroless_arch="arm"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Building default variant ($$variant_name) for linux-$* using $$dockerfile"; \ docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ -f $$dockerfile \ --build-arg ARCH="$*" \ --build-arg OS="linux" \ --build-arg DISTROLESS_ARCH="$$distroless_arch" \ $(DOCKERBUILD_CONTEXT); \ if [ "$$variant_name" != "default" ]; then \ echo "Tagging default variant with $$variant_name suffix"; \ docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ fi; \ else \ echo "Building $$variant_name variant for linux-$* using $$dockerfile"; \ docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" \ -f $$dockerfile \ --build-arg ARCH="$*" \ --build-arg OS="linux" \ --build-arg DISTROLESS_ARCH="$$distroless_arch" \ $(DOCKERBUILD_CONTEXT); \ fi; \ done .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) common-docker-publish: $(PUBLISH_DOCKER_ARCHS) $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ dockerfile=$${variant#*:}; \ variant_name=$${variant%%:*}; \ if $(call dockerfile_arch_is_excluded,$*); then \ echo "Skipping push for $$variant_name variant on linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$*); then \ echo "Skipping push for $$variant_name variant on linux-$* to $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ echo "Pushing $$variant_name variant for linux-$*"; \ docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Pushing default variant ($$variant_name) for linux-$*"; \ docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ fi; \ if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ echo "Pushing $$variant_name variant version tags for linux-$*"; \ docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Pushing default variant version tag for linux-$*"; \ docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ fi; \ fi; \ done DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION))) .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) common-docker-tag-latest: $(TAG_DOCKER_ARCHS) $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ dockerfile=$${variant#*:}; \ variant_name=$${variant%%:*}; \ if $(call dockerfile_arch_is_excluded,$*); then \ echo "Skipping tag for $$variant_name variant on linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$*); then \ echo "Skipping tag for $$variant_name variant on linux-$* for $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ echo "Tagging $$variant_name variant for linux-$* as latest"; \ docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest-$$variant_name"; \ docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Tagging default variant ($$variant_name) for linux-$* as latest"; \ docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"; \ docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ fi; \ done .PHONY: common-docker-manifest common-docker-manifest: @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ dockerfile=$${variant#*:}; \ variant_name=$${variant%%:*}; \ if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ echo "Creating manifest for $$variant_name variant"; \ refs=""; \ for arch in $(DOCKER_ARCHS); do \ if $(call dockerfile_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for $$variant_name (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for $$variant_name on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ done; \ if [ -z "$$refs" ]; then \ echo "Skipping manifest for $$variant_name variant (no supported architectures)"; \ continue; \ fi; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" $$refs; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Creating default variant ($$variant_name) manifest"; \ refs=""; \ for arch in $(DOCKER_ARCHS); do \ if $(call dockerfile_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for default variant (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for default variant on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ done; \ if [ -z "$$refs" ]; then \ echo "Skipping default variant manifest (no supported architectures)"; \ continue; \ fi; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $$refs; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \ fi; \ if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ echo "Creating manifest for $$variant_name variant version tag"; \ refs=""; \ for arch in $(DOCKER_ARCHS); do \ if $(call dockerfile_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for $$variant_name version tag (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for $$variant_name version tag on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ done; \ if [ -z "$$refs" ]; then \ echo "Skipping version-tag manifest for $$variant_name variant (no supported architectures)"; \ continue; \ fi; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $$refs; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ fi; \ if [ "$$dockerfile" = "Dockerfile" ]; then \ echo "Creating default variant version tag manifest"; \ refs=""; \ for arch in $(DOCKER_ARCHS); do \ if $(call dockerfile_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for default variant version tag (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ if $(call registry_arch_is_excluded,$$arch); then \ echo " Skipping $$arch for default variant version tag on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ continue; \ fi; \ refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)"; \ done; \ if [ -z "$$refs" ]; then \ echo "Skipping default variant version-tag manifest (no supported architectures)"; \ continue; \ fi; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $$refs; \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \ fi; \ fi; \ done .PHONY: promu promu: $(PROMU) $(PROMU): $(eval PROMU_TMP := $(shell mktemp -d)) curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP) mkdir -p $(FIRST_GOPATH)/bin cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu rm -r $(PROMU_TMP) .PHONY: common-proto common-proto: @echo ">> generating code from proto files" @./scripts/genproto.sh ifdef GOLANGCI_LINT $(GOLANGCI_LINT): mkdir -p $(FIRST_GOPATH)/bin curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \ | sed -e '/install -d/d' \ | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) endif .PHONY: common-print-golangci-lint-version common-print-golangci-lint-version: @echo $(GOLANGCI_LINT_VERSION) .PHONY: precheck precheck:: define PRECHECK_COMMAND_template = precheck:: $(1)_precheck PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1))) .PHONY: $(1)_precheck $(1)_precheck: @if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \ echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \ exit 1; \ fi endef govulncheck: install-govulncheck govulncheck ./... install-govulncheck: command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest ================================================ FILE: NOTICE ================================================ Copyright 2018-2021 Invision AG Copyright 2021-2024 NERDSWORDS Copyright 2024 The Prometheus Authors ================================================ FILE: README.md ================================================ # YACE - yet another cloudwatch exporter [![Container on Quay](https://quay.io/repository/prometheuscommunity/yet-another-cloudwatch-exporter/status)][quay] [![Container on Docker Hub](https://img.shields.io/docker/pulls/prometheuscommunity/yet-another-cloudwatch-exporter.svg?maxAge=604800)][docker] YACE, or `yet another cloudwatch exporter`, is a [Prometheus exporter](https://prometheus.io/docs/instrumenting/exporters/#exporters-and-integrations) for [AWS CloudWatch](http://aws.amazon.com/cloudwatch/) metrics. It is written in Go and uses the official AWS SDK. ## News As of November 2024, YACE is part of [prometheus-community](https://github.com/prometheus-community). Read more about it in these announcement posts: * https://prometheus.io/blog/2024/11/19/yace-joining-prometheus-community/ * https://grafana.com/blog/2024/11/19/yace-moves-to-prometheus-community/ ## Alternatives Consider using the official [CloudWatch Exporter](https://github.com/prometheus/cloudwatch_exporter) if you prefer a Java implementation. ## Project Status While YACE is at version less than 1.0.0, expect that any new release might introduce breaking changes. We'll document changes in [CHANGELOG.md](CHANGELOG.md). Where feasible, features will be deprecated instead of being immediately changed or removed. This means that YACE will continue to work but might log warning messages. Expect deprecated features to be permanently changed/removed within the next 2/3 releases. ## Security Read more how to report a security vulnerability in [SECURITY.md](SECURITY.md). ### Supported Versions Only the latest version gets security updates. We won't support older versions. ## Features * Stop worrying about your AWS IDs - Auto discovery of resources via tags * Structured logging (json and logfmt) * Filter monitored resources via regex * Automatic adding of tag labels to metrics * Automatic adding of dimension labels to metrics * Allows to export 0 even if CloudWatch returns nil * Allows exports metrics with CloudWatch timestamps (disabled by default) * Static metrics support for all cloudwatch metrics without auto discovery * Pull data from multiple AWS accounts using cross-account roles * Can be used as a library in an external application * Support the scraping of custom namespaces metrics with the CloudWatch Dimensions. * Supported services with auto discovery through tags: * `/aws/sagemaker/Endpoints` - Sagemaker Endpoints * `/aws/sagemaker/InferenceRecommendationsJobs` - Sagemaker Inference Recommender Jobs * `/aws/sagemaker/ProcessingJobs` - Sagemaker Processing Jobs * `/aws/sagemaker/TrainingJobs` - Sagemaker Training Jobs * `/aws/sagemaker/TransformJobs` - Sagemaker Batch Transform Jobs * `AmazonMWAA` - Managed Apache Airflow * `AWS/ACMPrivateCA` - ACM Private CA * `AWS/AmazonMQ` - Managed Message Broker Service * `AWS/AOSS` - OpenSearch Serverless * `AWS/ApiGateway` - ApiGateway (V1 and V2) * `AWS/ApplicationELB` - Application Load Balancer * `AWS/AppRunner` - Managed Container Apps Service * `AWS/AppStream` - AppStream * `AWS/AppSync` - AppSync * `AWS/Athena` - Athena * `AWS/AutoScaling` - Auto Scaling Group * `AWS/Backup` - Backup * `AWS/Bedrock` - GenerativeAI * `AWS/Billing` - Billing * `AWS/Cassandra` - Cassandra * `AWS/CertificateManager` - Certificate Manager * `AWS/ClientVPN` - Client-based VPN * `AWS/CloudFront` - Cloud Front * `AWS/Cognito` - Cognito * `AWS/DataSync` - DataSync * `AWS/DDoSProtection` - Distributed Denial of Service (DDoS) protection service * `AWS/DirectoryService` - Directory Services (MicrosoftAD) * `AWS/DMS` - Database Migration Service * `AWS/DocDB` - DocumentDB (with MongoDB compatibility) * `AWS/DX` - Direct Connect * `AWS/DynamoDB` - NoSQL Key-Value Database * `AWS/EBS` - Elastic Block Storage * `AWS/EC2` - Elastic Compute Cloud * `AWS/EC2Spot` - Elastic Compute Cloud for Spot Instances * `AWS/ECR` - Elastic Container Registry * `AWS/ECS` - Elastic Container Service (Service Metrics) * `AWS/EFS` - Elastic File System * `AWS/ElastiCache` - ElastiCache * `AWS/ElasticBeanstalk` - Elastic Beanstalk * `AWS/ElasticMapReduce` - Elastic MapReduce * `AWS/ELB` - Elastic Load Balancer * `AWS/EMRServerless` - Amazon EMR Serverless * `AWS/ES` - ElasticSearch * `AWS/Events` - EventBridge * `AWS/Firehose` - Managed Streaming Service * `AWS/FSx` - FSx File System * `AWS/GameLift` - GameLift * `AWS/GatewayELB` - Gateway Load Balancer * `AWS/GlobalAccelerator` - AWS Global Accelerator * `AWS/IoT` - IoT * `AWS/IPAM` - IP address manager * `AWS/Kafka` - Managed Apache Kafka * `AWS/KafkaConnect` - AWS MSK Connectors * `AWS/Kinesis` - Kinesis Data Stream * `AWS/KinesisAnalytics` - Kinesis Data Analytics for SQL Applications * `AWS/KMS` - Key Management Service * `AWS/Lambda` - Lambda Functions * `AWS/Logs` - CloudWatch Logs * `AWS/MediaConnect` - AWS Elemental MediaConnect * `AWS/MediaConvert` - AWS Elemental MediaConvert * `AWS/MediaLive` - AWS Elemental MediaLive * `AWS/MediaPackage` - AWS Elemental MediaPackage * `AWS/MediaTailor` - AWS Elemental MediaTailor * `AWS/MemoryDB` - AWS MemoryDB * `AWS/MWAA` - Managed Apache Airflow (Container, queue, and database metrics) * `AWS/NATGateway` - NAT Gateway * `AWS/Neptune` - Neptune * `AWS/NetworkELB` - Network Load Balancer * `AWS/NetworkFirewall` - Network Firewall * `AWS/Network Manager` - Network Manager * `AWS/PrivateLinkEndpoints` - VPC Endpoint * `AWS/PrivateLinkServices` - VPC Endpoint Service * `AWS/Prometheus` - Managed Service for Prometheus * `AWS/QLDB` - Quantum Ledger Database * `AWS/QuickSight` - QuickSight (Business Intelligence) * `AWS/RDS` - Relational Database Service * `AWS/Redshift` - Redshift Database * `AWS/Redshift-Serverless` - Redshift Serverless * `AWS/Route53` - Route53 Health Checks * `AWS/Route53Resolver` - Route53 Resolver * `AWS/RUM` - Real User Monitoring * `AWS/S3` - Object Storage * `AWS/Sagemaker/ModelBuildingPipeline` - Sagemaker Model Building Pipelines * `AWS/SageMaker` - Sagemaker invocations * `AWS/Scheduler` - EventBridge Scheduler * `AWS/SecretsManager` - Secrets Manager * `AWS/SES` - Simple Email Service * `AWS/SNS` - Simple Notification Service * `AWS/SQS` - Simple Queue Service * `AWS/States` - Step Functions * `AWS/StorageGateway` - On-premises access to cloud storage * `AWS/Timestream` - Time-series database service * `AWS/TransitGateway` - Transit Gateway * `AWS/TrustedAdvisor` - Trusted Advisor * `AWS/Usage` - Usage of some AWS resources and APIs * `AWS/VpcLattice` - VPC Lattice * `AWS/VPN` - VPN connection * `AWS/WAFV2` - Web Application Firewall v2 * `AWS/WorkSpaces` - Workspaces * `ContainerInsights` - EKS ContainerInsights (Dependency on Cloudwatch agent) * `CWAgent` - CloudWatch agent * `ECS/ContainerInsights` - ECS/ContainerInsights (Fargate metrics) * `Glue` - AWS Glue Jobs ## Feature flags To provide backwards compatibility, some of YACE's new features or breaking changes might be guarded under a feature flag. Refer to [docs/feature_flags.md](./docs/feature_flags.md) for details. ## Installing and running Refer to the [installation guide](docs/installation.md). ## Authentication The exporter will need to be running in an environment which has access to AWS. The exporter uses the [AWS SDK for Go](https://aws.github.io/aws-sdk-go-v2/docs/getting-started/) and supports providing authentication via [AWS's default credential chain](https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/#specifying-credentials). Regardless of the method used to acquire the credentials, some permissions are needed for the exporter to work. As a quick start, the following IAM policy can be used to grant the all permissions required by YACE ```json { "Version": "2012-10-17", "Statement": [ { "Action": [ "tag:GetResources", "cloudwatch:GetMetricData", "cloudwatch:GetMetricStatistics", "cloudwatch:ListMetrics", "apigateway:GET", "aps:ListWorkspaces", "autoscaling:DescribeAutoScalingGroups", "dms:DescribeReplicationInstances", "dms:DescribeReplicationTasks", "ec2:DescribeTransitGatewayAttachments", "ec2:DescribeSpotFleetRequests", "shield:ListProtections", "storagegateway:ListGateways", "storagegateway:ListTagsForResource", "iam:ListAccountAliases" ], "Effect": "Allow", "Resource": "*" } ] } ``` If you would like to remove certain permissions based on your needs the policy can be adjusted based the CloudWatch namespaces you are scraping These are the bare minimum permissions required to run Static and Discovery Jobs ```json "tag:GetResources", "cloudwatch:GetMetricData", "cloudwatch:GetMetricStatistics", "cloudwatch:ListMetrics" ``` This permission is required to discover resources for the AWS/ApiGateway namespace ```json "apigateway:GET" ``` This permission is required to discover resources for the AWS/AutoScaling namespace ```json "autoscaling:DescribeAutoScalingGroups" ``` These permissions are required to discover resources for the AWS/DMS namespace ```json "dms:DescribeReplicationInstances", "dms:DescribeReplicationTasks" ``` This permission is required to discover resources for the AWS/EC2Spot namespace ```json "ec2:DescribeSpotFleetRequests" ``` This permission is required to discover resources for the AWS/Prometheus namespace ```json "aps:ListWorkspaces" ``` These permissions are required to discover resources for the AWS/StorageGateway namespace ```json "storagegateway:ListGateways", "storagegateway:ListTagsForResource" ``` This permission is required to discover resources for the AWS/TransitGateway namespace ```json "ec2:DescribeTransitGatewayAttachments" ``` This permission is required to discover protected resources for the AWS/DDoSProtection namespace ```json "shield:ListProtections" ``` The AWS IAM API supports creating account aliases, which are human-friendly names that can be used to easily identify accounts. An account can have at most a single alias, see ([docs](https://docs.aws.amazon.com/IAM/latest/APIReference/API_ListAccountAliases.html)). Each alias must be unique across an AWS network partition ([docs](https://docs.aws.amazon.com/IAM/latest/UserGuide/console_account-alias.html#AboutAccountAlias)). The following permission is required to get the alias for an account, which is exported as a label in the `aws_account_info` metric: ```json "iam:ListAccountAliases" ``` If running YACE inside an AWS EC2 instance, the exporter will automatically attempt to assume the associated IAM Role. If this is undesirable behavior turn off the use the metadata endpoint by setting the environment variable `AWS_EC2_METADATA_DISABLED=true`. ## Configuration Refer to the [configuration](docs/configuration.md) docs. ## Metrics Examples ```text ### Metrics with exportedTagsOnMetrics aws_ec2_cpuutilization_maximum{dimension_InstanceId="i-someid", name="arn:aws:ec2:eu-west-1:472724724:instance/i-someid", tag_Name="jenkins"} 57.2916666666667 ### Info helper with tags aws_elb_info{name="arn:aws:elasticloadbalancing:eu-west-1:472724724:loadbalancer/a815b16g3417211e7738a02fcc13bbf9",tag_KubernetesCluster="production-19",tag_Name="",tag_kubernetes_io_cluster_production_19="owned",tag_kubernetes_io_service_name="nginx-ingress/private-ext",region="eu-west-1"} 0 aws_ec2_info{name="arn:aws:ec2:eu-west-1:472724724:instance/i-someid",tag_Name="jenkins"} 0 ### Track cloudwatch requests to calculate costs yace_cloudwatch_requests_total 168 ``` ## Query Examples without exportedTagsOnMetrics ```text # CPUUtilization + Name tag of the instance id - No more instance id needed for monitoring aws_ec2_cpuutilization_average + on (name) group_left(tag_Name) aws_ec2_info # Free Storage in Megabytes + tag Type of the elasticsearch cluster (aws_es_free_storage_space_sum + on (name) group_left(tag_Type) aws_es_info) / 1024 # Add kubernetes / kops tags on 4xx elb metrics (aws_elb_httpcode_backend_4_xx_sum + on (name) group_left(tag_KubernetesCluster,tag_kubernetes_io_service_name) aws_elb_info) # Availability Metric for ELBs (Successful requests / Total Requests) + k8s service name # Use nilToZero on all metrics else it won't work ((aws_elb_request_count_sum - on (name) group_left() aws_elb_httpcode_backend_4_xx_sum) - on (name) group_left() aws_elb_httpcode_backend_5_xx_sum) + on (name) group_left(tag_kubernetes_io_service_name) aws_elb_info # Forecast your elasticsearch disk size in 7 days and report metrics with tags type and version predict_linear(aws_es_free_storage_space_minimum[2d], 86400 * 7) + on (name) group_left(tag_type, tag_version) aws_es_info # Forecast your cloudwatch costs for next 32 days based on last 10 minutes # 1.000.000 Requests free # 0.01 Dollar for 1.000 GetMetricStatistics Api Requests (https://aws.amazon.com/cloudwatch/pricing/) ((increase(yace_cloudwatch_requests_total[10m]) * 6 * 24 * 32) - 100000) / 1000 * 0.01 ``` ## Override AWS endpoint urls to support local testing all AWS urls can be overridden with by setting an environment variable `AWS_ENDPOINT_URL` ```shell docker run -d --rm -v $PWD/credentials:/home/.aws/credentials -v $PWD/config.yml:/tmp/config.yml \ -e AWS_ENDPOINT_URL=http://localhost:4766 -p 5000:5000 --name yace quay.io/prometheuscommunity/yet-another-cloudwatch-exporter:latest ``` ## Options ### RoleArns Multiple roleArns are useful, when you are monitoring multi-account setup, where all accounts are using same AWS services. For example, you are running yace in monitoring account and you have number of accounts (for example newspapers, radio and television) running ECS clusters. Each account gives yace permissions to assume local IAM role, which has all the necessary permissions for Cloudwatch metrics. On this kind of setup, you could simply list: ```yaml apiVersion: v1alpha1 sts-region: eu-west-1 discovery: jobs: - type: AWS/ECS regions: - eu-north-1 roles: - roleArn: "arn:aws:iam::1111111111111:role/prometheus" # newspaper - roleArn: "arn:aws:iam::2222222222222:role/prometheus" # radio - roleArn: "arn:aws:iam::3333333333333:role/prometheus" # television metrics: - name: MemoryReservation statistics: - Average - Minimum - Maximum period: 600 length: 600 ``` Additionally, if the IAM role you want to assume requires an [External ID](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html?icmpid=docs_iam_console) you can specify it this way: ```yaml roles: - roleArn: "arn:aws:iam::1111111111111:role/prometheus" externalId: "shared-external-identifier" ``` ### Requests concurrency The flags 'cloudwatch-concurrency' and 'tag-concurrency' define the number of concurrent request to cloudwatch metrics and tags. Their default value is 5. Setting a higher value makes faster scraping times but can incur in throttling and the blocking of the API. ### Decoupled scraping The exporter scraped cloudwatch metrics in the background in fixed interval. This protects from the abuse of API requests that can cause extra billing in AWS account. The flag 'scraping-interval' defines the seconds between scrapes. The default value is 300. ## Embedding YACE in your application YACE can be used as a library and embedded into your application, see the [embedding guide](docs/embedding.md). ## Troubleshooting / Debugging ### Help my metrics are intermittent * Please, try out a bigger length e.g. for elb try out a length of 600 and a period of 600. Then test how low you can go without losing data. ELB metrics on AWS are written every 5 minutes (300) in default. ### My metrics only show new values after 5 minutes * Please, try to set a lower value for the 'scraping-interval' flag or set the 'decoupled-scraping' to false. ## Contribute [Development Setup / Guide](/CONTRIBUTE.md) ## Thank you * [Justin Santa Barbara](https://github.com/justinsb) - For telling me about AWS tags api which simplified a lot - Thanks! * [Brian Brazil](https://github.com/brian-brazil) - Who gave a lot of feedback regarding UX and prometheus lib - Thanks! [quay]: https://quay.io/repository/prometheuscommunity/yet-another-cloudwatch-exporter [docker]: https://hub.docker.com/r/prometheuscommunity/yet-another-cloudwatch-exporter ================================================ FILE: SECURITY.md ================================================ # Reporting a security issue The Prometheus security policy, including how to report vulnerabilities, can be found here: [https://prometheus.io/docs/operating/security/](https://prometheus.io/docs/operating/security/) ================================================ FILE: VERSION ================================================ 0.64.0 ================================================ FILE: cmd/yace/main.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "context" "fmt" "log/slog" "net/http" "net/http/pprof" "os" "slices" "strings" "github.com/prometheus/common/promslog" promslogflag "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" "github.com/urfave/cli/v2" "golang.org/x/sync/semaphore" exporter "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" ) const ( enableFeatureFlag = "enable-feature" htmlVersion = `<html> <head><title>Yet Another CloudWatch Exporter

Thanks for using YACE :)

Version: %s

Metrics

%s ` htmlPprof = `

Pprof

` ) var sem = semaphore.NewWeighted(1) const ( defaultLogLevel = "info" defaultLogFormat = "json" ) var ( addr string configFile string logLevel string logFormat string fips bool cloudwatchConcurrency cloudwatch.ConcurrencyConfig tagConcurrency int scrapingInterval int metricsPerQuery int labelsSnakeCase bool profilingEnabled bool logger *slog.Logger ) func main() { app := NewYACEApp() if err := app.Run(os.Args); err != nil { // if we exit very early we'll not have set up the logger yet if logger == nil { jsonFmt := promslog.NewFormat() _ = jsonFmt.Set("json") logger = promslog.New(&promslog.Config{Format: jsonFmt}) } logger.Error("Error running yace", "err", err) os.Exit(1) } } // NewYACEApp creates a new cli.App implementing the YACE entrypoints and CLI arguments. func NewYACEApp() *cli.App { yace := cli.NewApp() yace.Name = "Yet Another CloudWatch Exporter" yace.Version = version.Version yace.Usage = "YACE configured to retrieve CloudWatch metrics through the AWS API" yace.Description = "" yace.Authors = []*cli.Author{ {Name: "", Email: ""}, } yace.Flags = []cli.Flag{ &cli.StringFlag{ Name: "listen-address", Value: ":5000", Usage: "The address to listen on", Destination: &addr, EnvVars: []string{"listen-address"}, }, &cli.StringFlag{ Name: "config.file", Value: "config.yml", Usage: "Path to configuration file", Destination: &configFile, EnvVars: []string{"config.file"}, }, &cli.StringFlag{ Name: "log.level", Value: defaultLogLevel, Usage: promslogflag.LevelFlagHelp, Destination: &logLevel, Action: func(_ *cli.Context, s string) error { if !slices.Contains(promslog.LevelFlagOptions, s) { return fmt.Errorf("unrecognized log format %q", s) } return nil }, }, &cli.StringFlag{ Name: "log.format", Value: defaultLogFormat, Usage: promslogflag.FormatFlagHelp, Destination: &logFormat, Action: func(_ *cli.Context, s string) error { if !slices.Contains(promslog.FormatFlagOptions, s) { return fmt.Errorf("unrecognized log format %q", s) } return nil }, }, &cli.BoolFlag{ Name: "fips", Value: false, Usage: "Use FIPS compliant AWS API endpoints", Destination: &fips, }, &cli.IntFlag{ Name: "cloudwatch-concurrency", Value: exporter.DefaultCloudwatchConcurrency.SingleLimit, Usage: "Maximum number of concurrent requests to CloudWatch API.", Destination: &cloudwatchConcurrency.SingleLimit, }, &cli.BoolFlag{ Name: "cloudwatch-concurrency.per-api-limit-enabled", Value: exporter.DefaultCloudwatchConcurrency.PerAPILimitEnabled, Usage: "Whether to enable the per API CloudWatch concurrency limiter. When enabled, the concurrency `-cloudwatch-concurrency` flag will be ignored.", Destination: &cloudwatchConcurrency.PerAPILimitEnabled, }, &cli.IntFlag{ Name: "cloudwatch-concurrency.list-metrics-limit", Value: exporter.DefaultCloudwatchConcurrency.ListMetrics, Usage: "Maximum number of concurrent requests to ListMetrics CloudWatch API. Used if the -cloudwatch-concurrency.per-api-limit-enabled concurrency limiter is enabled.", Destination: &cloudwatchConcurrency.ListMetrics, }, &cli.IntFlag{ Name: "cloudwatch-concurrency.get-metric-data-limit", Value: exporter.DefaultCloudwatchConcurrency.GetMetricData, Usage: "Maximum number of concurrent requests to GetMetricData CloudWatch API. Used if the -cloudwatch-concurrency.per-api-limit-enabled concurrency limiter is enabled.", Destination: &cloudwatchConcurrency.GetMetricData, }, &cli.IntFlag{ Name: "cloudwatch-concurrency.get-metric-statistics-limit", Value: exporter.DefaultCloudwatchConcurrency.GetMetricStatistics, Usage: "Maximum number of concurrent requests to GetMetricStatistics CloudWatch API. Used if the -cloudwatch-concurrency.per-api-limit-enabled concurrency limiter is enabled.", Destination: &cloudwatchConcurrency.GetMetricStatistics, }, &cli.IntFlag{ Name: "tag-concurrency", Value: exporter.DefaultTaggingAPIConcurrency, Usage: "Maximum number of concurrent requests to Resource Tagging API.", Destination: &tagConcurrency, }, &cli.IntFlag{ Name: "scraping-interval", Value: 300, Usage: "Seconds to wait between scraping the AWS metrics", Destination: &scrapingInterval, EnvVars: []string{"scraping-interval"}, }, &cli.IntFlag{ Name: "metrics-per-query", Value: exporter.DefaultMetricsPerQuery, Usage: "Number of metrics made in a single GetMetricsData request", Destination: &metricsPerQuery, EnvVars: []string{"metrics-per-query"}, }, &cli.BoolFlag{ Name: "labels-snake-case", Value: exporter.DefaultLabelsSnakeCase, Usage: "Whether labels should be output in snake case instead of camel case", Destination: &labelsSnakeCase, }, &cli.BoolFlag{ Name: "profiling.enabled", Value: false, Usage: "Enable pprof endpoints", Destination: &profilingEnabled, }, &cli.StringSliceFlag{ Name: enableFeatureFlag, Usage: "Comma-separated list of enabled features", }, } yace.Commands = []*cli.Command{ { Name: "verify-config", Aliases: []string{"vc"}, Usage: "Loads and attempts to parse config file, then exits. Useful for CI/CD validation", Flags: []cli.Flag{ &cli.StringFlag{Name: "config.file", Value: "config.yml", Usage: "Path to configuration file.", Destination: &configFile}, }, Action: func(_ *cli.Context) error { logger = newLogger(logFormat, logLevel).With("version", version.Version) logger.Info("Parsing config") cfg := config.ScrapeConf{} if _, err := cfg.Load(configFile, logger); err != nil { logger.Error("Couldn't read config file", "err", err, "path", configFile) os.Exit(1) } logger.Info("Config file is valid", "path", configFile) os.Exit(0) return nil }, }, { Name: "version", Aliases: []string{"v"}, Usage: "prints current yace version.", Action: func(_ *cli.Context) error { fmt.Println(version.Version) os.Exit(0) return nil }, }, } yace.Action = startScraper return yace } func startScraper(c *cli.Context) error { logger = newLogger(logFormat, logLevel).With("version", version.Version) // log warning if the two concurrency limiting methods are configured via CLI if c.IsSet("cloudwatch-concurrency") && c.IsSet("cloudwatch-concurrency.per-api-limit-enabled") { logger.Warn("Both `cloudwatch-concurrency` and `cloudwatch-concurrency.per-api-limit-enabled` are set. `cloudwatch-concurrency` will be ignored, and the per-api concurrency limiting strategy will be favoured.") } logger.Info("Parsing config") cfg := config.ScrapeConf{} jobsCfg, err := cfg.Load(configFile, logger) if err != nil { return fmt.Errorf("couldn't read %s: %w", configFile, err) } featureFlags := c.StringSlice(enableFeatureFlag) s := NewScraper(featureFlags) cachingFactory, err := clients.NewFactory(logger, jobsCfg, fips) if err != nil { return fmt.Errorf("failed to construct aws sdk v2 client cache: %w", err) } ctx, cancelRunningScrape := context.WithCancel(context.Background()) go s.decoupled(ctx, logger, jobsCfg, cachingFactory) mux := http.NewServeMux() if profilingEnabled { mux.HandleFunc("/debug/pprof/", pprof.Index) mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) mux.HandleFunc("/debug/pprof/profile", pprof.Profile) mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) mux.HandleFunc("/debug/pprof/trace", pprof.Trace) } mux.HandleFunc("/metrics", s.makeHandler()) mux.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) { pprofLink := "" if profilingEnabled { pprofLink = htmlPprof } _, _ = fmt.Fprintf(w, htmlVersion, version.Version, pprofLink) }) mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte("ok")) }) mux.HandleFunc("/reload", func(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { w.WriteHeader(http.StatusNotFound) return } logger.Info("Parsing config") newCfg := config.ScrapeConf{} newJobsCfg, err := newCfg.Load(configFile, logger) if err != nil { logger.Error("Couldn't read config file", "err", err, "path", configFile) return } logger.Info("Reset clients cache") cache, err := clients.NewFactory(logger, newJobsCfg, fips) if err != nil { logger.Error("Failed to construct aws sdk v2 client cache", "err", err, "path", configFile) return } cancelRunningScrape() ctx, cancelRunningScrape = context.WithCancel(context.Background()) go s.decoupled(ctx, logger, newJobsCfg, cache) }) logger.Info("Yace startup completed", "build_info", version.Info(), "build_context", version.BuildContext(), "feature_flags", strings.Join(featureFlags, ",")) srv := &http.Server{Addr: addr, Handler: mux} return srv.ListenAndServe() } func newLogger(format, level string) *slog.Logger { // If flag parsing was successful, then we know that format and level // are both valid options; no need to error check their returns, just // set their values. f := promslog.NewFormat() _ = f.Set(format) lvl := promslog.NewLevel() _ = lvl.Set(level) return promslog.New(&promslog.Config{Format: f, Level: lvl}) } ================================================ FILE: cmd/yace/main_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "testing" "github.com/stretchr/testify/require" "github.com/urfave/cli/v2" ) func TestYACEApp_FeatureFlagsParsedCorrectly(t *testing.T) { app := NewYACEApp() // two feature flags app.Action = func(c *cli.Context) error { featureFlags := c.StringSlice(enableFeatureFlag) require.Equal(t, []string{"feature1", "feature2"}, featureFlags) return nil } require.NoError(t, app.Run([]string{"yace", "-enable-feature=feature1,feature2"}), "error running test command") // empty feature flags app.Action = func(c *cli.Context) error { featureFlags := c.StringSlice(enableFeatureFlag) require.Len(t, featureFlags, 0) return nil } require.NoError(t, app.Run([]string{"yace"}), "error running test command") } ================================================ FILE: cmd/yace/scraper.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "context" "log/slog" "net/http" "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" exporter "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type Scraper struct { registry atomic.Pointer[prometheus.Registry] featureFlags []string } type cachingFactory interface { clients.Factory Refresh() Clear() } func NewScraper(featureFlags []string) *Scraper { s := &Scraper{ registry: atomic.Pointer[prometheus.Registry]{}, featureFlags: featureFlags, } s.registry.Store(prometheus.NewRegistry()) return s } func (s *Scraper) makeHandler() func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { handler := promhttp.HandlerFor(s.registry.Load(), promhttp.HandlerOpts{ DisableCompression: false, }) handler.ServeHTTP(w, r) } } func (s *Scraper) decoupled(ctx context.Context, logger *slog.Logger, jobsCfg model.JobsConfig, cache cachingFactory) { logger.Debug("Starting scraping async") s.scrape(ctx, logger, jobsCfg, cache) scrapingDuration := time.Duration(scrapingInterval) * time.Second ticker := time.NewTicker(scrapingDuration) logger.Debug("Initial scrape completed", "scraping_interval", scrapingInterval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: logger.Debug("Starting scraping async") go s.scrape(ctx, logger, jobsCfg, cache) } } } func (s *Scraper) scrape(ctx context.Context, logger *slog.Logger, jobsCfg model.JobsConfig, cache cachingFactory) { if !sem.TryAcquire(1) { // This shouldn't happen under normal use, users should adjust their configuration when this occurs. // Let them know by logging a warning. logger.Warn("Another scrape is already in process, will not start a new one. " + "Adjust your configuration to ensure the previous scrape completes first.") return } defer sem.Release(1) newRegistry := prometheus.NewRegistry() for _, metric := range exporter.Metrics { if err := newRegistry.Register(metric); err != nil { logger.Warn("Could not register cloudwatch api metric") } } // since we have called refresh, we have loaded all the credentials // into the clients and it is now safe to call concurrently. Defer the // clearing, so we always clear credentials before the next scrape cache.Refresh() defer cache.Clear() options := []exporter.OptionsFunc{ exporter.MetricsPerQuery(metricsPerQuery), exporter.LabelsSnakeCase(labelsSnakeCase), exporter.EnableFeatureFlag(s.featureFlags...), exporter.TaggingAPIConcurrency(tagConcurrency), } if cloudwatchConcurrency.PerAPILimitEnabled { options = append(options, exporter.CloudWatchPerAPILimitConcurrency(cloudwatchConcurrency.ListMetrics, cloudwatchConcurrency.GetMetricData, cloudwatchConcurrency.GetMetricStatistics)) } else { options = append(options, exporter.CloudWatchAPIConcurrency(cloudwatchConcurrency.SingleLimit)) } err := exporter.UpdateMetrics( ctx, logger, jobsCfg, newRegistry, cache, options..., ) if err != nil { logger.Error("error updating metrics", "err", err) } s.registry.Store(newRegistry) logger.Debug("Metrics scraped") } ================================================ FILE: docker-compose/README.md ================================================ ## Setting up a local docker-compose environment This folder contains a [docker-compose](./docker-compose.yaml) configuration file to start a local development environment. This includes: - YACE, using as config file [yace-config.yaml](./yace-config.yaml) - Prometheus, with a scraping configuration targeting YACE - Grafana, wih no login required and the Prometheus datasource configured Docker will mount the `~/.aws` directory in order to re-utilize the host's AWS credentials. For selecting which region and AWS profile to use, fill in the `AWS_REGION` and `AWS_PROFILE` variables passed to the `docker-compose up` command, as shown below. ```bash # Build the YACE docker image docker-compose build # Start all docker-compose resource AWS_REGION=us-east-1 AWS_PROFILE=sandbox docker-compose up -d ``` After that, Prometheus will be exposed at [http://localhost:9090](http://localhost:9090), and Grafana in [http://localhost:3000](http://localhost:3000). ================================================ FILE: docker-compose/docker-compose.yaml ================================================ version: '3.8' networks: monitoring: driver: bridge volumes: prometheus_data: {} services: grafana: image: grafana/grafana:9.4.3 ports: - 3000:3000/tcp volumes: - ./grafana/datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yml environment: # configure no-login required access GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin" GF_AUTH_ANONYMOUS_ENABLED: "true" GF_AUTH_BASIC_ENABLED: "false" networks: - monitoring prometheus: image: prom/prometheus:latest container_name: prometheus restart: unless-stopped volumes: - ./prometheus.yaml:/etc/prometheus/prometheus.yaml - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yaml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' ports: - "9090:9090" expose: - 9090 networks: - monitoring yace: build: context: ../ dockerfile: Dockerfile restart: always environment: AWS_REGION: ${AWS_REGION} AWS_PROFILE: ${AWS_PROFILE} expose: - 8080 volumes: - ./yace-config.yaml:/tmp/config.yml - $HOME/.aws:/home/.aws:ro command: - -listen-address=:8080 - -config.file=/tmp/config.yml networks: - monitoring ================================================ FILE: docker-compose/grafana/datasource.yaml ================================================ apiVersion: 1 datasources: - name: Prometheus type: prometheus access: proxy orgId: 1 url: http://prometheus:9090 basicAuth: false isDefault: true editable: true ================================================ FILE: docker-compose/prometheus.yaml ================================================ global: scrape_interval: 1m scrape_configs: - job_name: prometheus scrape_interval: 1m static_configs: - targets: - localhost:9090 - job_name: yace static_configs: - targets: - yace:8080 ================================================ FILE: docker-compose/yace-config.yaml ================================================ apiVersion: v1alpha1 sts-region: us-east-1 discovery: jobs: - type: AWS/ECS regions: [us-east-1] period: 300 length: 300 metrics: - name: CPUReservation statistics: - Average - name: MemoryReservation statistics: - Average - name: CPUUtilization statistics: - Average - name: MemoryUtilization statistics: - Average - type: AWS/EC2 regions: [us-east-1] includeContextOnInfoMetrics: true metrics: - name: CPUUtilization statistics: - Average ================================================ FILE: docs/configuration.md ================================================ # Configuration YACE has two configuration mechanisms: - [command-line flags](#command-line-flags) - [yaml configuration file](#yaml-configuration-file) The command-line flags configure things which cannot change at runtime, such as the listen port for the HTTP server. The yaml file is used to configure scrape jobs and can be reloaded at runtime. The configuration file path is passed to YACE through the `-config.file` command line flag. ## Command-line flags Command-line flags are used to configure settings of the exporter which cannot be updated at runtime. All flags may be prefixed with either one hypen or two (i.e., both `-config.file` and `--config.file` are valid). | Flag | Description | Default value | | --- | --- | --- | | `-listen-address` | Network address to listen to | `127.0.0.1:5000` | | `-config.file` | Path to the configuration file | `config.yml` | | `-log.format` | Output format of log messages. One of: [logfmt, json] | `json` | | `-log.level` | Log at selected level. One of: [debug, info, warn, error] | `info` | | `-fips` | Use FIPS compliant AWS API | `false` | | `-cloudwatch-concurrency` | Maximum number of concurrent requests to CloudWatch API | `5` | | `-cloudwatch-concurrency.per-api-limit-enabled` | Enables a concurrency limiter, that has a specific limit per CloudWatch API call. | `false` | | `-cloudwatch-concurrency.list-metrics-limit` | Maximum number of concurrent requests to CloudWatch `ListMetrics` API. Only applicable if `per-api-limit-enabled` is `true`. | `5` | | `-cloudwatch-concurrency.get-metric-data-limit` | Maximum number of concurrent requests to CloudWatch `GetMetricsData` API. Only applicable if `per-api-limit-enabled` is `true`. | `5` | | `-cloudwatch-concurrency.get-metric-statistics-limit` | Maximum number of concurrent requests to CloudWatch `GetMetricStatistics` API. Only applicable if `per-api-limit-enabled` is `true`. | `5` | | `-tag-concurrency` | Maximum number of concurrent requests to Resource Tagging API | `5` | | `-scraping-interval` | Seconds to wait between scraping the AWS metrics | `300` | | `-metrics-per-query` | Number of metrics made in a single GetMetricsData request | `500` | | `-labels-snake-case` | Output labels on metrics in snake case instead of camel case | `false` | | `-profiling.enabled` | Enable the /debug/pprof endpoints for profiling | `false` | ## YAML configuration file To specify which configuration file to load, pass the `-config.file` flag at the command line. The file is written in the YAML format, defined by the scheme below. Brackets indicate that a parameter is optional. Below are the top level fields of the YAML configuration file: ```yaml # Configuration file version. Must be set to "v1alpha1" currently. apiVersion: v1alpha1 # STS regional endpoint (optional) [ sts-region: ] # Note that at least one of the following blocks must be defined. # Configurations for jobs of type "auto-discovery" discovery: # Configurations for jobs of type "static" static: [ - ... ] # Configurations for jobs of type "custom namespace" customNamespace: [ - ... ] ``` Note that while the `discovery`, `static` and `customNamespace` blocks are all optionals, at least one of them must be defined. ### `discovery_jobs_list_config` The `discovery_jobs_list_config` block configures jobs of type "auto-discovery". > Note: Only [tagged resources](https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) are discovered. ```yaml # List of tags per service to export to all metrics [exportedTagsOnMetrics: ] # List of "auto-discovery" jobs jobs: [ - ... ] ``` ### `discovery_job_config` The `discovery_job_config` block specifies the details of a job of type "auto-discovery". ```yaml # List of AWS regions regions: [ - ... ] # Cloudwatch service alias ("alb", "ec2", etc) or namespace name ("AWS/EC2", "AWS/S3", etc) type: # List of IAM roles to assume (optional) roles: [ - ... ] # List of Key/Value pairs to use for tag filtering (all must match). # The key is the AWS Tag key and is case-sensitive # The value will be treated as a regex searchTags: [ - ... ] # Custom tags to be added as a list of Key/Value pairs customTags: [ - ... ] # List of metric dimensions to query. Before querying metric values, the total list of metrics will be filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. dimensionNameRequirements: [ - ... ] # Specifies how the current time is rounded before calculating start/end times for CloudWatch GetMetricData requests. # This rounding is optimize performance of the CloudWatch request. # This setting only makes sense to use if, for example, you specify a very long period (such as 1 day) but want your times rounded to a shorter time (such as 5 minutes). For example, a value of 300 will round the current time to the nearest 5 minutes. If not specified, the roundingPeriod defaults to the same value as shortest period in the job. [ roundingPeriod: ] # Passes down the flag `--recently-active PT3H` to the CloudWatch API. This will only return metrics that have been active in the last 3 hours. # This is useful for reducing the number of metrics returned by CloudWatch, which can be very large for some services. See AWS Cloudwatch API docs for [ListMetrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html) for more details. [ recentlyActiveOnly: ] # Can be used to include contextual information (account_id, region, and customTags) on info metrics and cloudwatch metrics. This can be particularly # useful when cloudwatch metrics might not be present or when using info metrics to understand where your resources exist [ includeContextOnInfoMetrics: ] # (optional) This is an experimental feature that can be used to enable enhanced metrics for specific services within this discovery job. It might be subject to changes in future releases. enhancedMetrics: [ - ... ] # List of statistic types, e.g. "Minimum", "Maximum", etc (General Setting for all metrics in this job) statistics: [ - ... ] # Statistic period in seconds (General Setting for all metrics in this job) [ period: ] # How far back to request data for in seconds (General Setting for all metrics in this job) [ length: ] # If set it will request metrics up until `current_time - delay` (General Setting for all metrics in this job) [ delay: ] # Return 0 value if Cloudwatch returns no metrics at all. By default `NaN` will be reported (General Setting for all metrics in this job) [ nilToZero: ] # Export the metric with the original CloudWatch timestamp (General Setting for all metrics in this job) [ addCloudwatchTimestamp: ] # Enables the inclusion of past metric data points from the CloudWatch response if available. # This is useful when a metric is configured with a 60-second period and a 300-second duration, ensuring that all # five data points are exposed at the metrics endpoint instead of only the latest one. # Note: This option requires `addCloudwatchTimestamp` to be enabled. # The metric destination must support out of order timestamps, see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb # (General Setting for all metrics in this job) [ exportAllDataPoints: ] # List of metric definitions metrics: [ - ... ] ``` Example config file: ```yaml apiVersion: v1alpha1 sts-region: eu-west-1 discovery: exportedTagsOnMetrics: kafka: - Name jobs: - type: kafka regions: - eu-west-1 searchTags: - key: env value: dev metrics: - name: BytesOutPerSec statistics: - Average period: 600 length: 600 ``` ### `static_job_config` The `static_job_config` block configures jobs of type "static". ```yaml # Name of the job (required) name: # CloudWatch namespace namespace: # List of AWS regions regions: [ - ...] # List of IAM roles to assume (optional) roles: [ - ... ] # Custom tags to be added as a list of Key/Value pairs customTags: [ - ... ] # CloudWatch metric dimensions as a list of Name/Value pairs dimensions: [ ] # List of metric definitions metrics: [ - ... ] ``` Example config file: ```yaml apiVersion: v1alpha1 sts-region: eu-west-1 static: - namespace: AWS/AutoScaling name: must_be_set regions: - eu-west-1 dimensions: - name: AutoScalingGroupName value: MyGroup customTags: - key: CustomTag value: CustomValue metrics: - name: GroupInServiceInstances statistics: - Minimum period: 60 length: 300 ``` ### `custom_namespace_job_config` The `custom_namespace_job_config` block configures jobs of type "custom namespace". ```yaml # Name of the job (required) name: # CloudWatch namespace namespace: # List of AWS regions regions: [ - ...] # List of IAM roles to assume (optional) roles: [ - ... ] # Custom tags to be added as a list of Key/Value pairs customTags: [ - ... ] # List of metric dimensions to query. Before querying metric values, the total list of metrics will be filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. dimensionNameRequirements: [ - ... ] # Specifies how the current time is rounded before calculating start/end times for CloudWatch GetMetricData requests. # This rounding is optimize performance of the CloudWatch request. # This setting only makes sense to use if, for example, you specify a very long period (such as 1 day) but want your times rounded to a shorter time (such as 5 minutes). For example, a value of 300 will round the current time to the nearest 5 minutes. If not specified, the roundingPeriod defaults to the same value as shortest period in the job. [ roundingPeriod: ] # Passes down the flag `--recently-active PT3H` to the CloudWatch API. This will only return metrics that have been active in the last 3 hours. # This is useful for reducing the number of metrics returned by CloudWatch, which can be very large for some services. See AWS Cloudwatch API docs for [ListMetrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html) for more details. [ recentlyActiveOnly: ] # List of statistic types, e.g. "Minimum", "Maximum", etc (General Setting for all metrics in this job) statistics: [ - ... ] # Statistic period in seconds (General Setting for all metrics in this job) [ period: ] # How far back to request data for in seconds (General Setting for all metrics in this job) [ length: ] # If set it will request metrics up until `current_time - delay` (General Setting for all metrics in this job) [ delay: ] # Return 0 value if Cloudwatch returns no metrics at all. By default `NaN` will be reported (General Setting for all metrics in this job) [ nilToZero: ] # Export the metric with the original CloudWatch timestamp (General Setting for all metrics in this job) [ addCloudwatchTimestamp: ] # Enables the inclusion of past metric data points from the CloudWatch response if available. # This is useful when a metric is configured with a 60-second period and a 300-second duration, ensuring that all # five data points are exposed at the metrics endpoint instead of only the latest one. # Note: This option requires `addCloudwatchTimestamp` to be enabled. # The metric destination must support out of order timestamps, see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb # (General Setting for all metrics in this job) [ exportAllDataPoints: ] # List of metric definitions metrics: [ - ... ] ``` Example config file: ```yaml apiVersion: v1alpha1 sts-region: eu-west-1 customNamespace: - name: customEC2Metrics namespace: CustomEC2Metrics regions: - us-east-1 metrics: - name: cpu_usage_idle statistics: - Average period: 300 length: 300 nilToZero: true - name: disk_free statistics: - Average period: 300 length: 300 nilToZero: true ``` ### `metric_config` Some settings at the job level are overridden by settings at the metric level. This allows for a specific setting to override a general setting. ```yaml # CloudWatch metric name name: # List of statistic types, e.g. "Minimum", "Maximum", etc. (Overrides job level setting) statistics: [ - ... ] # Statistic period in seconds (Overrides job level setting) [ period: ] # How far back to request data for in seconds (Overrides job level setting) [ length: ] # If set it will request metrics up until `current_time - delay` (Overrides job level setting) [ delay: ] # Return 0 value if Cloudwatch returns no metrics at all. By default `NaN` will be reported (Overrides job level setting) [ nilToZero: ] # Export the metric with the original CloudWatch timestamp (Overrides job level setting) [ addCloudwatchTimestamp: ] # Enables the inclusion of past metric data points from the CloudWatch response if available. # This is useful when a metric is configured with a 60-second period and a 300-second duration, ensuring that all # five data points are exposed at the metrics endpoint instead of only the latest one. # Note: This option requires `addCloudwatchTimestamp` to be enabled. # The metric destination must support out of order timestamps, see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb # (General Setting for all metrics in this job) [ exportAllDataPoints: ] ``` Notes: - Available statistics: `Maximum`, `Minimum`, `Sum`, `SampleCount`, `Average`, `pXX` (e.g. `p90`). - Watch out using `addCloudwatchTimestamp` for sparse metrics, e.g from S3, since Prometheus won't scrape metrics containing timestamps older than 2-3 hours. Also the same applies when enabling `exportAllDataPoints` in any metric. ### `exported_tags_config` This is an example of the `exported_tags_config` block: ```yaml exportedTagsOnMetrics: ebs: - VolumeId kafka: - Name ``` ### `role_config` This is an example of the `role_config` block: ```yaml roles: - roleArn: "arn:aws:iam::123456789012:role/Prometheus" externalId: "shared-external-identifier" # optional ``` ### `search_tags_config` This is an example of the `search_tags_config` block: ```yaml searchTags: - key: env value: production ``` ### `custom_tags_config` This is an example of the `custom_tags_config` block: ```yaml customTags: - key: CustomTag value: CustomValue ``` ### `dimensions_config` This is an example of the `dimensions_config` block: ```yaml dimensions: - name: AutoScalingGroupName value: MyGroup ``` ### `enhanced_metrics_config` The `enhanced_metrics_config` block allows enabling enhanced metrics for specific metrics within a discovery job. Currently supported enhanced metrics are: - AWS/Lambda (Timeout) - The maximum execution duration permitted for the function before termination. - AWS/DynamoDB (ItemCount) - The count of items in the table, updated approximately every six hours; may not reflect recent changes. - AWS/RDS (AllocatedStorage) - The storage capacity in bytes allocated for the DB instance. - AWS/ElastiCache (NumCacheNodes) - The count of cache nodes in the cluster; must be 1 for Valkey or Redis OSS clusters, or between 1 and 40 for Memcached clusters. ```yaml enhancedMetrics: - name: ItemCount ``` ================================================ FILE: docs/embedding.md ================================================ # Embedding YACE in your application It is possible to embed YACE into an external Go application. This mode might be useful to you if you would like to scrape on demand or run in a stateless manner. See [`exporter.UpdateMetrics()`](https://pkg.go.dev/github.com/prometheus-community/yet-another-cloudwatch-exporter@v0.50.0/pkg#UpdateMetrics) for the documentation of the exporter entrypoint. Applications embedding YACE: - [Grafana Agent](https://github.com/grafana/agent/tree/release-v0.33/pkg/integrations/cloudwatch_exporter) ================================================ FILE: docs/feature_flags.md ================================================ # Feature flags List of features or changes that are disabled by default since they are breaking changes or are considered experimental. Their behavior can change in future releases which will be communicated via the release changelog. You can enable them using the `-enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. ## Always return info metrics `-enable-feature=always-return-info-metrics` Return info metrics even if there are no CloudWatch metrics for the resource. This is useful if you want to get a complete picture of your estate, for example if you have some resources which have not yet been used. ================================================ FILE: docs/installation.md ================================================ # Installing and running YACE There are various way to run YACE. ## Binaries See the [Releases](https://github.com/prometheus-community/yet-another-cloudwatch-exporter/releases) page to download binaries for various OS and arch. ## Docker Docker images are available on GitHub Container Registry [here](https://github.com/prometheus-community/yet-another-cloudwatch-exporter/pkgs/container/yet-another-cloudwatch-exporter). The image name is `quay.io/prometheuscommunity/yet-another-cloudwatch-exporter` and we support tags of the form `vX.Y.Z`. To pull and run the image locally use: ```shell docker run -d --rm \ -v $PWD/credentials:/home/.aws/credentials \ -v $PWD/config.yml:/tmp/config.yml \ -p 5000:5000 \ --name yace quay.io/prometheuscommunity/yet-another-cloudwatch-exporter:latest ``` Do not forget the `v` prefix in the image version tag. ## Docker compose See the [docker-compose directory](../docker-compose/README.md). ## Kubernetes ### Install with HELM The official [HELM chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-yet-another-cloudwatch-exporter) is the recommended way to install YACE in a Kubernetes cluster. ### Install with manifests Example: ```yaml --- apiVersion: v1 kind: ConfigMap metadata: name: yace data: config.yml: |- --- # Start of config file --- apiVersion: apps/v1 kind: Deployment metadata: name: yace spec: replicas: 1 selector: matchLabels: name: yace template: metadata: labels: name: yace spec: containers: - name: yace image: quay.io/prometheuscommunity/yet-another-cloudwatch-exporter:vX.Y.Z # release version as tag - Do not forget the version 'v' imagePullPolicy: IfNotPresent args: - "--config.file=/tmp/config.yml" ports: - name: app containerPort: 5000 volumeMounts: - name: config-volume mountPath: /tmp volumes: - name: config-volume configMap: name: yace ``` ================================================ FILE: examples/alb.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ApplicationELB regions: - us-east-1 period: 300 length: 300 metrics: - name: HealthyHostCount statistics: [Sum] - name: UnHealthyHostCount statistics: [Sum] - name: RequestCount statistics: [Average] - name: TargetResponseTime statistics: [Average] - name: ActiveConnectionCount statistics: [Sum] - name: NewConnectionCount statistics: [Sum] - name: RejectedConnectionCount statistics: [Sum] - name: TargetConnectionErrorCount statistics: [Sum] - name: IPv6RequestCount statistics: [Sum] - name: RequestCountPerTarget statistics: [Sum] - name: NonStickyRequestCount statistics: [Sum] - name: HTTPCode_Target_2XX_Count statistics: [Sum] - name: HTTPCode_Target_3XX_Count statistics: [Sum] - name: HTTPCode_Target_4XX_Count statistics: [Sum] - name: HTTPCode_Target_5XX_Count statistics: [Sum] - name: HTTPCode_ELB_3XX_Count statistics: [Sum] - name: HTTPCode_ELB_4XX_Count statistics: [Sum] - name: HTTPCode_ELB_5XX_Count statistics: [Sum] - name: ProcessedBytes statistics: [Sum] - name: IPv6ProcessedBytes statistics: [Sum] - name: ConsumedLCUs statistics: [Average] - name: ClientTLSNegotiationErrorCount statistics: [Sum] - name: TargetTLSNegotiationErrorCount statistics: [Sum] - name: RuleEvaluations statistics: [Sum] ================================================ FILE: examples/apigw.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ApiGateway regions: - us-east-1 period: 300 length: 300 metrics: - name: Latency statistics: [Average, Maximum, p95, p99] - name: Count statistics: [SampleCount, Sum] - name: 4xx statistics: [Sum] - name: 5xx statistics: [Sum] ================================================ FILE: examples/apprunner.yaml ================================================ apiVersion: v1alpha1 discovery: jobs: - regions: - us-east-1 period: 300 length: 300 type: AWS/AppRunner metrics: - name: MemoryUtilization statistics: - Average - Maximum - name: CPUUtilization statistics: - Average - Maximum - name: 2xxStatusResponses statistics: - Sum - name: Requests statistics: - Sum - name: RequestLatency statistics: - Average - name: ActiveInstances statistics: - Maximum - name: 4xxStatusResponses statistics: - Sum - name: Concurrency statistics: - Maximum ================================================ FILE: examples/appstream.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/AppStream regions: - us-east-1 period: 300 length: 300 metrics: - name: ActualCapacity statistics: [Average] - name: AvailableCapacity statistics: [Average] - name: CapacityUtilization statistics: [Average] - name: DesiredCapacity statistics: [Average] - name: InUseCapacity statistics: [Average] - name: PendingCapacity statistics: [Average] - name: RunningCapacity statistics: [Average] - name: InsufficientCapacityError statistics: [Average] ================================================ FILE: examples/backup.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Backup regions: - us-east-1 period: 300 length: 300 metrics: - name: NumberOfBackupJobsCompleted statistics: [Average] - name: NumberOfBackupJobsCreated statistics: [Average] - name: NumberOfBackupJobsPending statistics: [Average] - name: NumberOfBackupJobsRunning statistics: [Average] - name: NumberOfBackupJobsAborted statistics: [Average] - name: NumberOfBackupJobsCompleted statistics: [Average] - name: NumberOfBackupJobsFailed statistics: [Average] - name: NumberOfBackupJobsExpired statistics: [Average] ================================================ FILE: examples/cwagent.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: CWAgent regions: - us-east-1 period: 300 length: 300 metrics: - name: mem_used_percent statistics: [Average] - name: disk_used_percent statistics: [Average] ================================================ FILE: examples/ds.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/DirectoryService regions: - us-east-1 period: 300 length: 300 metrics: - name: "Bytes Sent/sec" statistics: [Average] - name: "% Processor Time" statistics: [Average] - name: "DS Directory Searches/Sec" statistics: [Average] - name: "Database Cache % Hit" statistics: [Average] - name: "% Free Space" statistics: [Sum] ================================================ FILE: examples/dx.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/DX regions: - us-east-1 period: 300 length: 300 metrics: - name: ConnectionState statistics: [Maximum] - name: VirtualInterfaceBpsIngress statistics: [Average] - name: VirtualInterfaceBpsEgress statistics: [Average] - name: VirtualInterfacePpsIngress statistics: [Average] - name: VirtualInterfacePpsEgress statistics: [Average] - name: ConnectionErrorCount statistics: [Minimum, Maximum, Sum] ================================================ FILE: examples/ebs.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/EBS regions: - us-east-1 period: 300 length: 300 metrics: - name: VolumeReadBytes statistics: [Sum] - name: VolumeWriteBytes statistics: [Sum] - name: VolumeReadOps statistics: [Average] - name: VolumeWriteOps statistics: [Average] - name: VolumeTotalReadTime statistics: [Average] - name: VolumeTotalWriteTime statistics: [Average] - name: VolumeIdleTime statistics: [Average] - name: VolumeQueueLength statistics: [Average] - name: VolumeThroughputPercentage statistics: [Average] - name: VolumeConsumedReadWriteOps statistics: [Average] - name: BurstBalance statistics: [Minimum] ================================================ FILE: examples/ec.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ElastiCache regions: - us-east-1 period: 300 length: 300 metrics: - name: CPUUtilization statistics: [Average] - name: FreeableMemory statistics: [Average] - name: NetworkBytesIn statistics: [Average] - name: NetworkBytesOut statistics: [Average] - name: NetworkPacketsIn statistics: [Average] - name: NetworkPacketsOut statistics: [Average] - name: SwapUsage statistics: [Average] - name: CPUCreditUsage statistics: [Average] ================================================ FILE: examples/ec2.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/EC2 regions: - us-east-1 period: 300 length: 300 metrics: - name: CPUUtilization statistics: [Average] - name: NetworkIn statistics: [Average, Sum] - name: NetworkOut statistics: [Average, Sum] - name: NetworkPacketsIn statistics: [Sum] - name: NetworkPacketsOut statistics: [Sum] - name: DiskReadBytes statistics: [Sum] - name: DiskWriteBytes statistics: [Sum] - name: DiskReadOps statistics: [Sum] - name: DiskWriteOps statistics: [Sum] - name: StatusCheckFailed statistics: [Sum] - name: StatusCheckFailed_Instance statistics: [Sum] - name: StatusCheckFailed_System statistics: [Sum] ================================================ FILE: examples/ecs.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ECS regions: - us-east-1 period: 300 length: 300 metrics: - name: CPUReservation statistics: [Average, Maximum] - name: MemoryReservation statistics: [Average, Maximum] - name: CPUUtilization statistics: [Average, Maximum] - name: MemoryUtilization statistics: [Average, Maximum] ================================================ FILE: examples/elb.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ELB regions: - us-east-1 period: 300 length: 300 metrics: - name: BackendConnectionErrors statistics: [Sum] - name: HTTPCode_Backend_2XX statistics: [Sum] - name: HTTPCode_Backend_3XX statistics: [Sum] - name: HTTPCode_Backend_4XX statistics: [Sum] - name: HTTPCode_Backend_5XX statistics: [Sum] - name: HTTPCode_ELB_4XX statistics: [Sum] - name: HTTPCode_ELB_5XX statistics: [Sum] - name: RequestCount statistics: [Sum] - name: Latency statistics: [Average] - name: SurgeQueueLength statistics: [Average] - name: SpilloverCount statistics: [Sum] - name: HealthyHostCount statistics: [Minimum, Maximum] - name: UnHealthyHostCount statistics: [Minimum, Maximum] ================================================ FILE: examples/es.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/ES regions: - us-east-1 period: 300 length: 300 metrics: - name: CPUUtilization statistics: [Average] - name: FreeStorageSpace statistics: [Sum] - name: ClusterStatus.green statistics: [Maximum] - name: ClusterStatus.yellow statistics: [Maximum] - name: ClusterStatus.red statistics: [Maximum] - name: Shards.active statistics: [Sum] - name: Shards.unassigned statistics: [Sum] - name: Shards.delayedUnassigned statistics: [Sum] - name: Shards.activePrimary statistics: [Sum] - name: Shards.initializing statistics: [Sum] - name: Shards.initializing statistics: [Sum] - name: Shards.relocating statistics: [Sum] - name: Nodes statistics: [Maximum] - name: SearchableDocuments statistics: [Maximum] - name: DeletedDocuments statistics: [Maximum] ================================================ FILE: examples/historic-data.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/SQS regions: - us-east-1 period: 60 length: 300 addCloudwatchTimestamp: true exportAllDataPoints: true metrics: - name: NumberOfMessagesSent statistics: [Sum] - name: NumberOfMessagesReceived statistics: [Sum] - name: NumberOfMessagesDeleted statistics: [Sum] - name: ApproximateAgeOfOldestMessage statistics: [Average] - name: NumberOfEmptyReceives statistics: [Sum] - name: SentMessageSize statistics: [Average] - name: ApproximateNumberOfMessagesNotVisible statistics: [Sum] - name: ApproximateNumberOfMessagesDelayed statistics: [Sum] - name: ApproximateNumberOfMessagesVisible statistics: [Sum] ================================================ FILE: examples/kafka.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Kafka regions: - us-east-1 period: 300 length: 300 metrics: - name: BytesInPerSec statistics: [Average] - name: BytesOutPerSec statistics: [Average] - name: RootDiskUsed statistics: [Average] - name: KafkaDataLogsDiskUsed statistics: [Average] - name: KafkaAppLogsDiskUsed statistics: [Average] - name: MemoryFree statistics: [Average] - name: MemoryUsed statistics: [Average] - name: NetworkRxPackets statistics: [Average] - name: NetworkTxPackets statistics: [Average] - name: SwapFree statistics: [Average] - name: SwapUsed statistics: [Average] - name: GlobalTopicCount statistics: [Maximum] - name: GlobalPartitionCount statistics: [Maximum] - name: CpuUser statistics: [Average] - name: CpuSystem statistics: [Average] - name: CpuIdle statistics: [Average] ================================================ FILE: examples/kinesis.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Kinesis regions: - us-east-1 period: 300 length: 300 metrics: - name: PutRecord.Latency statistics: [Average] - name: PutRecord.Success statistics: [Sum] - name: PutRecord.Bytes statistics: [Sum] - name: PutRecords.Latency statistics: [Average] - name: PutRecords.Records statistics: [Sum] - name: PutRecords.Success statistics: [Sum] - name: PutRecords.Bytes statistics: [Sum] - name: GetRecords.Latency statistics: [Average] - name: GetRecords.Records statistics: [Sum] - name: GetRecords.Success statistics: [Sum] - name: GetRecords.Bytes statistics: [Sum] - name: GetRecords.IteratorAgeMilliseconds statistics: [Average] - name: IncomingBytes statistics: [Sum] - name: IncomingRecords statistics: [Sum] - name: OutgoingBytes statistics: [Sum] - name: OutgoingRecords statistics: [Sum] - name: WriteProvisionedThroughputExceeded statistics: [Average] - name: ReadProvisionedThroughputExceeded statistics: [Average] ================================================ FILE: examples/kms.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/KMS regions: - us-east-1 period: 300 metrics: - name: SecondsUntilKeyMaterialExpiration statistics: [Maximum, Minimum] ================================================ FILE: examples/lambda.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Lambda regions: - us-east-1 period: 300 length: 300 metrics: - name: Invocations statistics: [Sum] - name: Errors statistics: [Sum] - name: Throttles statistics: [Sum] - name: Duration statistics: [Average, Maximum, Minimum, p90] ================================================ FILE: examples/lambda_edge.yml ================================================ # We can't configure discovery job for edge lambda function but static works.,he region is always us-east-1. # Other regions can be added in use as edge locations apiVersion: v1alpha1 static: - name: us-east-1. namespace: AWS/Lambda regions: - eu-central-1 - us-east-1 - us-west-2 - ap-southeast-1 period: 600 length: 600 metrics: - name: Invocations statistics: [Sum] - name: Errors statistics: [Sum] - name: Throttles statistics: [Sum] - name: Duration statistics: [Average, Maximum, Minimum, p90] ================================================ FILE: examples/logs.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Logs regions: - us-east-1 period: 60 length: 60 delay: 120 statistics: [Sum] metrics: - name: DeliveryErrors - name: DeliveryThrottling - name: EMFParsingErrors - name: EMFValidationErrors - name: ForwardedBytes - name: ForwardedLogEvents - name: IncomingBytes - name: IncomingLogEvents ================================================ FILE: examples/mq.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/AmazonMQ regions: - us-east-1 period: 300 length: 300 metrics: - name: NetworkOut statistics: [Minimum, Maximum, Average] - name: NetworkIn statistics: [Minimum, Maximum, Average] - name: QueueSize statistics: [Minimum, Maximum, Average] - name: ConsumerCount statistics: [Minimum, Maximum, Average] - name: ProducerCount statistics: [Minimum, Maximum, Average] - name: EnqueueCount statistics: [Minimum, Maximum, Average] - name: DequeueCount statistics: [Minimum, Maximum, Average] - name: MemoryUsage statistics: [Minimum, Maximum, Average] - name: CpuUtilization statistics: [Minimum, Maximum, Average] ================================================ FILE: examples/networkmanager.yml ================================================ # https://docs.aws.amazon.com/network-manager/latest/cloudwan/cloudwan-metrics.html apiVersion: v1alpha1 discovery: jobs: - type: AWS/Network Manager regions: - us-west-2 period: 60 length: 300 metrics: - name: BytesDropCountBlackhole statistics: [Sum] - name: BytesDropCountNoRoute statistics: [Sum] - name: BytesIn statistics: [Sum] - name: BytesOut statistics: [Sum] - name: PacketsDropCountBlackhole statistics: [Sum] - name: PacketsDropCountNoRoute statistics: [Sum] - name: PacketDropCountTTLExpired statistics: [Sum] - name: PacketsIn statistics: [Sum] - name: PacketsOut statistics: [Sum] ================================================ FILE: examples/ngw.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/NATGateway regions: - us-east-1 period: 300 length: 300 metrics: - name: ActiveConnectionCount statistics: [Average, Minimum, Maximum, Sum] - name: BytesInFromDestination statistics: [Average, Minimum, Maximum, Sum] - name: BytesInFromSource statistics: [Average, Minimum, Maximum, Sum] - name: BytesOutToDestination statistics: [Average, Minimum, Maximum, Sum] - name: BytesOutToSource statistics: [Average, Minimum, Maximum, Sum] - name: ConnectionAttemptCount statistics: [Average, Minimum, Maximum, Sum] - name: ConnectionEstablishedCount statistics: [Average, Minimum, Maximum, Sum] - name: ErrorPortAllocation statistics: [Average, Minimum, Maximum, Sum] - name: IdleTimeoutCount statistics: [Average, Minimum, Maximum, Sum] - name: PacketsDropCount statistics: [Average, Minimum, Maximum, Sum] - name: PacketsInFromDestination statistics: [Average, Minimum, Maximum, Sum] - name: PacketsInFromSource statistics: [Average, Minimum, Maximum, Sum] - name: PacketsOutToDestination statistics: [Average, Minimum, Maximum, Sum] - name: PacketsOutToSource statistics: [Average, Minimum, Maximum, Sum] ================================================ FILE: examples/nlb.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/NetworkELB regions: - us-east-1 period: 300 length: 300 metrics: - name: ActiveFlowCount statistics: [Average, Minimum, Maximum] - name: ActiveFlowCount_TLS statistics: [Average, Minimum, Maximum] - name: ActiveFlowCount_UDP statistics: [Average, Minimum, Maximum] - name: PortAllocationErrorCount statistics: [Minimum, Maximum, Sum] - name: ProcessedBytes statistics: [Minimum, Maximum, Sum] - name: ProcessedPackets statistics: [Minimum, Maximum, Sum] ================================================ FILE: examples/private-link-endpoints.yaml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/PrivateLinkEndpoints regions: - us-east-1 period: 300 length: 300 metrics: - name: ActiveConnections statistics: [Average] - name: NewConnections statistics: [Average, Sum] - name: PacketsDropped statistics: [Average, Sum] - name: BytesProcessed statistics: [Sum] ================================================ FILE: examples/private-link-services.yaml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/PrivateLinkServices regions: - us-east-1 period: 300 length: 300 metrics: - name: ActiveConnections statistics: [Average] - name: NewConnections statistics: [Average, Sum] - name: PacketsDropped statistics: [Average, Sum] - name: BytesProcessed statistics: [Sum] ================================================ FILE: examples/qldb.yml ================================================ apiVersion: v1alpha1 discovery: exportedTagsOnMetrics: AWS/QLDB: - Name jobs: - type: AWS/QLDB regions: - us-east-2 period: 300 length: 300 metrics: - name: JournalStorage statistics: - Average - name: IndexedStorage statistics: - Average - name: ReadIOs statistics: - Sum - name: WriteIOs statistics: - Sum - name: CommandLatency statistics: - Average - name: OccConflictExceptions statistics: - Sum - name: Session4xxExceptions statistics: - Sum - name: Session5xxExceptions statistics: - Sum - name: SessionRateExceededExceptions statistics: - Sum ================================================ FILE: examples/quicksight.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/QuickSight regions: - eu-west-2 period: 30000 length: 30000 metrics: - name: IngestionErrorCount statistics: [Sum] - name: IngestionRowCount statistics: [Sum] - name: IngestionInvocationCount statistics: [Sum] ================================================ FILE: examples/rds.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/RDS regions: - us-east-1 period: 300 length: 300 metrics: - name: CPUUtilization statistics: [Maximum] - name: DatabaseConnections statistics: [Sum] - name: FreeableMemory statistics: [Average] - name: FreeStorageSpace statistics: [Average] - name: ReadThroughput statistics: [Average] - name: WriteThroughput statistics: [Average] - name: ReadLatency statistics: [Maximum] - name: WriteLatency statistics: [Maximum] - name: ReadIOPS statistics: [Average] - name: WriteIOPS statistics: [Average] ================================================ FILE: examples/redshift-serverless.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Redshift-Serverless regions: - us-east-1 period: 300 length: 300 metrics: - name: DatabaseConnections statistics: [Average] - name: ComputeCapacity statistics: [Average] - name: QueryRuntimeBreakdown statistics: [Average] - name: QueriesRunning statistics: [Average] - name: QueriesQueued statistics: [Average] - name: QueryDuration statistics: [Average] ================================================ FILE: examples/s3.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/S3 regions: - us-east-1 period: 86400 length: 86400 metrics: - name: NumberOfObjects statistics: [Average] - name: BucketSizeBytes statistics: [Average] ================================================ FILE: examples/ses.yaml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/SES regions: - us-east-1 period: 300 length: 300 metrics: - name: Send statistics: [Sum] - name: Delivery statistics: [Sum] - name: Bounce statistics: [Sum] - name: Reputation.ComplaintRate statistics: [Sum] - name: Reputation.BounceRate statistics: [Sum] ================================================ FILE: examples/sns.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/SNS regions: - us-east-1 period: 300 length: 300 metrics: - name: NumberOfMessagesPublished statistics: [Sum] - name: NumberOfNotificationsDelivered statistics: [Sum] - name: NumberOfNotificationsFailed statistics: [Sum] - name: NumberOfNotificationsFilteredOut statistics: [Sum] - name: PublishSize statistics: [Average] ================================================ FILE: examples/sqs.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/SQS regions: - us-east-1 period: 60 length: 60 metrics: - name: NumberOfMessagesSent statistics: [Sum] - name: NumberOfMessagesReceived statistics: [Sum] - name: NumberOfMessagesDeleted statistics: [Sum] - name: ApproximateAgeOfOldestMessage statistics: [Average] - name: NumberOfEmptyReceives statistics: [Sum] - name: SentMessageSize statistics: [Average] - name: ApproximateNumberOfMessagesNotVisible statistics: [Sum] - name: ApproximateNumberOfMessagesDelayed statistics: [Sum] - name: ApproximateNumberOfMessagesVisible statistics: [Sum] ================================================ FILE: examples/usage.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/Usage regions: - us-east-1 period: 300 length: 300 metrics: - name: CallCount statistics: [Sum] - name: ResourceCount statistics: [Sum] ================================================ FILE: examples/vpn.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/VPN regions: - us-east-1 period: 300 length: 300 metrics: - name: TunnelDataIn statistics: [Sum] - name: TunnelDataOut statistics: [Sum] - name: TunnelState statistics: [Maximum] ================================================ FILE: go.mod ================================================ module github.com/prometheus-community/yet-another-cloudwatch-exporter go 1.25.0 require ( github.com/aws/aws-sdk-go-v2 v1.41.1 github.com/aws/aws-sdk-go-v2/config v1.32.7 github.com/aws/aws-sdk-go-v2/credentials v1.19.7 github.com/aws/aws-sdk-go-v2/service/amp v1.42.5 github.com/aws/aws-sdk-go-v2/service/apigateway v1.38.4 github.com/aws/aws-sdk-go-v2/service/apigatewayv2 v1.33.5 github.com/aws/aws-sdk-go-v2/service/autoscaling v1.63.0 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1 github.com/aws/aws-sdk-go-v2/service/databasemigrationservice v1.61.5 github.com/aws/aws-sdk-go-v2/service/dynamodb v1.53.6 github.com/aws/aws-sdk-go-v2/service/ec2 v1.280.0 github.com/aws/aws-sdk-go-v2/service/elasticache v1.51.9 github.com/aws/aws-sdk-go-v2/service/iam v1.53.2 github.com/aws/aws-sdk-go-v2/service/lambda v1.87.1 github.com/aws/aws-sdk-go-v2/service/rds v1.114.0 github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.6 github.com/aws/aws-sdk-go-v2/service/shield v1.34.17 github.com/aws/aws-sdk-go-v2/service/storagegateway v1.43.10 github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 github.com/aws/smithy-go v1.24.2 github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 github.com/prometheus/common v0.67.5 github.com/r3labs/diff/v3 v3.0.1 github.com/stretchr/testify v1.11.1 github.com/urfave/cli/v2 v2.27.7 go.uber.org/atomic v1.11.0 go.yaml.in/yaml/v2 v2.4.4 golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 golang.org/x/sync v0.19.0 ) require ( github.com/alecthomas/kingpin/v2 v2.4.0 // indirect github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.17 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/sys v0.39.0 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) ================================================ FILE: go.sum ================================================ github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU= github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= github.com/aws/aws-sdk-go-v2/config v1.32.7 h1:vxUyWGUwmkQ2g19n7JY/9YL8MfAIl7bTesIUykECXmY= github.com/aws/aws-sdk-go-v2/config v1.32.7/go.mod h1:2/Qm5vKUU/r7Y+zUk/Ptt2MDAEKAfUtKc1+3U1Mo3oY= github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8= github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA= github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U= github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= github.com/aws/aws-sdk-go-v2/service/amp v1.42.5 h1:Pd07a2Tdhl3591h+hbJZCC+50NGraSyt/I6yLx4FDak= github.com/aws/aws-sdk-go-v2/service/amp v1.42.5/go.mod h1:6q5j2wH8o1tf4glByj2hBDIEiOAKDh0x5QpjLKmIi40= github.com/aws/aws-sdk-go-v2/service/apigateway v1.38.4 h1:V8gcFwJPP3eXZXpeui+p97JmO7WtCkQlEAHrE6Kyt0k= github.com/aws/aws-sdk-go-v2/service/apigateway v1.38.4/go.mod h1:iJF5UdwkFue/YuUGCFsCCdT3SBMUx0s+h5TNi0Sz+qg= github.com/aws/aws-sdk-go-v2/service/apigatewayv2 v1.33.5 h1:VUf8W+s2EQwajy6n+xCN9ctkhJsCJbpwPmzf49NtJM8= github.com/aws/aws-sdk-go-v2/service/apigatewayv2 v1.33.5/go.mod h1:0/7yOW11zIEYILivvAmnKbyvYG+34Zb/JrnywtskyLw= github.com/aws/aws-sdk-go-v2/service/autoscaling v1.63.0 h1:ffFts1+wfxmRrJ6tQJnhh6+p1TeQDplJ1iLrZopUM9w= github.com/aws/aws-sdk-go-v2/service/autoscaling v1.63.0/go.mod h1:8O5Pj92iNpfw/Fa7WdHbn6YiEjDoVdutz+9PGRNoP3Y= github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1 h1:ElB5x0nrBHgQs+XcpQ1XJpSJzMFCq6fDTpT6WQCWOtQ= github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.53.1/go.mod h1:Cj+LUEvAU073qB2jInKV6Y0nvHX0k7bL7KAga9zZ3jw= github.com/aws/aws-sdk-go-v2/service/databasemigrationservice v1.61.5 h1:3d44lDPnuYJn1xSf7R4J2zEEL+CO5ooxci9OjI3xAh8= github.com/aws/aws-sdk-go-v2/service/databasemigrationservice v1.61.5/go.mod h1:XKPSi5JA8Wm59aLAmFoshAdBrY6YQnomNDbvYgNr/l8= github.com/aws/aws-sdk-go-v2/service/dynamodb v1.53.6 h1:LNmvkGzDO5PYXDW6m7igx+s2jKaPchpfbS0uDICywFc= github.com/aws/aws-sdk-go-v2/service/dynamodb v1.53.6/go.mod h1:ctEsEHY2vFQc6i4KU07q4n68v7BAmTbujv2Y+z8+hQY= github.com/aws/aws-sdk-go-v2/service/ec2 v1.280.0 h1:1KXSI/tWq+pdp3hz8Kfq2ngUcrBW28pIdoOhLWYHXW0= github.com/aws/aws-sdk-go-v2/service/ec2 v1.280.0/go.mod h1:Uy+C+Sc58jozdoL1McQr8bDsEvNFx+/nBY+vpO1HVUY= github.com/aws/aws-sdk-go-v2/service/elasticache v1.51.9 h1:hTgZLyNoDWphZUtTtcvQh0LP6TZO0mtdSfZK/GObDLk= github.com/aws/aws-sdk-go-v2/service/elasticache v1.51.9/go.mod h1:91RkIYy9ubykxB50XGYDsbljLZnrZ6rp/Urt4rZrbwQ= github.com/aws/aws-sdk-go-v2/service/iam v1.53.2 h1:62G6btFUwAa5uR5iPlnlNVAM0zJSLbWgDfKOfUC7oW4= github.com/aws/aws-sdk-go-v2/service/iam v1.53.2/go.mod h1:av9clChrbZbJ5E21msSsiT2oghl2BJHfQGhCkXmhyu8= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.17 h1:Nhx/OYX+ukejm9t/MkWI8sucnsiroNYNGb5ddI9ungQ= github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.17/go.mod h1:AjmK8JWnlAevq1b1NBtv5oQVG4iqnYXUufdgol+q9wg= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU= github.com/aws/aws-sdk-go-v2/service/lambda v1.87.1 h1:QBdmTXWwqVgx0PueT/Xgp2+al5HR0gAV743pTzYeBRw= github.com/aws/aws-sdk-go-v2/service/lambda v1.87.1/go.mod h1:ogjbkxFgFOjG3dYFQ8irC92gQfpfMDcy1RDKNSZWXNU= github.com/aws/aws-sdk-go-v2/service/rds v1.114.0 h1:p9c6HDzx6sTf7uyc9xsQd693uzArsPrsVr9n0oRk7DU= github.com/aws/aws-sdk-go-v2/service/rds v1.114.0/go.mod h1:JBRYWpz5oXQtHgQC+X8LX9lh0FBCwRHJlWEIT+TTLaE= github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.6 h1:gd7YMnFZQGdy4lERF9ffz9kbc6K/IPhCu5CrJDJr8XY= github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.6/go.mod h1:lnTv81am9e2C2SjX3VKyUrKEzDADD9lKST9ou96UBoY= github.com/aws/aws-sdk-go-v2/service/shield v1.34.17 h1:XOqXVwczmfk6/GtGW7eee1RvCp7NhPKn8wYbZp+yTa8= github.com/aws/aws-sdk-go-v2/service/shield v1.34.17/go.mod h1:eQV3cCW6J6J+cpBitDt/tDvVTmBFTdlZdEGNKsB76O8= github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y= github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M= github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk= github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo= github.com/aws/aws-sdk-go-v2/service/storagegateway v1.43.10 h1:E0WFFeaadVwljcYiyMLtpha8GSewQJg4n0xw49MXuds= github.com/aws/aws-sdk-go-v2/service/storagegateway v1.43.10/go.mod h1:QoprJo5GSv73ompRyJRq2sXmvodjOZc3eBfvbotVefw= github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ= github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ= github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng= github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da h1:BML5sNe+bw2uO8t8cQSwe5QhvoP04eHPF7bnaQma0Kw= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/r3labs/diff/v3 v3.0.1 h1:CBKqf3XmNRHXKmdU7mZP1w7TV0pDyVCis1AUHtA4Xtg= github.com/r3labs/diff/v3 v3.0.1/go.mod h1:f1S9bourRbiM66NskseyUdo0fTmEE0qKrikYJX63dgo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ= go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ= golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA= golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= ================================================ FILE: mixin/README.md ================================================ # CloudWatch Mixin This is a Prometheus [Monitoring Mixin](https://monitoring.mixins.dev/) that comes with pre-defined dashboards. It can be installed e.g. with [Grizzly](https://grafana.github.io/grizzly). First, install [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler) with ``` go install -a github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest ``` Then install all the dependencies of this mixin: ``` jb install ``` Finally, install `Grizzly` and apply the mixin to your Grafana instance: ``` go install github.com/grafana/grizzly/cmd/grr@latest grr apply mixin.libsonnet ``` ================================================ FILE: mixin/config.libsonnet ================================================ { // use to override the default configuration of base mixin _config+:: { }, } ================================================ FILE: mixin/dashboards/all.libsonnet ================================================ { 'ebs.json': import 'ebs.libsonnet', 'ec2.json': import 'ec2.libsonnet', 'lambda.json': import 'lambda.libsonnet', 'rds.json': import 'rds.libsonnet', 's3.json': import 's3.libsonnet', } ================================================ FILE: mixin/dashboards/common.libsonnet ================================================ { // Tooltip type // 0 = 'default': no shared crosshair or tooltip // 1 = 'shared_crosshair': shared tooltip // 2 = 'shared_tooltip': shared crosshair AND shared tooltip tooltipSharedCrosshair: 1, // Refresh // 1 = 'load': Queries the data source every time the dashboard loads // 2 = 'time': Queries the data source when the dashboard time range changes refreshOnPageLoad: 1, refreshOnTimeRangeChange: 2, // Sorting // 0: Without Sort, // 1: Alphabetical (asc) // 2: Alphabetical (desc) // 3: Numerical (asc) // 4: Numerical (desc) sortAlphabeticalAsc: 1, } ================================================ FILE: mixin/dashboards/ebs.libsonnet ================================================ local common = import 'common.libsonnet'; local grafana = import 'grafonnet-7.0/grafana.libsonnet'; local allLabels = 'job=~"$job", region=~"$region", dimension_VolumeId=~"$volume"'; grafana.dashboard.new( title='AWS EBS', description='Visualize Amazon EBS metrics', tags=['Amazon', 'AWS', 'CloudWatch', 'EBS'], graphTooltip=common.tooltipSharedCrosshair, ) .addTemplate( grafana.template.datasource.new( name='datasource', query='prometheus', label='Data Source', ) ) .addTemplate( grafana.template.query.new( name='job', label='job', datasource='$datasource', query='label_values(aws_ebs_volume_idle_time_average, job)', refresh=common.refreshOnPageLoad, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='region', label='Region', datasource='$datasource', query='label_values(aws_ebs_volume_idle_time_average, region)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='volume', label='Volume', datasource='$datasource', query='label_values(aws_ebs_volume_idle_time_average{job=~"$job", region=~"$region"}, dimension_VolumeId)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addPanels( [ grafana.panel.text.new( title='Info', content=||| Showing metrics only for AWS resources that have tags assigned to them. For more information, see [Amazon CloudWatch Metrics for Amazon EBS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using_cloudwatch_ebs.html). |||, ) .setGridPos(w=24, h=3), grafana.panel.graph.new( title='Volume read bandwidth (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_read_bytes_sum{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume write bandwidth (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_write_bytes_sum{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume read throughput (operations)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=8) .addYaxis(format='ops', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_read_ops_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume write throughput (operations)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=8) .addYaxis(format='ops', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_write_ops_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume idle time', datasource='$datasource', ) .setGridPos(w=8, h=8, x=0, y=16) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_idle_time_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume total read time', datasource='$datasource', ) .setGridPos(w=8, h=8, x=8, y=16) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_total_read_time_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume total write time', datasource='$datasource', ) .setGridPos(w=8, h=8, x=16, y=16) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_total_write_time_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume queue length (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=24) .addYaxis(format='short', min=0, max=1) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_queue_length_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume throughput percentage', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=24) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_throughput_percentage_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Burst balance', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=32) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_burst_balance_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Volume consumed r/w operations', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=32) .addYaxis(format='short') .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ebs_volume_consumed_read_write_ops_average{%s}' % [allLabels], legendFormat='{{dimension_VolumeId}}', datasource='$datasource', ), ), ] ) ================================================ FILE: mixin/dashboards/ec2.libsonnet ================================================ local common = import 'common.libsonnet'; local grafana = import 'grafonnet-7.0/grafana.libsonnet'; local allLabels = 'job=~"$job", region=~"$region", dimension_InstanceId=~"$instance"'; grafana.dashboard.new( title='AWS EC2', description='Visualize Amazon EC2 metrics', tags=['Amazon', 'AWS', 'CloudWatch', 'EC2'], graphTooltip=common.tooltipSharedCrosshair, ) .addTemplate( grafana.template.datasource.new( name='datasource', query='prometheus', label='Data Source', ) ) .addTemplate( grafana.template.query.new( name='job', label='job', datasource='$datasource', query='label_values(aws_ec2_cpuutilization_maximum, job)', refresh=common.refreshOnPageLoad, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='region', label='Region', datasource='$datasource', query='label_values(aws_ec2_cpuutilization_maximum, region)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='instance', label='instance', datasource='$datasource', query='label_values(aws_ec2_cpuutilization_maximum{job=~"$job", region=~"$region"}, dimension_InstanceId)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addPanels( [ grafana.panel.text.new( title='Info', content=||| Showing metrics only for AWS resources that have tags assigned to them. For more information, see [Amazon CloudWatch Metrics for Amazon EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/viewing_metrics_with_cloudwatch.html). |||, ) .setGridPos(w=24, h=3), grafana.panel.graph.new( title='CPU utilization', datasource='$datasource', ) .setGridPos(w=24, h=8, x=0, y=3) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_cpuutilization_maximum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Average network traffic', datasource='$datasource', ) .setGridPos(w=24, h=8, x=0, y=11) .addYaxis( format='bps', label='bytes in (+) / out (-)' ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_in_average{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}} inbound', datasource='$datasource', ), ) .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_out_average{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}} outbound', datasource='$datasource', ), ) .addSeriesOverride(alias='/.*outbound/', transform='negative-Y'), grafana.panel.row.new( title='Network details', ) .setGridPos(w=12, h=16, x=0, y=19) .addPanel( grafana.panel.graph.new( title='Inbound network traffic', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=19) .addYaxis( format='bps', min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_in_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Outbound network traffic', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=19) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_out_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Inbound network packets', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=27) .addYaxis(format='pps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_packets_in_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Outbound network packets', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=27) .addYaxis(format='pps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_network_packets_out_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ), grafana.panel.row.new( title='Disk details', ) .setGridPos(w=24, h=18, x=0, y=35) .addPanel( grafana.panel.text.new( content='The following metrics are reported for EC2 Instance Store Volumes. For Amazon EBS volumes, see the EBS dashboard.', ) .setGridPos(w=24, h=2, x=0, y=35), ) .addPanel( grafana.panel.graph.new( title='Disk reads (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=37) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_disk_read_bytes_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Disk writes (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=37) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_disk_write_bytes_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Disk read (operations)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=45) .addYaxis(format='pps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_disk_read_ops_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Disk write (operations)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=45) .addYaxis(format='pps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_disk_write_ops_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ), grafana.panel.row.new( title='Status checks', ) .setGridPos(w=24, h=8, x=0, y=53) .addPanel( grafana.panel.graph.new( title='Status check failed (system)', datasource='$datasource', ) .setGridPos(w=8, h=8, x=0, y=53) .addYaxis(min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_status_check_failed_system_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Status check failed (instance)', datasource='$datasource', ) .setGridPos(w=8, h=8, x=8, y=53) .addYaxis(min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_status_check_failed_instance_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Status check failed (all)', datasource='$datasource', ) .setGridPos(w=8, h=8, x=16, y=53) .addYaxis(min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_ec2_status_check_failed_sum{%s}' % [allLabels], legendFormat='{{dimension_InstanceId}}', datasource='$datasource', ), ), ), ], ) ================================================ FILE: mixin/dashboards/lambda.libsonnet ================================================ local common = import 'common.libsonnet'; local grafana = import 'grafonnet-7.0/grafana.libsonnet'; local allLabels = 'job=~"$job", region=~"$region", dimension_FunctionName=~"$function_name", dimension_Resource=~"$resource", dimension_ExecutedVersion=~"$executed_version"'; grafana.dashboard.new( title='AWS Lambda', description='Visualize Amazon Lambda metrics', tags=['Amazon', 'AWS', 'CloudWatch', 'Lambda'], graphTooltip=common.tooltipSharedCrosshair, ) .addTemplate( grafana.template.datasource.new( name='datasource', query='prometheus', label='Data Source', ) ) .addTemplate( grafana.template.query.new( name='job', label='job', datasource='$datasource', query='label_values(aws_lambda_invocations_sum, job)', refresh=common.refreshOnPageLoad, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='region', label='Region', datasource='$datasource', query='label_values(aws_lambda_invocations_sum, region)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='function_name', label='Function name', datasource='$datasource', query='label_values(aws_lambda_invocations_sum{job=~"$job", region=~"$region"}, dimension_FunctionName)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='resource', label='Resource', datasource='$datasource', query='label_values(aws_lambda_invocations_sum{job=~"$job", region=~"$region", dimension_FunctionName=~"$function_name"}, dimension_Resource)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='executed_version', label='Executed Version', datasource='$datasource', query='label_values(aws_lambda_invocations_sum{job=~"$job", region=~"$region", dimension_FunctionName=~"$function_name", dimension_Resource=~"$resource"}, dimension_ExecutedVersion)', refresh=common.refreshOnTimeRangeChange, allValue='.*', includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addPanels( [ grafana.panel.text.new( title='Info', content=||| Showing metrics only for AWS resources that have tags assigned to them. For more information, see [Amazon CloudWatch Metrics for Amazon Lambda](https://docs.aws.amazon.com/lambda/latest/dg/monitoring-metrics.html). |||, ) .setGridPos(w=24, h=3), grafana.panel.graph.new( title='Invocations', description='The number of times your function code is executed.', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='short', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_invocations_sum{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Errors', description='The number of invocations that result in a function error.', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='short', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_errors_sum{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Throttles', description='The number of invocation requests that are throttled.', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='short', min=0, decimals=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_throttles_sum{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Duration', description='The time that your function code spends processing an event.', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='ms', min=0, decimals=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_duration_p90{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}} (p90)', datasource='$datasource', ), ) .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_duration_minimum{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}} (min)', datasource='$datasource', ), ) .addTarget( grafana.target.prometheus.new( expr='sum by (dimension_FunctionName) (aws_lambda_duration_maximum{%s})' % [allLabels], legendFormat='{{dimension_FunctionName}} (max)', datasource='$datasource', ), ), ] ) ================================================ FILE: mixin/dashboards/rds.libsonnet ================================================ local common = import 'common.libsonnet'; local grafana = import 'grafonnet-7.0/grafana.libsonnet'; local allLabels = 'job=~"$job", region=~"$region", dimension_DBInstanceIdentifier=~"$instance"'; grafana.dashboard.new( title='AWS RDS', description='Visualize Amazon RDS metrics', tags=['Amazon', 'AWS', 'CloudWatch', 'RDS'], graphTooltip=common.tooltipSharedCrosshair, ) .addTemplate( grafana.template.datasource.new( name='datasource', query='prometheus', label='Data Source', ) ) .addTemplate( grafana.template.query.new( name='job', label='job', datasource='$datasource', query='label_values(aws_rds_database_connections_sum, job)', refresh=common.refreshOnPageLoad, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='region', label='Region', datasource='$datasource', query='label_values(aws_rds_database_connections_sum, region)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='instance', label='instance', datasource='$datasource', query='label_values(aws_rds_database_connections_sum{job=~"$job", region=~"$region"}, dimension_DBInstanceIdentifier)', refresh=common.refreshOnTimeRangeChange, allValue='.+', includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addPanels( [ grafana.panel.text.new( title='Info', content=||| Showing metrics only for AWS resources that have tags assigned to them. For more information, see [Amazon CloudWatch Metrics for Amazon RDS](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/monitoring-cloudwatch.html). |||, ) .setGridPos(w=24, h=3), grafana.panel.graph.new( title='CPU utilization average', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_cpuutilization_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='CPU utilization maximum', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis( format='percent', max=100, min=0, ) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_cpuutilization_maximum{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Database connections average', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_database_connections_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Database connections count', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_database_connections_sum{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Free storage space', datasource='$datasource', ) .setGridPos(w=24, h=8) .addYaxis(format='bytes', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_free_storage_space_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Freeable memory', datasource='$datasource', ) .setGridPos(w=24, h=8) .addYaxis(format='bytes', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_freeable_memory_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk read throughput (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_read_throughput_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk write throughput (bytes)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='bps', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_write_throughput_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk read IOPS', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='ops', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_read_iops_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk write IOPS', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='ops', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_write_iops_average{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk read latency', datasource='$datasource', ) .setGridPos(w=12, h=8) .addYaxis(format='ms', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_read_latency_maximum{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Disk write latency', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12) .addYaxis(format='ms', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_rds_write_latency_maximum{%s}' % [allLabels], legendFormat='{{dimension_DBInstanceIdentifier}}', datasource='$datasource', ), ), ] ) ================================================ FILE: mixin/dashboards/s3.libsonnet ================================================ local common = import 'common.libsonnet'; local grafana = import 'grafonnet-7.0/grafana.libsonnet'; local allLabels = 'job=~"$job", region=~"$region", dimension_BucketName=~"$bucket"'; grafana.dashboard.new( title='AWS S3', description='Visualize Amazon S3 metrics', tags=['Amazon', 'AWS', 'CloudWatch', 'S3'], graphTooltip=common.tooltipSharedCrosshair, ) .addTemplate( grafana.template.datasource.new( name='datasource', query='prometheus', label='Data Source', ) ) .addTemplate( grafana.template.query.new( name='job', label='job', datasource='$datasource', query='label_values(aws_s3_number_of_objects_average, job)', refresh=common.refreshOnPageLoad, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, allValue='.+', ) ) .addTemplate( grafana.template.query.new( name='region', label='Region', datasource='$datasource', query='label_values(aws_s3_number_of_objects_average, region)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='bucket', label='Bucket', datasource='$datasource', query='label_values(aws_s3_number_of_objects_average, dimension_BucketName)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addTemplate( grafana.template.query.new( name='filter_id', label='FilterId', datasource='$datasource', query='label_values(aws_s3_all_requests_sum{dimension_BucketName=~"$bucket"}, dimension_FilterId)', refresh=common.refreshOnTimeRangeChange, includeAll=true, multi=true, sort=common.sortAlphabeticalAsc, ) ) .addPanels( [ grafana.panel.text.new( title='Info', content=||| Showing metrics only for AWS resources that have tags assigned to them. For more information, see [Amazon CloudWatch Metrics for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/metrics-dimensions.html). |||, ) .setGridPos(w=24, h=3), grafana.panel.stat.new( title='Total number of objects', datasource='$datasource', ) .setGridPos(w=12, h=4, x=0, y=3) .setFieldConfig(min=0) .setOptions(calcs=['lastNotNull'], colorMode='none') .addTarget( grafana.target.prometheus.new( expr='sum(last_over_time(aws_s3_number_of_objects_average{job=~"$job"}[1d]) > 0)', datasource='$datasource', ), ), grafana.panel.stat.new( title='Total buckets size', datasource='$datasource', ) .setGridPos(w=12, h=4, x=12, y=3) .setFieldConfig(unit='bytes', min=0) .setOptions(calcs=['lastNotNull'], colorMode='none') .addTarget( grafana.target.prometheus.new( expr='sum(last_over_time(aws_s3_bucket_size_bytes_average{job=~"$job"}[1d]) > 0)', datasource='$datasource', ), ), grafana.panel.graph.new( title='Number of objects', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=7) .addYaxis(format='short', min=0, decimals=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='last_over_time(aws_s3_number_of_objects_average{%s}[1d])' % [allLabels], legendFormat='{{dimension_BucketName}}', datasource='$datasource', ), ), grafana.panel.graph.new( title='Bucket size', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=7) .addYaxis(format='bytes', min=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='last_over_time(aws_s3_bucket_size_bytes_average{%s}[1d])' % [allLabels], legendFormat='{{dimension_BucketName}}', datasource='$datasource', ), ), grafana.panel.row.new( title='Request metrics', datasource='$datasource', ) .setGridPos(w=24, h=1, x=0, y=15) .addPanel( grafana.panel.text.new( title='Info', content=||| Enable [Requests metrics](https://docs.aws.amazon.com/AmazonS3/latest/userguide/cloudwatch-monitoring.html) from the AWS console and create a Filter to make sure your requests metrics are reported. |||, ) .setGridPos(w=24, h=2, x=0, y=16), ) .addPanel( grafana.panel.graph.new( title='Request latency (p95)', datasource='$datasource', ) .setGridPos(w=12, h=8, x=0, y=18) .addYaxis(format='ms', min=0, decimals=1) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='rate(aws_s3_total_request_latency_p95{%s, dimension_FilterId=~"$filter_id"}[2h]) * 1e3' % [allLabels], legendFormat='{{dimension_BucketName}}', datasource='$datasource', ), ), ) .addPanel( grafana.panel.graph.new( title='Errors count', datasource='$datasource', ) .setGridPos(w=12, h=8, x=12, y=18) .addYaxis(format='short', min=0, decimals=0) .addYaxis() .addTarget( grafana.target.prometheus.new( expr='aws_s3_4xx_errors_sum{%s, dimension_FilterId=~"$filter_id"}' % [allLabels], legendFormat='{{dimension_BucketName}}', datasource='$datasource', ), ), ), ] ) ================================================ FILE: mixin/jsonnetfile.json ================================================ { "version": 1, "dependencies": [ { "source": { "git": { "remote": "https://github.com/grafana/grafonnet-lib.git", "subdir": "grafonnet-7.0" } }, "version": "master" } ], "legacyImports": true } ================================================ FILE: mixin/jsonnetfile.lock.json ================================================ { "version": 1, "dependencies": [ { "source": { "git": { "remote": "https://github.com/grafana/grafonnet-lib.git", "subdir": "grafonnet-7.0" } }, "version": "30280196507e0fe6fa978a3e0eaca3a62844f817", "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM=" } ], "legacyImports": false } ================================================ FILE: mixin/mixin.libsonnet ================================================ { local config = import './config.libsonnet', local util = import './util.libsonnet', local mixin = (import './dashboards/all.libsonnet') + config, grafanaDashboards+:: { [fname]: util.decorate_dashboard(mixin[fname], tags=['cloudwatch-integration']) + { uid: std.md5(fname) } for fname in std.objectFields(mixin) }, prometheusAlerts+:: if std.objectHasAll(mixin, 'prometheusAlerts') then mixin.prometheusAlerts else {}, prometheusRules+:: if std.objectHasAll(mixin, 'prometheusRules') then mixin.prometheusRules else {}, } ================================================ FILE: mixin/util.libsonnet ================================================ { decorate_dashboard(dashboard, tags, refresh='30s', timeFrom='now-30m'):: dashboard { editable: false, id: null, // If id is set the grafana client will try to update instead of create tags: tags, refresh: refresh, time: { from: timeFrom, to: 'now', }, templating: { list+: [ if std.objectHas(t, 'query') && t.query == 'prometheus' then t { regex: '(?!grafanacloud-usage|grafanacloud-ml-metrics).+' } else t for t in dashboard.templating.list ], }, }, } ================================================ FILE: pkg/clients/README.md ================================================ # Purpose of the clients package The goal of this package is to abstract away as much of the AWS SDK implementation details as possible. YACE uses [AWS SDK for Go v2](https://aws.github.io/aws-sdk-go-v2/docs/) exclusively (SDK v1 support was removed in v0.64.0). The folder structure isolates common interfaces from their implementations: ``` /clients: Factory interface and CachingFactory implementation /clients/account: account interface and implementation for looking up AWS account info /clients/cloudwatch: cloudwatch interface and implementation for gathering metrics data /clients/tagging: tagging interface and implementation for discovering resources, including service-specific filters ``` ## /clients/tagging/filters.go serviceFilters `serviceFilters` are extra definitions for how to lookup or filter resources for certain CloudWatch namespaces which cannot be done using only tag data alone. Changes to service filters include: * Adding a service filter implementation for a new service * Modifying the behavior of a `ResourceFunc` * Modifying the behavior of a `FilterFunc` ================================================ FILE: pkg/clients/account/client.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package account import ( "context" "errors" "log/slog" "github.com/aws/aws-sdk-go-v2/service/iam" "github.com/aws/aws-sdk-go-v2/service/sts" ) type Client interface { // GetAccount returns the AWS account ID for the configured authenticated client. GetAccount(ctx context.Context) (string, error) // GetAccountAlias returns the account alias if there's one set, otherwise an empty string. GetAccountAlias(ctx context.Context) (string, error) } type client struct { logger *slog.Logger stsClient *sts.Client iamClient *iam.Client } func NewClient(logger *slog.Logger, stsClient *sts.Client, iamClient *iam.Client) Client { return &client{ logger: logger, stsClient: stsClient, iamClient: iamClient, } } func (c client) GetAccount(ctx context.Context) (string, error) { result, err := c.stsClient.GetCallerIdentity(ctx, &sts.GetCallerIdentityInput{}) if err != nil { return "", err } if result.Account == nil { return "", errors.New("aws sts GetCallerIdentity returned no account") } return *result.Account, nil } func (c client) GetAccountAlias(ctx context.Context) (string, error) { acctAliasOut, err := c.iamClient.ListAccountAliases(ctx, &iam.ListAccountAliasesInput{}) if err != nil { return "", err } possibleAccountAlias := "" // Since a single account can only have one alias, and an authenticated SDK session corresponds to a single account, // the output can have at most one alias. // https://docs.aws.amazon.com/IAM/latest/APIReference/API_ListAccountAliases.html if len(acctAliasOut.AccountAliases) > 0 { possibleAccountAlias = acctAliasOut.AccountAliases[0] } return possibleAccountAlias, nil } ================================================ FILE: pkg/clients/cloudwatch/client.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatch import ( "context" "log/slog" "time" "github.com/aws/aws-sdk-go-v2/aws" aws_cloudwatch "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/promutil" ) type Client interface { // ListMetrics returns the list of metrics and dimensions for a given namespace // and metric name. Results pagination is handled automatically; the caller // must provide a non-nil handler func that will be invoked for each page of // results. ListMetrics(ctx context.Context, namespace string, metric *model.MetricConfig, recentlyActiveOnly bool, fn func(page []*model.Metric)) error // GetMetricData returns the output of the GetMetricData CloudWatch API. // Results pagination is handled automatically. GetMetricData(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []MetricDataResult // GetMetricStatistics returns the output of the GetMetricStatistics CloudWatch API. GetMetricStatistics(ctx context.Context, logger *slog.Logger, dimensions []model.Dimension, namespace string, metric *model.MetricConfig) []*model.MetricStatisticsResult } type MetricDataResult struct { ID string DataPoints []DataPoint } type DataPoint struct { Value *float64 Timestamp time.Time } type client struct { logger *slog.Logger cloudwatchAPI *aws_cloudwatch.Client } func NewClient(logger *slog.Logger, cloudwatchAPI *aws_cloudwatch.Client) Client { return &client{ logger: logger, cloudwatchAPI: cloudwatchAPI, } } func (c client) ListMetrics(ctx context.Context, namespace string, metric *model.MetricConfig, recentlyActiveOnly bool, fn func(page []*model.Metric)) error { filter := &aws_cloudwatch.ListMetricsInput{ MetricName: aws.String(metric.Name), Namespace: aws.String(namespace), } if recentlyActiveOnly { filter.RecentlyActive = types.RecentlyActivePt3h } c.logger.Debug("ListMetrics", "input", filter) paginator := aws_cloudwatch.NewListMetricsPaginator(c.cloudwatchAPI, filter, func(options *aws_cloudwatch.ListMetricsPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() { promutil.CloudwatchAPICounter.WithLabelValues("ListMetrics").Inc() page, err := paginator.NextPage(ctx) if err != nil { promutil.CloudwatchAPIErrorCounter.WithLabelValues("ListMetrics").Inc() c.logger.Error("ListMetrics error", "err", err) return err } metricsPage := toModelMetric(page) c.logger.Debug("ListMetrics", "output", metricsPage) fn(metricsPage) } return nil } func toModelMetric(page *aws_cloudwatch.ListMetricsOutput) []*model.Metric { modelMetrics := make([]*model.Metric, 0, len(page.Metrics)) for _, cloudwatchMetric := range page.Metrics { modelMetric := &model.Metric{ MetricName: *cloudwatchMetric.MetricName, Namespace: *cloudwatchMetric.Namespace, Dimensions: toModelDimensions(cloudwatchMetric.Dimensions), } modelMetrics = append(modelMetrics, modelMetric) } return modelMetrics } func toModelDimensions(dimensions []types.Dimension) []model.Dimension { modelDimensions := make([]model.Dimension, 0, len(dimensions)) for _, dimension := range dimensions { modelDimension := model.Dimension{ Name: *dimension.Name, Value: *dimension.Value, } modelDimensions = append(modelDimensions, modelDimension) } return modelDimensions } func (c client) GetMetricData(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []MetricDataResult { metricDataQueries := make([]types.MetricDataQuery, 0, len(getMetricData)) exportAllDataPoints := false for _, data := range getMetricData { metricStat := &types.MetricStat{ Metric: &types.Metric{ Dimensions: toCloudWatchDimensions(data.Dimensions), MetricName: &data.MetricName, Namespace: &namespace, }, Period: aws.Int32(int32(data.GetMetricDataProcessingParams.Period)), Stat: &data.GetMetricDataProcessingParams.Statistic, } metricDataQueries = append(metricDataQueries, types.MetricDataQuery{ Id: &data.GetMetricDataProcessingParams.QueryID, MetricStat: metricStat, ReturnData: aws.Bool(true), }) exportAllDataPoints = exportAllDataPoints || data.MetricMigrationParams.ExportAllDataPoints } input := &aws_cloudwatch.GetMetricDataInput{ EndTime: &endTime, StartTime: &startTime, MetricDataQueries: metricDataQueries, ScanBy: "TimestampDescending", } var resp aws_cloudwatch.GetMetricDataOutput promutil.CloudwatchGetMetricDataAPIMetricsCounter.Add(float64(len(input.MetricDataQueries))) c.logger.Debug("GetMetricData", "input", input) paginator := aws_cloudwatch.NewGetMetricDataPaginator(c.cloudwatchAPI, input, func(options *aws_cloudwatch.GetMetricDataPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() { promutil.CloudwatchAPICounter.WithLabelValues("GetMetricData").Inc() promutil.CloudwatchGetMetricDataAPICounter.Inc() page, err := paginator.NextPage(ctx) if err != nil { promutil.CloudwatchAPIErrorCounter.WithLabelValues("GetMetricData").Inc() c.logger.Error("GetMetricData error", "err", err) return nil } resp.MetricDataResults = append(resp.MetricDataResults, page.MetricDataResults...) } c.logger.Debug("GetMetricData", "output", resp) return toMetricDataResult(resp, exportAllDataPoints) } func toMetricDataResult(resp aws_cloudwatch.GetMetricDataOutput, exportAllDataPoints bool) []MetricDataResult { output := make([]MetricDataResult, 0, len(resp.MetricDataResults)) for _, metricDataResult := range resp.MetricDataResults { mappedResult := MetricDataResult{ ID: *metricDataResult.Id, DataPoints: make([]DataPoint, 0, len(metricDataResult.Timestamps)), } for i := 0; i < len(metricDataResult.Timestamps); i++ { mappedResult.DataPoints = append(mappedResult.DataPoints, DataPoint{ Value: &metricDataResult.Values[i], Timestamp: metricDataResult.Timestamps[i], }) if !exportAllDataPoints { break } } output = append(output, mappedResult) } return output } func (c client) GetMetricStatistics(ctx context.Context, logger *slog.Logger, dimensions []model.Dimension, namespace string, metric *model.MetricConfig) []*model.MetricStatisticsResult { filter := createGetMetricStatisticsInput(logger, dimensions, &namespace, metric) c.logger.Debug("GetMetricStatistics", "input", filter) resp, err := c.cloudwatchAPI.GetMetricStatistics(ctx, filter) c.logger.Debug("GetMetricStatistics", "output", resp) promutil.CloudwatchAPICounter.WithLabelValues("GetMetricStatistics").Inc() promutil.CloudwatchGetMetricStatisticsAPICounter.Inc() if err != nil { promutil.CloudwatchAPIErrorCounter.WithLabelValues("GetMetricStatistics").Inc() c.logger.Error("Failed to get metric statistics", "err", err) return nil } ptrs := make([]*types.Datapoint, 0, len(resp.Datapoints)) for _, datapoint := range resp.Datapoints { ptrs = append(ptrs, &datapoint) } return toModelDataPoints(ptrs) } func toModelDataPoints(cwDataPoints []*types.Datapoint) []*model.MetricStatisticsResult { modelDataPoints := make([]*model.MetricStatisticsResult, 0, len(cwDataPoints)) for _, cwDatapoint := range cwDataPoints { extendedStats := make(map[string]*float64, len(cwDatapoint.ExtendedStatistics)) for name, value := range cwDatapoint.ExtendedStatistics { extendedStats[name] = &value } modelDataPoints = append(modelDataPoints, &model.MetricStatisticsResult{ Average: cwDatapoint.Average, ExtendedStatistics: extendedStats, Maximum: cwDatapoint.Maximum, Minimum: cwDatapoint.Minimum, SampleCount: cwDatapoint.SampleCount, Sum: cwDatapoint.Sum, Timestamp: cwDatapoint.Timestamp, }) } return modelDataPoints } ================================================ FILE: pkg/clients/cloudwatch/client_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatch import ( "testing" "time" "github.com/aws/aws-sdk-go-v2/aws" aws_cloudwatch "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/stretchr/testify/require" ) func Test_toMetricDataResult(t *testing.T) { ts := time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC) type testCase struct { name string exportAllDataPoints bool getMetricDataOutput aws_cloudwatch.GetMetricDataOutput expectedMetricDataResults []MetricDataResult } testCases := []testCase{ { name: "all metrics present", exportAllDataPoints: false, getMetricDataOutput: aws_cloudwatch.GetMetricDataOutput{ MetricDataResults: []types.MetricDataResult{ { Id: aws.String("metric-1"), Values: []float64{1.0, 2.0, 3.0}, Timestamps: []time.Time{ts.Add(10 * time.Minute), ts.Add(5 * time.Minute), ts}, }, { Id: aws.String("metric-2"), Values: []float64{2.0}, Timestamps: []time.Time{ts}, }, }, }, expectedMetricDataResults: []MetricDataResult{ { ID: "metric-1", DataPoints: []DataPoint{ {Value: aws.Float64(1.0), Timestamp: ts.Add(10 * time.Minute)}, }, }, { ID: "metric-2", DataPoints: []DataPoint{ {Value: aws.Float64(2.0), Timestamp: ts}, }, }, }, }, { name: "metric with no values", exportAllDataPoints: false, getMetricDataOutput: aws_cloudwatch.GetMetricDataOutput{ MetricDataResults: []types.MetricDataResult{ { Id: aws.String("metric-1"), Values: []float64{1.0, 2.0, 3.0}, Timestamps: []time.Time{ts.Add(10 * time.Minute), ts.Add(5 * time.Minute), ts}, }, { Id: aws.String("metric-2"), Values: []float64{}, Timestamps: []time.Time{}, }, }, }, expectedMetricDataResults: []MetricDataResult{ { ID: "metric-1", DataPoints: []DataPoint{ {Value: aws.Float64(1.0), Timestamp: ts.Add(10 * time.Minute)}, }, }, { ID: "metric-2", DataPoints: []DataPoint{}, }, }, }, { name: "export all data points", exportAllDataPoints: true, getMetricDataOutput: aws_cloudwatch.GetMetricDataOutput{ MetricDataResults: []types.MetricDataResult{ { Id: aws.String("metric-1"), Values: []float64{1.0, 2.0, 3.0}, Timestamps: []time.Time{ts.Add(10 * time.Minute), ts.Add(5 * time.Minute), ts}, }, { Id: aws.String("metric-2"), Values: []float64{2.0}, Timestamps: []time.Time{ts}, }, }, }, expectedMetricDataResults: []MetricDataResult{ { ID: "metric-1", DataPoints: []DataPoint{ {Value: aws.Float64(1.0), Timestamp: ts.Add(10 * time.Minute)}, {Value: aws.Float64(2.0), Timestamp: ts.Add(5 * time.Minute)}, {Value: aws.Float64(3.0), Timestamp: ts}, }, }, { ID: "metric-2", DataPoints: []DataPoint{ {Value: aws.Float64(2.0), Timestamp: ts}, }, }, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { metricDataResults := toMetricDataResult(tc.getMetricDataOutput, tc.exportAllDataPoints) require.Equal(t, tc.expectedMetricDataResults, metricDataResults) }) } } ================================================ FILE: pkg/clients/cloudwatch/concurrency_client.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatch import ( "context" "log/slog" "time" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) const ( listMetricsCall = "ListMetrics" getMetricDataCall = "GetMetricData" getMetricStatisticsCall = "GetMetricStatistics" ) // ConcurrencyLimiter limits the concurrency when calling AWS CloudWatch APIs. The functions implemented // by this interface follow the same as a normal semaphore, but accept and operation identifier. Some // implementations might use this to keep a different semaphore, with different reentrance values, per // operation. type ConcurrencyLimiter interface { // Acquire takes one "ticket" from the concurrency limiter for op. If there's none available, the caller // routine will be blocked until there's room available. Acquire(op string) // Release gives back one "ticket" to the concurrency limiter identified by op. If there's one or more // routines waiting for one, one will be woken up. Release(op string) } type limitedConcurrencyClient struct { client Client limiter ConcurrencyLimiter } func NewLimitedConcurrencyClient(client Client, limiter ConcurrencyLimiter) Client { return &limitedConcurrencyClient{ client: client, limiter: limiter, } } func (c limitedConcurrencyClient) ListMetrics(ctx context.Context, namespace string, metric *model.MetricConfig, recentlyActiveOnly bool, fn func(page []*model.Metric)) error { c.limiter.Acquire(listMetricsCall) err := c.client.ListMetrics(ctx, namespace, metric, recentlyActiveOnly, fn) c.limiter.Release(listMetricsCall) return err } func (c limitedConcurrencyClient) GetMetricData(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []MetricDataResult { c.limiter.Acquire(getMetricDataCall) res := c.client.GetMetricData(ctx, getMetricData, namespace, startTime, endTime) c.limiter.Release(getMetricDataCall) return res } func (c limitedConcurrencyClient) GetMetricStatistics(ctx context.Context, logger *slog.Logger, dimensions []model.Dimension, namespace string, metric *model.MetricConfig) []*model.MetricStatisticsResult { c.limiter.Acquire(getMetricStatisticsCall) res := c.client.GetMetricStatistics(ctx, logger, dimensions, namespace, metric) c.limiter.Release(getMetricStatisticsCall) return res } // ConcurrencyConfig configures how concurrency should be limited in a Cloudwatch API client. It allows // one to pick between different limiter implementations: a single limit limiter, or one with a different limit per // API call. type ConcurrencyConfig struct { // PerAPIEnabled configures whether to have a limit per API call. PerAPILimitEnabled bool // SingleLimit configures the concurrency limit when using a single limiter for api calls. SingleLimit int // ListMetrics limits the number for ListMetrics API concurrent API calls. ListMetrics int // GetMetricData limits the number for GetMetricData API concurrent API calls. GetMetricData int // GetMetricStatistics limits the number for GetMetricStatistics API concurrent API calls. GetMetricStatistics int } // semaphore implements a simple semaphore using a channel. type semaphore chan struct{} // newSemaphore creates a new semaphore with the given limit. func newSemaphore(limit int) semaphore { return make(semaphore, limit) } func (s semaphore) Acquire() { s <- struct{}{} } func (s semaphore) Release() { <-s } // NewLimiter creates a new ConcurrencyLimiter, according to the ConcurrencyConfig. func (cfg ConcurrencyConfig) NewLimiter() ConcurrencyLimiter { if cfg.PerAPILimitEnabled { return NewPerAPICallLimiter(cfg.ListMetrics, cfg.GetMetricData, cfg.GetMetricStatistics) } return NewSingleLimiter(cfg.SingleLimit) } // perAPICallLimiter is a ConcurrencyLimiter that keeps a different concurrency limiter per different API call. This allows // a more granular control of concurrency, allowing us to take advantage of different api limits. For example, ListMetrics // has a limit of 25 TPS, while GetMetricData has none. type perAPICallLimiter struct { listMetricsLimiter semaphore getMetricsDataLimiter semaphore getMetricsStatisticsLimiter semaphore } // NewPerAPICallLimiter creates a new PerAPICallLimiter. func NewPerAPICallLimiter(listMetrics, getMetricData, getMetricStatistics int) ConcurrencyLimiter { return &perAPICallLimiter{ listMetricsLimiter: newSemaphore(listMetrics), getMetricsDataLimiter: newSemaphore(getMetricData), getMetricsStatisticsLimiter: newSemaphore(getMetricStatistics), } } func (l *perAPICallLimiter) Acquire(op string) { switch op { case listMetricsCall: l.listMetricsLimiter.Acquire() case getMetricDataCall: l.getMetricsDataLimiter.Acquire() case getMetricStatisticsCall: l.getMetricsStatisticsLimiter.Acquire() } } func (l *perAPICallLimiter) Release(op string) { switch op { case listMetricsCall: l.listMetricsLimiter.Release() case getMetricDataCall: l.getMetricsDataLimiter.Release() case getMetricStatisticsCall: l.getMetricsStatisticsLimiter.Release() } } // singleLimiter is the current implementation of ConcurrencyLimiter, which has a single limit for all different API calls. type singleLimiter struct { s semaphore } // NewSingleLimiter creates a new SingleLimiter. func NewSingleLimiter(limit int) ConcurrencyLimiter { return &singleLimiter{ s: newSemaphore(limit), } } func (sl *singleLimiter) Acquire(_ string) { sl.s.Acquire() } func (sl *singleLimiter) Release(_ string) { sl.s.Release() } ================================================ FILE: pkg/clients/cloudwatch/input.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatch import ( "log/slog" "strconv" "strings" "time" "github.com/aws/aws-sdk-go-v2/aws" aws_cloudwatch "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/promutil" ) func toCloudWatchDimensions(dimensions []model.Dimension) []types.Dimension { cwDim := make([]types.Dimension, 0, len(dimensions)) for _, dim := range dimensions { // Don't take pointers directly to loop variables cDim := dim cwDim = append(cwDim, types.Dimension{ Name: &cDim.Name, Value: &cDim.Value, }) } return cwDim } func createGetMetricStatisticsInput(logger *slog.Logger, dimensions []model.Dimension, namespace *string, metric *model.MetricConfig) *aws_cloudwatch.GetMetricStatisticsInput { period := metric.Period length := metric.Length delay := metric.Delay endTime := time.Now().Add(-time.Duration(delay) * time.Second) startTime := time.Now().Add(-(time.Duration(length) + time.Duration(delay)) * time.Second) var statistics []types.Statistic var extendedStatistics []string for _, statistic := range metric.Statistics { if promutil.Percentile.MatchString(statistic) { extendedStatistics = append(extendedStatistics, statistic) } else { statistics = append(statistics, types.Statistic(statistic)) } } output := &aws_cloudwatch.GetMetricStatisticsInput{ Dimensions: toCloudWatchDimensions(dimensions), Namespace: namespace, StartTime: &startTime, EndTime: &endTime, Period: aws.Int32(int32(period)), MetricName: &metric.Name, Statistics: statistics, ExtendedStatistics: extendedStatistics, } logger.Debug("CLI helper - " + "aws cloudwatch get-metric-statistics" + " --metric-name " + metric.Name + " --dimensions " + dimensionsToCliString(dimensions) + " --namespace " + *namespace + " --statistics " + string(statistics[0]) + " --period " + strconv.FormatInt(period, 10) + " --start-time " + startTime.Format(time.RFC3339) + " --end-time " + endTime.Format(time.RFC3339)) logger.Debug("createGetMetricStatisticsInput", "output", *output) return output } func dimensionsToCliString(dimensions []model.Dimension) string { out := strings.Builder{} for _, dim := range dimensions { out.WriteString("Name=") out.WriteString(dim.Name) out.WriteString(",Value=") out.WriteString(dim.Value) out.WriteString(" ") } return out.String() } ================================================ FILE: pkg/clients/factory.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package clients import ( "context" "fmt" "log/slog" "os" "sync" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/retry" aws_config "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials/stscreds" "github.com/aws/aws-sdk-go-v2/service/amp" "github.com/aws/aws-sdk-go-v2/service/apigateway" "github.com/aws/aws-sdk-go-v2/service/apigatewayv2" "github.com/aws/aws-sdk-go-v2/service/autoscaling" "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/iam" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" "github.com/aws/aws-sdk-go-v2/service/shield" "github.com/aws/aws-sdk-go-v2/service/storagegateway" "github.com/aws/aws-sdk-go-v2/service/sts" aws_logging "github.com/aws/smithy-go/logging" "go.uber.org/atomic" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/account" cloudwatch_client "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/tagging" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // Factory is an interface to abstract away all logic required to produce the different // YACE specific clients which wrap AWS clients type Factory interface { GetCloudwatchClient(region string, role model.Role, concurrency cloudwatch_client.ConcurrencyConfig) cloudwatch_client.Client GetTaggingClient(region string, role model.Role, concurrencyLimit int) tagging.Client GetAccountClient(region string, role model.Role) account.Client } type awsRegion = string type CachingFactory struct { logger *slog.Logger stsOptions func(*sts.Options) clients map[model.Role]map[awsRegion]*cachedClients mu sync.Mutex refreshed *atomic.Bool cleared *atomic.Bool fipsEnabled bool endpointURLOverride string } type cachedClients struct { awsConfig *aws.Config // if we know that this job is only used for static // then we don't have to construct as many cached connections // later on onlyStatic bool cloudwatch cloudwatch_client.Client tagging tagging.Client account account.Client } // Ensure the struct properly implements the interface var _ Factory = &CachingFactory{} // NewFactory creates a new client factory to use when fetching data from AWS with sdk v2 func NewFactory(logger *slog.Logger, jobsCfg model.JobsConfig, fips bool) (*CachingFactory, error) { var options []func(*aws_config.LoadOptions) error options = append(options, aws_config.WithLogger(aws_logging.LoggerFunc(func(classification aws_logging.Classification, format string, v ...interface{}) { switch classification { case aws_logging.Debug: if logger.Enabled(context.Background(), slog.LevelDebug) { logger.Debug(fmt.Sprintf(format, v...)) } case aws_logging.Warn: logger.Warn(fmt.Sprintf(format, v...)) default: // AWS logging only supports debug or warn, log everything else as error logger.Error(fmt.Sprintf(format, v...), "err", "unexpected aws error classification", "classification", classification) } }))) options = append(options, aws_config.WithLogConfigurationWarnings(true)) endpointURLOverride := os.Getenv("AWS_ENDPOINT_URL") options = append(options, aws_config.WithRetryMaxAttempts(5)) c, err := aws_config.LoadDefaultConfig(context.TODO(), options...) if err != nil { return nil, fmt.Errorf("failed to load default aws config: %w", err) } stsOptions := createStsOptions(jobsCfg.StsRegion, logger.Enabled(context.Background(), slog.LevelDebug), endpointURLOverride, fips) cache := map[model.Role]map[awsRegion]*cachedClients{} for _, discoveryJob := range jobsCfg.DiscoveryJobs { for _, role := range discoveryJob.Roles { if _, ok := cache[role]; !ok { cache[role] = map[awsRegion]*cachedClients{} } for _, region := range discoveryJob.Regions { regionConfig := awsConfigForRegion(role, &c, region, stsOptions) cache[role][region] = &cachedClients{ awsConfig: regionConfig, onlyStatic: false, } } } } for _, staticJob := range jobsCfg.StaticJobs { for _, role := range staticJob.Roles { if _, ok := cache[role]; !ok { cache[role] = map[awsRegion]*cachedClients{} } for _, region := range staticJob.Regions { // Discovery job client definitions have precedence if _, exists := cache[role][region]; !exists { regionConfig := awsConfigForRegion(role, &c, region, stsOptions) cache[role][region] = &cachedClients{ awsConfig: regionConfig, onlyStatic: true, } } } } } for _, customNamespaceJob := range jobsCfg.CustomNamespaceJobs { for _, role := range customNamespaceJob.Roles { if _, ok := cache[role]; !ok { cache[role] = map[awsRegion]*cachedClients{} } for _, region := range customNamespaceJob.Regions { // Discovery job client definitions have precedence if _, exists := cache[role][region]; !exists { regionConfig := awsConfigForRegion(role, &c, region, stsOptions) cache[role][region] = &cachedClients{ awsConfig: regionConfig, onlyStatic: true, } } } } } return &CachingFactory{ logger: logger, clients: cache, fipsEnabled: fips, stsOptions: stsOptions, endpointURLOverride: endpointURLOverride, cleared: atomic.NewBool(false), refreshed: atomic.NewBool(false), }, nil } func (c *CachingFactory) GetCloudwatchClient(region string, role model.Role, concurrency cloudwatch_client.ConcurrencyConfig) cloudwatch_client.Client { if !c.refreshed.Load() { // if we have not refreshed then we need to lock in case we are accessing concurrently c.mu.Lock() defer c.mu.Unlock() } if client := c.clients[role][region].cloudwatch; client != nil { return cloudwatch_client.NewLimitedConcurrencyClient(client, concurrency.NewLimiter()) } c.clients[role][region].cloudwatch = cloudwatch_client.NewClient(c.logger, c.createCloudwatchClient(c.clients[role][region].awsConfig)) return cloudwatch_client.NewLimitedConcurrencyClient(c.clients[role][region].cloudwatch, concurrency.NewLimiter()) } func (c *CachingFactory) GetTaggingClient(region string, role model.Role, concurrencyLimit int) tagging.Client { if !c.refreshed.Load() { // if we have not refreshed then we need to lock in case we are accessing concurrently c.mu.Lock() defer c.mu.Unlock() } if client := c.clients[role][region].tagging; client != nil { return tagging.NewLimitedConcurrencyClient(client, concurrencyLimit) } c.clients[role][region].tagging = tagging.NewClient( c.logger, c.createTaggingClient(c.clients[role][region].awsConfig), c.createAutoScalingClient(c.clients[role][region].awsConfig), c.createAPIGatewayClient(c.clients[role][region].awsConfig), c.createAPIGatewayV2Client(c.clients[role][region].awsConfig), c.createEC2Client(c.clients[role][region].awsConfig), c.createDMSClient(c.clients[role][region].awsConfig), c.createPrometheusClient(c.clients[role][region].awsConfig), c.createStorageGatewayClient(c.clients[role][region].awsConfig), c.createShieldClient(c.clients[role][region].awsConfig), ) return tagging.NewLimitedConcurrencyClient(c.clients[role][region].tagging, concurrencyLimit) } func (c *CachingFactory) GetAccountClient(region string, role model.Role) account.Client { if !c.refreshed.Load() { // if we have not refreshed then we need to lock in case we are accessing concurrently c.mu.Lock() defer c.mu.Unlock() } if client := c.clients[role][region].account; client != nil { return client } stsClient := c.createStsClient(c.clients[role][region].awsConfig) iamClient := c.createIAMClient(c.clients[role][region].awsConfig) c.clients[role][region].account = account.NewClient(c.logger, stsClient, iamClient) return c.clients[role][region].account } func (c *CachingFactory) Refresh() { if c.refreshed.Load() { return } c.mu.Lock() defer c.mu.Unlock() // Avoid double refresh in the event Refresh() is called concurrently if c.refreshed.Load() { return } for _, regionClients := range c.clients { for _, cache := range regionClients { cache.cloudwatch = cloudwatch_client.NewClient(c.logger, c.createCloudwatchClient(cache.awsConfig)) if cache.onlyStatic { continue } cache.tagging = tagging.NewClient( c.logger, c.createTaggingClient(cache.awsConfig), c.createAutoScalingClient(cache.awsConfig), c.createAPIGatewayClient(cache.awsConfig), c.createAPIGatewayV2Client(cache.awsConfig), c.createEC2Client(cache.awsConfig), c.createDMSClient(cache.awsConfig), c.createPrometheusClient(cache.awsConfig), c.createStorageGatewayClient(cache.awsConfig), c.createShieldClient(cache.awsConfig), ) cache.account = account.NewClient(c.logger, c.createStsClient(cache.awsConfig), c.createIAMClient(cache.awsConfig)) } } c.refreshed.Store(true) c.cleared.Store(false) } func (c *CachingFactory) Clear() { if c.cleared.Load() { return } // Prevent concurrent reads/write if clear is called during execution c.mu.Lock() defer c.mu.Unlock() // Avoid double clear in the event Refresh() is called concurrently if c.cleared.Load() { return } for _, regions := range c.clients { for _, cache := range regions { cache.cloudwatch = nil cache.account = nil cache.tagging = nil } } c.refreshed.Store(false) c.cleared.Store(true) } // GetAWSRegionalConfig returns the aws.Config for the given region and role. It implements the RegionalConfigProvider interface. func (c *CachingFactory) GetAWSRegionalConfig(region string, role model.Role) *aws.Config { return c.clients[role][region].awsConfig } func (c *CachingFactory) createCloudwatchClient(regionConfig *aws.Config) *cloudwatch.Client { return cloudwatch.NewFromConfig(*regionConfig, func(options *cloudwatch.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } // Setting an explicit retryer will override the default settings on the config options.Retryer = retry.NewStandard(func(options *retry.StandardOptions) { options.MaxAttempts = 5 options.MaxBackoff = 3 * time.Second }) if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createTaggingClient(regionConfig *aws.Config) *resourcegroupstaggingapi.Client { return resourcegroupstaggingapi.NewFromConfig(*regionConfig, func(options *resourcegroupstaggingapi.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } // The FIPS setting is ignored because FIPS is not available for resource groups tagging apis // If enabled the SDK will try to use non-existent FIPS URLs, https://github.com/aws/aws-sdk-go-v2/issues/2138#issuecomment-1570791988 // AWS FIPS Reference: https://aws.amazon.com/compliance/fips/ }) } func (c *CachingFactory) createAutoScalingClient(assumedConfig *aws.Config) *autoscaling.Client { return autoscaling.NewFromConfig(*assumedConfig, func(options *autoscaling.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } // The FIPS setting is ignored because FIPS is not available for EC2 autoscaling apis // If enabled the SDK will try to use non-existent FIPS URLs, https://github.com/aws/aws-sdk-go-v2/issues/2138#issuecomment-1570791988 // AWS FIPS Reference: https://aws.amazon.com/compliance/fips/ // EC2 autoscaling has FIPS compliant URLs for govcloud, but they do not use any FIPS prefixing, and should work // with sdk v2s EndpointResolverV2 }) } func (c *CachingFactory) createAPIGatewayClient(assumedConfig *aws.Config) *apigateway.Client { return apigateway.NewFromConfig(*assumedConfig, func(options *apigateway.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createAPIGatewayV2Client(assumedConfig *aws.Config) *apigatewayv2.Client { return apigatewayv2.NewFromConfig(*assumedConfig, func(options *apigatewayv2.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createEC2Client(assumedConfig *aws.Config) *ec2.Client { return ec2.NewFromConfig(*assumedConfig, func(options *ec2.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createDMSClient(assumedConfig *aws.Config) *databasemigrationservice.Client { return databasemigrationservice.NewFromConfig(*assumedConfig, func(options *databasemigrationservice.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createStorageGatewayClient(assumedConfig *aws.Config) *storagegateway.Client { return storagegateway.NewFromConfig(*assumedConfig, func(options *storagegateway.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func (c *CachingFactory) createPrometheusClient(assumedConfig *aws.Config) *amp.Client { return amp.NewFromConfig(*assumedConfig, func(options *amp.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } // The FIPS setting is ignored because FIPS is not available for amp apis // If enabled the SDK will try to use non-existent FIPS URLs, https://github.com/aws/aws-sdk-go-v2/issues/2138#issuecomment-1570791988 // AWS FIPS Reference: https://aws.amazon.com/compliance/fips/ }) } func (c *CachingFactory) createStsClient(awsConfig *aws.Config) *sts.Client { return sts.NewFromConfig(*awsConfig, c.stsOptions) } func (c *CachingFactory) createIAMClient(awsConfig *aws.Config) *iam.Client { return iam.NewFromConfig(*awsConfig) } func (c *CachingFactory) createShieldClient(awsConfig *aws.Config) *shield.Client { return shield.NewFromConfig(*awsConfig, func(options *shield.Options) { if c.logger != nil && c.logger.Enabled(context.Background(), slog.LevelDebug) { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if c.endpointURLOverride != "" { options.BaseEndpoint = aws.String(c.endpointURLOverride) } if c.fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } }) } func createStsOptions(stsRegion string, isDebugLoggingEnabled bool, endpointURLOverride string, fipsEnabled bool) func(*sts.Options) { return func(options *sts.Options) { if stsRegion != "" { options.Region = stsRegion } if isDebugLoggingEnabled { options.ClientLogMode = aws.LogRequestWithBody | aws.LogResponseWithBody } if endpointURLOverride != "" { options.BaseEndpoint = aws.String(endpointURLOverride) } if fipsEnabled { options.EndpointOptions.UseFIPSEndpoint = aws.FIPSEndpointStateEnabled } } } var defaultRole = model.Role{} func awsConfigForRegion(r model.Role, c *aws.Config, region awsRegion, stsOptions func(*sts.Options)) *aws.Config { regionalConfig := c.Copy() regionalConfig.Region = region if r == defaultRole { return ®ionalConfig } // based on https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/credentials/stscreds#hdr-Assume_Role // found via https://github.com/aws/aws-sdk-go-v2/issues/1382 regionalSts := sts.NewFromConfig(*c, stsOptions) credentials := stscreds.NewAssumeRoleProvider(regionalSts, r.RoleArn, func(options *stscreds.AssumeRoleOptions) { if r.ExternalID != "" { options.ExternalID = aws.String(r.ExternalID) } }) regionalConfig.Credentials = aws.NewCredentialsCache(credentials) return ®ionalConfig } ================================================ FILE: pkg/clients/factory_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package clients import ( "context" "log/slog" "reflect" "sync" "testing" "time" "unsafe" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/amp" "github.com/aws/aws-sdk-go-v2/service/apigateway" "github.com/aws/aws-sdk-go-v2/service/apigatewayv2" "github.com/aws/aws-sdk-go-v2/service/autoscaling" "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" "github.com/aws/aws-sdk-go-v2/service/storagegateway" "github.com/aws/aws-sdk-go-v2/service/sts" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/atomic" cloudwatch_client "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var jobsCfgWithDefaultRoleAndRegion1 = model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Roles: []model.Role{{}}, Regions: []string{"region1"}, }, }, } func TestNewFactory_initializes_clients(t *testing.T) { role1 := model.Role{ RoleArn: "role1", ExternalID: "external1", } role2 := model.Role{ RoleArn: "role2", ExternalID: "external2", } role3 := model.Role{ RoleArn: "role3", ExternalID: "external3", } region1 := "region1" region2 := "region2" region3 := "region3" tests := []struct { name string jobsCfg model.JobsConfig onlyStatic *bool }{ { name: "from discovery config", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Regions: []string{region1, region2, region3}, Roles: []model.Role{defaultRole, role1, role2, role3}, }}, }, onlyStatic: aws.Bool(false), }, { name: "from static config", jobsCfg: model.JobsConfig{ StaticJobs: []model.StaticJob{{ Regions: []string{region1, region2, region3}, Roles: []model.Role{defaultRole, role1, role2, role3}, }}, }, onlyStatic: aws.Bool(true), }, { name: "from custom config", jobsCfg: model.JobsConfig{ CustomNamespaceJobs: []model.CustomNamespaceJob{{ Regions: []string{region1, region2, region3}, Roles: []model.Role{defaultRole, role1, role2, role3}, }}, }, onlyStatic: aws.Bool(true), }, { name: "from all configs", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Regions: []string{region1, region2}, Roles: []model.Role{defaultRole, role1, role2}, }}, StaticJobs: []model.StaticJob{{ Regions: []string{region2, region3}, Roles: []model.Role{defaultRole, role2, role3}, }}, CustomNamespaceJobs: []model.CustomNamespaceJob{{ Regions: []string{region1, region3}, Roles: []model.Role{defaultRole, role1, role3}, }}, }, onlyStatic: nil, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { output, err := NewFactory(promslog.NewNopLogger(), test.jobsCfg, false) require.NoError(t, err) assert.False(t, output.refreshed.Load()) assert.False(t, output.cleared.Load()) require.Len(t, output.clients, 4) assert.Contains(t, output.clients, defaultRole) assert.Contains(t, output.clients, role1) assert.Contains(t, output.clients, role2) assert.Contains(t, output.clients, role3) for role, regionalClients := range output.clients { require.Len(t, regionalClients, 3) assert.Contains(t, regionalClients, region1) assert.Contains(t, regionalClients, region2) assert.Contains(t, regionalClients, region3) for region, clients := range regionalClients { assert.NotNil(t, clients, "role %s region %s had nil clients", role, region) if test.onlyStatic != nil { assert.Equal(t, *test.onlyStatic, clients.onlyStatic, "role %s region %s had unexpected onlyStatic value", role, region) } assert.Equal(t, region, clients.awsConfig.Region) } } }) } } func TestNewFactory_respects_stsregion(t *testing.T) { stsRegion := "custom-sts-region" cfg := model.JobsConfig{ StsRegion: stsRegion, DiscoveryJobs: []model.DiscoveryJob{{ Regions: []string{"region1"}, Roles: []model.Role{defaultRole}, }}, } output, err := NewFactory(promslog.NewNopLogger(), cfg, false) require.NoError(t, err) require.Len(t, output.clients, 1) stsOptions := sts.Options{} output.stsOptions(&stsOptions) assert.Equal(t, stsRegion, stsOptions.Region) } func TestCachingFactory_Clear(t *testing.T) { cache := &CachingFactory{ logger: promslog.NewNopLogger(), clients: map[model.Role]map[awsRegion]*cachedClients{ defaultRole: { "region1": &cachedClients{ awsConfig: nil, cloudwatch: testClient{}, tagging: testClient{}, account: testClient{}, }, }, }, refreshed: atomic.NewBool(true), cleared: atomic.NewBool(false), } cache.Clear() assert.True(t, cache.cleared.Load()) assert.False(t, cache.refreshed.Load()) clients := cache.clients[defaultRole]["region1"] require.NotNil(t, clients) assert.Nil(t, clients.cloudwatch) assert.Nil(t, clients.account) assert.Nil(t, clients.tagging) } func TestCachingFactory_Refresh(t *testing.T) { t.Run("creates all clients when config contains only discovery jobs", func(t *testing.T) { output, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, false) require.NoError(t, err) output.Refresh() assert.False(t, output.cleared.Load()) assert.True(t, output.refreshed.Load()) clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) assert.NotNil(t, clients.cloudwatch) assert.NotNil(t, clients.account) assert.NotNil(t, clients.tagging) }) t.Run("creates only cloudwatch when config is only static jobs", func(t *testing.T) { jobsCfg := model.JobsConfig{ StaticJobs: []model.StaticJob{{ Regions: []string{"region1"}, Roles: []model.Role{{}}, }}, CustomNamespaceJobs: []model.CustomNamespaceJob{{ Regions: []string{"region1"}, Roles: []model.Role{{}}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) output.Refresh() assert.False(t, output.cleared.Load()) assert.True(t, output.refreshed.Load()) clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) assert.NotNil(t, clients.cloudwatch) assert.Nil(t, clients.account) assert.Nil(t, clients.tagging) }) } func TestCachingFactory_GetAccountClient(t *testing.T) { t.Run("refreshed cache does not create new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) output.Refresh() clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) assert.Equal(t, clients.account, output.GetAccountClient("region1", defaultRole)) }) t.Run("unrefreshed cache creates a new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) require.Nil(t, clients.account) client := output.GetAccountClient("region1", defaultRole) assert.Equal(t, clients.account, client) }) } func TestCachingFactory_GetCloudwatchClient(t *testing.T) { t.Run("refreshed cache does not create new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) output.Refresh() clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) // Can't do equality comparison due to concurrency limiter assert.NotNil(t, output.GetCloudwatchClient("region1", defaultRole, cloudwatch_client.ConcurrencyConfig{SingleLimit: 1})) }) t.Run("unrefreshed cache creates a new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) require.Nil(t, clients.cloudwatch) output.GetCloudwatchClient("region1", defaultRole, cloudwatch_client.ConcurrencyConfig{SingleLimit: 1}) assert.NotNil(t, clients.cloudwatch) }) } func TestCachingFactory_GetTaggingClient(t *testing.T) { t.Run("refreshed cache does not create new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) output.Refresh() clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) // Can't do equality comparison due to concurrency limiter assert.NotNil(t, output.GetTaggingClient("region1", defaultRole, 1)) }) t.Run("unrefreshed cache creates a new client", func(t *testing.T) { jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{{ Roles: []model.Role{{}}, Regions: []string{"region1"}, }}, } output, err := NewFactory(promslog.NewNopLogger(), jobsCfg, false) require.NoError(t, err) clients := output.clients[defaultRole]["region1"] require.NotNil(t, clients) require.Nil(t, clients.tagging) output.GetTaggingClient("region1", defaultRole, 1) assert.NotNil(t, clients.tagging) }) } func TestCachingFactory_createTaggingClient_DoesNotEnableFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createTaggingClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[resourcegroupstaggingapi.Client, resourcegroupstaggingapi.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateUnset) } func TestCachingFactory_createAPIGatewayClient_EnablesFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createAPIGatewayClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[apigateway.Client, apigateway.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateEnabled) } func TestCachingFactory_createAPIGatewayV2Client_EnablesFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createAPIGatewayV2Client(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[apigatewayv2.Client, apigatewayv2.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateEnabled) } func TestCachingFactory_createAutoScalingClient_DoesNotEnableFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createAutoScalingClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[autoscaling.Client, autoscaling.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateUnset) } func TestCachingFactory_createEC2Client_EnablesFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createEC2Client(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[ec2.Client, ec2.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateEnabled) } func TestCachingFactory_createDMSClient_EnablesFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createDMSClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[databasemigrationservice.Client, databasemigrationservice.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateEnabled) } func TestCachingFactory_createStorageGatewayClient_EnablesFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createStorageGatewayClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[storagegateway.Client, storagegateway.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateEnabled) } func TestCachingFactory_createPrometheusClient_DoesNotEnableFIPS(t *testing.T) { factory, err := NewFactory(promslog.NewNopLogger(), jobsCfgWithDefaultRoleAndRegion1, true) require.NoError(t, err) client := factory.createPrometheusClient(factory.clients[defaultRole]["region1"].awsConfig) require.NotNil(t, client) options := getOptions[amp.Client, amp.Options](client) require.NotNil(t, options) assert.Equal(t, options.EndpointOptions.UseFIPSEndpoint, aws.FIPSEndpointStateUnset) } func TestRaceConditionRefreshClear(t *testing.T) { // Create a factory with the test config factory, err := NewFactory(promslog.NewNopLogger(), model.JobsConfig{}, false) require.NoError(t, err) // Number of concurrent operations to perform iterations := 100 // Use WaitGroup to synchronize goroutines var wg sync.WaitGroup wg.Add(iterations) // For both Refresh and Clear calls // Start function to run concurrent operations for i := 0; i < iterations; i++ { // Launch goroutine to call Refresh go func() { defer wg.Done() factory.Refresh() factory.Clear() }() } // Create a channel to signal completion done := make(chan struct{}) go func() { wg.Wait() close(done) }() // Wait for either completion or timeout select { case <-done: // Test completed successfully case <-time.After(60 * time.Second): require.Fail(t, "Test timed out after 60 seconds") } } // getOptions uses reflection to pull the unexported options field off of any AWS Client // the options of the client carries around a lot of info about how the client will behave and is helpful for // testing lower level sdk configuration func getOptions[T any, V any](awsClient *T) V { field := reflect.ValueOf(awsClient).Elem().FieldByName("options") options := reflect.NewAt(field.Type(), unsafe.Pointer(field.UnsafeAddr())).Elem().Interface().(V) return options } type testClient struct{} func (t testClient) GetResources(_ context.Context, _ model.DiscoveryJob, _ string) ([]*model.TaggedResource, error) { return nil, nil } func (t testClient) GetAccount(_ context.Context) (string, error) { return "", nil } func (t testClient) GetAccountAlias(_ context.Context) (string, error) { return "", nil } func (t testClient) ListMetrics(_ context.Context, _ string, _ *model.MetricConfig, _ bool, _ func(page []*model.Metric)) error { return nil } func (t testClient) GetMetricData(_ context.Context, _ []*model.CloudwatchData, _ string, _ time.Time, _ time.Time) []cloudwatch_client.MetricDataResult { return nil } func (t testClient) GetMetricStatistics(_ context.Context, _ *slog.Logger, _ []model.Dimension, _ string, _ *model.MetricConfig) []*model.MetricStatisticsResult { return nil } ================================================ FILE: pkg/clients/tagging/client.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tagging import ( "context" "errors" "fmt" "log/slog" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/amp" "github.com/aws/aws-sdk-go-v2/service/apigateway" "github.com/aws/aws-sdk-go-v2/service/apigatewayv2" "github.com/aws/aws-sdk-go-v2/service/autoscaling" "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" "github.com/aws/aws-sdk-go-v2/service/shield" "github.com/aws/aws-sdk-go-v2/service/storagegateway" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/promutil" ) type Client interface { GetResources(ctx context.Context, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) } var ErrExpectedToFindResources = errors.New("expected to discover resources but none were found") type client struct { logger *slog.Logger taggingAPI *resourcegroupstaggingapi.Client autoscalingAPI *autoscaling.Client apiGatewayAPI *apigateway.Client apiGatewayV2API *apigatewayv2.Client ec2API *ec2.Client dmsAPI *databasemigrationservice.Client prometheusSvcAPI *amp.Client storageGatewayAPI *storagegateway.Client shieldAPI *shield.Client } func NewClient( logger *slog.Logger, taggingAPI *resourcegroupstaggingapi.Client, autoscalingAPI *autoscaling.Client, apiGatewayAPI *apigateway.Client, apiGatewayV2API *apigatewayv2.Client, ec2API *ec2.Client, dmsClient *databasemigrationservice.Client, prometheusClient *amp.Client, storageGatewayAPI *storagegateway.Client, shieldAPI *shield.Client, ) Client { return &client{ logger: logger, taggingAPI: taggingAPI, autoscalingAPI: autoscalingAPI, apiGatewayAPI: apiGatewayAPI, apiGatewayV2API: apiGatewayV2API, ec2API: ec2API, dmsAPI: dmsClient, prometheusSvcAPI: prometheusClient, storageGatewayAPI: storageGatewayAPI, shieldAPI: shieldAPI, } } func (c client) GetResources(ctx context.Context, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { svc := config.SupportedServices.GetService(job.Namespace) var resources []*model.TaggedResource shouldHaveDiscoveredResources := false if len(svc.ResourceFilters) > 0 { shouldHaveDiscoveredResources = true filters := make([]string, 0, len(svc.ResourceFilters)) for _, filter := range svc.ResourceFilters { filters = append(filters, *filter) } var tagFilters []types.TagFilter if len(job.SearchTags) > 0 { for i := range job.SearchTags { // Because everything with the AWS APIs is pointers we need a pointer to the `Key` field from the SearchTag. // We can't take a pointer to any fields from loop variable or the pointer will always be the same and this logic will be broken. st := job.SearchTags[i] // AWS's GetResources has a TagFilter option which matches the semantics of our SearchTags where all filters must match // Their value matching implementation is different though so instead of mapping the Key and Value we only map the Keys. // Their API docs say, "If you don't specify a value for a key, the response returns all resources that are tagged with that key, with any or no value." // which makes this a safe way to reduce the amount of data we need to filter out. // https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/API_GetResources.html#resourcegrouptagging-GetResources-request-TagFilters tagFilters = append(tagFilters, types.TagFilter{Key: &st.Key}) } } inputparams := &resourcegroupstaggingapi.GetResourcesInput{ ResourceTypeFilters: filters, ResourcesPerPage: aws.Int32(int32(100)), // max allowed value according to API docs TagFilters: tagFilters, } paginator := resourcegroupstaggingapi.NewGetResourcesPaginator(c.taggingAPI, inputparams, func(options *resourcegroupstaggingapi.GetResourcesPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() { promutil.ResourceGroupTaggingAPICounter.Inc() page, err := paginator.NextPage(ctx) if err != nil { return nil, err } for _, resourceTagMapping := range page.ResourceTagMappingList { resource := model.TaggedResource{ ARN: *resourceTagMapping.ResourceARN, Namespace: job.Namespace, Region: region, Tags: make([]model.Tag, 0, len(resourceTagMapping.Tags)), } for _, t := range resourceTagMapping.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: *t.Key, Value: *t.Value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } else { c.logger.Debug("Skipping resource because search tags do not match", "arn", resource.ARN) } } } c.logger.Debug("GetResourcesPages finished", "total", len(resources)) } if ext, ok := ServiceFilters[svc.Namespace]; ok { if ext.ResourceFunc != nil { shouldHaveDiscoveredResources = true newResources, err := ext.ResourceFunc(ctx, c, job, region) if err != nil { return nil, fmt.Errorf("failed to apply ResourceFunc for %s, %w", svc.Namespace, err) } resources = append(resources, newResources...) c.logger.Debug("ResourceFunc finished", "total", len(resources)) } if ext.FilterFunc != nil { filteredResources, err := ext.FilterFunc(ctx, c, resources) if err != nil { return nil, fmt.Errorf("failed to apply FilterFunc for %s, %w", svc.Namespace, err) } resources = filteredResources c.logger.Debug("FilterFunc finished", "total", len(resources)) } } if shouldHaveDiscoveredResources && len(resources) == 0 { return nil, ErrExpectedToFindResources } return resources, nil } ================================================ FILE: pkg/clients/tagging/concurrency_client.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tagging import ( "context" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type limitedConcurrencyClient struct { client Client sem chan struct{} } func NewLimitedConcurrencyClient(client Client, maxConcurrency int) Client { return &limitedConcurrencyClient{ client: client, sem: make(chan struct{}, maxConcurrency), } } func (c limitedConcurrencyClient) GetResources(ctx context.Context, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { c.sem <- struct{}{} res, err := c.client.GetResources(ctx, job, region) <-c.sem return res, err } ================================================ FILE: pkg/clients/tagging/filters.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tagging import ( "context" "fmt" "strings" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/arn" "github.com/aws/aws-sdk-go-v2/service/amp" "github.com/aws/aws-sdk-go-v2/service/apigateway" "github.com/aws/aws-sdk-go-v2/service/apigatewayv2" "github.com/aws/aws-sdk-go-v2/service/autoscaling" "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/shield" "github.com/aws/aws-sdk-go-v2/service/storagegateway" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/promutil" ) type ServiceFilter struct { // ResourceFunc can be used to fetch additional resources ResourceFunc func(context.Context, client, model.DiscoveryJob, string) ([]*model.TaggedResource, error) // FilterFunc can be used to modify the input resources or to drop based on some condition FilterFunc func(context.Context, client, []*model.TaggedResource) ([]*model.TaggedResource, error) } // ServiceFilters maps a service namespace to (optional) ServiceFilter var ServiceFilters = map[string]ServiceFilter{ "AWS/ApiGateway": { // ApiGateway ARNs use the Id (for v1 REST APIs) and ApiId (for v2 APIs) instead of // the ApiName (display name). See https://docs.aws.amazon.com/apigateway/latest/developerguide/arn-format-reference.html // However, in metrics, the ApiId dimension uses the ApiName as value. // // Here we use the ApiGateway API to map resource correctly. For backward compatibility, // in v1 REST APIs we change the ARN to replace the ApiId with ApiName, while for v2 APIs // we leave the ARN as-is. FilterFunc: func(ctx context.Context, client client, inputResources []*model.TaggedResource) ([]*model.TaggedResource, error) { var limit int32 = 500 // max number of results per page. default=25, max=500 const maxPages = 10 input := apigateway.GetRestApisInput{Limit: &limit} output := apigateway.GetRestApisOutput{} var pageNum int paginator := apigateway.NewGetRestApisPaginator(client.apiGatewayAPI, &input, func(options *apigateway.GetRestApisPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum <= maxPages { page, err := paginator.NextPage(ctx) promutil.APIGatewayAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling apiGatewayAPI.GetRestApis, %w", err) } pageNum++ output.Items = append(output.Items, page.Items...) } outputV2, err := client.apiGatewayV2API.GetApis(ctx, &apigatewayv2.GetApisInput{}) if err != nil { return nil, fmt.Errorf("error calling apigatewayv2.GetApis, %w", err) } var outputResources []*model.TaggedResource for _, resource := range inputResources { for i, gw := range output.Items { if strings.HasSuffix(resource.ARN, "/restapis/"+*gw.Id) { r := resource r.ARN = strings.ReplaceAll(resource.ARN, *gw.Id, *gw.Name) outputResources = append(outputResources, r) output.Items = append(output.Items[:i], output.Items[i+1:]...) break } } for i, gw := range outputV2.Items { if strings.HasSuffix(resource.ARN, "/apis/"+*gw.ApiId) { outputResources = append(outputResources, resource) outputV2.Items = append(outputV2.Items[:i], outputV2.Items[i+1:]...) break } } } return outputResources, nil }, }, "AWS/AutoScaling": { ResourceFunc: func(ctx context.Context, client client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { pageNum := 0 var resources []*model.TaggedResource paginator := autoscaling.NewDescribeAutoScalingGroupsPaginator(client.autoscalingAPI, &autoscaling.DescribeAutoScalingGroupsInput{}, func(options *autoscaling.DescribeAutoScalingGroupsPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum < 100 { page, err := paginator.NextPage(ctx) promutil.AutoScalingAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling autoscalingAPI.DescribeAutoScalingGroups, %w", err) } pageNum++ for _, asg := range page.AutoScalingGroups { resource := model.TaggedResource{ ARN: *asg.AutoScalingGroupARN, Namespace: job.Namespace, Region: region, } for _, t := range asg.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: *t.Key, Value: *t.Value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } } } return resources, nil }, }, "AWS/DMS": { // Append the replication instance identifier to DMS task and instance ARNs FilterFunc: func(ctx context.Context, client client, inputResources []*model.TaggedResource) ([]*model.TaggedResource, error) { if len(inputResources) == 0 { return inputResources, nil } replicationInstanceIdentifiers := make(map[string]string) pageNum := 0 instancesPaginator := databasemigrationservice.NewDescribeReplicationInstancesPaginator(client.dmsAPI, &databasemigrationservice.DescribeReplicationInstancesInput{}, func(options *databasemigrationservice.DescribeReplicationInstancesPaginatorOptions) { options.StopOnDuplicateToken = true }) for instancesPaginator.HasMorePages() && pageNum < 100 { page, err := instancesPaginator.NextPage(ctx) promutil.DmsAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling dmsAPI.DescribeReplicationInstances, %w", err) } pageNum++ for _, instance := range page.ReplicationInstances { replicationInstanceIdentifiers[*instance.ReplicationInstanceArn] = *instance.ReplicationInstanceIdentifier } } pageNum = 0 tasksPaginator := databasemigrationservice.NewDescribeReplicationTasksPaginator(client.dmsAPI, &databasemigrationservice.DescribeReplicationTasksInput{}, func(options *databasemigrationservice.DescribeReplicationTasksPaginatorOptions) { options.StopOnDuplicateToken = true }) for tasksPaginator.HasMorePages() && pageNum < 100 { page, err := tasksPaginator.NextPage(ctx) promutil.DmsAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling dmsAPI.DescribeReplicationTasks, %w", err) } pageNum++ for _, task := range page.ReplicationTasks { taskInstanceArn := *task.ReplicationInstanceArn if instanceIdentifier, ok := replicationInstanceIdentifiers[taskInstanceArn]; ok { replicationInstanceIdentifiers[*task.ReplicationTaskArn] = instanceIdentifier } } } var outputResources []*model.TaggedResource for _, resource := range inputResources { r := resource // Append the replication instance identifier to replication instance and task ARNs if instanceIdentifier, ok := replicationInstanceIdentifiers[r.ARN]; ok { r.ARN = fmt.Sprintf("%s/%s", r.ARN, instanceIdentifier) } outputResources = append(outputResources, r) } return outputResources, nil }, }, "AWS/EC2Spot": { ResourceFunc: func(ctx context.Context, client client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { pageNum := 0 var resources []*model.TaggedResource paginator := ec2.NewDescribeSpotFleetRequestsPaginator(client.ec2API, &ec2.DescribeSpotFleetRequestsInput{}, func(options *ec2.DescribeSpotFleetRequestsPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum < 100 { page, err := paginator.NextPage(ctx) promutil.Ec2APICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling describing ec2API.DescribeSpotFleetRequests, %w", err) } pageNum++ for _, ec2Spot := range page.SpotFleetRequestConfigs { resource := model.TaggedResource{ ARN: *ec2Spot.SpotFleetRequestId, Namespace: job.Namespace, Region: region, } for _, t := range ec2Spot.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: *t.Key, Value: *t.Value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } } } return resources, nil }, }, "AWS/Prometheus": { ResourceFunc: func(ctx context.Context, client client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { pageNum := 0 var resources []*model.TaggedResource paginator := amp.NewListWorkspacesPaginator(client.prometheusSvcAPI, &.ListWorkspacesInput{}, func(options *amp.ListWorkspacesPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum < 100 { page, err := paginator.NextPage(ctx) promutil.ManagedPrometheusAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error while calling prometheusSvcAPI.ListWorkspaces, %w", err) } pageNum++ for _, ws := range page.Workspaces { resource := model.TaggedResource{ ARN: *ws.Arn, Namespace: job.Namespace, Region: region, } for key, value := range ws.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: key, Value: value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } } } return resources, nil }, }, "AWS/StorageGateway": { ResourceFunc: func(ctx context.Context, client client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { pageNum := 0 var resources []*model.TaggedResource paginator := storagegateway.NewListGatewaysPaginator(client.storageGatewayAPI, &storagegateway.ListGatewaysInput{}, func(options *storagegateway.ListGatewaysPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum < 100 { page, err := paginator.NextPage(ctx) promutil.StoragegatewayAPICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling storageGatewayAPI.ListGateways, %w", err) } pageNum++ for _, gwa := range page.Gateways { resource := model.TaggedResource{ ARN: fmt.Sprintf("%s/%s", *gwa.GatewayId, *gwa.GatewayName), Namespace: job.Namespace, Region: region, } tagsRequest := &storagegateway.ListTagsForResourceInput{ ResourceARN: gwa.GatewayARN, } tagsResponse, _ := client.storageGatewayAPI.ListTagsForResource(ctx, tagsRequest) promutil.StoragegatewayAPICounter.Inc() for _, t := range tagsResponse.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: *t.Key, Value: *t.Value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } } } return resources, nil }, }, "AWS/TransitGateway": { ResourceFunc: func(ctx context.Context, client client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { pageNum := 0 var resources []*model.TaggedResource paginator := ec2.NewDescribeTransitGatewayAttachmentsPaginator(client.ec2API, &ec2.DescribeTransitGatewayAttachmentsInput{}, func(options *ec2.DescribeTransitGatewayAttachmentsPaginatorOptions) { options.StopOnDuplicateToken = true }) for paginator.HasMorePages() && pageNum < 100 { page, err := paginator.NextPage(ctx) promutil.Ec2APICounter.Inc() if err != nil { return nil, fmt.Errorf("error calling ec2API.DescribeTransitGatewayAttachments, %w", err) } pageNum++ for _, tgwa := range page.TransitGatewayAttachments { resource := model.TaggedResource{ ARN: fmt.Sprintf("%s/%s", *tgwa.TransitGatewayId, *tgwa.TransitGatewayAttachmentId), Namespace: job.Namespace, Region: region, } for _, t := range tgwa.Tags { resource.Tags = append(resource.Tags, model.Tag{Key: *t.Key, Value: *t.Value}) } if resource.FilterThroughTags(job.SearchTags) { resources = append(resources, &resource) } } } return resources, nil }, }, "AWS/DDoSProtection": { // Resource discovery only targets the protections, protections are global, so they will only be discoverable in us-east-1. // Outside us-east-1 no resources are going to be found. We use the shield.ListProtections API to get the protections + // protected resources to add to the tagged resources. This data is eventually usable for joining with metrics. ResourceFunc: func(ctx context.Context, c client, job model.DiscoveryJob, region string) ([]*model.TaggedResource, error) { var output []*model.TaggedResource // Default page size is only 20 which can easily lead to throttling request := &shield.ListProtectionsInput{MaxResults: aws.Int32(1000)} paginator := shield.NewListProtectionsPaginator(c.shieldAPI, request, func(options *shield.ListProtectionsPaginatorOptions) { options.StopOnDuplicateToken = true }) pageNum := 0 for paginator.HasMorePages() && pageNum < 100 { promutil.ShieldAPICounter.Inc() page, err := paginator.NextPage(ctx) pageNum++ if err != nil { return nil, fmt.Errorf("error calling shieldAPI.ListProtections, %w", err) } for _, protection := range page.Protections { protectedResourceArn := *protection.ResourceArn protectionArn := *protection.ProtectionArn protectedResource, err := arn.Parse(protectedResourceArn) if err != nil { return nil, fmt.Errorf("shieldAPI.ListProtections returned an invalid ProtectedResourceArn %s for Protection %s", protectedResourceArn, protectionArn) } // Shield covers regional services, // EC2 (arn:aws:ec2:::eip-allocation/*) // load balancers (arn:aws:elasticloadbalancing:::loadbalancer:*) // where the region of the protectedResource ARN should match the region for the job to prevent // duplicating resources across all regions // Shield also covers other global services, // global accelerator (arn:aws:globalaccelerator:::accelerator/*) // route53 (arn:aws:route53:::hostedzone/*) // where the protectedResource contains no region. Just like other global services the metrics for // these land in us-east-1 so any protected resource without a region should be added when the job // is for us-east-1 if protectedResource.Region == region || (protectedResource.Region == "" && region == "us-east-1") { taggedResource := &model.TaggedResource{ ARN: protectedResourceArn, Namespace: job.Namespace, Region: region, Tags: []model.Tag{{Key: "ProtectionArn", Value: protectionArn}}, } output = append(output, taggedResource) } } } return output, nil }, }, } ================================================ FILE: pkg/clients/tagging/filters_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tagging import ( "context" "fmt" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/apigateway" apigtypes "github.com/aws/aws-sdk-go-v2/service/apigateway/types" "github.com/aws/aws-sdk-go-v2/service/apigatewayv2" apigv2types "github.com/aws/aws-sdk-go-v2/service/apigatewayv2/types" "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice" dmstypes "github.com/aws/aws-sdk-go-v2/service/databasemigrationservice/types" "github.com/aws/smithy-go/middleware" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestValidServiceFilterNames(t *testing.T) { for svc, filter := range ServiceFilters { if config.SupportedServices.GetService(svc) == nil { t.Errorf("invalid service name '%s' in ServiceFilters", svc) } if filter.FilterFunc == nil && filter.ResourceFunc == nil { t.Errorf("no filter functions defined for service name '%s'", svc) } } } // mockAPIOption returns middleware that intercepts AWS SDK v2 API calls and returns // mock responses keyed by operation name, short-circuiting before the HTTP call. func mockAPIOption(responses map[string]interface{}) func(*middleware.Stack) error { return func(stack *middleware.Stack) error { return stack.Finalize.Add( middleware.FinalizeMiddlewareFunc("mock", func(ctx context.Context, _ middleware.FinalizeInput, _ middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) { opName := middleware.GetOperationName(ctx) if resp, ok := responses[opName]; ok { return middleware.FinalizeOutput{Result: resp}, middleware.Metadata{}, nil } return middleware.FinalizeOutput{}, middleware.Metadata{}, fmt.Errorf("unexpected operation: %s", opName) }, ), middleware.Before, ) } } func TestApiGatewayFilterFunc(t *testing.T) { tests := []struct { name string apiGatewayAPI *apigateway.Client apiGatewayV2API *apigatewayv2.Client inputResources []*model.TaggedResource outputResources []*model.TaggedResource }{ { name: "API Gateway v1 REST API: stages are filtered and IDs replaced with names", apiGatewayAPI: apigateway.New(apigateway.Options{ Region: "us-east-1", APIOptions: []func(*middleware.Stack) error{ mockAPIOption(map[string]interface{}{ "GetRestApis": &apigateway.GetRestApisOutput{ Items: []apigtypes.RestApi{ { Id: aws.String("gwid1234"), Name: aws.String("apiname"), }, }, }, }), }, }), apiGatewayV2API: apigatewayv2.New(apigatewayv2.Options{ Region: "us-east-1", APIOptions: []func(*middleware.Stack) error{ mockAPIOption(map[string]interface{}{ "GetApis": &apigatewayv2.GetApisOutput{ Items: []apigv2types.Api{}, }, }), }, }), inputResources: []*model.TaggedResource{ { ARN: "arn:aws:apigateway:us-east-1::/restapis/gwid1234/stages/main", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value"}}, }, { ARN: "arn:aws:apigateway:us-east-1::/restapis/gwid1234", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, }, outputResources: []*model.TaggedResource{ { ARN: "arn:aws:apigateway:us-east-1::/restapis/apiname", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, }, }, { name: "API Gateway v2 REST API: stages are filtered", apiGatewayAPI: apigateway.New(apigateway.Options{ Region: "us-east-1", APIOptions: []func(*middleware.Stack) error{ mockAPIOption(map[string]interface{}{ "GetRestApis": &apigateway.GetRestApisOutput{ Items: []apigtypes.RestApi{}, }, }), }, }), apiGatewayV2API: apigatewayv2.New(apigatewayv2.Options{ Region: "us-east-1", APIOptions: []func(*middleware.Stack) error{ mockAPIOption(map[string]interface{}{ "GetApis": &apigatewayv2.GetApisOutput{ Items: []apigv2types.Api{ { ApiId: aws.String("gwid9876"), }, }, }, }), }, }), inputResources: []*model.TaggedResource{ { ARN: "arn:aws:apigateway:us-east-1::/apis/gwid9876/stages/$default", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value"}}, }, { ARN: "arn:aws:apigateway:us-east-1::/apis/gwid9876", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, }, outputResources: []*model.TaggedResource{ { ARN: "arn:aws:apigateway:us-east-1::/apis/gwid9876", Namespace: "apigateway", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, }, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { c := client{ apiGatewayAPI: tc.apiGatewayAPI, apiGatewayV2API: tc.apiGatewayV2API, } filter := ServiceFilters["AWS/ApiGateway"] require.NotNil(t, filter.FilterFunc) outputResources, err := filter.FilterFunc(context.Background(), c, tc.inputResources) require.NoError(t, err) require.Equal(t, tc.outputResources, outputResources) }) } } func TestDMSFilterFunc(t *testing.T) { tests := []struct { name string dmsAPI *databasemigrationservice.Client inputResources []*model.TaggedResource outputResources []*model.TaggedResource }{ { name: "empty input resources", inputResources: []*model.TaggedResource{}, outputResources: []*model.TaggedResource{}, }, { name: "replication instance identifiers appended to task and instance ARNs", dmsAPI: databasemigrationservice.New(databasemigrationservice.Options{ Region: "us-east-1", APIOptions: []func(*middleware.Stack) error{ mockAPIOption(map[string]interface{}{ "DescribeReplicationInstances": &databasemigrationservice.DescribeReplicationInstancesOutput{ ReplicationInstances: []dmstypes.ReplicationInstance{ { ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:ABCDEFG1234567890"), ReplicationInstanceIdentifier: aws.String("repl-instance-identifier-1"), }, { ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:ZZZZZZZZZZZZZZZZZ"), ReplicationInstanceIdentifier: aws.String("repl-instance-identifier-2"), }, { ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:YYYYYYYYYYYYYYYYY"), ReplicationInstanceIdentifier: aws.String("repl-instance-identifier-3"), }, }, }, "DescribeReplicationTasks": &databasemigrationservice.DescribeReplicationTasksOutput{ ReplicationTasks: []dmstypes.ReplicationTask{ { ReplicationTaskArn: aws.String("arn:aws:dms:us-east-1:123123123123:task:9999999999999999"), ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:ZZZZZZZZZZZZZZZZZ"), }, { ReplicationTaskArn: aws.String("arn:aws:dms:us-east-1:123123123123:task:2222222222222222"), ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:ZZZZZZZZZZZZZZZZZ"), }, { ReplicationTaskArn: aws.String("arn:aws:dms:us-east-1:123123123123:task:3333333333333333"), ReplicationInstanceArn: aws.String("arn:aws:dms:us-east-1:123123123123:rep:WWWWWWWWWWWWWWWWW"), }, }, }, }), }, }), inputResources: []*model.TaggedResource{ { ARN: "arn:aws:dms:us-east-1:123123123123:rep:ABCDEFG1234567890", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:rep:WXYZ987654321", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:task:9999999999999999", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 3"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:task:5555555555555555", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 4"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:subgrp:demo-subgrp", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 5"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:endpoint:1111111111111111", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 6"}}, }, }, outputResources: []*model.TaggedResource{ { ARN: "arn:aws:dms:us-east-1:123123123123:rep:ABCDEFG1234567890/repl-instance-identifier-1", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:rep:WXYZ987654321", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 2"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:task:9999999999999999/repl-instance-identifier-2", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 3"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:task:5555555555555555", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 4"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:subgrp:demo-subgrp", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 5"}}, }, { ARN: "arn:aws:dms:us-east-1:123123123123:endpoint:1111111111111111", Namespace: "dms", Region: "us-east-1", Tags: []model.Tag{{Key: "Test", Value: "Value 6"}}, }, }, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { c := client{ dmsAPI: tc.dmsAPI, } filter := ServiceFilters["AWS/DMS"] require.NotNil(t, filter.FilterFunc) outputResources, err := filter.FilterFunc(context.Background(), c, tc.inputResources) require.NoError(t, err) require.Equal(t, tc.outputResources, outputResources) }) } } ================================================ FILE: pkg/config/config.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "errors" "fmt" "log/slog" "os" "github.com/aws/aws-sdk-go-v2/aws" "github.com/grafana/regexp" "go.yaml.in/yaml/v2" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type ScrapeConf struct { APIVersion string `yaml:"apiVersion"` StsRegion string `yaml:"sts-region"` Discovery Discovery `yaml:"discovery"` Static []*Static `yaml:"static"` CustomNamespace []*CustomNamespace `yaml:"customNamespace"` } type Discovery struct { ExportedTagsOnMetrics ExportedTagsOnMetrics `yaml:"exportedTagsOnMetrics"` Jobs []*Job `yaml:"jobs"` } type ExportedTagsOnMetrics map[string][]string type Tag struct { Key string `yaml:"key"` Value string `yaml:"value"` } type JobLevelMetricFields struct { Statistics []string `yaml:"statistics"` Period int64 `yaml:"period"` Length int64 `yaml:"length"` Delay int64 `yaml:"delay"` NilToZero *bool `yaml:"nilToZero"` AddCloudwatchTimestamp *bool `yaml:"addCloudwatchTimestamp"` ExportAllDataPoints *bool `yaml:"exportAllDataPoints"` } type Job struct { Regions []string `yaml:"regions"` Type string `yaml:"type"` Roles []Role `yaml:"roles"` SearchTags []Tag `yaml:"searchTags"` CustomTags []Tag `yaml:"customTags"` DimensionNameRequirements []string `yaml:"dimensionNameRequirements"` Metrics []*Metric `yaml:"metrics"` RoundingPeriod *int64 `yaml:"roundingPeriod"` RecentlyActiveOnly bool `yaml:"recentlyActiveOnly"` IncludeContextOnInfoMetrics bool `yaml:"includeContextOnInfoMetrics"` EnhancedMetrics []*EnhancedMetric `yaml:"enhancedMetrics"` JobLevelMetricFields `yaml:",inline"` } type EnhancedMetric struct { Name string `yaml:"name"` } type Static struct { Name string `yaml:"name"` Regions []string `yaml:"regions"` Roles []Role `yaml:"roles"` Namespace string `yaml:"namespace"` CustomTags []Tag `yaml:"customTags"` Dimensions []Dimension `yaml:"dimensions"` Metrics []*Metric `yaml:"metrics"` } type CustomNamespace struct { Regions []string `yaml:"regions"` Name string `yaml:"name"` Namespace string `yaml:"namespace"` RecentlyActiveOnly bool `yaml:"recentlyActiveOnly"` Roles []Role `yaml:"roles"` Metrics []*Metric `yaml:"metrics"` CustomTags []Tag `yaml:"customTags"` DimensionNameRequirements []string `yaml:"dimensionNameRequirements"` RoundingPeriod *int64 `yaml:"roundingPeriod"` JobLevelMetricFields `yaml:",inline"` } type Metric struct { Name string `yaml:"name"` Statistics []string `yaml:"statistics"` Period int64 `yaml:"period"` Length int64 `yaml:"length"` Delay int64 `yaml:"delay"` NilToZero *bool `yaml:"nilToZero"` AddCloudwatchTimestamp *bool `yaml:"addCloudwatchTimestamp"` ExportAllDataPoints *bool `yaml:"exportAllDataPoints"` } type Dimension struct { Name string `yaml:"name"` Value string `yaml:"value"` } type Role struct { RoleArn string `yaml:"roleArn"` ExternalID string `yaml:"externalId"` } func (r *Role) ValidateRole(roleIdx int, parent string) error { if r.RoleArn == "" && r.ExternalID != "" { return fmt.Errorf("Role [%d] in %v: RoleArn should not be empty", roleIdx, parent) } return nil } func (c *ScrapeConf) Load(file string, logger *slog.Logger) (model.JobsConfig, error) { yamlFile, err := os.ReadFile(file) if err != nil { return model.JobsConfig{}, err } err = yaml.Unmarshal(yamlFile, c) if err != nil { return model.JobsConfig{}, err } logConfigErrors(yamlFile, logger) for _, job := range c.Discovery.Jobs { if len(job.Roles) == 0 { job.Roles = []Role{{}} // use current IAM role } } for _, job := range c.CustomNamespace { if len(job.Roles) == 0 { job.Roles = []Role{{}} // use current IAM role } } for _, job := range c.Static { if len(job.Roles) == 0 { job.Roles = []Role{{}} // use current IAM role } } return c.Validate(logger) } func (c *ScrapeConf) Validate(logger *slog.Logger) (model.JobsConfig, error) { if c.Discovery.Jobs == nil && c.Static == nil && c.CustomNamespace == nil { return model.JobsConfig{}, fmt.Errorf("at least 1 Discovery job, 1 Static or one CustomNamespace must be defined") } if c.Discovery.Jobs != nil { for idx, job := range c.Discovery.Jobs { err := job.validateDiscoveryJob(logger, idx) if err != nil { return model.JobsConfig{}, err } } if len(c.Discovery.ExportedTagsOnMetrics) > 0 { for ns := range c.Discovery.ExportedTagsOnMetrics { if svc := SupportedServices.GetService(ns); svc == nil { if svc = SupportedServices.getServiceByAlias(ns); svc != nil { return model.JobsConfig{}, fmt.Errorf("Discovery jobs: Invalid key in 'exportedTagsOnMetrics', use namespace %q rather than alias %q", svc.Namespace, svc.Alias) } return model.JobsConfig{}, fmt.Errorf("Discovery jobs: 'exportedTagsOnMetrics' key is not a valid namespace: %s", ns) } jobTypeMatch := false for _, job := range c.Discovery.Jobs { if job.Type == ns { jobTypeMatch = true break } } if !jobTypeMatch { return model.JobsConfig{}, fmt.Errorf("Discovery jobs: 'exportedTagsOnMetrics' key %q does not match with any discovery job type", ns) } } } } if c.CustomNamespace != nil { for idx, job := range c.CustomNamespace { err := job.validateCustomNamespaceJob(logger, idx) if err != nil { return model.JobsConfig{}, err } } } if c.Static != nil { for idx, job := range c.Static { err := job.validateStaticJob(logger, idx) if err != nil { return model.JobsConfig{}, err } } } if c.APIVersion != "" && c.APIVersion != "v1alpha1" { return model.JobsConfig{}, fmt.Errorf("unknown apiVersion value '%s'", c.APIVersion) } return c.toModelConfig(), nil } func (j *Job) validateDiscoveryJob(logger *slog.Logger, jobIdx int) error { if j.Type != "" { if svc := SupportedServices.GetService(j.Type); svc == nil { if svc = SupportedServices.getServiceByAlias(j.Type); svc != nil { return fmt.Errorf("Discovery job [%d]: Invalid 'type' field, use namespace %q rather than alias %q", jobIdx, svc.Namespace, svc.Alias) } return fmt.Errorf("Discovery job [%d]: Service is not in known list!: %s", jobIdx, j.Type) } } else { return fmt.Errorf("Discovery job [%d]: Type should not be empty", jobIdx) } parent := fmt.Sprintf("Discovery job [%s/%d]", j.Type, jobIdx) if len(j.Roles) > 0 { for roleIdx, role := range j.Roles { if err := role.ValidateRole(roleIdx, parent); err != nil { return err } } } else { return fmt.Errorf("no IAM roles configured. If the current IAM role is desired, an empty Role should be configured") } if len(j.Regions) == 0 { return fmt.Errorf("Discovery job [%s/%d]: Regions should not be empty", j.Type, jobIdx) } if len(j.Metrics) == 0 && len(j.EnhancedMetrics) == 0 { return fmt.Errorf("Discovery job [%s/%d]: Metrics and EnhancedMetrics should not both be empty", j.Type, jobIdx) } for metricIdx, metric := range j.Metrics { err := metric.validateMetric(logger, metricIdx, parent, &j.JobLevelMetricFields) if err != nil { return err } } for _, st := range j.SearchTags { if _, err := regexp.Compile(st.Value); err != nil { return fmt.Errorf("Discovery job [%s/%d]: search tag value for %s has invalid regex value %s: %w", j.Type, jobIdx, st.Key, st.Value, err) } } if j.RoundingPeriod != nil { logger.Warn(fmt.Sprintf("Discovery job [%s/%d]: Setting a rounding period is deprecated. In a future release it will always be enabled and set to the value of the metric period.", j.Type, jobIdx)) } if len(j.EnhancedMetrics) > 0 { svc, err := enhancedmetrics.DefaultEnhancedMetricServiceRegistry.GetEnhancedMetricsService(j.Type) if err != nil { return fmt.Errorf("Discovery job [%s/%d]: enhanced metrics are not supported for this namespace: %w", j.Type, jobIdx, err) } for _, em := range j.EnhancedMetrics { if !svc.IsMetricSupported(em.Name) { return fmt.Errorf("Discovery job [%s/%d]: enhanced metric %q is not supported for this namespace", j.Type, jobIdx, em.Name) } } } return nil } func (j *CustomNamespace) validateCustomNamespaceJob(logger *slog.Logger, jobIdx int) error { if j.Name == "" { return fmt.Errorf("CustomNamespace job [%v]: Name should not be empty", jobIdx) } if j.Namespace == "" { return fmt.Errorf("CustomNamespace job [%v]: Namespace should not be empty", jobIdx) } parent := fmt.Sprintf("CustomNamespace job [%s/%d]", j.Namespace, jobIdx) if len(j.Roles) > 0 { for roleIdx, role := range j.Roles { if err := role.ValidateRole(roleIdx, parent); err != nil { return err } } } else { return fmt.Errorf("no IAM roles configured. If the current IAM role is desired, an empty Role should be configured") } if len(j.Regions) == 0 { return fmt.Errorf("CustomNamespace job [%s/%d]: Regions should not be empty", j.Name, jobIdx) } if len(j.Metrics) == 0 { return fmt.Errorf("CustomNamespace job [%s/%d]: Metrics should not be empty", j.Name, jobIdx) } for metricIdx, metric := range j.Metrics { err := metric.validateMetric(logger, metricIdx, parent, &j.JobLevelMetricFields) if err != nil { return err } } if j.RoundingPeriod != nil { logger.Warn(fmt.Sprintf("CustomNamespace job [%s/%d]: Setting a rounding period is deprecated. It is always enabled and set to the value of the metric period.", j.Name, jobIdx)) } return nil } func (j *Static) validateStaticJob(logger *slog.Logger, jobIdx int) error { if j.Name == "" { return fmt.Errorf("Static job [%v]: Name should not be empty", jobIdx) } if j.Namespace == "" { return fmt.Errorf("Static job [%s/%d]: Namespace should not be empty", j.Name, jobIdx) } parent := fmt.Sprintf("Static job [%s/%d]", j.Name, jobIdx) if len(j.Roles) > 0 { for roleIdx, role := range j.Roles { if err := role.ValidateRole(roleIdx, parent); err != nil { return err } } } else { return fmt.Errorf("no IAM roles configured. If the current IAM role is desired, an empty Role should be configured") } if len(j.Regions) == 0 { return fmt.Errorf("Static job [%s/%d]: Regions should not be empty", j.Name, jobIdx) } for metricIdx, metric := range j.Metrics { err := metric.validateMetric(logger, metricIdx, parent, nil) if err != nil { return err } } return nil } func (m *Metric) validateMetric(logger *slog.Logger, metricIdx int, parent string, discovery *JobLevelMetricFields) error { if m.Name == "" { return fmt.Errorf("Metric [%s/%d] in %v: Name should not be empty", m.Name, metricIdx, parent) } mStatistics := m.Statistics if len(mStatistics) == 0 && discovery != nil { if len(discovery.Statistics) > 0 { mStatistics = discovery.Statistics } else { return fmt.Errorf("Metric [%s/%d] in %v: Statistics should not be empty", m.Name, metricIdx, parent) } } mPeriod := m.Period if mPeriod == 0 { if discovery != nil && discovery.Period != 0 { mPeriod = discovery.Period } else { mPeriod = model.DefaultPeriodSeconds } } if mPeriod < 1 { return fmt.Errorf("Metric [%s/%d] in %v: Period value should be a positive integer", m.Name, metricIdx, parent) } mLength := m.Length if mLength == 0 { if discovery != nil && discovery.Length != 0 { mLength = discovery.Length } else { mLength = model.DefaultLengthSeconds } } // Delay at the metric level has been ignored for an incredibly long time. If we started respecting metric delay // now a lot of configurations would break on release. This logs a warning for now if m.Delay != 0 { logger.Warn(fmt.Sprintf("Metric [%s/%d] in %v: Metric is configured with delay that has been being ignored. This behavior will change in the future, if your config works now remove this delay to prevent a future issue.", m.Name, metricIdx, parent)) } var mDelay int64 if discovery != nil && discovery.Delay != 0 { mDelay = discovery.Delay } mNilToZero := m.NilToZero if mNilToZero == nil { if discovery != nil && discovery.NilToZero != nil { mNilToZero = discovery.NilToZero } else { mNilToZero = aws.Bool(false) } } mAddCloudwatchTimestamp := m.AddCloudwatchTimestamp if mAddCloudwatchTimestamp == nil { if discovery != nil && discovery.AddCloudwatchTimestamp != nil { mAddCloudwatchTimestamp = discovery.AddCloudwatchTimestamp } else { mAddCloudwatchTimestamp = aws.Bool(false) } } mExportAllDataPoints := m.ExportAllDataPoints if mExportAllDataPoints == nil { if discovery != nil && discovery.ExportAllDataPoints != nil { mExportAllDataPoints = discovery.ExportAllDataPoints } else { mExportAllDataPoints = aws.Bool(false) } } if aws.ToBool(mExportAllDataPoints) && !aws.ToBool(mAddCloudwatchTimestamp) { return fmt.Errorf("Metric [%s/%d] in %v: ExportAllDataPoints can only be enabled if AddCloudwatchTimestamp is enabled", m.Name, metricIdx, parent) } if mLength < mPeriod { return fmt.Errorf( "Metric [%s/%d] in %v: length(%d) is smaller than period(%d). This can cause that the data requested is not ready and generate data gaps", m.Name, metricIdx, parent, mLength, mPeriod, ) } m.Length = mLength m.Period = mPeriod m.Delay = mDelay m.NilToZero = mNilToZero m.AddCloudwatchTimestamp = mAddCloudwatchTimestamp m.ExportAllDataPoints = mExportAllDataPoints m.Statistics = mStatistics return nil } func (c *ScrapeConf) toModelConfig() model.JobsConfig { jobsCfg := model.JobsConfig{} jobsCfg.StsRegion = c.StsRegion for _, discoveryJob := range c.Discovery.Jobs { svc := SupportedServices.GetService(discoveryJob.Type) job := model.DiscoveryJob{} job.Regions = discoveryJob.Regions job.Namespace = svc.Namespace job.DimensionNameRequirements = discoveryJob.DimensionNameRequirements job.RecentlyActiveOnly = discoveryJob.RecentlyActiveOnly job.RoundingPeriod = discoveryJob.RoundingPeriod job.Roles = toModelRoles(discoveryJob.Roles) job.SearchTags = toModelSearchTags(discoveryJob.SearchTags) job.CustomTags = toModelTags(discoveryJob.CustomTags) job.Metrics = toModelMetricConfig(discoveryJob.Metrics) job.IncludeContextOnInfoMetrics = discoveryJob.IncludeContextOnInfoMetrics job.DimensionsRegexps = svc.ToModelDimensionsRegexp() job.EnhancedMetrics = svc.toModelEnhancedMetricsConfig(discoveryJob.EnhancedMetrics) job.ExportedTagsOnMetrics = []string{} if len(c.Discovery.ExportedTagsOnMetrics) > 0 { if exportedTags, ok := c.Discovery.ExportedTagsOnMetrics[svc.Namespace]; ok { job.ExportedTagsOnMetrics = exportedTags } } jobsCfg.DiscoveryJobs = append(jobsCfg.DiscoveryJobs, job) } for _, staticJob := range c.Static { job := model.StaticJob{} job.Name = staticJob.Name job.Namespace = staticJob.Namespace job.Regions = staticJob.Regions job.Roles = toModelRoles(staticJob.Roles) job.CustomTags = toModelTags(staticJob.CustomTags) job.Dimensions = toModelDimensions(staticJob.Dimensions) job.Metrics = toModelMetricConfig(staticJob.Metrics) jobsCfg.StaticJobs = append(jobsCfg.StaticJobs, job) } for _, customNamespaceJob := range c.CustomNamespace { job := model.CustomNamespaceJob{} job.Regions = customNamespaceJob.Regions job.Name = customNamespaceJob.Name job.Namespace = customNamespaceJob.Namespace job.DimensionNameRequirements = customNamespaceJob.DimensionNameRequirements job.RoundingPeriod = customNamespaceJob.RoundingPeriod job.RecentlyActiveOnly = customNamespaceJob.RecentlyActiveOnly job.Roles = toModelRoles(customNamespaceJob.Roles) job.CustomTags = toModelTags(customNamespaceJob.CustomTags) job.Metrics = toModelMetricConfig(customNamespaceJob.Metrics) jobsCfg.CustomNamespaceJobs = append(jobsCfg.CustomNamespaceJobs, job) } return jobsCfg } func toModelTags(tags []Tag) []model.Tag { ret := make([]model.Tag, 0, len(tags)) for _, t := range tags { ret = append(ret, model.Tag{ Key: t.Key, Value: t.Value, }) } return ret } func toModelSearchTags(tags []Tag) []model.SearchTag { ret := make([]model.SearchTag, 0, len(tags)) for _, t := range tags { // This should never panic as long as regex validation continues to happen before model mapping r := regexp.MustCompile(t.Value) ret = append(ret, model.SearchTag{ Key: t.Key, Value: r, }) } return ret } func toModelRoles(roles []Role) []model.Role { ret := make([]model.Role, 0, len(roles)) for _, r := range roles { ret = append(ret, model.Role{ RoleArn: r.RoleArn, ExternalID: r.ExternalID, }) } return ret } func toModelDimensions(dimensions []Dimension) []model.Dimension { ret := make([]model.Dimension, 0, len(dimensions)) for _, d := range dimensions { ret = append(ret, model.Dimension{ Name: d.Name, Value: d.Value, }) } return ret } func toModelMetricConfig(metrics []*Metric) []*model.MetricConfig { ret := make([]*model.MetricConfig, 0, len(metrics)) for _, m := range metrics { ret = append(ret, &model.MetricConfig{ Name: m.Name, Statistics: m.Statistics, Period: m.Period, Length: m.Length, Delay: m.Delay, NilToZero: aws.ToBool(m.NilToZero), AddCloudwatchTimestamp: aws.ToBool(m.AddCloudwatchTimestamp), ExportAllDataPoints: aws.ToBool(m.ExportAllDataPoints), }) } return ret } // logConfigErrors logs as warning any config unmarshalling error. func logConfigErrors(cfg []byte, logger *slog.Logger) { var sc ScrapeConf var errMsgs []string if err := yaml.UnmarshalStrict(cfg, &sc); err != nil { terr := &yaml.TypeError{} if errors.As(err, &terr) { errMsgs = append(errMsgs, terr.Errors...) } else { errMsgs = append(errMsgs, err.Error()) } } if sc.APIVersion == "" { errMsgs = append(errMsgs, "missing apiVersion") } if len(errMsgs) > 0 { for _, msg := range errMsgs { logger.Warn("config file syntax error", "err", msg) } logger.Warn(`Config file error(s) detected: Yace might not work as expected. Future versions of Yace might fail to run with an invalid config file.`) } } ================================================ FILE: pkg/config/config_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "fmt" "strings" "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) func TestConfLoad(t *testing.T) { testCases := []struct { configFile string }{ {configFile: "config_test.yml"}, {configFile: "empty_rolearn.ok.yml"}, {configFile: "sts_region.ok.yml"}, {configFile: "multiple_roles.ok.yml"}, {configFile: "custom_namespace.ok.yml"}, } for _, tc := range testCases { config := ScrapeConf{} configFile := fmt.Sprintf("testdata/%s", tc.configFile) if _, err := config.Load(configFile, promslog.NewNopLogger()); err != nil { t.Error(err) t.FailNow() } } } func TestBadConfigs(t *testing.T) { testCases := []struct { configFile string errorMsg string }{ { configFile: "externalid_without_rolearn.bad.yml", errorMsg: "RoleArn should not be empty", }, { configFile: "externalid_with_empty_rolearn.bad.yml", errorMsg: "RoleArn should not be empty", }, { configFile: "unknown_version.bad.yml", errorMsg: "unknown apiVersion value 'invalidVersion'", }, { configFile: "custom_namespace_without_name.bad.yml", errorMsg: "Name should not be empty", }, { configFile: "custom_namespace_without_namespace.bad.yml", errorMsg: "Namespace should not be empty", }, { configFile: "custom_namespace_without_region.bad.yml", errorMsg: "Regions should not be empty", }, { configFile: "discovery_job_type_unknown.bad.yml", errorMsg: "Discovery job [0]: Service is not in known list!: AWS/FancyNewNamespace", }, { configFile: "discovery_job_type_alias.bad.yml", errorMsg: "Discovery job [0]: Invalid 'type' field, use namespace \"AWS/S3\" rather than alias \"s3\"", }, { configFile: "discovery_job_exported_tags_alias.bad.yml", errorMsg: "Discovery jobs: Invalid key in 'exportedTagsOnMetrics', use namespace \"AWS/S3\" rather than alias \"s3\"", }, { configFile: "discovery_job_exported_tags_mismatch.bad.yml", errorMsg: "Discovery jobs: 'exportedTagsOnMetrics' key \"AWS/RDS\" does not match with any discovery job type", }, } for _, tc := range testCases { config := ScrapeConf{} configFile := fmt.Sprintf("testdata/%s", tc.configFile) if _, err := config.Load(configFile, promslog.NewNopLogger()); err != nil { if !strings.Contains(err.Error(), tc.errorMsg) { t.Errorf("expecter error for config file %q to contain %q but got: %s", tc.configFile, tc.errorMsg, err) t.FailNow() } } else { t.Log("expected validation error") t.FailNow() } } } func TestValidateConfigFailuresWhenUsingAsLibrary(t *testing.T) { type testcase struct { config ScrapeConf errorMsg string } testCases := map[string]testcase{ "empty role should be configured when environment role is desired": { config: ScrapeConf{ APIVersion: "v1alpha1", StsRegion: "us-east-2", Discovery: Discovery{ Jobs: []*Job{{ Regions: []string{"us-east-2"}, Type: "AWS/SQS", Metrics: []*Metric{{ Name: "NumberOfMessagesSent", Statistics: []string{"Average"}, }}, }}, }, }, errorMsg: "no IAM roles configured. If the current IAM role is desired, an empty Role should be configured", }, "enhanced metric are not supported for the namespace": { config: ScrapeConf{ Discovery: Discovery{ Jobs: []*Job{{ Regions: []string{"us-east-2"}, Type: "AWS/S3", Roles: []Role{{RoleArn: "arn:aws:iam::123456789012:role/test"}}, Metrics: []*Metric{{ Name: "BucketSizeBytes", Statistics: []string{"Average"}, }}, EnhancedMetrics: []*EnhancedMetric{{ Name: "SomeEnhancedMetric", }}, }}, }, }, errorMsg: "Discovery job [AWS/S3/0]: enhanced metrics are not supported for this namespace: enhanced metrics service for namespace AWS/S3 not found", }, "enhanced metric are not supported for the enhanced mertrics service": { config: ScrapeConf{ Discovery: Discovery{ Jobs: []*Job{{ Regions: []string{"us-east-2"}, Type: "AWS/Lambda", Roles: []Role{{RoleArn: "arn:aws:iam::123456789012:role/test"}}, Metrics: []*Metric{{ Name: "BucketSizeBytes", Statistics: []string{"Average"}, }}, EnhancedMetrics: []*EnhancedMetric{{ Name: "SomeEnhancedMetric", }}, }}, }, }, errorMsg: "Discovery job [AWS/Lambda/0]: enhanced metric \"SomeEnhancedMetric\" is not supported for this namespace", }, } for name, tc := range testCases { t.Run(name, func(t *testing.T) { _, err := tc.config.Validate(promslog.NewNopLogger()) require.Error(t, err, "Expected config validation to fail") require.Equal(t, tc.errorMsg, err.Error()) }) } } ================================================ FILE: pkg/config/feature_flags.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import "context" type flagsCtxKey struct{} // AlwaysReturnInfoMetrics is a feature flag used to enable the return of info metrics even when there are no corresponding CloudWatch metrics const AlwaysReturnInfoMetrics = "always-return-info-metrics" // FeatureFlags is an interface all objects that can tell wether or not a feature flag is enabled can implement. type FeatureFlags interface { // IsFeatureEnabled tells if the feature flag identified by flag is enabled. IsFeatureEnabled(flag string) bool } // CtxWithFlags injects a FeatureFlags inside a given context.Context, so that they are easily communicated through layers. func CtxWithFlags(ctx context.Context, ctrl FeatureFlags) context.Context { return context.WithValue(ctx, flagsCtxKey{}, ctrl) } // FlagsFromCtx retrieves a FeatureFlags from a given context.Context, defaulting to one with all feature flags disabled if none is found. func FlagsFromCtx(ctx context.Context) FeatureFlags { if ctrl := ctx.Value(flagsCtxKey{}); ctrl != nil { return ctrl.(FeatureFlags) } return noFeatureFlags{} } // noFeatureFlags implements a no-op FeatureFlags type noFeatureFlags struct{} func (nff noFeatureFlags) IsFeatureEnabled(_ string) bool { return false } ================================================ FILE: pkg/config/feature_flags_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "context" "testing" "github.com/stretchr/testify/require" ) func TestFeatureFlagsInContext_DefaultsToNonEnabled(t *testing.T) { flags := FlagsFromCtx(context.Background()) require.False(t, flags.IsFeatureEnabled("some-feature")) require.False(t, flags.IsFeatureEnabled("some-other-feature")) } type flags struct{} func (f flags) IsFeatureEnabled(_ string) bool { return true } func TestFeatureFlagsInContext_RetrievesFlagsFromContext(t *testing.T) { ctx := CtxWithFlags(context.Background(), flags{}) require.True(t, FlagsFromCtx(ctx).IsFeatureEnabled("some-feature")) require.True(t, FlagsFromCtx(ctx).IsFeatureEnabled("some-other-feature")) } ================================================ FILE: pkg/config/services.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "strings" "github.com/aws/aws-sdk-go-v2/aws" "github.com/grafana/regexp" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // ServiceConfig defines a namespace supported by discovery jobs. type ServiceConfig struct { // Namespace is the formal AWS namespace identification string Namespace string // Alias is the formal AWS namespace alias Alias string // ResourceFilters is a list of strings used as filters in the // resourcegroupstaggingapi.GetResources request. It should always // be provided, except for those few namespaces where resources can't // be tagged. ResourceFilters []*string // DimensionRegexps is an optional list of regexes that allow to // extract dimensions names from a resource ARN. The regex should // use named groups that correspond to AWS dimensions names. // In cases where the dimension name has a space, it should be // replaced with an underscore (`_`). DimensionRegexps []*regexp.Regexp } func (sc ServiceConfig) ToModelDimensionsRegexp() []model.DimensionsRegexp { dr := []model.DimensionsRegexp{} for _, dimensionRegexp := range sc.DimensionRegexps { names := dimensionRegexp.SubexpNames() dimensionNames := make([]string, 0, len(names)-1) // skip first name, it's always an empty string for i := 1; i < len(names); i++ { // in the regex names we use underscores where AWS dimensions have spaces dimensionNames = append(dimensionNames, strings.ReplaceAll(names[i], "_", " ")) } dr = append(dr, model.DimensionsRegexp{ Regexp: dimensionRegexp, DimensionsNames: dimensionNames, }) } return dr } func (sc ServiceConfig) toModelEnhancedMetricsConfig(ems []*EnhancedMetric) []*model.EnhancedMetricConfig { emc := make([]*model.EnhancedMetricConfig, 0, len(ems)) for _, em := range ems { emc = append(emc, &model.EnhancedMetricConfig{ Name: em.Name, }) } return emc } type serviceConfigs []ServiceConfig func (sc serviceConfigs) GetService(serviceType string) *ServiceConfig { for _, sf := range sc { if sf.Namespace == serviceType { return &sf } } return nil } func (sc serviceConfigs) getServiceByAlias(alias string) *ServiceConfig { for _, sf := range sc { if sf.Alias == alias { return &sf } } return nil } var SupportedServices = serviceConfigs{ { Namespace: "CWAgent", Alias: "cwagent", }, { Namespace: "AWS/Usage", Alias: "usage", }, { Namespace: "AWS/CertificateManager", Alias: "acm", ResourceFilters: []*string{ aws.String("acm:certificate"), }, }, { Namespace: "AWS/ACMPrivateCA", Alias: "acm-pca", ResourceFilters: []*string{ aws.String("acm-pca:certificate-authority"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.*)"), }, }, { Namespace: "AmazonMWAA", Alias: "airflow", ResourceFilters: []*string{ aws.String("airflow"), }, }, { Namespace: "AWS/MWAA", Alias: "mwaa", }, { Namespace: "AWS/ApplicationELB", Alias: "alb", ResourceFilters: []*string{ aws.String("elasticloadbalancing:loadbalancer/app"), aws.String("elasticloadbalancing:targetgroup"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":(?Ptargetgroup/.+)"), regexp.MustCompile(":loadbalancer/(?P.+)$"), }, }, { Namespace: "AWS/AppStream", Alias: "appstream", ResourceFilters: []*string{ aws.String("appstream"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":fleet/(?P[^/]+)"), }, }, { Namespace: "AWS/Backup", Alias: "backup", ResourceFilters: []*string{ aws.String("backup"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":backup-vault:(?P[^:]+)"), }, }, { Namespace: "AWS/ApiGateway", Alias: "apigateway", ResourceFilters: []*string{ aws.String("apigateway"), }, DimensionRegexps: []*regexp.Regexp{ // DimensionRegexps starting with 'restapis' are for APIGateway V1 gateways (REST API gateways) regexp.MustCompile("/restapis/(?P[^/]+)$"), regexp.MustCompile("/restapis/(?P[^/]+)/stages/(?P[^/]+)$"), // DimensionRegexps starting 'apis' are for APIGateway V2 gateways (HTTP and Websocket gateways) regexp.MustCompile("/apis/(?P[^/]+)$"), regexp.MustCompile("/apis/(?P[^/]+)/stages/(?P[^/]+)$"), regexp.MustCompile("/apis/(?P[^/]+)/routes/(?P[^/]+)$"), }, }, { Namespace: "AWS/AmazonMQ", Alias: "mq", ResourceFilters: []*string{ aws.String("mq"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("broker:(?P[^:]+)"), }, }, { Namespace: "AWS/AppRunner", Alias: "apprunner", }, { Namespace: "AWS/AppSync", Alias: "appsync", ResourceFilters: []*string{ aws.String("appsync"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("apis/(?P[^/]+)"), }, }, { Namespace: "AWS/Athena", Alias: "athena", ResourceFilters: []*string{ aws.String("athena"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("workgroup/(?P[^/]+)"), }, }, { Namespace: "AWS/AutoScaling", Alias: "asg", DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("autoScalingGroupName/(?P[^/]+)"), }, }, { Namespace: "AWS/ElasticBeanstalk", Alias: "beanstalk", ResourceFilters: []*string{ aws.String("elasticbeanstalk:environment"), }, DimensionRegexps: []*regexp.Regexp{ // arn uses /${ApplicationName}/${EnvironmentName}, but only EnvironmentName is a Metric Dimension regexp.MustCompile("environment/[^/]+/(?P[^/]+)"), }, }, { Namespace: "AWS/Billing", Alias: "billing", }, { Namespace: "AWS/Cassandra", Alias: "cassandra", ResourceFilters: []*string{ aws.String("cassandra"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("keyspace/(?P[^/]+)/table/(?P[^/]+)"), regexp.MustCompile("keyspace/(?P[^/]+)/"), }, }, { Namespace: "AWS/CloudFront", Alias: "cloudfront", ResourceFilters: []*string{ aws.String("cloudfront:distribution"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("distribution/(?P[^/]+)"), }, }, { Namespace: "AWS/Cognito", Alias: "cognito-idp", ResourceFilters: []*string{ aws.String("cognito-idp:userpool"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("userpool/(?P[^/]+)"), }, }, { Namespace: "AWS/DataSync", Alias: "datasync", ResourceFilters: []*string{ aws.String("datasync:task"), aws.String("datasync:agent"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":task/(?P[^/]+)"), regexp.MustCompile(":agent/(?P[^/]+)"), }, }, { Namespace: "AWS/DirectoryService", Alias: "ds", ResourceFilters: []*string{ aws.String("ds:directory"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":directory/(?P[^/]+)"), }, }, { Namespace: "AWS/DMS", Alias: "dms", ResourceFilters: []*string{ aws.String("dms"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("rep:[^/]+/(?P[^/]+)"), regexp.MustCompile("task:(?P[^/]+)/(?P[^/]+)"), }, }, { Namespace: "AWS/DDoSProtection", Alias: "shield", ResourceFilters: []*string{ aws.String("shield:protection"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.+)"), }, }, { Namespace: "AWS/DocDB", Alias: "docdb", ResourceFilters: []*string{ aws.String("rds:db"), aws.String("rds:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("cluster:(?P[^/]+)"), regexp.MustCompile("db:(?P[^/]+)"), }, }, { Namespace: "AWS/DX", Alias: "dx", ResourceFilters: []*string{ aws.String("directconnect"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":dxcon/(?P[^/]+)"), regexp.MustCompile(":dxlag/(?P[^/]+)"), regexp.MustCompile(":dxvif/(?P[^/]+)"), }, }, { Namespace: "AWS/DynamoDB", Alias: "dynamodb", ResourceFilters: []*string{ aws.String("dynamodb:table"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":table/(?P[^/]+)"), }, }, { Namespace: "AWS/EBS", Alias: "ebs", ResourceFilters: []*string{ aws.String("ec2:volume"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("volume/(?P[^/]+)"), }, }, { Namespace: "AWS/ElastiCache", Alias: "ec", ResourceFilters: []*string{ aws.String("elasticache:cluster"), aws.String("elasticache:serverlesscache"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("cluster:(?P[^/]+)"), regexp.MustCompile("serverlesscache:(?P[^/]+)"), }, }, { Namespace: "AWS/MemoryDB", Alias: "memorydb", ResourceFilters: []*string{ aws.String("memorydb:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("cluster/(?P[^/]+)"), }, }, { Namespace: "AWS/EC2", Alias: "ec2", ResourceFilters: []*string{ aws.String("ec2:instance"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("instance/(?P[^/]+)"), }, }, { Namespace: "AWS/EC2Spot", Alias: "ec2Spot", DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.*)"), }, }, { Namespace: "AWS/EC2CapacityReservations", Alias: "ec2CapacityReservations", DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":capacity-reservation/(?P)$"), }, }, { Namespace: "AWS/ECS", Alias: "ecs-svc", ResourceFilters: []*string{ aws.String("ecs:cluster"), aws.String("ecs:service"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster/(?P[^/]+)$"), regexp.MustCompile(":service/(?P[^/]+)/(?P[^/]+)$"), }, }, { Namespace: "ECS/ContainerInsights", Alias: "ecs-containerinsights", ResourceFilters: []*string{ aws.String("ecs:cluster"), aws.String("ecs:service"), }, DimensionRegexps: []*regexp.Regexp{ // Use "new" long arns as per // https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-account-settings.html#ecs-resource-ids regexp.MustCompile(":cluster/(?P[^/]+)$"), regexp.MustCompile(":service/(?P[^/]+)/(?P[^/]+)$"), }, }, { Namespace: "ContainerInsights", Alias: "containerinsights", ResourceFilters: []*string{ aws.String("eks:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster/(?P[^/]+)$"), }, }, { Namespace: "AWS/EFS", Alias: "efs", ResourceFilters: []*string{ aws.String("elasticfilesystem:file-system"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("file-system/(?P[^/]+)"), }, }, { Namespace: "AWS/EKS", Alias: "eks", ResourceFilters: []*string{ aws.String("eks:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster/(?P[^/]+)$"), }, }, { Namespace: "AWS/ELB", Alias: "elb", ResourceFilters: []*string{ aws.String("elasticloadbalancing:loadbalancer"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":loadbalancer/(?P.+)$"), }, }, { Namespace: "AWS/ElasticMapReduce", Alias: "emr", ResourceFilters: []*string{ aws.String("elasticmapreduce:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("cluster/(?P[^/]+)"), }, }, { Namespace: "AWS/EMRServerless", Alias: "emr-serverless", ResourceFilters: []*string{ aws.String("emr-serverless:applications"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("applications/(?P[^/]+)"), }, }, { Namespace: "AWS/ES", Alias: "es", ResourceFilters: []*string{ aws.String("es:domain"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":domain/(?P[^/]+)"), }, }, { Namespace: "AWS/Firehose", Alias: "firehose", ResourceFilters: []*string{ aws.String("firehose"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":deliverystream/(?P[^/]+)"), }, }, { Namespace: "AWS/FSx", Alias: "fsx", ResourceFilters: []*string{ aws.String("fsx:file-system"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("file-system/(?P[^/]+)"), }, }, { Namespace: "AWS/GameLift", Alias: "gamelift", ResourceFilters: []*string{ aws.String("gamelift"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":fleet/(?P[^/]+)"), }, }, { Namespace: "AWS/GatewayELB", Alias: "gwlb", ResourceFilters: []*string{ aws.String("elasticloadbalancing:loadbalancer"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":(?Ptargetgroup/.+)"), regexp.MustCompile(":loadbalancer/(?P.+)$"), }, }, { Namespace: "AWS/GlobalAccelerator", Alias: "ga", ResourceFilters: []*string{ aws.String("globalaccelerator"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("accelerator/(?P[^/]+)$"), regexp.MustCompile("accelerator/(?P[^/]+)/listener/(?P[^/]+)$"), regexp.MustCompile("accelerator/(?P[^/]+)/listener/(?P[^/]+)/endpoint-group/(?P[^/]+)$"), }, }, { Namespace: "Glue", Alias: "glue", ResourceFilters: []*string{ aws.String("glue:job"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":job/(?P[^/]+)"), }, }, { Namespace: "AWS/IoT", Alias: "iot", ResourceFilters: []*string{ aws.String("iot:rule"), aws.String("iot:provisioningtemplate"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":rule/(?P[^/]+)"), regexp.MustCompile(":provisioningtemplate/(?P[^/]+)"), }, }, { Namespace: "AWS/Kafka", Alias: "kafka", ResourceFilters: []*string{ aws.String("kafka:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster/(?P[^/]+)"), }, }, { Namespace: "AWS/KafkaConnect", Alias: "kafkaconnect", ResourceFilters: []*string{ aws.String("kafka:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":connector/(?P[^/]+)"), }, }, { Namespace: "AWS/Kinesis", Alias: "kinesis", ResourceFilters: []*string{ aws.String("kinesis:stream"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":stream/(?P[^/]+)"), }, }, { Namespace: "AWS/KinesisAnalytics", Alias: "kinesis-analytics", ResourceFilters: []*string{ aws.String("kinesisanalytics:application"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":application/(?P[^/]+)"), }, }, { Namespace: "AWS/KMS", Alias: "kms", ResourceFilters: []*string{ aws.String("kms:key"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":key/(?P[^/]+)"), }, }, { Namespace: "AWS/Lambda", Alias: "lambda", ResourceFilters: []*string{ aws.String("lambda:function"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":function:(?P[^/]+)"), }, }, { Namespace: "AWS/Logs", Alias: "logs", ResourceFilters: []*string{ aws.String("logs:log-group"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":log-group:(?P.+)"), }, }, { Namespace: "AWS/MediaConnect", Alias: "mediaconnect", ResourceFilters: []*string{ aws.String("mediaconnect:flow"), aws.String("mediaconnect:source"), aws.String("mediaconnect:output"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("^(?P.*:flow:.*)$"), regexp.MustCompile("^(?P.*:source:.*)$"), regexp.MustCompile("^(?P.*:output:.*)$"), }, }, { Namespace: "AWS/MediaConvert", Alias: "mediaconvert", ResourceFilters: []*string{ aws.String("mediaconvert"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.*:.*:mediaconvert:.*:queues/.*)$"), }, }, { Namespace: "AWS/MediaPackage", Alias: "mediapackage", ResourceFilters: []*string{ aws.String("mediapackage"), aws.String("mediapackagev2"), aws.String("mediapackage-vod"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":channels/(?P.+)$"), regexp.MustCompile(":packaging-configurations/(?P.+)$"), }, }, { Namespace: "AWS/MediaLive", Alias: "medialive", ResourceFilters: []*string{ aws.String("medialive:channel"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":channel:(?P.+)$"), }, }, { Namespace: "AWS/MediaTailor", Alias: "mediatailor", ResourceFilters: []*string{ aws.String("mediatailor:playbackConfiguration"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("playbackConfiguration/(?P[^/]+)"), }, }, { Namespace: "AWS/Neptune", Alias: "neptune", ResourceFilters: []*string{ aws.String("rds:db"), aws.String("rds:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster:(?P[^/]+)"), regexp.MustCompile(":db:(?P[^/]+)"), }, }, { Namespace: "AWS/NetworkFirewall", Alias: "nfw", ResourceFilters: []*string{ aws.String("network-firewall:firewall"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("firewall/(?P[^/]+)"), }, }, { Namespace: "AWS/NATGateway", Alias: "ngw", ResourceFilters: []*string{ aws.String("ec2:natgateway"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("natgateway/(?P[^/]+)"), }, }, { Namespace: "AWS/NetworkELB", Alias: "nlb", ResourceFilters: []*string{ aws.String("elasticloadbalancing:loadbalancer/net"), aws.String("elasticloadbalancing:targetgroup"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":(?Ptargetgroup/.+)"), regexp.MustCompile(":loadbalancer/(?P.+)$"), }, }, { Namespace: "AWS/PrivateLinkEndpoints", Alias: "vpc-endpoint", ResourceFilters: []*string{ aws.String("ec2:vpc-endpoint"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":vpc-endpoint/(?P.+)"), }, }, { Namespace: "AWS/PrivateLinkServices", Alias: "vpc-endpoint-service", ResourceFilters: []*string{ aws.String("ec2:vpc-endpoint-service"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":vpc-endpoint-service/(?P.+)"), }, }, { Namespace: "AWS/Prometheus", Alias: "amp", }, { Namespace: "AWS/QLDB", Alias: "qldb", ResourceFilters: []*string{ aws.String("qldb"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":ledger/(?P[^/]+)"), }, }, { Namespace: "AWS/QuickSight", Alias: "quicksight", }, { Namespace: "AWS/RDS", Alias: "rds", ResourceFilters: []*string{ aws.String("rds:db"), aws.String("rds:cluster"), aws.String("rds:db-proxy"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster:(?P[^/]+)"), regexp.MustCompile(":db:(?P[^/]+)"), regexp.MustCompile(":db-proxy:(?P[^/]+)"), }, }, { Namespace: "AWS/Redshift", Alias: "redshift", ResourceFilters: []*string{ aws.String("redshift:cluster"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":cluster:(?P[^/]+)"), }, }, { Namespace: "AWS/Redshift-Serverless", Alias: "redshift", ResourceFilters: []*string{ aws.String("redshift-serverless:workgroup"), aws.String("redshift-serverless:namespace"), }, }, { Namespace: "AWS/Route53Resolver", Alias: "route53-resolver", ResourceFilters: []*string{ aws.String("route53resolver"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":resolver-endpoint/(?P[^/]+)"), }, }, { Namespace: "AWS/Route53", Alias: "route53", ResourceFilters: []*string{ aws.String("route53"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":healthcheck/(?P[^/]+)"), }, }, { Namespace: "AWS/RUM", Alias: "rum", }, { Namespace: "AWS/S3", Alias: "s3", ResourceFilters: []*string{ aws.String("s3"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P[^:]+)$"), }, }, { Namespace: "AWS/Scheduler", Alias: "scheduler", }, { Namespace: "AWS/ECR", Alias: "ecr", }, { Namespace: "AWS/Timestream", Alias: "timestream", }, { Namespace: "AWS/SecretsManager", Alias: "secretsmanager", }, { Namespace: "AWS/SES", Alias: "ses", }, { Namespace: "AWS/States", Alias: "sfn", ResourceFilters: []*string{ aws.String("states"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.*)"), }, }, { Namespace: "AWS/SNS", Alias: "sns", ResourceFilters: []*string{ aws.String("sns"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P[^:]+)$"), }, }, { Namespace: "AWS/SQS", Alias: "sqs", ResourceFilters: []*string{ aws.String("sqs"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P[^:]+)$"), }, }, { Namespace: "AWS/StorageGateway", Alias: "storagegateway", ResourceFilters: []*string{ aws.String("storagegateway"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":gateway/(?P[^:]+)$"), regexp.MustCompile(":share/(?P[^:]+)$"), regexp.MustCompile("^(?P[^:/]+)/(?P[^:]+)$"), }, }, { Namespace: "AWS/Transfer", Alias: "transfer", }, { Namespace: "AWS/TransitGateway", Alias: "tgw", ResourceFilters: []*string{ aws.String("ec2:transit-gateway"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":transit-gateway/(?P[^/]+)"), regexp.MustCompile("(?P[^/]+)/(?P[^/]+)"), }, }, { Namespace: "AWS/TrustedAdvisor", Alias: "trustedadvisor", }, { Namespace: "AWS/VPN", Alias: "vpn", ResourceFilters: []*string{ aws.String("ec2:vpn-connection"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":vpn-connection/(?P[^/]+)"), }, }, { Namespace: "AWS/ClientVPN", Alias: "clientvpn", ResourceFilters: []*string{ aws.String("ec2:client-vpn-endpoint"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":client-vpn-endpoint/(?P[^/]+)"), }, }, { Namespace: "AWS/WAFV2", Alias: "wafv2", ResourceFilters: []*string{ aws.String("wafv2"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("/webacl/(?P[^/]+)"), }, }, { Namespace: "AWS/WorkSpaces", Alias: "workspaces", ResourceFilters: []*string{ aws.String("workspaces:workspace"), aws.String("workspaces:directory"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":workspace/(?P[^/]+)$"), regexp.MustCompile(":directory/(?P[^/]+)$"), }, }, { Namespace: "AWS/AOSS", Alias: "aoss", ResourceFilters: []*string{ aws.String("aoss:collection"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":collection/(?P[^/]+)"), }, }, { Namespace: "AWS/SageMaker", Alias: "sagemaker", ResourceFilters: []*string{ aws.String("sagemaker:endpoint"), aws.String("sagemaker:inference-component"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":endpoint/(?P[^/]+)$"), regexp.MustCompile(":inference-component/(?P[^/]+)$"), }, }, { Namespace: "/aws/sagemaker/Endpoints", Alias: "sagemaker-endpoints", ResourceFilters: []*string{ aws.String("sagemaker:endpoint"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":endpoint/(?P[^/]+)$"), }, }, { Namespace: "/aws/sagemaker/InferenceComponents", Alias: "sagemaker-inference-components", ResourceFilters: []*string{ aws.String("sagemaker:inference-component"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":inference-component/(?P[^/]+)$"), }, }, { Namespace: "/aws/sagemaker/TrainingJobs", Alias: "sagemaker-training", ResourceFilters: []*string{ aws.String("sagemaker:training-job"), }, }, { Namespace: "/aws/sagemaker/ProcessingJobs", Alias: "sagemaker-processing", ResourceFilters: []*string{ aws.String("sagemaker:processing-job"), }, }, { Namespace: "/aws/sagemaker/TransformJobs", Alias: "sagemaker-transform", ResourceFilters: []*string{ aws.String("sagemaker:transform-job"), }, }, { Namespace: "/aws/sagemaker/InferenceRecommendationsJobs", Alias: "sagemaker-inf-rec", ResourceFilters: []*string{ aws.String("sagemaker:inference-recommendations-job"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":inference-recommendations-job/(?P[^/]+)"), }, }, { Namespace: "AWS/Sagemaker/ModelBuildingPipeline", Alias: "sagemaker-model-building-pipeline", ResourceFilters: []*string{ aws.String("sagemaker:pipeline"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":pipeline/(?P[^/]+)"), }, }, { Namespace: "AWS/IPAM", Alias: "ipam", ResourceFilters: []*string{ aws.String("ec2:ipam-pool"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":ipam-pool/(?P[^/]+)$"), }, }, { Namespace: "AWS/Bedrock", Alias: "bedrock", }, { Namespace: "AWS/Bedrock/Agents", Alias: "bedrock-agents", ResourceFilters: []*string{ aws.String("bedrock:agent-alias"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.+)"), }, }, { Namespace: "AWS/Bedrock/Guardrails", Alias: "bedrock-guardrails", ResourceFilters: []*string{ aws.String("bedrock:guardrail"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile("(?P.+)"), }, }, { Namespace: "AWS/Events", Alias: "event-rule", ResourceFilters: []*string{ aws.String("events"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":rule/(?P[^/]+)/(?P[^/]+)$"), regexp.MustCompile(":rule/aws.partner/(?P.+)/(?P[^/]+)$"), }, }, { Namespace: "AWS/VpcLattice", Alias: "vpc-lattice", ResourceFilters: []*string{ aws.String("vpc-lattice:service"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":service/(?P[^/]+)$"), }, }, { Namespace: "AWS/Network Manager", Alias: "networkmanager", ResourceFilters: []*string{ aws.String("networkmanager:core-network"), }, DimensionRegexps: []*regexp.Regexp{ regexp.MustCompile(":core-network/(?P[^/]+)$"), }, }, } ================================================ FILE: pkg/config/services_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "fmt" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/stretchr/testify/require" ) func TestSupportedServices(t *testing.T) { for i, svc := range SupportedServices { require.NotNil(t, svc.Namespace, fmt.Sprintf("Nil Namespace for service at index '%d'", i)) require.NotNil(t, svc.Alias, fmt.Sprintf("Nil Alias for service '%s' at index '%d'", svc.Namespace, i)) if svc.ResourceFilters != nil { require.NotEmpty(t, svc.ResourceFilters) for _, filter := range svc.ResourceFilters { require.NotEmpty(t, aws.ToString(filter)) } } if svc.DimensionRegexps != nil { require.NotEmpty(t, svc.DimensionRegexps) for _, regex := range svc.DimensionRegexps { require.NotEmpty(t, regex.String()) require.Positive(t, regex.NumSubexp()) } } } } ================================================ FILE: pkg/config/testdata/config_test.yml ================================================ apiVersion: v1alpha1 discovery: exportedTagsOnMetrics: AWS/EBS: - VolumeId AWS/Kafka: - Name jobs: - type: AWS/Billing regions: - us-east-1 metrics: - name: EstimatedCharges statistics: - Sum period: 3600 length: 87600 - type: AWS/ES regions: - eu-west-1 searchTags: - key: Environment value: qa metrics: - name: FreeStorageSpace statistics: - Sum period: 60 length: 600 - name: ClusterStatus.green statistics: - Minimum period: 60 length: 600 - name: ClusterStatus.yellow statistics: - Maximum period: 60 length: 600 - name: ClusterStatus.red statistics: - Maximum period: 60 length: 600 - type: AWS/ELB regions: - eu-west-1 length: 900 delay: 120 statistics: - Minimum - Maximum - Sum searchTags: - key: KubernetesCluster value: production-19 metrics: - name: HealthyHostCount statistics: - Minimum period: 600 length: 600 #(this will be ignored) - name: HTTPCode_Backend_4XX statistics: - Sum period: 60 length: 900 #(this will be ignored) delay: 300 #(this will be ignored) nilToZero: true - name: HTTPCode_Backend_5XX period: 60 - type: AWS/ApplicationELB regions: - eu-west-1 searchTags: - key: kubernetes.io/service-name value: .* metrics: - name: UnHealthyHostCount statistics: - Maximum period: 60 length: 600 - type: AWS/VPN regions: - eu-west-1 searchTags: - key: kubernetes.io/service-name value: .* metrics: - name: TunnelState statistics: - p90 period: 60 length: 300 - type: AWS/Kinesis regions: - eu-west-1 metrics: - name: PutRecords.Success statistics: - Sum period: 60 length: 300 - type: AWS/KMS regions: - eu-west-1 metrics: - name: SecondsUntilKeyMaterialExpiration statistics: - Minimum period: 60 length: 300 - type: AWS/S3 regions: - eu-west-1 searchTags: - key: type value: public metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 - type: AWS/EBS regions: - eu-west-1 searchTags: - key: type value: public metrics: - name: BurstBalance statistics: - Minimum period: 600 length: 600 addCloudwatchTimestamp: true - type: AWS/Kafka regions: - eu-west-1 searchTags: - key: env value: dev metrics: - name: BytesOutPerSec statistics: - Average period: 600 length: 600 static: - namespace: AWS/AutoScaling name: must_be_set regions: - eu-west-1 dimensions: - name: AutoScalingGroupName value: Test customTags: - key: CustomTag value: CustomValue metrics: - name: GroupInServiceInstances statistics: - Minimum period: 60 length: 300 ================================================ FILE: pkg/config/testdata/custom_namespace.ok.yml ================================================ apiVersion: v1alpha1 sts-region: eu-west-1 customNamespace: - name: customMetrics namespace: CustomEC2Metrics regions: - us-east-1 metrics: - name: cpu_usage_idle statistics: - Average period: 300 length: 300 nilToZero: true - name: disk_free statistics: - Average period: 300 length: 300 nilToZero: true ================================================ FILE: pkg/config/testdata/custom_namespace_without_name.bad.yml ================================================ apiVersion: v1alpha1 sts-region: eu-west-1 customNamespace: - namespace: CustomEC2Metrics regions: - us-east-1 metrics: - name: cpu_usage_idle statistics: - Average period: 300 length: 300 nilToZero: true - name: disk_free statistics: - Average period: 300 length: 300 nilToZero: true ================================================ FILE: pkg/config/testdata/custom_namespace_without_namespace.bad.yml ================================================ apiVersion: v1alpha1 sts-region: eu-west-1 customNamespace: - name: customMetrics regions: - us-east-1 metrics: - name: cpu_usage_idle statistics: - Average period: 300 length: 300 nilToZero: true - name: disk_free statistics: - Average period: 300 length: 300 nilToZero: true ================================================ FILE: pkg/config/testdata/custom_namespace_without_region.bad.yml ================================================ apiVersion: v1alpha1 sts-region: eu-west-1 customNamespace: - name: customMetrics namespace: customMetrics metrics: - name: cpu_usage_idle statistics: - Average period: 300 length: 300 nilToZero: true - name: disk_free statistics: - Average period: 300 length: 300 nilToZero: true ================================================ FILE: pkg/config/testdata/discovery_job_exported_tags_alias.bad.yml ================================================ apiVersion: v1alpha1 discovery: exportedTagsOnMetrics: s3: - BucketName jobs: - type: AWS/S3 regions: - eu-west-1 metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/discovery_job_exported_tags_mismatch.bad.yml ================================================ apiVersion: v1alpha1 discovery: exportedTagsOnMetrics: AWS/RDS: - ClusterName jobs: - type: AWS/S3 regions: - eu-west-1 metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/discovery_job_type_alias.bad.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: s3 regions: - eu-west-1 metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/discovery_job_type_unknown.bad.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/FancyNewNamespace regions: - eu-west-1 metrics: - name: SomeMetric statistics: - Average ================================================ FILE: pkg/config/testdata/empty_rolearn.ok.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/S3 regions: - eu-west-1 roles: - roleArn: metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/externalid_with_empty_rolearn.bad.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/S3 regions: - eu-west-1 roles: - externalId: something roleArn: metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/externalid_without_rolearn.bad.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/S3 regions: - eu-west-1 roles: - externalId: something metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/multiple_roles.ok.yml ================================================ apiVersion: v1alpha1 discovery: jobs: - type: AWS/S3 regions: - eu-west-1 roles: - roleArn: something externalId: something - roleArn: something metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/sts_region.ok.yml ================================================ apiVersion: v1alpha1 sts-region: eu-west-1 discovery: jobs: - type: AWS/S3 regions: - eu-west-1 roles: - externalId: something roleArn: something metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 ================================================ FILE: pkg/config/testdata/unknown_version.bad.yml ================================================ apiVersion: invalidVersion discovery: exportedTagsOnMetrics: AWS/EBS: - VolumeId AWS/Kafka: - Name jobs: - type: AWS/Billing regions: - us-east-1 metrics: - name: EstimatedCharges statistics: - Sum period: 3600 length: 87600 - type: AWS/ES regions: - eu-west-1 searchTags: - key: Environment value: qa metrics: - name: FreeStorageSpace statistics: - Sum period: 60 length: 600 - name: ClusterStatus.green statistics: - Minimum period: 60 length: 600 - name: ClusterStatus.yellow statistics: - Maximum period: 60 length: 600 - name: ClusterStatus.red statistics: - Maximum period: 60 length: 600 - type: AWS/ELB regions: - eu-west-1 length: 900 delay: 120 searchTags: - key: KubernetesCluster value: production-19 metrics: - name: HealthyHostCount statistics: - Minimum period: 600 length: 600 #(this will be ignored) - name: HTTPCode_Backend_4XX statistics: - Sum period: 60 length: 900 #(this will be ignored) delay: 300 #(this will be ignored) nilToZero: true - type: AWS/ApplicationELB regions: - eu-west-1 searchTags: - key: kubernetes.io/service-name value: .* metrics: - name: UnHealthyHostCount statistics: - Maximum period: 60 length: 600 - type: AWS/VPN regions: - eu-west-1 searchTags: - key: kubernetes.io/service-name value: .* metrics: - name: TunnelState statistics: - p90 period: 60 length: 300 - type: AWS/Kinesis regions: - eu-west-1 metrics: - name: PutRecords.Success statistics: - Sum period: 60 length: 300 - type: AWS/S3 regions: - eu-west-1 searchTags: - key: type value: public metrics: - name: NumberOfObjects statistics: - Average period: 86400 length: 172800 - name: BucketSizeBytes statistics: - Average period: 86400 length: 172800 - type: AWS/EBS regions: - eu-west-1 searchTags: - key: type value: public metrics: - name: BurstBalance statistics: - Minimum period: 600 length: 600 addCloudwatchTimestamp: true - type: AWS/Kafka regions: - eu-west-1 searchTags: - key: env value: dev metrics: - name: BytesOutPerSec statistics: - Average period: 600 length: 600 static: - namespace: AWS/AutoScaling name: must_be_set regions: - eu-west-1 dimensions: - name: AutoScalingGroupName value: Test customTags: - key: CustomTag value: CustomValue metrics: - name: GroupInServiceInstances statistics: - Minimum period: 60 length: 300 ================================================ FILE: pkg/exporter.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package exporter import ( "context" "fmt" "log/slog" "github.com/prometheus/client_golang/prometheus" prom "github.com/prometheus/common/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/promutil" ) // Metrics is a slice of prometheus metrics specific to the scraping process such API call counters var Metrics = []prometheus.Collector{ promutil.CloudwatchAPIErrorCounter, promutil.CloudwatchAPICounter, promutil.CloudwatchGetMetricDataAPICounter, promutil.CloudwatchGetMetricDataAPIMetricsCounter, promutil.CloudwatchGetMetricStatisticsAPICounter, promutil.ResourceGroupTaggingAPICounter, promutil.AutoScalingAPICounter, promutil.TargetGroupsAPICounter, promutil.APIGatewayAPICounter, promutil.Ec2APICounter, promutil.DmsAPICounter, promutil.StoragegatewayAPICounter, promutil.DuplicateMetricsFilteredCounter, } const ( DefaultMetricsPerQuery = 500 DefaultLabelsSnakeCase = false DefaultTaggingAPIConcurrency = 5 ) var DefaultCloudwatchConcurrency = cloudwatch.ConcurrencyConfig{ SingleLimit: 5, PerAPILimitEnabled: false, // If PerAPILimitEnabled is enabled, then use the same limit as the single limit by default. ListMetrics: 5, GetMetricData: 5, GetMetricStatistics: 5, } // featureFlagsMap is a map that contains the enabled feature flags. If a key is not present, it means the feature flag // is disabled. type featureFlagsMap map[string]struct{} type options struct { metricsPerQuery int labelsSnakeCase bool taggingAPIConcurrency int featureFlags featureFlagsMap cloudwatchConcurrency cloudwatch.ConcurrencyConfig } // IsFeatureEnabled implements the FeatureFlags interface, allowing us to inject the options-configure feature flags in the rest of the code. func (ff featureFlagsMap) IsFeatureEnabled(flag string) bool { _, ok := ff[flag] return ok } type OptionsFunc func(*options) error func MetricsPerQuery(metricsPerQuery int) OptionsFunc { return func(o *options) error { if metricsPerQuery <= 0 { return fmt.Errorf("MetricsPerQuery must be a positive value") } o.metricsPerQuery = metricsPerQuery return nil } } func LabelsSnakeCase(labelsSnakeCase bool) OptionsFunc { return func(o *options) error { o.labelsSnakeCase = labelsSnakeCase return nil } } func CloudWatchAPIConcurrency(maxConcurrency int) OptionsFunc { return func(o *options) error { if maxConcurrency <= 0 { return fmt.Errorf("CloudWatchAPIConcurrency must be a positive value") } o.cloudwatchConcurrency.SingleLimit = maxConcurrency return nil } } func CloudWatchPerAPILimitConcurrency(listMetrics, getMetricData, getMetricStatistics int) OptionsFunc { return func(o *options) error { if listMetrics <= 0 { return fmt.Errorf("LitMetrics concurrency limit must be a positive value") } if getMetricData <= 0 { return fmt.Errorf("GetMetricData concurrency limit must be a positive value") } if getMetricStatistics <= 0 { return fmt.Errorf("GetMetricStatistics concurrency limit must be a positive value") } o.cloudwatchConcurrency.PerAPILimitEnabled = true o.cloudwatchConcurrency.ListMetrics = listMetrics o.cloudwatchConcurrency.GetMetricData = getMetricData o.cloudwatchConcurrency.GetMetricStatistics = getMetricStatistics return nil } } func TaggingAPIConcurrency(maxConcurrency int) OptionsFunc { return func(o *options) error { if maxConcurrency <= 0 { return fmt.Errorf("TaggingAPIConcurrency must be a positive value") } o.taggingAPIConcurrency = maxConcurrency return nil } } // EnableFeatureFlag is an option that enables a feature flag on the YACE's entrypoint. func EnableFeatureFlag(flags ...string) OptionsFunc { return func(o *options) error { for _, flag := range flags { o.featureFlags[flag] = struct{}{} } return nil } } func defaultOptions() options { return options{ metricsPerQuery: DefaultMetricsPerQuery, labelsSnakeCase: DefaultLabelsSnakeCase, taggingAPIConcurrency: DefaultTaggingAPIConcurrency, featureFlags: make(featureFlagsMap), cloudwatchConcurrency: DefaultCloudwatchConcurrency, } } // UpdateMetrics is the entrypoint to scrape metrics from AWS on demand. // // Parameters are: // - `ctx`: a context for the request // - `config`: this is the struct representation of the configuration defined in top-level configuration // - `logger`: an *slog.Logger // - `registry`: any prometheus compatible registry where scraped AWS metrics will be written // - `factory`: any implementation of the `clients.Factory` interface // - `optFuncs`: (optional) any number of options funcs // // You can pre-register any of the default metrics from `Metrics` with the provided `registry` if you want them // included in the AWS scrape results. If you are using multiple instances of `registry` it // might make more sense to register these metrics in the application using YACE as a library to better // track them over the lifetime of the application. func UpdateMetrics( ctx context.Context, logger *slog.Logger, jobsCfg model.JobsConfig, registry *prometheus.Registry, factory clients.Factory, optFuncs ...OptionsFunc, ) error { // Use legacy validation as that's the behaviour of former releases. prom.NameValidationScheme = prom.LegacyValidation //nolint:staticcheck options := defaultOptions() for _, f := range optFuncs { if err := f(&options); err != nil { return err } } // add feature flags to context passed down to all other layers ctx = config.CtxWithFlags(ctx, options.featureFlags) tagsData, cloudwatchData := job.ScrapeAwsData( ctx, logger, jobsCfg, factory, options.metricsPerQuery, options.cloudwatchConcurrency, options.taggingAPIConcurrency, ) metrics, observedMetricLabels, err := promutil.BuildMetrics(cloudwatchData, options.labelsSnakeCase, logger) if err != nil { logger.Error("Error migrating cloudwatch metrics to prometheus metrics", "err", err) return nil } metrics, observedMetricLabels = promutil.BuildNamespaceInfoMetrics(tagsData, metrics, observedMetricLabels, options.labelsSnakeCase, logger) metrics = promutil.EnsureLabelConsistencyAndRemoveDuplicates(metrics, observedMetricLabels) registry.MustRegister(promutil.NewPrometheusCollector(metrics)) return nil } ================================================ FILE: pkg/exporter_enhancedmetrics_test.go ================================================ package exporter import ( "context" "log/slog" "strings" "testing" "github.com/aws/aws-sdk-go-v2/aws" dynamodbTypes "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" elasticacheTypes "github.com/aws/aws-sdk-go-v2/service/elasticache/types" lambdaTypes "github.com/aws/aws-sdk-go-v2/service/lambda/types" "github.com/aws/aws-sdk-go-v2/service/rds/types" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/account" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/tagging" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics" enhancedmetricsDynamoDBService "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/dynamodb" enhancedmetricsElastiCacheService "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/elasticache" enhancedmetricsLambdaService "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/lambda" enhancedmetricsService "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/rds" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var ( _ account.Client = &mockAccountClient{} _ cloudwatch.Client = &mockCloudwatchClient{} _ tagging.Client = &mockTaggingClient{} ) // mockFactory is a local mock that implements both clients.Factory and config.RegionalConfigProvider type mockFactoryForEnhancedMetrics struct { accountClient account.Client cloudwatchClient cloudwatch.Client taggingClient tagging.Client awsConfig *aws.Config } // GetAccountClient implements clients.Factory func (m *mockFactoryForEnhancedMetrics) GetAccountClient(string, model.Role) account.Client { return m.accountClient } // GetCloudwatchClient implements clients.Factory func (m *mockFactoryForEnhancedMetrics) GetCloudwatchClient(string, model.Role, cloudwatch.ConcurrencyConfig) cloudwatch.Client { return m.cloudwatchClient } // GetTaggingClient implements clients.Factory func (m *mockFactoryForEnhancedMetrics) GetTaggingClient(string, model.Role, int) tagging.Client { return m.taggingClient } // GetAWSRegionalConfig implements config.RegionalConfigProvider func (m *mockFactoryForEnhancedMetrics) GetAWSRegionalConfig(string, model.Role) *aws.Config { return m.awsConfig } // mockRDSClient implements the RDS Client interface for testing type mockRDSClient struct { instances []types.DBInstance err error } func (m *mockRDSClient) DescribeDBInstances(context.Context, *slog.Logger, []string) ([]types.DBInstance, error) { if m.err != nil { return nil, m.err } return m.instances, nil } // mockLambdaClient implements the Lambda Client interface for testing type mockLambdaClient struct { functions []lambdaTypes.FunctionConfiguration err error } func (m *mockLambdaClient) ListAllFunctions(context.Context, *slog.Logger) ([]lambdaTypes.FunctionConfiguration, error) { if m.err != nil { return nil, m.err } return m.functions, nil } // mockElastiCacheClient implements the ElastiCache Client interface for testing type mockElastiCacheClient struct { clusters []elasticacheTypes.CacheCluster err error } func (m *mockElastiCacheClient) DescribeAllCacheClusters(context.Context, *slog.Logger) ([]elasticacheTypes.CacheCluster, error) { if m.err != nil { return nil, m.err } return m.clusters, nil } // mockDynamoDBClient implements the DynamoDB Client interface for testing type mockDynamoDBClient struct { tables []dynamodbTypes.TableDescription err error } func (m *mockDynamoDBClient) DescribeTables(context.Context, *slog.Logger, []string) ([]dynamodbTypes.TableDescription, error) { if m.err != nil { return nil, m.err } return m.tables, nil } func TestUpdateMetrics_WithEnhancedMetrics_RDS(t *testing.T) { defer enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsService.NewRDSService(nil), ) ctx := context.Background() logger := slog.New(slog.DiscardHandler) // Create a test AWS config testAWSConfig := &aws.Config{ Region: "us-east-1", } // Create mock clients mockAcctClient := &mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", } mockCWClient := &mockCloudwatchClient{ metrics: []*model.Metric{}, metricDataResults: []cloudwatch.MetricDataResult{}, } mockTagClient := &mockTaggingClient{ resources: []*model.TaggedResource{ { ARN: "arn:aws:rds:us-east-1:123456789012:db:test-db", Namespace: "AWS/RDS", Region: "us-east-1", Tags: []model.Tag{ {Key: "Name", Value: "test-db"}, }, }, }, } // Create a mock RDS client builder function for testing mockRDSClientBuilder := func(_ aws.Config) enhancedmetricsService.Client { return &mockRDSClient{ instances: []types.DBInstance{ { DBInstanceArn: aws.String("arn:aws:rds:us-east-1:123456789012:db:test-db"), DBInstanceIdentifier: aws.String("test-db"), AllocatedStorage: aws.Int32(100), }, }, } } // Register the RDS service with the mock builder in the default registry enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsService.NewRDSService(mockRDSClientBuilder), ) factory := &mockFactoryForEnhancedMetrics{ accountClient: mockAcctClient, cloudwatchClient: mockCWClient, taggingClient: mockTagClient, awsConfig: testAWSConfig, } // Create a test job config with enhanced metrics jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "AWS/RDS", Roles: []model.Role{{RoleArn: "arn:aws:iam::123456789012:role/test-role"}}, EnhancedMetrics: []*model.EnhancedMetricConfig{ { Name: "AllocatedStorage", }, }, ExportedTagsOnMetrics: []string{"Name"}, }, }, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) metrics, err := registry.Gather() require.NoError(t, err) require.NotNil(t, metrics) require.Len(t, metrics, 2) expectedMetric := ` # HELP aws_rds_info Help is not implemented yet. # TYPE aws_rds_info gauge aws_rds_info{name="arn:aws:rds:us-east-1:123456789012:db:test-db",tag_Name="test-db"} 0 # HELP aws_rds_allocated_storage Help is not implemented yet. # TYPE aws_rds_allocated_storage gauge aws_rds_allocated_storage{account_alias="test-account",account_id="123456789012",dimension_DBInstanceIdentifier="test-db",name="arn:aws:rds:us-east-1:123456789012:db:test-db",region="us-east-1",tag_Name="test-db"} 1.073741824e+11 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err) } func TestUpdateMetrics_WithEnhancedMetrics_Lambda(t *testing.T) { defer enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsLambdaService.NewLambdaService(nil), ) ctx := context.Background() logger := slog.New(slog.DiscardHandler) // Create a test AWS config testAWSConfig := &aws.Config{ Region: "us-east-1", } // Create mock clients mockAcctClient := &mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", } mockCWClient := &mockCloudwatchClient{ metrics: []*model.Metric{}, metricDataResults: []cloudwatch.MetricDataResult{}, } mockTagClient := &mockTaggingClient{ resources: []*model.TaggedResource{ { ARN: "arn:aws:lambda:us-east-1:123456789012:function:test-function", Namespace: "AWS/Lambda", Region: "us-east-1", Tags: []model.Tag{ {Key: "Name", Value: "test-function"}, }, }, }, } // Create a mock Lambda client builder function for testing mockLambdaClientBuilder := func(_ aws.Config) enhancedmetricsLambdaService.Client { return &mockLambdaClient{ functions: []lambdaTypes.FunctionConfiguration{ { FunctionArn: aws.String("arn:aws:lambda:us-east-1:123456789012:function:test-function"), FunctionName: aws.String("test-function"), Timeout: aws.Int32(300), }, }, } } // Register the Lambda service with the mock builder in the default registry enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsLambdaService.NewLambdaService(mockLambdaClientBuilder), ) factory := &mockFactoryForEnhancedMetrics{ accountClient: mockAcctClient, cloudwatchClient: mockCWClient, taggingClient: mockTagClient, awsConfig: testAWSConfig, } // Create a test job config with enhanced metrics jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "AWS/Lambda", Roles: []model.Role{{RoleArn: "arn:aws:iam::123456789012:role/test-role"}}, EnhancedMetrics: []*model.EnhancedMetricConfig{ { Name: "Timeout", }, }, ExportedTagsOnMetrics: []string{"Name"}, }, }, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) metrics, err := registry.Gather() require.NoError(t, err) require.NotNil(t, metrics) require.Len(t, metrics, 2) expectedMetric := ` # HELP aws_lambda_info Help is not implemented yet. # TYPE aws_lambda_info gauge aws_lambda_info{name="arn:aws:lambda:us-east-1:123456789012:function:test-function",tag_Name="test-function"} 0 # HELP aws_lambda_timeout Help is not implemented yet. # TYPE aws_lambda_timeout gauge aws_lambda_timeout{account_alias="test-account",account_id="123456789012",dimension_FunctionName="test-function",name="arn:aws:lambda:us-east-1:123456789012:function:test-function",region="us-east-1",tag_Name="test-function"} 300 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err) } func TestUpdateMetrics_WithEnhancedMetrics_ElastiCache(t *testing.T) { defer enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsElastiCacheService.NewElastiCacheService(nil), ) ctx := context.Background() logger := slog.New(slog.DiscardHandler) // Create a test AWS config testAWSConfig := &aws.Config{ Region: "us-east-1", } // Create mock clients mockAcctClient := &mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", } mockCWClient := &mockCloudwatchClient{ metrics: []*model.Metric{}, metricDataResults: []cloudwatch.MetricDataResult{}, } mockTagClient := &mockTaggingClient{ resources: []*model.TaggedResource{ { ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster", Namespace: "AWS/ElastiCache", Region: "us-east-1", Tags: []model.Tag{ {Key: "Name", Value: "test-cluster"}, }, }, }, } // Create a mock ElastiCache client builder function for testing mockElastiCacheClientBuilder := func(_ aws.Config) enhancedmetricsElastiCacheService.Client { return &mockElastiCacheClient{ clusters: []elasticacheTypes.CacheCluster{ { ARN: aws.String("arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster"), CacheClusterId: aws.String("test-cluster"), NumCacheNodes: aws.Int32(3), }, }, } } // Register the ElastiCache service with the mock builder in the default registry enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsElastiCacheService.NewElastiCacheService(mockElastiCacheClientBuilder), ) factory := &mockFactoryForEnhancedMetrics{ accountClient: mockAcctClient, cloudwatchClient: mockCWClient, taggingClient: mockTagClient, awsConfig: testAWSConfig, } // Create a test job config with enhanced metrics jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "AWS/ElastiCache", Roles: []model.Role{{RoleArn: "arn:aws:iam::123456789012:role/test-role"}}, EnhancedMetrics: []*model.EnhancedMetricConfig{ { Name: "NumCacheNodes", }, }, ExportedTagsOnMetrics: []string{"Name"}, }, }, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) metrics, err := registry.Gather() require.NoError(t, err) require.NotNil(t, metrics) require.Len(t, metrics, 2) expectedMetric := ` # HELP aws_elasticache_info Help is not implemented yet. # TYPE aws_elasticache_info gauge aws_elasticache_info{name="arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster",tag_Name="test-cluster"} 0 # HELP aws_elasticache_num_cache_nodes Help is not implemented yet. # TYPE aws_elasticache_num_cache_nodes gauge aws_elasticache_num_cache_nodes{account_alias="test-account",account_id="123456789012",dimension_CacheClusterId="test-cluster",name="arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster",region="us-east-1",tag_Name="test-cluster"} 3 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err) } func TestUpdateMetrics_WithEnhancedMetrics_DynamoDB(t *testing.T) { defer enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsDynamoDBService.NewDynamoDBService(nil), ) ctx := context.Background() logger := slog.New(slog.DiscardHandler) // Create a test AWS config testAWSConfig := &aws.Config{ Region: "us-east-1", } // Create mock clients mockAcctClient := &mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", } mockCWClient := &mockCloudwatchClient{ metrics: []*model.Metric{}, metricDataResults: []cloudwatch.MetricDataResult{}, } mockTagClient := &mockTaggingClient{ resources: []*model.TaggedResource{ { ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table", Namespace: "AWS/DynamoDB", Region: "us-east-1", Tags: []model.Tag{ {Key: "Name", Value: "test-table"}, }, }, }, } // Create a mock DynamoDB client builder function for testing mockDynamoDBClientBuilder := func(_ aws.Config) enhancedmetricsDynamoDBService.Client { return &mockDynamoDBClient{ tables: []dynamodbTypes.TableDescription{ { TableArn: aws.String("arn:aws:dynamodb:us-east-1:123456789012:table/test-table"), TableName: aws.String("test-table"), ItemCount: aws.Int64(1000), GlobalSecondaryIndexes: []dynamodbTypes.GlobalSecondaryIndexDescription{ { IndexName: aws.String("GSI1"), ItemCount: aws.Int64(500), }, { IndexName: aws.String("GSI2"), ItemCount: aws.Int64(300), }, }, }, }, } } // Register the DynamoDB service with the mock builder in the default registry enhancedmetrics.DefaultEnhancedMetricServiceRegistry.Register( enhancedmetricsDynamoDBService.NewDynamoDBService(mockDynamoDBClientBuilder), ) factory := &mockFactoryForEnhancedMetrics{ accountClient: mockAcctClient, cloudwatchClient: mockCWClient, taggingClient: mockTagClient, awsConfig: testAWSConfig, } // Create a test job config with enhanced metrics jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "AWS/DynamoDB", Roles: []model.Role{{RoleArn: "arn:aws:iam::123456789012:role/test-role"}}, EnhancedMetrics: []*model.EnhancedMetricConfig{ { Name: "ItemCount", }, }, ExportedTagsOnMetrics: []string{"Name"}, }, }, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) metrics, err := registry.Gather() require.NoError(t, err) require.NotNil(t, metrics) require.Len(t, metrics, 2) expectedMetric := ` # HELP aws_dynamodb_info Help is not implemented yet. # TYPE aws_dynamodb_info gauge aws_dynamodb_info{name="arn:aws:dynamodb:us-east-1:123456789012:table/test-table",tag_Name="test-table"} 0 # HELP aws_dynamodb_item_count Help is not implemented yet. # TYPE aws_dynamodb_item_count gauge aws_dynamodb_item_count{account_alias="test-account",account_id="123456789012",dimension_GlobalSecondaryIndexName="",dimension_TableName="test-table",name="arn:aws:dynamodb:us-east-1:123456789012:table/test-table",region="us-east-1",tag_Name="test-table"} 1000 aws_dynamodb_item_count{account_alias="test-account",account_id="123456789012",dimension_GlobalSecondaryIndexName="GSI1",dimension_TableName="test-table",name="arn:aws:dynamodb:us-east-1:123456789012:table/test-table",region="us-east-1",tag_Name="test-table"} 500 aws_dynamodb_item_count{account_alias="test-account",account_id="123456789012",dimension_GlobalSecondaryIndexName="GSI2",dimension_TableName="test-table",name="arn:aws:dynamodb:us-east-1:123456789012:table/test-table",region="us-east-1",tag_Name="test-table"} 300 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err) } ================================================ FILE: pkg/exporter_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package exporter import ( "context" "log/slog" "strings" "testing" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/account" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/tagging" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // mockFactory implements the clients.Factory interface for testing type mockFactory struct { cloudwatchClient mockCloudwatchClient taggingClient mockTaggingClient accountClient mockAccountClient } func (f *mockFactory) GetCloudwatchClient(_ string, _ model.Role, _ cloudwatch.ConcurrencyConfig) cloudwatch.Client { return &f.cloudwatchClient } func (f *mockFactory) GetTaggingClient(_ string, _ model.Role, _ int) tagging.Client { return f.taggingClient } func (f *mockFactory) GetAccountClient(_ string, _ model.Role) account.Client { return f.accountClient } // mockAccountClient implements the account.Client interface type mockAccountClient struct { accountID string accountAlias string err error } func (m mockAccountClient) GetAccount(_ context.Context) (string, error) { if m.err != nil { return "", m.err } return m.accountID, nil } func (m mockAccountClient) GetAccountAlias(_ context.Context) (string, error) { if m.err != nil { return "", m.err } return m.accountAlias, nil } // mockTaggingClient implements the tagging.Client interface type mockTaggingClient struct { resources []*model.TaggedResource err error } func (m mockTaggingClient) GetResources(_ context.Context, _ model.DiscoveryJob, _ string) ([]*model.TaggedResource, error) { if m.err != nil { return nil, m.err } return m.resources, nil } // mockCloudwatchClient implements the cloudwatch.Client interface type mockCloudwatchClient struct { metrics []*model.Metric metricDataResults []cloudwatch.MetricDataResult err error } func (m *mockCloudwatchClient) ListMetrics(_ context.Context, _ string, _ *model.MetricConfig, _ bool, fn func(page []*model.Metric)) error { if m.err != nil { return m.err } if len(m.metrics) > 0 { fn(m.metrics) } return nil } func (m *mockCloudwatchClient) GetMetricData(_ context.Context, _ []*model.CloudwatchData, _ string, _ time.Time, _ time.Time) []cloudwatch.MetricDataResult { return m.metricDataResults } func (m *mockCloudwatchClient) GetMetricStatistics(_ context.Context, _ *slog.Logger, _ []model.Dimension, _ string, _ *model.MetricConfig) []*model.MetricStatisticsResult { // Return a simple metric statistics result for testing now := time.Now() avg := 42.0 return []*model.MetricStatisticsResult{ { Timestamp: &now, Average: &avg, }, } } func TestUpdateMetrics_StaticJob(t *testing.T) { ctx := context.Background() logger := promslog.NewNopLogger() // Create a simple static job configuration jobsCfg := model.JobsConfig{ StaticJobs: []model.StaticJob{ { Name: "test-static-job", Regions: []string{"us-east-1"}, Roles: []model.Role{{}}, Namespace: "AWS/EC2", Dimensions: []model.Dimension{ {Name: "InstanceId", Value: "i-1234567890abcdef0"}, }, Metrics: []*model.MetricConfig{ { Name: "CPUUtilization", Statistics: []string{"Average"}, Period: 300, Length: 300, }, }, }, }, } factory := &mockFactory{ accountClient: mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", }, cloudwatchClient: mockCloudwatchClient{}, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) // Verify the expected metric exists using testutil expectedMetric := ` # HELP aws_ec2_cpuutilization_average Help is not implemented yet. # TYPE aws_ec2_cpuutilization_average gauge aws_ec2_cpuutilization_average{account_alias="test-account",account_id="123456789012",dimension_InstanceId="i-1234567890abcdef0",name="test-static-job",region="us-east-1"} 42 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err, "Metric aws_ec2_cpuutilization_average should match expected output") } func TestUpdateMetrics_DiscoveryJob(t *testing.T) { ctx := context.Background() logger := promslog.NewNopLogger() // Create a discovery job configuration svc := config.SupportedServices.GetService("AWS/EC2") jobsCfg := model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Namespace: "AWS/EC2", Regions: []string{"us-east-1"}, Roles: []model.Role{{}}, SearchTags: []model.SearchTag{ {Key: "Environment", Value: regexp.MustCompile(".*")}, }, Metrics: []*model.MetricConfig{ { Name: "CPUUtilization", Statistics: []string{"Average"}, Period: 300, Length: 300, }, }, DimensionsRegexps: svc.ToModelDimensionsRegexp(), }, }, } factory := &mockFactory{ accountClient: mockAccountClient{ accountID: "123456789012", accountAlias: "test-account", }, taggingClient: mockTaggingClient{ resources: []*model.TaggedResource{ { ARN: "arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0", Namespace: "AWS/EC2", Region: "us-east-1", Tags: []model.Tag{ {Key: "Environment", Value: "production"}, {Key: "Name", Value: "test-instance"}, }, }, }, }, cloudwatchClient: mockCloudwatchClient{ metrics: []*model.Metric{ { MetricName: "CPUUtilization", Namespace: "AWS/EC2", Dimensions: []model.Dimension{ {Name: "InstanceId", Value: "i-1234567890abcdef0"}, }, }, }, metricDataResults: []cloudwatch.MetricDataResult{ { ID: "id_0", DataPoints: []cloudwatch.DataPoint{ {Value: aws.Float64(42.5), Timestamp: time.Now()}, }, }, }, }, } registry := prometheus.NewRegistry() err := UpdateMetrics(ctx, logger, jobsCfg, registry, factory) require.NoError(t, err) expectedMetric := ` # HELP aws_ec2_cpuutilization_average Help is not implemented yet. # TYPE aws_ec2_cpuutilization_average gauge aws_ec2_cpuutilization_average{account_alias="test-account", account_id="123456789012",dimension_InstanceId="i-1234567890abcdef0",name="arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0",region="us-east-1"} 42.5 # HELP aws_ec2_info Help is not implemented yet. # TYPE aws_ec2_info gauge aws_ec2_info{name="arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0",tag_Environment="production",tag_Name="test-instance"} 0 ` err = testutil.GatherAndCompare(registry, strings.NewReader(expectedMetric)) require.NoError(t, err) } ================================================ FILE: pkg/internal/enhancedmetrics/config/provider.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // RegionalConfigProvider is an interface for providing AWS regional configurations based on region and role. // Factory interface implementations should implement this interface in order to support enhanced metrics. type RegionalConfigProvider interface { // GetAWSRegionalConfig returns the AWS configuration for a given region and role. // It will be used to create AWS service clients for enhanced metrics processing. GetAWSRegionalConfig(region string, role model.Role) *aws.Config } ================================================ FILE: pkg/internal/enhancedmetrics/registry.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package enhancedmetrics import ( "fmt" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/dynamodb" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/elasticache" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/lambda" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/rds" ) // DefaultEnhancedMetricServiceRegistry is the default registry containing all built-in enhanced metrics services // It allows registering additional services if needed, or replacing existing ones, e.g. for testing purposes. // // Note:In the future, it can be removed in favor of being injected via dependency injection. // However, it will require changes in the YACE's API. var DefaultEnhancedMetricServiceRegistry = (&Registry{}). Register(rds.NewRDSService(nil)). Register(lambda.NewLambdaService(nil)). Register(dynamodb.NewDynamoDBService(nil)). Register(elasticache.NewElastiCacheService(nil)) // MetricsService represents an enhanced metrics service with methods to get its instance and namespace. // Services implementing this interface can be registered in the Registry. type MetricsService interface { Instance() service.EnhancedMetricsService GetNamespace() string } // Registry maintains a mapping of enhanced metrics services by their namespaces. type Registry struct { m sync.RWMutex services map[string]func() service.EnhancedMetricsService } // Register adds a new enhanced metrics service to the registry or replaces an existing one with the same namespace. func (receiver *Registry) Register(t MetricsService) *Registry { receiver.m.Lock() defer receiver.m.Unlock() if receiver.services == nil { receiver.services = map[string]func() service.EnhancedMetricsService{} } receiver.services[t.GetNamespace()] = t.Instance return receiver } // GetEnhancedMetricsService retrieves an enhanced metrics service by its namespace. func (receiver *Registry) GetEnhancedMetricsService(namespace string) (service.EnhancedMetricsService, error) { receiver.m.RLock() defer receiver.m.RUnlock() if constructor, exists := receiver.services[namespace]; exists { return constructor(), nil } return nil, fmt.Errorf("enhanced metrics service for namespace %s not found", namespace) } ================================================ FILE: pkg/internal/enhancedmetrics/registry_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package enhancedmetrics import ( "sync" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service/rds" ) // registryMockMetricsServiceWrapper wraps the mock service to implement MetricsService interface type registryMockMetricsServiceWrapper struct { namespace string instanceFunc func() service.EnhancedMetricsService } func (m *registryMockMetricsServiceWrapper) GetNamespace() string { return m.namespace } func (m *registryMockMetricsServiceWrapper) Instance() service.EnhancedMetricsService { if m.instanceFunc != nil { return m.instanceFunc() } return nil } func TestRegistry_Register(t *testing.T) { tests := []struct { name string setup func() *Registry services []string assertions func(t *testing.T, registry *Registry) }{ { name: "register single service", setup: func() *Registry { return &Registry{} }, services: []string{"AWS/Test"}, assertions: func(t *testing.T, registry *Registry) { assert.NotNil(t, registry.services) assert.Contains(t, registry.services, "AWS/Test") assert.Len(t, registry.services, 1) }, }, { name: "register multiple services", setup: func() *Registry { return &Registry{} }, services: []string{"AWS/Test1", "AWS/Test2"}, assertions: func(t *testing.T, registry *Registry) { assert.Len(t, registry.services, 2) assert.Contains(t, registry.services, "AWS/Test1") assert.Contains(t, registry.services, "AWS/Test2") }, }, { name: "replace existing service", setup: func() *Registry { return &Registry{} }, services: []string{"AWS/Test", "AWS/Test"}, assertions: func(t *testing.T, registry *Registry) { assert.Len(t, registry.services, 1) _, err := registry.GetEnhancedMetricsService("AWS/Test") require.NoError(t, err) }, }, { name: "register on nil services map", setup: func() *Registry { return &Registry{} }, services: []string{"AWS/Test"}, assertions: func(t *testing.T, registry *Registry) { assert.NotNil(t, registry.services) }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { registry := tt.setup() var result *Registry for _, ns := range tt.services { mockSvc := ®istryMockMetricsServiceWrapper{ namespace: ns, } result = registry.Register(mockSvc) } assert.NotNil(t, result) assert.Equal(t, registry, result, "Register should return the registry for chaining") tt.assertions(t, registry) }) } } func TestRegistry_GetEnhancedMetricsService(t *testing.T) { tests := []struct { name string setup func() *Registry namespace string expectError bool error string }{ { name: "get existing service", setup: func() *Registry { registry := &Registry{} registry.Register(rds.NewRDSService(nil)) return registry }, namespace: "AWS/RDS", expectError: false, }, { name: "get non-existent service", setup: func() *Registry { registry := &Registry{} registry.Register(rds.NewRDSService(nil)) return registry }, namespace: "AWS/NonExistent", expectError: true, error: "enhanced metrics service for namespace AWS/NonExistent not found", }, { name: "get service from empty registry", setup: func() *Registry { return &Registry{} }, namespace: "AWS/Test", error: "enhanced metrics service for namespace AWS/Test not found", expectError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { registry := tt.setup() svc, err := registry.GetEnhancedMetricsService(tt.namespace) if tt.expectError { assert.Error(t, err) assert.Equal(t, err.Error(), tt.error) } else { require.NoError(t, err) assert.NotNil(t, svc) } }) } t.Run("service instance is independent", func(t *testing.T) { registry := &Registry{} registry.Register(rds.NewRDSService(nil)) svc1, err1 := registry.GetEnhancedMetricsService("AWS/RDS") svc2, err2 := registry.GetEnhancedMetricsService("AWS/RDS") require.NoError(t, err1) require.NoError(t, err2) assert.NotNil(t, svc1) assert.NotNil(t, svc2) // Each call to Instance() should return a new instance // This test verifies that the constructor function is being called // copy the pointer addresses to compare assert.NotSame(t, svc1, svc2, "Each call to GetEnhancedMetricsService should return a new instance") }) } func TestRegistry_ConcurrentAccess(t *testing.T) { t.Run("concurrent registration", func(t *testing.T) { registry := &Registry{} var wg sync.WaitGroup // Register multiple services concurrently for i := 0; i < 10; i++ { wg.Add(1) go func(idx int) { defer wg.Done() mockSvc := ®istryMockMetricsServiceWrapper{ namespace: "AWS/Test" + string(rune('0'+idx)), } registry.Register(mockSvc) }(i) } wg.Wait() assert.Len(t, registry.services, 10) }) t.Run("concurrent read and write", func(t *testing.T) { registry := &Registry{} mockSvc := ®istryMockMetricsServiceWrapper{ namespace: "AWS/Test", } registry.Register(mockSvc) var wg sync.WaitGroup errors := make(chan error, 20) // Concurrent reads for i := 0; i < 10; i++ { wg.Add(1) go func() { defer wg.Done() _, err := registry.GetEnhancedMetricsService("AWS/Test") if err != nil { errors <- err } }() } // Concurrent writes for i := 0; i < 10; i++ { wg.Add(1) go func(idx int) { defer wg.Done() mockSvc := ®istryMockMetricsServiceWrapper{ namespace: "AWS/NewTest" + string(rune('0'+idx)), } registry.Register(mockSvc) }(i) } wg.Wait() close(errors) for err := range errors { assert.NoError(t, err) } }) } func TestDefaultRegistry(t *testing.T) { tests := []struct { name string namespace string expectError bool }{ { name: "AWS/RDS is registered", namespace: "AWS/RDS", expectError: false, }, { name: "AWS/Lambda is registered", namespace: "AWS/Lambda", expectError: false, }, { name: "AWS/DynamoDB is registered", namespace: "AWS/DynamoDB", expectError: false, }, { name: "AWS/ElastiCache is registered", namespace: "AWS/ElastiCache", expectError: false, }, { name: "unknown namespace returns error", namespace: "AWS/Unknown", expectError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { svc, err := DefaultEnhancedMetricServiceRegistry.GetEnhancedMetricsService(tt.namespace) assert.Len(t, DefaultEnhancedMetricServiceRegistry.services, 4, "Expected 4 services to be registered in the default registry") if tt.expectError { assert.Error(t, err) assert.Nil(t, svc) } else { assert.NoError(t, err, "Expected namespace %s to be registered", tt.namespace) assert.NotNil(t, svc, "Expected service for namespace %s to be non-nil", tt.namespace) } }) } } func TestRegistry_ChainedRegistration(t *testing.T) { t.Run("chained registration", func(t *testing.T) { registry := (&Registry{}). Register(®istryMockMetricsServiceWrapper{ namespace: "AWS/Test1", }). Register(®istryMockMetricsServiceWrapper{ namespace: "AWS/Test2", }). Register(®istryMockMetricsServiceWrapper{ namespace: "AWS/Test3", }) assert.Len(t, registry.services, 3) for i := 1; i <= 3; i++ { namespace := "AWS/Test" + string(rune('0'+i)) _, err := registry.GetEnhancedMetricsService(namespace) require.NoError(t, err) } }) } func TestRegistry_ServiceFactory(t *testing.T) { t.Run("service factory is called on each get", func(t *testing.T) { registry := &Registry{} callCount := 0 registry.services = map[string]func() service.EnhancedMetricsService{ "AWS/Test": func() service.EnhancedMetricsService { callCount++ return nil }, } // Call multiple times for i := 0; i < 3; i++ { _, _ = registry.GetEnhancedMetricsService("AWS/Test") } assert.Equal(t, 3, callCount, "Factory should be called for each Get") }) } ================================================ FILE: pkg/internal/enhancedmetrics/service/dynamodb/client.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dynamodb import ( "context" "fmt" "log/slog" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/dynamodb" "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" ) type awsClient interface { DescribeTable(ctx context.Context, params *dynamodb.DescribeTableInput, optFns ...func(*dynamodb.Options)) (*dynamodb.DescribeTableOutput, error) } // AWSDynamoDBClient wraps the AWS DynamoDB client type AWSDynamoDBClient struct { client awsClient } // NewDynamoDBClientWithConfig creates a new DynamoDB client with custom AWS configuration func NewDynamoDBClientWithConfig(cfg aws.Config) Client { return &AWSDynamoDBClient{ client: dynamodb.NewFromConfig(cfg), } } // describeTable retrieves detailed information about a DynamoDB table func (c *AWSDynamoDBClient) describeTable(ctx context.Context, tableARN string) (*types.TableDescription, error) { result, err := c.client.DescribeTable(ctx, &dynamodb.DescribeTableInput{ // TableName can be either the table name or ARN TableName: aws.String(tableARN), }) if err != nil { return nil, fmt.Errorf("failed to describe table %s: %w", tableARN, err) } return result.Table, nil } // DescribeTables retrieves DynamoDB tables with their descriptions func (c *AWSDynamoDBClient) DescribeTables(ctx context.Context, logger *slog.Logger, tablesARNs []string) ([]types.TableDescription, error) { logger.Debug("Describing DynamoDB tables", "count", len(tablesARNs)) var tables []types.TableDescription for _, arn := range tablesARNs { tableDesc, err := c.describeTable(ctx, arn) if err != nil { logger.Error("Failed to describe table", "error", err.Error(), "arn", arn) continue } tables = append(tables, *tableDesc) } logger.Debug("Describing DynamoDB tables completed", "total_tables", len(tables)) return tables, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/dynamodb/client_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dynamodb import ( "context" "fmt" "log/slog" "reflect" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/dynamodb" "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" ) func TestAWSDynamoDBClient_DescribeAllTables(t *testing.T) { tests := []struct { name string client awsClient want []types.TableDescription wantErr bool tables []string }{ { name: "success - single page", tables: []string{"table-1"}, client: &mockDynamoDBClient{ describeTableFunc: func(_ context.Context, params *dynamodb.DescribeTableInput, _ ...func(*dynamodb.Options)) (*dynamodb.DescribeTableOutput, error) { if *params.TableName != "table-1" { return nil, fmt.Errorf("unexpected table name: %s", *params.TableName) } return &dynamodb.DescribeTableOutput{ Table: &types.TableDescription{ TableName: aws.String("table-1"), }, }, nil }, }, want: []types.TableDescription{ {TableName: aws.String("table-1")}, }, wantErr: false, }, { name: "describeTable failure", tables: []string{"table-1", "table-2"}, client: &mockDynamoDBClient{ describeTableFunc: func(_ context.Context, params *dynamodb.DescribeTableInput, _ ...func(*dynamodb.Options)) (*dynamodb.DescribeTableOutput, error) { if *params.TableName == "table-1" { return nil, fmt.Errorf("describe error") } if *params.TableName != "table-2" { return nil, fmt.Errorf("unexpected table name: %s", *params.TableName) } return &dynamodb.DescribeTableOutput{ Table: &types.TableDescription{ TableName: params.TableName, }, }, nil }, }, want: []types.TableDescription{ {TableName: aws.String("table-2")}, }, wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := &AWSDynamoDBClient{ client: tt.client, } got, err := c.DescribeTables(context.Background(), slog.New(slog.DiscardHandler), tt.tables) if (err != nil) != tt.wantErr { t.Errorf("DescribeTables() error = %v, wantErr %v", err, tt.wantErr) return } if !reflect.DeepEqual(got, tt.want) { t.Errorf("DescribeTables() got = %v, want %v", got, tt.want) } }) } } // mockDynamoDBClient is a mock implementation of sdk AWS DynamoDB Client type mockDynamoDBClient struct { describeTableFunc func(ctx context.Context, params *dynamodb.DescribeTableInput, optFns ...func(*dynamodb.Options)) (*dynamodb.DescribeTableOutput, error) } func (m *mockDynamoDBClient) DescribeTable(ctx context.Context, params *dynamodb.DescribeTableInput, optFns ...func(*dynamodb.Options)) (*dynamodb.DescribeTableOutput, error) { return m.describeTableFunc(ctx, params, optFns...) } ================================================ FILE: pkg/internal/enhancedmetrics/service/dynamodb/service.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dynamodb import ( "context" "fmt" "log/slog" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) const awsDynamoDBNamespace = "AWS/DynamoDB" type Client interface { // DescribeTables retrieves DynamoDB tables with their descriptions. tables is a list of table ARNs or table names. DescribeTables(ctx context.Context, logger *slog.Logger, tables []string) ([]types.TableDescription, error) } type buildCloudwatchDataFunc func(*model.TaggedResource, *types.TableDescription, []string) ([]*model.CloudwatchData, error) type supportedMetric struct { name string buildCloudwatchDataFunc buildCloudwatchDataFunc requiredPermissions []string } func (sm *supportedMetric) buildCloudwatchData(resource *model.TaggedResource, table *types.TableDescription, metrics []string) ([]*model.CloudwatchData, error) { return sm.buildCloudwatchDataFunc(resource, table, metrics) } type DynamoDB struct { supportedMetrics map[string]supportedMetric buildClientFunc func(cfg aws.Config) Client } func NewDynamoDBService(buildClientFunc func(cfg aws.Config) Client) *DynamoDB { if buildClientFunc == nil { buildClientFunc = NewDynamoDBClientWithConfig } svc := &DynamoDB{ buildClientFunc: buildClientFunc, } // The count of items in the table, updated approximately every six hours; may not reflect recent changes. itemCountMetric := supportedMetric{ name: "ItemCount", buildCloudwatchDataFunc: buildItemCountMetric, requiredPermissions: []string{ "dynamodb:DescribeTable", }, } svc.supportedMetrics = map[string]supportedMetric{ itemCountMetric.name: itemCountMetric, } return svc } func (s *DynamoDB) GetNamespace() string { return awsDynamoDBNamespace } func (s *DynamoDB) loadMetricsMetadata( ctx context.Context, logger *slog.Logger, region string, role model.Role, configProvider config.RegionalConfigProvider, tablesARNs []string, ) (map[string]*types.TableDescription, error) { client := s.buildClientFunc(*configProvider.GetAWSRegionalConfig(region, role)) tables, err := client.DescribeTables(ctx, logger, tablesARNs) if err != nil { return nil, fmt.Errorf("error listing DynamoDB tables in region %s: %w", region, err) } regionalData := make(map[string]*types.TableDescription, len(tables)) for _, table := range tables { regionalData[*table.TableArn] = &table } return regionalData, nil } func (s *DynamoDB) IsMetricSupported(metricName string) bool { _, exists := s.supportedMetrics[metricName] return exists } func (s *DynamoDB) GetMetrics(ctx context.Context, logger *slog.Logger, resources []*model.TaggedResource, enhancedMetricConfigs []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, regionalConfigProvider config.RegionalConfigProvider) ([]*model.CloudwatchData, error) { if len(resources) == 0 || len(enhancedMetricConfigs) == 0 { return nil, nil } tablesARNs := make([]string, 0, len(resources)) for _, resource := range resources { tablesARNs = append(tablesARNs, resource.ARN) } data, err := s.loadMetricsMetadata( ctx, logger, region, role, regionalConfigProvider, tablesARNs, ) if err != nil { return nil, fmt.Errorf("error loading DynamoDB metrics metadata: %w", err) } var result []*model.CloudwatchData for _, resource := range resources { if resource.Namespace != s.GetNamespace() { logger.Warn("Resource namespace does not match DynamoDB namespace, skipping", "arn", resource.ARN, "namespace", resource.Namespace) continue } table, exists := data[resource.ARN] if !exists { logger.Warn("DynamoDB table not found in data", "arn", resource.ARN) continue } for _, enhancedMetric := range enhancedMetricConfigs { supportedMetric, ok := s.supportedMetrics[enhancedMetric.Name] if !ok { logger.Warn("Unsupported DynamoDB enhanced metric, skipping", "metric", enhancedMetric.Name) continue } em, err := supportedMetric.buildCloudwatchData(resource, table, exportedTagOnMetrics) if err != nil || em == nil { logger.Warn("Error building DynamoDB enhanced metric", "metric", enhancedMetric.Name, "error", err) continue } result = append(result, em...) } } return result, nil } func (s *DynamoDB) ListRequiredPermissions() map[string][]string { permissions := make(map[string][]string, len(s.supportedMetrics)) for _, metric := range s.supportedMetrics { permissions[metric.name] = metric.requiredPermissions } return permissions } func (s *DynamoDB) ListSupportedEnhancedMetrics() []string { var metrics []string for metric := range s.supportedMetrics { metrics = append(metrics, metric) } return metrics } func (s *DynamoDB) Instance() service.EnhancedMetricsService { // do not use NewDynamoDBService to avoid extra map allocation return &DynamoDB{ supportedMetrics: s.supportedMetrics, buildClientFunc: s.buildClientFunc, } } func buildItemCountMetric(resource *model.TaggedResource, table *types.TableDescription, exportedTags []string) ([]*model.CloudwatchData, error) { if table.ItemCount == nil { return nil, fmt.Errorf("ItemCount is nil for DynamoDB table %s", resource.ARN) } var dimensions []model.Dimension if table.TableName != nil { dimensions = []model.Dimension{ {Name: "TableName", Value: *table.TableName}, } } value := float64(*table.ItemCount) result := []*model.CloudwatchData{{ MetricName: "ItemCount", ResourceName: resource.ARN, Namespace: "AWS/DynamoDB", Dimensions: dimensions, Tags: resource.MetricTags(exportedTags), GetMetricDataResult: &model.GetMetricDataResult{ DataPoints: []model.DataPoint{ { Value: &value, Timestamp: time.Now(), }, }, }, }} if len(table.GlobalSecondaryIndexes) > 0 { for _, globalSecondaryIndex := range table.GlobalSecondaryIndexes { if globalSecondaryIndex.ItemCount == nil || globalSecondaryIndex.IndexName == nil { continue } var secondaryIndexesDimensions []model.Dimension globalSecondaryIndexesItemsCount := float64(*globalSecondaryIndex.ItemCount) if table.TableName != nil { secondaryIndexesDimensions = append(secondaryIndexesDimensions, model.Dimension{ Name: "TableName", Value: *table.TableName, }) } if globalSecondaryIndex.IndexName != nil { secondaryIndexesDimensions = append(secondaryIndexesDimensions, model.Dimension{ Name: "GlobalSecondaryIndexName", Value: *globalSecondaryIndex.IndexName, }) } result = append(result, &model.CloudwatchData{ MetricName: "ItemCount", ResourceName: resource.ARN, Namespace: "AWS/DynamoDB", Dimensions: secondaryIndexesDimensions, Tags: resource.MetricTags(exportedTags), GetMetricDataResult: &model.GetMetricDataResult{ DataPoints: []model.DataPoint{ { Value: &globalSecondaryIndexesItemsCount, Timestamp: time.Now(), }, }, }, }) } } return result, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/dynamodb/service_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dynamodb import ( "context" "fmt" "log/slog" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestNewDynamoDBService(t *testing.T) { tests := []struct { name string buildClientFunc func(cfg aws.Config) Client }{ { name: "with nil buildClientFunc", buildClientFunc: nil, }, { name: "with custom buildClientFunc", buildClientFunc: func(_ aws.Config) Client { return nil }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := NewDynamoDBService(tt.buildClientFunc) require.NotNil(t, got) require.Len(t, got.supportedMetrics, 1) require.NotNil(t, got.supportedMetrics["ItemCount"]) }) } } func TestDynamoDB_GetNamespace(t *testing.T) { service := NewDynamoDBService(nil) expectedNamespace := awsDynamoDBNamespace require.Equal(t, expectedNamespace, service.GetNamespace()) } func TestDynamoDB_ListRequiredPermissions(t *testing.T) { service := NewDynamoDBService(nil) expectedPermissions := map[string][]string{ "ItemCount": { "dynamodb:DescribeTable", }, } require.Equal(t, expectedPermissions, service.ListRequiredPermissions()) } func TestDynamoDB_ListSupportedEnhancedMetrics(t *testing.T) { service := NewDynamoDBService(nil) expectedMetrics := []string{ "ItemCount", } require.Equal(t, expectedMetrics, service.ListSupportedEnhancedMetrics()) } func TestDynamoDB_GetMetrics(t *testing.T) { defaultTables := []types.TableDescription{ { TableArn: aws.String("arn:aws:dynamodb:us-east-1:123456789012:table/test-table"), TableName: aws.String("test-table"), ItemCount: aws.Int64(1000), }, } tests := []struct { name string resources []*model.TaggedResource enhancedMetrics []*model.EnhancedMetricConfig exportedTagOnMetrics []string tables []types.TableDescription describeErr bool wantErr bool wantResultCount int }{ { name: "empty resources", resources: []*model.TaggedResource{}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, tables: defaultTables, wantErr: false, wantResultCount: 0, }, { name: "empty enhanced metrics", resources: []*model.TaggedResource{{ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{}, tables: defaultTables, wantErr: false, wantResultCount: 0, }, { name: "wrong namespace", resources: []*model.TaggedResource{{ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test", Namespace: awsDynamoDBNamespace}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, tables: defaultTables, wantErr: false, wantResultCount: 0, }, { name: "metadata not loaded", resources: []*model.TaggedResource{{ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, describeErr: true, wantErr: true, wantResultCount: 0, }, { name: "successfully received metric", resources: []*model.TaggedResource{ {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table", Namespace: awsDynamoDBNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, tables: defaultTables, wantErr: false, wantResultCount: 1, }, { name: "successfully received metric with global secondary indexes", resources: []*model.TaggedResource{ {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table-with-gsi", Namespace: awsDynamoDBNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, tables: []types.TableDescription{ { TableArn: aws.String("arn:aws:dynamodb:us-east-1:123456789012:table/test-table-with-gsi"), TableName: aws.String("test-table-with-gsi"), ItemCount: aws.Int64(1000), GlobalSecondaryIndexes: []types.GlobalSecondaryIndexDescription{ { IndexName: aws.String("test-gsi-1"), ItemCount: aws.Int64(500), }, { IndexName: aws.String("test-gsi-2"), ItemCount: aws.Int64(300), }, }, }, }, wantErr: false, wantResultCount: 3, // 1 for table + 2 for GSIs }, { name: "resource not found in metadata", resources: []*model.TaggedResource{ {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/non-existent"}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, tables: defaultTables, wantErr: false, wantResultCount: 0, }, { name: "unsupported metric", resources: []*model.TaggedResource{ {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table"}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "UnsupportedMetric"}}, tables: defaultTables, wantErr: false, wantResultCount: 0, }, { name: "multiple resources and metrics", resources: []*model.TaggedResource{ {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table-1", Namespace: awsDynamoDBNamespace}, {ARN: "arn:aws:dynamodb:us-east-1:123456789012:table/test-table-2", Namespace: awsDynamoDBNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "ItemCount"}}, exportedTagOnMetrics: []string{"Name"}, tables: []types.TableDescription{ { TableArn: aws.String("arn:aws:dynamodb:us-east-1:123456789012:table/test-table-1"), TableName: aws.String("test-table-1"), ItemCount: aws.Int64(1000), }, { TableArn: aws.String("arn:aws:dynamodb:us-east-1:123456789012:table/test-table-2"), TableName: aws.String("test-table-2"), ItemCount: aws.Int64(2000), }, }, wantErr: false, wantResultCount: 2, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() logger := slog.New(slog.DiscardHandler) mockClient := &mockServiceDynamoDBClient{ tables: tt.tables, describeErr: tt.describeErr, } service := NewDynamoDBService(func(_ aws.Config) Client { return mockClient }) mockConfig := &mockConfigProvider{ c: &aws.Config{Region: "us-east-1"}, } result, err := service.GetMetrics(ctx, logger, tt.resources, tt.enhancedMetrics, tt.exportedTagOnMetrics, "us-east-1", model.Role{}, mockConfig) if tt.wantErr { require.Error(t, err) } else { require.NoError(t, err) } require.Len(t, result, tt.wantResultCount) if tt.wantResultCount > 0 { for _, metric := range result { require.NotNil(t, metric) require.Equal(t, awsDynamoDBNamespace, metric.Namespace) require.NotEmpty(t, metric.Dimensions) require.NotNil(t, metric.GetMetricDataResult) require.Nil(t, metric.GetMetricStatisticsResult) } } }) } } type mockServiceDynamoDBClient struct { tables []types.TableDescription describeErr bool } func (m *mockServiceDynamoDBClient) DescribeTables(context.Context, *slog.Logger, []string) ([]types.TableDescription, error) { if m.describeErr { return nil, fmt.Errorf("mock describe error") } return m.tables, nil } type mockConfigProvider struct { c *aws.Config } func (m *mockConfigProvider) GetAWSRegionalConfig(_ string, _ model.Role) *aws.Config { return m.c } ================================================ FILE: pkg/internal/enhancedmetrics/service/elasticache/client.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package elasticache import ( "context" "fmt" "log/slog" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/elasticache" "github.com/aws/aws-sdk-go-v2/service/elasticache/types" ) type awsClient interface { DescribeCacheClusters(ctx context.Context, params *elasticache.DescribeCacheClustersInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) } // AWSElastiCacheClient wraps the AWS ElastiCache client type AWSElastiCacheClient struct { client awsClient } // NewElastiCacheClientWithConfig creates a new ElastiCache client with custom AWS configuration func NewElastiCacheClientWithConfig(cfg aws.Config) Client { return &AWSElastiCacheClient{ client: elasticache.NewFromConfig(cfg), } } // describeCacheClusters retrieves information about cache clusters func (c *AWSElastiCacheClient) describeCacheClusters(ctx context.Context, input *elasticache.DescribeCacheClustersInput) (*elasticache.DescribeCacheClustersOutput, error) { result, err := c.client.DescribeCacheClusters(ctx, input) if err != nil { return nil, fmt.Errorf("failed to describe cache clusters: %w", err) } return result, nil } // DescribeAllCacheClusters retrieves all cache clusters with pagination support func (c *AWSElastiCacheClient) DescribeAllCacheClusters(ctx context.Context, logger *slog.Logger) ([]types.CacheCluster, error) { logger.Debug("Describing all ElastiCache cache clusters") var allClusters []types.CacheCluster var marker *string var maxRecords int32 = 100 showNodeInfo := true for { output, err := c.describeCacheClusters(ctx, &elasticache.DescribeCacheClustersInput{ MaxRecords: &maxRecords, Marker: marker, ShowCacheNodeInfo: &showNodeInfo, }) if err != nil { return nil, err } allClusters = append(allClusters, output.CacheClusters...) if output.Marker == nil { break } marker = output.Marker } logger.Debug("Completed describing ElastiCache cache clusters", slog.Int("totalClusters", len(allClusters))) return allClusters, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/elasticache/client_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package elasticache import ( "context" "fmt" "log/slog" "reflect" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/elasticache" "github.com/aws/aws-sdk-go-v2/service/elasticache/types" ) func TestAWSElastiCacheClient_DescribeAllCacheClusters(t *testing.T) { tests := []struct { name string client awsClient want []types.CacheCluster wantErr bool }{ { name: "success - single page", client: &mockElastiCacheClient{ describeCacheClustersFunc: func(_ context.Context, _ *elasticache.DescribeCacheClustersInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) { return &elasticache.DescribeCacheClustersOutput{ CacheClusters: []types.CacheCluster{ {CacheClusterId: aws.String("cluster-1")}, }, Marker: nil, }, nil }, }, want: []types.CacheCluster{ {CacheClusterId: aws.String("cluster-1")}, }, wantErr: false, }, { name: "success - multiple pages", client: &mockElastiCacheClient{ describeCacheClustersFunc: func() func(_ context.Context, _ *elasticache.DescribeCacheClustersInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) { callCount := 0 return func(_ context.Context, _ *elasticache.DescribeCacheClustersInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) { callCount++ if callCount == 1 { return &elasticache.DescribeCacheClustersOutput{ CacheClusters: []types.CacheCluster{ {CacheClusterId: aws.String("cluster-1")}, }, Marker: aws.String("marker1"), }, nil } return &elasticache.DescribeCacheClustersOutput{ CacheClusters: []types.CacheCluster{ {CacheClusterId: aws.String("cluster-2")}, }, Marker: nil, }, nil } }(), }, want: []types.CacheCluster{ {CacheClusterId: aws.String("cluster-1")}, {CacheClusterId: aws.String("cluster-2")}, }, wantErr: false, }, { name: "error - API failure", client: &mockElastiCacheClient{ describeCacheClustersFunc: func(_ context.Context, _ *elasticache.DescribeCacheClustersInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) { return nil, fmt.Errorf("API error") }, }, want: nil, wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := &AWSElastiCacheClient{ client: tt.client, } got, err := c.DescribeAllCacheClusters(context.Background(), slog.New(slog.DiscardHandler)) if (err != nil) != tt.wantErr { t.Errorf("DescribeAllCacheClusters() error = %v, wantErr %v", err, tt.wantErr) return } if !reflect.DeepEqual(got, tt.want) { t.Errorf("DescribeAllCacheClusters() got = %v, want %v", got, tt.want) } }) } } // mockElastiCacheClient is a mock implementation of AWS ElastiCache Client type mockElastiCacheClient struct { describeCacheClustersFunc func(ctx context.Context, params *elasticache.DescribeCacheClustersInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) } func (m *mockElastiCacheClient) DescribeCacheClusters(ctx context.Context, params *elasticache.DescribeCacheClustersInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) { return m.describeCacheClustersFunc(ctx, params, optFns...) } ================================================ FILE: pkg/internal/enhancedmetrics/service/elasticache/service.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package elasticache import ( "context" "fmt" "log/slog" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/elasticache/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) const awsElastiCacheNamespace = "AWS/ElastiCache" type Client interface { DescribeAllCacheClusters(ctx context.Context, logger *slog.Logger) ([]types.CacheCluster, error) } type buildCloudwatchDataFunc func(*model.TaggedResource, *types.CacheCluster, []string) (*model.CloudwatchData, error) type supportedMetric struct { name string buildCloudwatchDataFunc buildCloudwatchDataFunc requiredPermissions []string } func (sm *supportedMetric) buildCloudwatchData(resource *model.TaggedResource, elasticacheCluster *types.CacheCluster, metrics []string) (*model.CloudwatchData, error) { return sm.buildCloudwatchDataFunc(resource, elasticacheCluster, metrics) } type ElastiCache struct { supportedMetrics map[string]supportedMetric buildClientFunc func(cfg aws.Config) Client } func NewElastiCacheService(buildClientFunc func(cfg aws.Config) Client) *ElastiCache { if buildClientFunc == nil { buildClientFunc = NewElastiCacheClientWithConfig } svc := &ElastiCache{ buildClientFunc: buildClientFunc, } // The count of cache nodes in the cluster; must be 1 for Valkey or Redis OSS clusters, or between 1 and 40 for Memcached clusters. numCacheNodesMetric := supportedMetric{ name: "NumCacheNodes", buildCloudwatchDataFunc: buildNumCacheNodesMetric, requiredPermissions: []string{"elasticache:DescribeCacheClusters"}, } svc.supportedMetrics = map[string]supportedMetric{ numCacheNodesMetric.name: numCacheNodesMetric, } return svc } func (s *ElastiCache) GetNamespace() string { return awsElastiCacheNamespace } func (s *ElastiCache) loadMetricsMetadata(ctx context.Context, logger *slog.Logger, region string, role model.Role, configProvider config.RegionalConfigProvider) (map[string]*types.CacheCluster, error) { client := s.buildClientFunc(*configProvider.GetAWSRegionalConfig(region, role)) instances, err := client.DescribeAllCacheClusters(ctx, logger) if err != nil { return nil, fmt.Errorf("error listing cache clusters in region %s: %w", region, err) } regionalData := make(map[string]*types.CacheCluster, len(instances)) for _, instance := range instances { regionalData[*instance.ARN] = &instance } return regionalData, nil } func (s *ElastiCache) IsMetricSupported(metricName string) bool { _, exists := s.supportedMetrics[metricName] return exists } func (s *ElastiCache) GetMetrics(ctx context.Context, logger *slog.Logger, resources []*model.TaggedResource, enhancedMetricConfigs []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, regionalConfigProvider config.RegionalConfigProvider) ([]*model.CloudwatchData, error) { if len(resources) == 0 || len(enhancedMetricConfigs) == 0 { return nil, nil } data, err := s.loadMetricsMetadata( ctx, logger, region, role, regionalConfigProvider, ) if err != nil { return nil, fmt.Errorf("couldn't load elasticache metrics metadata: %w", err) } var result []*model.CloudwatchData for _, resource := range resources { if resource.Namespace != s.GetNamespace() { logger.Warn("Resource namespace does not match elasticache namespace, skipping", "arn", resource.ARN, "namespace", resource.Namespace) continue } elastiCacheCluster, exists := data[resource.ARN] if !exists { logger.Warn("ElastiCache cluster not found in data", "arn", resource.ARN) continue } for _, enhancedMetric := range enhancedMetricConfigs { supportedMetric, ok := s.supportedMetrics[enhancedMetric.Name] if !ok { logger.Warn("Unsupported elasticache enhanced metric requested", "metric", enhancedMetric.Name) continue } em, err := supportedMetric.buildCloudwatchData(resource, elastiCacheCluster, exportedTagOnMetrics) if err != nil || em == nil { logger.Warn("Error building elasticache enhanced metric", "metric", enhancedMetric.Name, "error", err) continue } result = append(result, em) } } return result, nil } func (s *ElastiCache) ListRequiredPermissions() map[string][]string { requiredPermissions := make(map[string][]string, len(s.supportedMetrics)) for metricName, metric := range s.supportedMetrics { requiredPermissions[metricName] = metric.requiredPermissions } return requiredPermissions } func (s *ElastiCache) ListSupportedEnhancedMetrics() []string { var metrics []string for metric := range s.supportedMetrics { metrics = append(metrics, metric) } return metrics } func (s *ElastiCache) Instance() service.EnhancedMetricsService { // do not use NewElastiCacheService to avoid extra map allocation return &ElastiCache{ supportedMetrics: s.supportedMetrics, buildClientFunc: s.buildClientFunc, } } func buildNumCacheNodesMetric(resource *model.TaggedResource, cacheCluster *types.CacheCluster, exportedTags []string) (*model.CloudwatchData, error) { if cacheCluster.NumCacheNodes == nil { return nil, fmt.Errorf("NumCacheNodes is nil for ElastiCache cluster %s", resource.ARN) } var dimensions []model.Dimension if cacheCluster.CacheClusterId != nil { dimensions = []model.Dimension{ {Name: "CacheClusterId", Value: *cacheCluster.CacheClusterId}, } } if cacheCluster.ReplicationGroupId != nil { dimensions = append(dimensions, model.Dimension{ Name: "ReplicationGroupId", Value: *cacheCluster.ReplicationGroupId, }) } value := float64(*cacheCluster.NumCacheNodes) return &model.CloudwatchData{ MetricName: "NumCacheNodes", ResourceName: resource.ARN, Namespace: "AWS/ElastiCache", Dimensions: dimensions, Tags: resource.MetricTags(exportedTags), GetMetricDataResult: &model.GetMetricDataResult{ DataPoints: []model.DataPoint{ { Value: &value, Timestamp: time.Now(), }, }, }, }, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/elasticache/service_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package elasticache import ( "context" "fmt" "log/slog" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/elasticache/types" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestNewElastiCacheService(t *testing.T) { tests := []struct { name string buildClientFunc func(cfg aws.Config) Client }{ { name: "with nil buildClientFunc", buildClientFunc: nil, }, { name: "with custom buildClientFunc", buildClientFunc: func(_ aws.Config) Client { return nil }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := NewElastiCacheService(tt.buildClientFunc) require.NotNil(t, got) require.Len(t, got.supportedMetrics, 1) require.NotNil(t, got.supportedMetrics["NumCacheNodes"]) }) } } func TestElastiCache_GetNamespace(t *testing.T) { service := NewElastiCacheService(nil) expectedNamespace := awsElastiCacheNamespace require.Equal(t, expectedNamespace, service.GetNamespace()) } func TestElastiCache_ListRequiredPermissions(t *testing.T) { service := NewElastiCacheService(nil) expectedPermissions := map[string][]string{ "NumCacheNodes": {"elasticache:DescribeCacheClusters"}, } require.Equal(t, expectedPermissions, service.ListRequiredPermissions()) } func TestElastiCache_ListSupportedEnhancedMetrics(t *testing.T) { service := NewElastiCacheService(nil) expectedMetrics := []string{ "NumCacheNodes", } require.Equal(t, expectedMetrics, service.ListSupportedEnhancedMetrics()) } func TestElastiCache_GetMetrics(t *testing.T) { // Common test data testCluster := types.CacheCluster{ ARN: aws.String("arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster"), CacheClusterId: aws.String("test-cluster"), NumCacheNodes: aws.Int32(2), } tests := []struct { name string resources []*model.TaggedResource enhancedMetrics []*model.EnhancedMetricConfig clusters []types.CacheCluster describeErr bool wantErr bool wantResultCount int }{ { name: "empty resources", resources: []*model.TaggedResource{}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, clusters: []types.CacheCluster{testCluster}, wantResultCount: 0, }, { name: "empty enhanced metrics", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{}, clusters: []types.CacheCluster{testCluster}, wantResultCount: 0, }, { name: "wrong namespace", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, wantErr: false, }, { name: "describe error", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, describeErr: true, wantErr: true, }, { name: "successfully received metric", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster", Namespace: awsElastiCacheNamespace}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, clusters: []types.CacheCluster{testCluster}, wantResultCount: 1, }, { name: "resource not found in metadata", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:non-existent"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, clusters: []types.CacheCluster{testCluster}, wantResultCount: 0, }, { name: "unsupported metric", resources: []*model.TaggedResource{{ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "UnsupportedMetric"}}, clusters: []types.CacheCluster{testCluster}, wantResultCount: 0, }, { name: "multiple resources and metrics", resources: []*model.TaggedResource{ {ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster-1", Namespace: awsElastiCacheNamespace}, {ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster-2", Namespace: awsElastiCacheNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "NumCacheNodes"}}, clusters: []types.CacheCluster{ { ARN: aws.String("arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster-1"), CacheClusterId: aws.String("test-cluster-1"), NumCacheNodes: aws.Int32(1), }, { ARN: aws.String("arn:aws:elasticache:us-east-1:123456789012:cluster:test-cluster-2"), CacheClusterId: aws.String("test-cluster-2"), NumCacheNodes: aws.Int32(3), }, }, wantResultCount: 2, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() logger := slog.New(slog.DiscardHandler) mockClient := &mockServiceElastiCacheClient{ clusters: tt.clusters, describeErr: tt.describeErr, } service := NewElastiCacheService(func(_ aws.Config) Client { return mockClient }) mockConfig := &mockConfigProvider{ c: &aws.Config{Region: "us-east-1"}, } result, err := service.GetMetrics(ctx, logger, tt.resources, tt.enhancedMetrics, nil, "us-east-1", model.Role{}, mockConfig) if tt.wantErr { require.Error(t, err) } else { require.NoError(t, err) } require.Len(t, result, tt.wantResultCount) if tt.wantResultCount > 0 { for _, metric := range result { require.NotNil(t, metric) require.Equal(t, awsElastiCacheNamespace, metric.Namespace) require.NotEmpty(t, metric.Dimensions) require.NotNil(t, metric.GetMetricDataResult) require.Empty(t, metric.GetMetricDataResult.Statistic) require.Nil(t, metric.GetMetricStatisticsResult) } } }) } } type mockServiceElastiCacheClient struct { clusters []types.CacheCluster describeErr bool } func (m *mockServiceElastiCacheClient) DescribeAllCacheClusters(_ context.Context, _ *slog.Logger) ([]types.CacheCluster, error) { if m.describeErr { return nil, fmt.Errorf("mock describe error") } return m.clusters, nil } type mockConfigProvider struct { c *aws.Config } func (m *mockConfigProvider) GetAWSRegionalConfig(_ string, _ model.Role) *aws.Config { return m.c } ================================================ FILE: pkg/internal/enhancedmetrics/service/lambda/client.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package lambda import ( "context" "fmt" "log/slog" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/lambda" "github.com/aws/aws-sdk-go-v2/service/lambda/types" ) type awsClient interface { ListFunctions(ctx context.Context, params *lambda.ListFunctionsInput, optFns ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) } // AWSLambdaClient wraps the AWS Lambda client type AWSLambdaClient struct { client awsClient } // NewLambdaClientWithConfig creates a new Lambda client with custom AWS configuration func NewLambdaClientWithConfig(cfg aws.Config) Client { return &AWSLambdaClient{ client: lambda.NewFromConfig(cfg), } } // listFunctions retrieves a list of Lambda regionalData func (c *AWSLambdaClient) listFunctions(ctx context.Context, input *lambda.ListFunctionsInput) (*lambda.ListFunctionsOutput, error) { result, err := c.client.ListFunctions(ctx, input) if err != nil { return nil, fmt.Errorf("failed to list Lambda regionalData: %w", err) } return result, nil } // ListAllFunctions retrieves all Lambda regionalData by handling pagination func (c *AWSLambdaClient) ListAllFunctions(ctx context.Context, logger *slog.Logger) ([]types.FunctionConfiguration, error) { logger.Debug("Listing all Lambda functions") var allFunctions []types.FunctionConfiguration var marker *string var maxItems int32 = 50 for { output, err := c.listFunctions(ctx, &lambda.ListFunctionsInput{ Marker: marker, MaxItems: &maxItems, }) if err != nil { return nil, err } allFunctions = append(allFunctions, output.Functions...) if output.NextMarker == nil { break } marker = output.NextMarker } logger.Debug("Completed listing all Lambda functions", slog.Int("totalFunctions", len(allFunctions))) return allFunctions, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/lambda/client_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package lambda import ( "context" "fmt" "log/slog" "reflect" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/lambda" "github.com/aws/aws-sdk-go-v2/service/lambda/types" ) func TestAWSLambdaClient_ListAllFunctions(t *testing.T) { tests := []struct { name string client awsClient want []types.FunctionConfiguration wantErr bool }{ { name: "success - single page", client: &mockLambdaClient{ listFunctionsFunc: func(_ context.Context, _ *lambda.ListFunctionsInput, _ ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) { return &lambda.ListFunctionsOutput{ Functions: []types.FunctionConfiguration{ {FunctionName: aws.String("function-1")}, }, NextMarker: nil, }, nil }, }, want: []types.FunctionConfiguration{ {FunctionName: aws.String("function-1")}, }, wantErr: false, }, { name: "success - multiple pages", client: &mockLambdaClient{ listFunctionsFunc: func() func(_ context.Context, _ *lambda.ListFunctionsInput, _ ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) { callCount := 0 return func(_ context.Context, _ *lambda.ListFunctionsInput, _ ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) { callCount++ if callCount == 1 { return &lambda.ListFunctionsOutput{ Functions: []types.FunctionConfiguration{ {FunctionName: aws.String("function-1")}, }, NextMarker: aws.String("marker1"), }, nil } return &lambda.ListFunctionsOutput{ Functions: []types.FunctionConfiguration{ {FunctionName: aws.String("function-2")}, }, NextMarker: nil, }, nil } }(), }, want: []types.FunctionConfiguration{ {FunctionName: aws.String("function-1")}, {FunctionName: aws.String("function-2")}, }, wantErr: false, }, { name: "error - API failure", client: &mockLambdaClient{ listFunctionsFunc: func(_ context.Context, _ *lambda.ListFunctionsInput, _ ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) { return nil, fmt.Errorf("API error") }, }, want: nil, wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := &AWSLambdaClient{ client: tt.client, } got, err := c.ListAllFunctions(context.Background(), slog.New(slog.DiscardHandler)) if (err != nil) != tt.wantErr { t.Errorf("ListAllFunctions() error = %v, wantErr %v", err, tt.wantErr) return } if !reflect.DeepEqual(got, tt.want) { t.Errorf("ListAllFunctions() got = %v, want %v", got, tt.want) } }) } } // mockLambdaClient is a mock implementation of AWS Lambda Client type mockLambdaClient struct { listFunctionsFunc func(ctx context.Context, params *lambda.ListFunctionsInput, optFns ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) } func (m *mockLambdaClient) ListFunctions(ctx context.Context, params *lambda.ListFunctionsInput, optFns ...func(*lambda.Options)) (*lambda.ListFunctionsOutput, error) { return m.listFunctionsFunc(ctx, params, optFns...) } ================================================ FILE: pkg/internal/enhancedmetrics/service/lambda/service.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package lambda import ( "context" "fmt" "log/slog" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/lambda/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) const awsLambdaNamespace = "AWS/Lambda" type Client interface { ListAllFunctions(ctx context.Context, logger *slog.Logger) ([]types.FunctionConfiguration, error) } type buildCloudwatchDataFunc func(*model.TaggedResource, *types.FunctionConfiguration, []string) (*model.CloudwatchData, error) type supportedMetric struct { name string buildCloudwatchDataFunc buildCloudwatchDataFunc requiredPermissions []string } func (sm *supportedMetric) buildCloudwatchData(resource *model.TaggedResource, functionConfiguration *types.FunctionConfiguration, exportedTagOnMetrics []string) (*model.CloudwatchData, error) { return sm.buildCloudwatchDataFunc(resource, functionConfiguration, exportedTagOnMetrics) } type Lambda struct { supportedMetrics map[string]supportedMetric buildClientFunc func(cfg aws.Config) Client } func NewLambdaService(buildClientFunc func(cfg aws.Config) Client) *Lambda { if buildClientFunc == nil { buildClientFunc = NewLambdaClientWithConfig } svc := &Lambda{ buildClientFunc: buildClientFunc, } // The maximum execution duration permitted for the function before termination. timeoutMetric := supportedMetric{ name: "Timeout", buildCloudwatchDataFunc: buildTimeoutMetric, requiredPermissions: []string{"lambda:ListFunctions"}, } svc.supportedMetrics = map[string]supportedMetric{ timeoutMetric.name: timeoutMetric, } return svc } func (s *Lambda) GetNamespace() string { return awsLambdaNamespace } func (s *Lambda) loadMetricsMetadata(ctx context.Context, logger *slog.Logger, region string, role model.Role, configProvider config.RegionalConfigProvider) (map[string]*types.FunctionConfiguration, error) { client := s.buildClientFunc(*configProvider.GetAWSRegionalConfig(region, role)) instances, err := client.ListAllFunctions(ctx, logger) if err != nil { return nil, fmt.Errorf("error listing functions in region %s: %w", region, err) } regionalData := make(map[string]*types.FunctionConfiguration, len(instances)) for _, instance := range instances { regionalData[*instance.FunctionArn] = &instance } logger.Info("Loaded Lambda metrics metadata", "region", region) return regionalData, nil } func (s *Lambda) IsMetricSupported(metricName string) bool { _, exists := s.supportedMetrics[metricName] return exists } func (s *Lambda) GetMetrics(ctx context.Context, logger *slog.Logger, resources []*model.TaggedResource, enhancedMetricConfigs []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, regionalConfigProvider config.RegionalConfigProvider) ([]*model.CloudwatchData, error) { if len(resources) == 0 || len(enhancedMetricConfigs) == 0 { return nil, nil } data, err := s.loadMetricsMetadata( ctx, logger, region, role, regionalConfigProvider, ) if err != nil { return nil, fmt.Errorf("error loading lambda metrics metadata: %w", err) } var result []*model.CloudwatchData for _, resource := range resources { if resource.Namespace != s.GetNamespace() { logger.Warn("Resource namespace does not match Lambda namespace, skipping", "arn", resource.ARN, "namespace", resource.Namespace) continue } functionConfiguration, exists := data[resource.ARN] if !exists { logger.Warn("Lambda function not found in data", "arn", resource.ARN) continue } for _, enhancedMetric := range enhancedMetricConfigs { supportedMetric, ok := s.supportedMetrics[enhancedMetric.Name] if !ok { logger.Warn("Unsupported Lambda enhanced metric, skipping", "metric", enhancedMetric.Name) continue } em, err := supportedMetric.buildCloudwatchData(resource, functionConfiguration, exportedTagOnMetrics) if err != nil || em == nil { logger.Warn("Error building Lambda enhanced metric", "metric", enhancedMetric.Name, "error", err) continue } result = append(result, em) } } return result, nil } func (s *Lambda) ListRequiredPermissions() map[string][]string { permissions := make(map[string][]string, len(s.supportedMetrics)) for _, metric := range s.supportedMetrics { permissions[metric.name] = metric.requiredPermissions } return permissions } func (s *Lambda) ListSupportedEnhancedMetrics() []string { var metrics []string for metric := range s.supportedMetrics { metrics = append(metrics, metric) } return metrics } func (s *Lambda) Instance() service.EnhancedMetricsService { // do not use NewLambdaService to avoid extra map allocation return &Lambda{ supportedMetrics: s.supportedMetrics, buildClientFunc: s.buildClientFunc, } } func buildTimeoutMetric(resource *model.TaggedResource, fn *types.FunctionConfiguration, exportedTags []string) (*model.CloudwatchData, error) { if fn.Timeout == nil { return nil, fmt.Errorf("timeout is nil for Lambda function %s", resource.ARN) } var dimensions []model.Dimension if fn.FunctionName != nil { dimensions = []model.Dimension{ {Name: "FunctionName", Value: *fn.FunctionName}, } } value := float64(*fn.Timeout) return &model.CloudwatchData{ MetricName: "Timeout", ResourceName: resource.ARN, Namespace: "AWS/Lambda", Dimensions: dimensions, Tags: resource.MetricTags(exportedTags), GetMetricDataResult: &model.GetMetricDataResult{ DataPoints: []model.DataPoint{ { Value: &value, Timestamp: time.Now(), }, }, }, }, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/lambda/service_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package lambda import ( "context" "fmt" "log/slog" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/lambda/types" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestNewLambdaService(t *testing.T) { tests := []struct { name string buildClientFunc func(cfg aws.Config) Client }{ { name: "with nil buildClientFunc", buildClientFunc: nil, }, { name: "with custom buildClientFunc", buildClientFunc: func(_ aws.Config) Client { return nil }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := NewLambdaService(tt.buildClientFunc) require.NotNil(t, got) require.Len(t, got.supportedMetrics, 1) require.NotNil(t, got.supportedMetrics["Timeout"]) }) } } func TestLambda_GetNamespace(t *testing.T) { service := NewLambdaService(nil) expectedNamespace := awsLambdaNamespace require.Equal(t, expectedNamespace, service.GetNamespace()) } func TestLambda_ListRequiredPermissions(t *testing.T) { service := NewLambdaService(nil) expectedPermissions := map[string][]string{ "Timeout": {"lambda:ListFunctions"}, } require.Equal(t, expectedPermissions, service.ListRequiredPermissions()) } func TestLambda_ListSupportedEnhancedMetrics(t *testing.T) { service := NewLambdaService(nil) expectedMetrics := []string{ "Timeout", } require.Equal(t, expectedMetrics, service.ListSupportedEnhancedMetrics()) } func TestLambda_GetMetrics(t *testing.T) { makeFunctionConfiguration := func(name string, timeout int32) types.FunctionConfiguration { arn := fmt.Sprintf("arn:aws:lambda:us-east-1:123456789012:function:%s", name) return types.FunctionConfiguration{ FunctionArn: aws.String(arn), FunctionName: aws.String(name), Timeout: aws.Int32(timeout), } } tests := []struct { name string resources []*model.TaggedResource enhancedMetrics []*model.EnhancedMetricConfig functions []types.FunctionConfiguration wantErr bool wantCount int }{ { name: "empty resources returns empty", resources: []*model.TaggedResource{}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "Timeout"}}, functions: []types.FunctionConfiguration{makeFunctionConfiguration("test", 300)}, wantCount: 0, }, { name: "empty enhanced metrics returns empty", resources: []*model.TaggedResource{{ARN: "arn:aws:lambda:us-east-1:123456789012:function:test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{}, functions: []types.FunctionConfiguration{makeFunctionConfiguration("test", 300)}, wantCount: 0, }, { name: "wrong namespace returns error", resources: []*model.TaggedResource{{ARN: "arn:aws:lambda:us-east-1:123456789012:function:test"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "Timeout"}}, wantErr: false, }, { name: "successfully received single metric", resources: []*model.TaggedResource{ {ARN: "arn:aws:lambda:us-east-1:123456789012:function:test", Namespace: awsLambdaNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "Timeout"}}, functions: []types.FunctionConfiguration{makeFunctionConfiguration("test", 300)}, wantCount: 1, }, { name: "skips unsupported metrics", resources: []*model.TaggedResource{ {ARN: "arn:aws:lambda:us-east-1:123456789012:function:test"}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "UnsupportedMetric"}}, functions: []types.FunctionConfiguration{makeFunctionConfiguration("test", 300)}, wantCount: 0, }, { name: "processes multiple resources", resources: []*model.TaggedResource{ {ARN: "arn:aws:lambda:us-east-1:123456789012:function:func1", Namespace: awsLambdaNamespace}, {ARN: "arn:aws:lambda:us-east-1:123456789012:function:func2", Namespace: awsLambdaNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "Timeout"}}, functions: []types.FunctionConfiguration{makeFunctionConfiguration("func1", 300), makeFunctionConfiguration("func2", 600)}, wantCount: 2, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { service := NewLambdaService(func(_ aws.Config) Client { return &mockServiceLambdaClient{functions: tt.functions} }) result, err := service.GetMetrics(context.Background(), slog.New(slog.DiscardHandler), tt.resources, tt.enhancedMetrics, nil, "us-east-1", model.Role{}, &mockConfigProvider{c: &aws.Config{Region: "us-east-1"}}) if tt.wantErr { require.Error(t, err) return } require.NoError(t, err) require.Len(t, result, tt.wantCount) for _, metric := range result { require.Equal(t, awsLambdaNamespace, metric.Namespace) require.NotEmpty(t, metric.Dimensions) require.NotNil(t, metric.GetMetricDataResult) } }) } } type mockServiceLambdaClient struct { functions []types.FunctionConfiguration } func (m *mockServiceLambdaClient) ListAllFunctions(_ context.Context, _ *slog.Logger) ([]types.FunctionConfiguration, error) { return m.functions, nil } type mockConfigProvider struct { c *aws.Config } func (m *mockConfigProvider) GetAWSRegionalConfig(_ string, _ model.Role) *aws.Config { return m.c } ================================================ FILE: pkg/internal/enhancedmetrics/service/rds/client.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package rds import ( "context" "fmt" "log/slog" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/rds" "github.com/aws/aws-sdk-go-v2/service/rds/types" ) type awsClient interface { DescribeDBInstances(ctx context.Context, params *rds.DescribeDBInstancesInput, optFns ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) } // AWSRDSClient wraps the AWS RDS client type AWSRDSClient struct { client awsClient } // NewRDSClientWithConfig creates a new RDS client with custom AWS configuration func NewRDSClientWithConfig(cfg aws.Config) Client { return &AWSRDSClient{ client: rds.NewFromConfig(cfg), } } // describeDBInstances retrieves information about provisioned RDS instances func (c *AWSRDSClient) describeDBInstances(ctx context.Context, input *rds.DescribeDBInstancesInput) (*rds.DescribeDBInstancesOutput, error) { result, err := c.client.DescribeDBInstances(ctx, input) if err != nil { return nil, fmt.Errorf("failed to describe DB instances: %w", err) } return result, nil } // DescribeAllDBInstances retrieves all DB instances by handling pagination func (c *AWSRDSClient) DescribeDBInstances(ctx context.Context, logger *slog.Logger, dbInstances []string) ([]types.DBInstance, error) { logger.Debug("Describing all RDS DB instances") var allInstances []types.DBInstance var marker *string maxRecords := aws.Int32(100) for { output, err := c.describeDBInstances(ctx, &rds.DescribeDBInstancesInput{ Marker: marker, MaxRecords: maxRecords, Filters: []types.Filter{ { Name: aws.String("db-instance-id"), Values: dbInstances, }, }, }) if err != nil { return nil, err } allInstances = append(allInstances, output.DBInstances...) if output.Marker == nil { break } marker = output.Marker } logger.Debug("Completed describing RDS DB instances", slog.Int("totalInstances", len(allInstances))) return allInstances, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/rds/client_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package rds import ( "context" "fmt" "log/slog" "reflect" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/rds" "github.com/aws/aws-sdk-go-v2/service/rds/types" ) func TestAWSRDSClient_DescribeDBInstances(t *testing.T) { tests := []struct { name string client awsClient want []types.DBInstance wantErr bool instances []string }{ { name: "success - single page", instances: []string{"db-1"}, client: &mockRDSClient{ describeDBInstancesFunc: func(_ context.Context, params *rds.DescribeDBInstancesInput, _ ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { if len(params.Filters) != 1 || *params.Filters[0].Name != "db-instance-id" { return nil, fmt.Errorf("unexpected filter: %v", params.Filters) } return &rds.DescribeDBInstancesOutput{ DBInstances: []types.DBInstance{ {DBInstanceIdentifier: aws.String("db-1")}, }, Marker: nil, }, nil }, }, want: []types.DBInstance{ {DBInstanceIdentifier: aws.String("db-1")}, }, wantErr: false, }, { name: "success - multiple pages", instances: []string{"db-1", "db-2"}, client: &mockRDSClient{ describeDBInstancesFunc: func() func(_ context.Context, params *rds.DescribeDBInstancesInput, _ ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { callCount := 0 return func(_ context.Context, params *rds.DescribeDBInstancesInput, _ ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { if len(params.Filters) != 1 || *params.Filters[0].Name != "db-instance-id" { return nil, fmt.Errorf("unexpected filter: %v", params.Filters) } if params.Filters[0].Values[0] != "db-1" || params.Filters[0].Values[1] != "db-2" { return nil, fmt.Errorf("unexpected filter values: %v", params.Filters[0].Values) } callCount++ if callCount == 1 { return &rds.DescribeDBInstancesOutput{ DBInstances: []types.DBInstance{ {DBInstanceIdentifier: aws.String("db-1")}, }, Marker: aws.String("marker1"), }, nil } return &rds.DescribeDBInstancesOutput{ DBInstances: []types.DBInstance{ {DBInstanceIdentifier: aws.String("db-2")}, }, Marker: nil, }, nil } }(), }, want: []types.DBInstance{ {DBInstanceIdentifier: aws.String("db-1")}, {DBInstanceIdentifier: aws.String("db-2")}, }, wantErr: false, }, { name: "error - API failure", client: &mockRDSClient{ describeDBInstancesFunc: func(_ context.Context, _ *rds.DescribeDBInstancesInput, _ ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { return nil, fmt.Errorf("API error") }, }, want: nil, wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := &AWSRDSClient{ client: tt.client, } got, err := c.DescribeDBInstances(context.Background(), slog.New(slog.DiscardHandler), tt.instances) if (err != nil) != tt.wantErr { t.Errorf("DescribeDBInstances() error = %v, wantErr %v", err, tt.wantErr) return } if !reflect.DeepEqual(got, tt.want) { t.Errorf("DescribeDBInstances() got = %v, want %v", got, tt.want) } }) } } // mockRDSClient is a mock implementation of AWS RDS Client type mockRDSClient struct { describeDBInstancesFunc func(ctx context.Context, params *rds.DescribeDBInstancesInput, optFns ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) } func (m *mockRDSClient) DescribeDBInstances(ctx context.Context, params *rds.DescribeDBInstancesInput, optFns ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { return m.describeDBInstancesFunc(ctx, params, optFns...) } ================================================ FILE: pkg/internal/enhancedmetrics/service/rds/service.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package rds import ( "context" "fmt" "log/slog" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/rds/types" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) const awsRdsNamespace = "AWS/RDS" type Client interface { DescribeDBInstances(ctx context.Context, logger *slog.Logger, dbInstances []string) ([]types.DBInstance, error) } type buildCloudwatchData func(*model.TaggedResource, *types.DBInstance, []string) (*model.CloudwatchData, error) type supportedMetric struct { name string buildCloudwatchDataFunc buildCloudwatchData requiredPermissions []string } func (sm *supportedMetric) buildCloudwatchData(resource *model.TaggedResource, instance *types.DBInstance, metrics []string) (*model.CloudwatchData, error) { return sm.buildCloudwatchDataFunc(resource, instance, metrics) } type RDS struct { supportedMetrics map[string]supportedMetric buildClientFunc func(cfg aws.Config) Client } func NewRDSService(buildClientFunc func(cfg aws.Config) Client) *RDS { if buildClientFunc == nil { buildClientFunc = NewRDSClientWithConfig } rds := &RDS{ buildClientFunc: buildClientFunc, } // The storage capacity in gibibytes (GiB) allocated for the DB instance. allocatedStorageMetrics := supportedMetric{ name: "AllocatedStorage", buildCloudwatchDataFunc: buildAllocatedStorageMetric, requiredPermissions: []string{"rds:DescribeDBInstances"}, } rds.supportedMetrics = map[string]supportedMetric{ allocatedStorageMetrics.name: allocatedStorageMetrics, } return rds } // GetNamespace returns the AWS CloudWatch namespace for RDS func (s *RDS) GetNamespace() string { return awsRdsNamespace } // loadMetricsMetadata loads any metadata needed for RDS enhanced metrics for the given region and role func (s *RDS) loadMetricsMetadata( ctx context.Context, logger *slog.Logger, region string, role model.Role, configProvider config.RegionalConfigProvider, dbInstances []string, ) (map[string]*types.DBInstance, error) { client := s.buildClientFunc(*configProvider.GetAWSRegionalConfig(region, role)) instances, err := client.DescribeDBInstances(ctx, logger, dbInstances) if err != nil { return nil, fmt.Errorf("error describing RDS DB instances in region %s: %w", region, err) } regionalData := make(map[string]*types.DBInstance, len(instances)) for _, instance := range instances { regionalData[*instance.DBInstanceArn] = &instance } return regionalData, nil } func (s *RDS) IsMetricSupported(metricName string) bool { _, exists := s.supportedMetrics[metricName] return exists } func (s *RDS) GetMetrics(ctx context.Context, logger *slog.Logger, resources []*model.TaggedResource, enhancedMetricConfigs []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, regionalConfigProvider config.RegionalConfigProvider) ([]*model.CloudwatchData, error) { if len(resources) == 0 || len(enhancedMetricConfigs) == 0 { return nil, nil } dbInstances := make([]string, 0, len(resources)) for _, resource := range resources { dbInstances = append(dbInstances, resource.ARN) } data, err := s.loadMetricsMetadata( ctx, logger, region, role, regionalConfigProvider, dbInstances, ) if err != nil { return nil, fmt.Errorf("error loading RDS metrics metadata: %w", err) } var result []*model.CloudwatchData for _, resource := range resources { if resource.Namespace != s.GetNamespace() { logger.Warn("RDS enhanced metrics service cannot process resource with different namespace", "namespace", resource.Namespace, "arn", resource.ARN) continue } dbInstance, exists := data[resource.ARN] if !exists { logger.Warn("RDS DB instance not found in metadata", "arn", resource.ARN) continue } for _, enhancedMetric := range enhancedMetricConfigs { supportedMetric, ok := s.supportedMetrics[enhancedMetric.Name] if !ok { logger.Warn("Unsupported RDS enhanced metric requested", "metric", enhancedMetric.Name) continue } em, err := supportedMetric.buildCloudwatchData(resource, dbInstance, exportedTagOnMetrics) if err != nil || em == nil { logger.Warn("Error building RDS enhanced metric", "metric", enhancedMetric.Name, "error", err) continue } result = append(result, em) } } return result, nil } func (s *RDS) ListRequiredPermissions() map[string][]string { requiredPermissions := make(map[string][]string, len(s.supportedMetrics)) for metricName, metric := range s.supportedMetrics { requiredPermissions[metricName] = metric.requiredPermissions } return requiredPermissions } func (s *RDS) ListSupportedEnhancedMetrics() []string { var metrics []string for metric := range s.supportedMetrics { metrics = append(metrics, metric) } return metrics } func (s *RDS) Instance() service.EnhancedMetricsService { // do not use NewRDSService to avoid extra map allocation return &RDS{ supportedMetrics: s.supportedMetrics, buildClientFunc: s.buildClientFunc, } } func buildAllocatedStorageMetric(resource *model.TaggedResource, instance *types.DBInstance, exportedTags []string) (*model.CloudwatchData, error) { if instance.AllocatedStorage == nil { return nil, fmt.Errorf("AllocatedStorage is nil for DB instance %s", resource.ARN) } var dimensions []model.Dimension if instance.DBInstanceIdentifier != nil && len(*instance.DBInstanceIdentifier) > 0 { dimensions = append(dimensions, model.Dimension{ Name: "DBInstanceIdentifier", Value: *instance.DBInstanceIdentifier, }) } if instance.DBInstanceClass != nil && len(*instance.DBInstanceClass) > 0 { dimensions = append(dimensions, model.Dimension{ Name: "DatabaseClass", Value: *instance.DBInstanceClass, }) } if instance.Engine != nil && len(*instance.Engine) > 0 { dimensions = append(dimensions, model.Dimension{ Name: "EngineName", Value: *instance.Engine, }) } // Convert from GiB to bytes valueInBytes := float64(*instance.AllocatedStorage) * 1024 * 1024 * 1024 return &model.CloudwatchData{ MetricName: "AllocatedStorage", ResourceName: resource.ARN, Namespace: awsRdsNamespace, Dimensions: dimensions, Tags: resource.MetricTags(exportedTags), // Store the value as a single data point GetMetricDataResult: &model.GetMetricDataResult{ DataPoints: []model.DataPoint{ { Value: &valueInBytes, Timestamp: time.Now(), }, }, }, }, nil } ================================================ FILE: pkg/internal/enhancedmetrics/service/rds/service_test.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package rds import ( "context" "fmt" "log/slog" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/rds/types" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestNewRDSService(t *testing.T) { tests := []struct { name string buildClientFunc func(cfg aws.Config) Client }{ { name: "with nil buildClientFunc", buildClientFunc: nil, }, { name: "with custom buildClientFunc", buildClientFunc: func(_ aws.Config) Client { return nil }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := NewRDSService(tt.buildClientFunc) require.NotNil(t, got) require.Len(t, got.supportedMetrics, 1) require.NotNil(t, got.supportedMetrics["AllocatedStorage"]) }) } } func TestRDS_GetNamespace(t *testing.T) { service := NewRDSService(nil) expectedNamespace := awsRdsNamespace require.Equal(t, expectedNamespace, service.GetNamespace()) } func TestRDS_ListRequiredPermissions(t *testing.T) { service := NewRDSService(nil) expectedPermissions := map[string][]string{ "AllocatedStorage": {"rds:DescribeDBInstances"}, } require.Equal(t, expectedPermissions, service.ListRequiredPermissions()) } func TestRDS_ListSupportedEnhancedMetrics(t *testing.T) { service := NewRDSService(nil) expectedMetrics := []string{ "AllocatedStorage", } require.Equal(t, expectedMetrics, service.ListSupportedEnhancedMetrics()) } func TestRDS_GetMetrics(t *testing.T) { testInstance := makeTestDBInstance("test-instance", 100) testARN := *testInstance.DBInstanceArn tests := []struct { name string resources []*model.TaggedResource enhancedMetrics []*model.EnhancedMetricConfig regionalData map[string]*types.DBInstance wantErr bool wantResultCount int wantValues []float64 // Expected values in bytes }{ { name: "empty resources", resources: []*model.TaggedResource{}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantResultCount: 0, }, { name: "empty enhanced metrics", resources: []*model.TaggedResource{{ARN: testARN}}, enhancedMetrics: []*model.EnhancedMetricConfig{}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantResultCount: 0, }, { name: "wrong namespace", resources: []*model.TaggedResource{{ARN: testARN}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantErr: false, }, { name: "metadata not loaded", resources: []*model.TaggedResource{{ARN: testARN}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: nil, wantResultCount: 0, }, { name: "successfully received metric", resources: []*model.TaggedResource{{ARN: testARN, Namespace: awsRdsNamespace}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantResultCount: 1, wantValues: []float64{107374182400}, // 100 GiB in bytes }, { name: "resource not found in metadata", resources: []*model.TaggedResource{{ARN: "arn:aws:rds:us-east-1:123456789012:db:non-existent"}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantResultCount: 0, }, { name: "unsupported metric", resources: []*model.TaggedResource{{ARN: testARN}}, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "UnsupportedMetric"}}, regionalData: map[string]*types.DBInstance{testARN: testInstance}, wantResultCount: 0, }, { name: "multiple resources", resources: []*model.TaggedResource{ {ARN: "arn:aws:rds:us-east-1:123456789012:db:test-instance-1", Namespace: awsRdsNamespace}, {ARN: "arn:aws:rds:us-east-1:123456789012:db:test-instance-2", Namespace: awsRdsNamespace}, }, enhancedMetrics: []*model.EnhancedMetricConfig{{Name: "AllocatedStorage"}}, regionalData: map[string]*types.DBInstance{ "arn:aws:rds:us-east-1:123456789012:db:test-instance-1": makeTestDBInstance("test-instance-1", 100), "arn:aws:rds:us-east-1:123456789012:db:test-instance-2": makeTestDBInstance("test-instance-2", 200), }, wantResultCount: 2, wantValues: []float64{107374182400, 214748364800}, // 100 and 200 GiB in bytes }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { service := newTestRDSService(tt.regionalData) result, err := service.GetMetrics(context.Background(), slog.New(slog.DiscardHandler), tt.resources, tt.enhancedMetrics, nil, "us-east-1", model.Role{}, &mockConfigProvider{c: &aws.Config{Region: "us-east-1"}}) if tt.wantErr { require.Error(t, err) return } require.NoError(t, err) require.Len(t, result, tt.wantResultCount) for i, metric := range result { require.Equal(t, awsRdsNamespace, metric.Namespace) require.NotEmpty(t, metric.Dimensions) require.NotNil(t, metric.GetMetricDataResult) require.Nil(t, metric.GetMetricStatisticsResult) // Validate the actual value if wantValues is specified if len(tt.wantValues) > 0 { require.NotNil(t, metric.GetMetricDataResult.DataPoints) require.Len(t, metric.GetMetricDataResult.DataPoints, 1) require.NotNil(t, metric.GetMetricDataResult.DataPoints[0].Value) require.Equal(t, tt.wantValues[i], *metric.GetMetricDataResult.DataPoints[0].Value, "expected value in bytes for AllocatedStorage") } } }) } } type mockServiceRDSClient struct { instances []types.DBInstance describeErr bool } func (m *mockServiceRDSClient) DescribeDBInstances(context.Context, *slog.Logger, []string) ([]types.DBInstance, error) { if m.describeErr { return nil, fmt.Errorf("mock describe error") } return m.instances, nil } type mockConfigProvider struct { c *aws.Config } func (m *mockConfigProvider) GetAWSRegionalConfig(_ string, _ model.Role) *aws.Config { return m.c } // Helper functions for test setup func makeTestDBInstance(name string, storage int32) *types.DBInstance { arn := fmt.Sprintf("arn:aws:rds:us-east-1:123456789012:db:%s", name) return &types.DBInstance{ DBInstanceArn: aws.String(arn), DBInstanceIdentifier: aws.String(name), DBInstanceClass: aws.String("db.t3.micro"), Engine: aws.String("postgres"), AllocatedStorage: aws.Int32(storage), } } func newTestRDSService(regionalData map[string]*types.DBInstance) *RDS { return NewRDSService(func(_ aws.Config) Client { return &mockServiceRDSClient{ instances: convertRegionalDataToInstances(regionalData), } }) } // convertRegionalDataToInstances converts the regionalData map to a slice of DBInstance func convertRegionalDataToInstances(regionalData map[string]*types.DBInstance) []types.DBInstance { if regionalData == nil { return nil } instances := make([]types.DBInstance, 0, len(regionalData)) for _, instance := range regionalData { if instance != nil { instances = append(instances, *instance) } } return instances } ================================================ FILE: pkg/internal/enhancedmetrics/service/services.go ================================================ // Copyright 2026 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package service import ( "context" "log/slog" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type EnhancedMetricsService interface { // GetMetrics returns enhanced metrics for the given resources and enhancedMetricConfigs. // filteredResources are the resources that belong to the service's namespace. // filteredEnhancedMetricConfigs are the enhanced metric configs that belong to the service's namespace and are supported by the service. GetMetrics( ctx context.Context, logger *slog.Logger, filteredResources []*model.TaggedResource, filteredEnhancedMetricConfigs []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, regionalConfigProvider config.RegionalConfigProvider, ) ([]*model.CloudwatchData, error) // IsMetricSupported checks if the given metric name is supported by this service. IsMetricSupported(metricName string) bool } ================================================ FILE: pkg/internal/enhancedmetrics/service.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package enhancedmetrics import ( "context" "fmt" "log/slog" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // MetricsServiceRegistry defines an interface to get enhanced metrics services by namespace type MetricsServiceRegistry interface { GetEnhancedMetricsService(namespace string) (service.EnhancedMetricsService, error) } // Service is responsible for getting enhanced metrics using appropriate services. type Service struct { configProvider config.RegionalConfigProvider enhancedMetricsServiceRegistry MetricsServiceRegistry } // GetMetrics returns the enhanced metrics for the specified namespace using the appropriate enhanced metrics service. func (ep *Service) GetMetrics( ctx context.Context, logger *slog.Logger, namespace string, resources []*model.TaggedResource, metrics []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, ) ([]*model.CloudwatchData, error) { svc, err := ep.enhancedMetricsServiceRegistry.GetEnhancedMetricsService(namespace) if err != nil { return nil, fmt.Errorf("could not get enhanced metric service for namespace %s: %w", namespace, err) } // filter out resources that do not match the service's namespace, it should not happen in the current scenario var filteredResources []*model.TaggedResource for _, res := range resources { if res.Namespace == namespace { filteredResources = append(filteredResources, res) } else { // Resource validation should have happened earlier, this log will identify any unexpected issues logger.Warn("Skipping resource for enhanced metric service due to namespace mismatch", "expected_namespace", namespace, "resource_namespace", res.Namespace, "resource_arn", res.ARN, ) } } // filter out metrics that are not supported by the service var filteredMetrics []*model.EnhancedMetricConfig for _, metric := range metrics { if svc.IsMetricSupported(metric.Name) { filteredMetrics = append(filteredMetrics, metric) } else { // Metrics validation should have happened earlier, this log will identify any unexpected issues logger.Warn("Skipping unsupported enhanced metric for service", "namespace", namespace, "metric", metric.Name, ) } } return svc.GetMetrics(ctx, logger, filteredResources, filteredMetrics, exportedTagOnMetrics, region, role, ep.configProvider) } func NewService( configProvider config.RegionalConfigProvider, enhancedMetricsServiceRegistry MetricsServiceRegistry, ) *Service { return &Service{ configProvider: configProvider, enhancedMetricsServiceRegistry: enhancedMetricsServiceRegistry, } } ================================================ FILE: pkg/internal/enhancedmetrics/service_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package enhancedmetrics import ( "context" "errors" "log/slog" "sync" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/service" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) // mockConfigProvider is a mock implementation of config.RegionalConfigProvider type mockConfigProvider struct { configs map[string]*aws.Config } func (m *mockConfigProvider) GetAWSRegionalConfig(region string, _ model.Role) *aws.Config { if m.configs == nil { return &aws.Config{} } if cfg, ok := m.configs[region]; ok { return cfg } return &aws.Config{} } // mockMetricsService is a mock implementation of service.EnhancedMetricsService type mockMetricsService struct { getMetricsCalled int err error result []*model.CloudwatchData mu sync.Mutex } func (m *mockMetricsService) GetMetrics(context.Context, *slog.Logger, []*model.TaggedResource, []*model.EnhancedMetricConfig, []string, string, model.Role, config.RegionalConfigProvider) ([]*model.CloudwatchData, error) { m.mu.Lock() defer m.mu.Unlock() m.getMetricsCalled++ return m.result, m.err } func (m *mockMetricsService) IsMetricSupported(_ string) bool { return true } func (m *mockMetricsService) getGetMetricsCalled() int { m.mu.Lock() defer m.mu.Unlock() return m.getMetricsCalled } // mockMetricsServiceRegistry is a mock implementation of MetricsServiceRegistry type mockMetricsServiceRegistry struct { services map[string]service.EnhancedMetricsService getErr error } func (m *mockMetricsServiceRegistry) GetEnhancedMetricsService(namespace string) (service.EnhancedMetricsService, error) { if m.getErr != nil { return nil, m.getErr } if svc, ok := m.services[namespace]; ok { return svc, nil } return nil, errors.New("service not found") } func TestNewService(t *testing.T) { svc := NewService(&mockConfigProvider{}, &mockMetricsServiceRegistry{}) require.NotNil(t, svc) require.NotNil(t, svc.configProvider) } func TestService_GetMetrics(t *testing.T) { ctx := context.Background() logger := slog.New(slog.DiscardHandler) namespace := "AWS/RDS" region := "us-east-1" role := model.Role{RoleArn: "arn:aws:iam::123456789012:role/test"} resources := []*model.TaggedResource{ { ARN: "arn:aws:rds:us-east-1:123456789012:db:test", Namespace: namespace, Region: region, }, } metrics := []*model.EnhancedMetricConfig{ {Name: "AllocatedStorage"}, } exportedTags := []string{"Name"} tests := []struct { name string namespace string registry MetricsServiceRegistry wantErr bool errMsg string wantData []*model.CloudwatchData wantGetMetricsCalled int }{ { name: "successfully get metrics", namespace: namespace, registry: &mockMetricsServiceRegistry{ services: map[string]service.EnhancedMetricsService{ namespace: &mockMetricsService{ result: []*model.CloudwatchData{ { MetricName: "AllocatedStorage", ResourceName: "arn:aws:rds:us-east-1:123456789012:db:test", Namespace: namespace, }, }, }, }, }, wantErr: false, wantData: []*model.CloudwatchData{ { MetricName: "AllocatedStorage", ResourceName: "arn:aws:rds:us-east-1:123456789012:db:test", Namespace: namespace, }, }, wantGetMetricsCalled: 1, }, { name: "failure when service not found in registry", namespace: namespace, registry: &mockMetricsServiceRegistry{ services: map[string]service.EnhancedMetricsService{}, }, wantErr: true, errMsg: "service not found", }, { name: "failure when service GetMetrics returns error", namespace: namespace, registry: &mockMetricsServiceRegistry{ services: map[string]service.EnhancedMetricsService{ namespace: &mockMetricsService{ err: errors.New("get metric error"), }, }, }, wantErr: true, errMsg: "get metric error", wantGetMetricsCalled: 1, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { svc := NewService( &mockConfigProvider{}, tt.registry, ) data, err := svc.GetMetrics(ctx, logger, tt.namespace, resources, metrics, exportedTags, region, role) if tt.wantErr { require.Error(t, err) require.Contains(t, err.Error(), tt.errMsg) require.Nil(t, data) } else { require.NoError(t, err) require.Equal(t, tt.wantData, data) } if tt.wantGetMetricsCalled > 0 { mockSvc := tt.registry.(*mockMetricsServiceRegistry).services[tt.namespace].(*mockMetricsService) require.Equal(t, tt.wantGetMetricsCalled, mockSvc.getGetMetricsCalled()) } }) } } ================================================ FILE: pkg/job/cloudwatchrunner/customnamespace.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatchrunner import ( "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/listmetrics" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type CustomNamespaceJob struct { Job model.CustomNamespaceJob } func (c CustomNamespaceJob) Namespace() string { return c.Job.Namespace } func (c CustomNamespaceJob) listMetricsParams() listmetrics.ProcessingParams { return listmetrics.ProcessingParams{ Namespace: c.Job.Namespace, Metrics: c.Job.Metrics, RecentlyActiveOnly: c.Job.RecentlyActiveOnly, DimensionNameRequirements: c.Job.DimensionNameRequirements, } } func (c CustomNamespaceJob) CustomTags() []model.Tag { return c.Job.CustomTags } func (c CustomNamespaceJob) resourceEnrichment() ResourceEnrichment { // TODO add implementation in followup return nil } ================================================ FILE: pkg/job/cloudwatchrunner/discovery.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatchrunner import ( "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/listmetrics" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type DiscoveryJob struct { Job model.DiscoveryJob Resources []*model.TaggedResource } func (d DiscoveryJob) Namespace() string { return d.Job.Namespace } func (d DiscoveryJob) CustomTags() []model.Tag { return d.Job.CustomTags } func (d DiscoveryJob) listMetricsParams() listmetrics.ProcessingParams { return listmetrics.ProcessingParams{ Namespace: d.Job.Namespace, Metrics: d.Job.Metrics, RecentlyActiveOnly: d.Job.RecentlyActiveOnly, DimensionNameRequirements: d.Job.DimensionNameRequirements, } } func (d DiscoveryJob) resourceEnrichment() ResourceEnrichment { // TODO add implementation in followup return nil } ================================================ FILE: pkg/job/cloudwatchrunner/runner.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cloudwatchrunner import ( "log/slog" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/listmetrics" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/resourcemetadata" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type ResourceEnrichment interface { Create(logger *slog.Logger) resourcemetadata.MetricResourceEnricher } type Job interface { Namespace() string CustomTags() []model.Tag listMetricsParams() listmetrics.ProcessingParams resourceEnrichment() ResourceEnrichment } ================================================ FILE: pkg/job/custom.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "context" "log/slog" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func runCustomNamespaceJob( ctx context.Context, logger *slog.Logger, job model.CustomNamespaceJob, clientCloudwatch cloudwatch.Client, gmdProcessor getMetricDataProcessor, ) []*model.CloudwatchData { cloudwatchDatas := getMetricDataForQueriesForCustomNamespace(ctx, job, clientCloudwatch, logger) if len(cloudwatchDatas) == 0 { logger.Debug("No metrics data found") return nil } var err error cloudwatchDatas, err = gmdProcessor.Run(ctx, job.Namespace, cloudwatchDatas) if err != nil { logger.Error("Failed to get metric data", "err", err) return nil } return cloudwatchDatas } func getMetricDataForQueriesForCustomNamespace( ctx context.Context, customNamespaceJob model.CustomNamespaceJob, clientCloudwatch cloudwatch.Client, logger *slog.Logger, ) []*model.CloudwatchData { mux := &sync.Mutex{} var getMetricDatas []*model.CloudwatchData var wg sync.WaitGroup wg.Add(len(customNamespaceJob.Metrics)) for _, metric := range customNamespaceJob.Metrics { // For every metric of the job get the full list of metrics. // This includes, for this metric the possible combinations // of dimensions and value of dimensions with data. go func(metric *model.MetricConfig) { defer wg.Done() err := clientCloudwatch.ListMetrics(ctx, customNamespaceJob.Namespace, metric, customNamespaceJob.RecentlyActiveOnly, func(page []*model.Metric) { var data []*model.CloudwatchData for _, cwMetric := range page { if len(customNamespaceJob.DimensionNameRequirements) > 0 && !metricDimensionsMatchNames(cwMetric, customNamespaceJob.DimensionNameRequirements) { continue } for _, stat := range metric.Statistics { data = append(data, &model.CloudwatchData{ MetricName: metric.Name, ResourceName: customNamespaceJob.Name, Namespace: customNamespaceJob.Namespace, Dimensions: cwMetric.Dimensions, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Period: metric.Period, Length: metric.Length, Delay: metric.Delay, Statistic: stat, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: metric.NilToZero, AddCloudwatchTimestamp: metric.AddCloudwatchTimestamp, ExportAllDataPoints: metric.ExportAllDataPoints, }, Tags: nil, GetMetricDataResult: nil, GetMetricStatisticsResult: nil, }) } } mux.Lock() getMetricDatas = append(getMetricDatas, data...) mux.Unlock() }) if err != nil { logger.Error("Failed to get full metric list", "metric_name", metric.Name, "namespace", customNamespaceJob.Namespace, "err", err) return } }(metric) } wg.Wait() return getMetricDatas } ================================================ FILE: pkg/job/discovery.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "context" "errors" "fmt" "log/slog" "strings" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/tagging" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/maxdimassociator" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type resourceAssociator interface { AssociateMetricToResource(cwMetric *model.Metric) (*model.TaggedResource, bool) } type getMetricDataProcessor interface { Run(ctx context.Context, namespace string, requests []*model.CloudwatchData) ([]*model.CloudwatchData, error) } type enhancedMetricsService interface { GetMetrics( ctx context.Context, logger *slog.Logger, namespace string, resources []*model.TaggedResource, metrics []*model.EnhancedMetricConfig, exportedTagOnMetrics []string, region string, role model.Role, ) ([]*model.CloudwatchData, error) } func runDiscoveryJob( ctx context.Context, logger *slog.Logger, job model.DiscoveryJob, region string, clientTag tagging.Client, clientCloudwatch cloudwatch.Client, gmdProcessor getMetricDataProcessor, enhancedMetricsService enhancedMetricsService, role model.Role, ) ([]*model.TaggedResource, []*model.CloudwatchData) { logger.Debug("Get tagged resources") resources, err := clientTag.GetResources(ctx, job, region) if err != nil { if errors.Is(err, tagging.ErrExpectedToFindResources) { logger.Error("No tagged resources made it through filtering", "err", err) } else { logger.Error("Couldn't describe resources", "err", err) } return nil, nil } if len(resources) == 0 { logger.Debug("No tagged resources", "region", region, "namespace", job.Namespace) } svc := config.SupportedServices.GetService(job.Namespace) metricData := getMetricDataForQueries(ctx, logger, job, svc, clientCloudwatch, resources) if len(metricData) > 0 && svc != nil { metricData, err = gmdProcessor.Run(ctx, svc.Namespace, metricData) if err != nil { logger.Error("Failed to get metric data", "err", err) // ensure we do not return cw metrics on data processing failure metricData = nil } } if enhancedMetricsService == nil || !job.HasEnhancedMetrics() || svc == nil { if len(metricData) == 0 { logger.Info("No metrics data found") } return resources, metricData } logger.Debug("Processing enhanced metrics", "count", len(job.EnhancedMetrics), "namespace", svc.Namespace) enhancedMetricData, err := enhancedMetricsService.GetMetrics( ctx, logger, svc.Namespace, resources, job.EnhancedMetrics, job.ExportedTagsOnMetrics, region, role, ) if err != nil { logger.Error("Failed to get enhanced metrics", "err", err) return resources, metricData } metricData = append(metricData, enhancedMetricData...) if len(metricData) == 0 { logger.Info("No metrics data found") } return resources, metricData } func getMetricDataForQueries( ctx context.Context, logger *slog.Logger, discoveryJob model.DiscoveryJob, svc *config.ServiceConfig, clientCloudwatch cloudwatch.Client, resources []*model.TaggedResource, ) []*model.CloudwatchData { mux := &sync.Mutex{} var getMetricDatas []*model.CloudwatchData var assoc resourceAssociator if len(svc.DimensionRegexps) > 0 && len(resources) > 0 { assoc = maxdimassociator.NewAssociator(logger, discoveryJob.DimensionsRegexps, resources) } else { // If we don't have dimension regex's and resources there's nothing to associate but metrics shouldn't be skipped assoc = nopAssociator{} } var wg sync.WaitGroup wg.Add(len(discoveryJob.Metrics)) // For every metric of the job call the ListMetrics API // to fetch the existing combinations of dimensions and // value of dimensions with data. for _, metric := range discoveryJob.Metrics { go func(metric *model.MetricConfig) { defer wg.Done() err := clientCloudwatch.ListMetrics(ctx, svc.Namespace, metric, discoveryJob.RecentlyActiveOnly, func(page []*model.Metric) { data := getFilteredMetricDatas(logger, discoveryJob.Namespace, discoveryJob.ExportedTagsOnMetrics, page, discoveryJob.DimensionNameRequirements, metric, assoc) mux.Lock() getMetricDatas = append(getMetricDatas, data...) mux.Unlock() }) if err != nil { logger.Error("Failed to get full metric list", "metric_name", metric.Name, "namespace", svc.Namespace, "err", err) return } }(metric) } wg.Wait() return getMetricDatas } type nopAssociator struct{} func (ns nopAssociator) AssociateMetricToResource(_ *model.Metric) (*model.TaggedResource, bool) { return nil, false } func getFilteredMetricDatas( logger *slog.Logger, namespace string, tagsOnMetrics []string, metricsList []*model.Metric, dimensionNameList []string, m *model.MetricConfig, assoc resourceAssociator, ) []*model.CloudwatchData { getMetricsData := make([]*model.CloudwatchData, 0, len(metricsList)) for _, cwMetric := range metricsList { if len(dimensionNameList) > 0 && !metricDimensionsMatchNames(cwMetric, dimensionNameList) { continue } matchedResource, skip := assoc.AssociateMetricToResource(cwMetric) if skip { dimensions := make([]string, 0, len(cwMetric.Dimensions)) for _, dim := range cwMetric.Dimensions { dimensions = append(dimensions, fmt.Sprintf("%s=%s", dim.Name, dim.Value)) } logger.Debug("skipping metric unmatched by associator", "metric", m.Name, "dimensions", strings.Join(dimensions, ",")) continue } resource := matchedResource if resource == nil { resource = &model.TaggedResource{ ARN: "global", Namespace: namespace, } } metricTags := resource.MetricTags(tagsOnMetrics) for _, stat := range m.Statistics { getMetricsData = append(getMetricsData, &model.CloudwatchData{ MetricName: m.Name, ResourceName: resource.ARN, Namespace: namespace, Dimensions: cwMetric.Dimensions, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Period: m.Period, Length: m.Length, Delay: m.Delay, Statistic: stat, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: m.NilToZero, AddCloudwatchTimestamp: m.AddCloudwatchTimestamp, ExportAllDataPoints: m.ExportAllDataPoints, }, Tags: metricTags, GetMetricDataResult: nil, GetMetricStatisticsResult: nil, }) } } return getMetricsData } func metricDimensionsMatchNames(metric *model.Metric, dimensionNameRequirements []string) bool { if len(dimensionNameRequirements) != len(metric.Dimensions) { return false } for _, dimension := range metric.Dimensions { foundMatch := false for _, dimensionName := range dimensionNameRequirements { if dimension.Name == dimensionName { foundMatch = true break } } if !foundMatch { return false } } return true } ================================================ FILE: pkg/job/discovery_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/assert" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/maxdimassociator" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func Test_getFilteredMetricDatas(t *testing.T) { type args struct { region string accountID string namespace string customTags []model.Tag tagsOnMetrics []string dimensionRegexps []model.DimensionsRegexp dimensionNameRequirements []string resources []*model.TaggedResource metricsList []*model.Metric m *model.MetricConfig } tests := []struct { name string args args wantGetMetricsData []model.CloudwatchData }{ { "additional dimension", args{ region: "us-east-1", accountID: "123123123123", namespace: "efs", customTags: nil, tagsOnMetrics: []string{ "Value1", "Value2", }, dimensionRegexps: config.SupportedServices.GetService("AWS/EFS").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{ { ARN: "arn:aws:elasticfilesystem:us-east-1:123123123123:file-system/fs-abc123", Tags: []model.Tag{ { Key: "Tag", Value: "some-Tag", }, }, Namespace: "efs", Region: "us-east-1", }, }, metricsList: []*model.Metric{ { MetricName: "StorageBytes", Dimensions: []model.Dimension{ { Name: "FileSystemId", Value: "fs-abc123", }, { Name: "StorageClass", Value: "Standard", }, }, Namespace: "AWS/EFS", }, }, m: &model.MetricConfig{ Name: "StorageBytes", Statistics: []string{ "Average", }, Period: 60, Length: 600, Delay: 120, NilToZero: false, AddCloudwatchTimestamp: false, }, }, []model.CloudwatchData{ { MetricName: "StorageBytes", Dimensions: []model.Dimension{ { Name: "FileSystemId", Value: "fs-abc123", }, { Name: "StorageClass", Value: "Standard", }, }, ResourceName: "arn:aws:elasticfilesystem:us-east-1:123123123123:file-system/fs-abc123", Namespace: "efs", Tags: []model.Tag{ { Key: "Value1", Value: "", }, { Key: "Value2", Value: "", }, }, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Period: 60, Length: 600, Delay: 120, Statistic: "Average", }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, }, }, }, { "ec2", args{ region: "us-east-1", accountID: "123123123123", namespace: "ec2", customTags: nil, tagsOnMetrics: []string{ "Value1", "Value2", }, dimensionRegexps: config.SupportedServices.GetService("AWS/EC2").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{ { ARN: "arn:aws:ec2:us-east-1:123123123123:instance/i-12312312312312312", Tags: []model.Tag{ { Key: "Name", Value: "some-Node", }, }, Namespace: "ec2", Region: "us-east-1", }, }, metricsList: []*model.Metric{ { MetricName: "CPUUtilization", Dimensions: []model.Dimension{ { Name: "InstanceId", Value: "i-12312312312312312", }, }, Namespace: "AWS/EC2", }, }, m: &model.MetricConfig{ Name: "CPUUtilization", Statistics: []string{ "Average", }, Period: 60, Length: 600, Delay: 120, NilToZero: false, AddCloudwatchTimestamp: false, }, }, []model.CloudwatchData{ { MetricName: "CPUUtilization", ResourceName: "arn:aws:ec2:us-east-1:123123123123:instance/i-12312312312312312", Namespace: "ec2", Dimensions: []model.Dimension{ { Name: "InstanceId", Value: "i-12312312312312312", }, }, Tags: []model.Tag{ { Key: "Value1", Value: "", }, { Key: "Value2", Value: "", }, }, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Statistic: "Average", Period: 60, Length: 600, Delay: 120, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, }, }, }, { "kafka", args{ region: "us-east-1", accountID: "123123123123", namespace: "kafka", customTags: nil, tagsOnMetrics: []string{ "Value1", "Value2", }, dimensionRegexps: config.SupportedServices.GetService("AWS/Kafka").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{ { ARN: "arn:aws:kafka:us-east-1:123123123123:cluster/demo-cluster-1/12312312-1231-1231-1231-123123123123-12", Tags: []model.Tag{ { Key: "Test", Value: "Value", }, }, Namespace: "kafka", Region: "us-east-1", }, }, metricsList: []*model.Metric{ { MetricName: "GlobalTopicCount", Dimensions: []model.Dimension{ { Name: "Cluster Name", Value: "demo-cluster-1", }, }, Namespace: "AWS/Kafka", }, }, m: &model.MetricConfig{ Name: "GlobalTopicCount", Statistics: []string{ "Average", }, Period: 60, Length: 600, Delay: 120, NilToZero: false, AddCloudwatchTimestamp: false, }, }, []model.CloudwatchData{ { MetricName: "GlobalTopicCount", Dimensions: []model.Dimension{ { Name: "Cluster Name", Value: "demo-cluster-1", }, }, ResourceName: "arn:aws:kafka:us-east-1:123123123123:cluster/demo-cluster-1/12312312-1231-1231-1231-123123123123-12", Namespace: "kafka", Tags: []model.Tag{ { Key: "Value1", Value: "", }, { Key: "Value2", Value: "", }, }, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Statistic: "Average", Period: 60, Length: 600, Delay: 120, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, }, }, }, { "alb", args{ region: "us-east-1", accountID: "123123123123", namespace: "alb", customTags: nil, tagsOnMetrics: nil, dimensionRegexps: config.SupportedServices.GetService("AWS/ApplicationELB").ToModelDimensionsRegexp(), dimensionNameRequirements: []string{"LoadBalancer", "TargetGroup"}, resources: []*model.TaggedResource{ { ARN: "arn:aws:elasticloadbalancing:us-east-1:123123123123:loadbalancer/app/some-ALB/0123456789012345", Tags: []model.Tag{ { Key: "Name", Value: "some-ALB", }, }, Namespace: "alb", Region: "us-east-1", }, }, metricsList: []*model.Metric{ { MetricName: "RequestCount", Dimensions: []model.Dimension{ { Name: "LoadBalancer", Value: "app/some-ALB/0123456789012345", }, { Name: "TargetGroup", Value: "targetgroup/some-ALB/9999666677773333", }, { Name: "AvailabilityZone", Value: "us-east-1", }, }, Namespace: "AWS/ApplicationELB", }, { MetricName: "RequestCount", Dimensions: []model.Dimension{ { Name: "LoadBalancer", Value: "app/some-ALB/0123456789012345", }, { Name: "TargetGroup", Value: "targetgroup/some-ALB/9999666677773333", }, }, Namespace: "AWS/ApplicationELB", }, { MetricName: "RequestCount", Dimensions: []model.Dimension{ { Name: "LoadBalancer", Value: "app/some-ALB/0123456789012345", }, { Name: "AvailabilityZone", Value: "us-east-1", }, }, Namespace: "AWS/ApplicationELB", }, { MetricName: "RequestCount", Dimensions: []model.Dimension{ { Name: "LoadBalancer", Value: "app/some-ALB/0123456789012345", }, }, Namespace: "AWS/ApplicationELB", }, }, m: &model.MetricConfig{ Name: "RequestCount", Statistics: []string{ "Sum", }, Period: 60, Length: 600, Delay: 120, NilToZero: false, AddCloudwatchTimestamp: false, }, }, []model.CloudwatchData{ { MetricName: "RequestCount", Dimensions: []model.Dimension{ { Name: "LoadBalancer", Value: "app/some-ALB/0123456789012345", }, { Name: "TargetGroup", Value: "targetgroup/some-ALB/9999666677773333", }, }, ResourceName: "arn:aws:elasticloadbalancing:us-east-1:123123123123:loadbalancer/app/some-ALB/0123456789012345", Namespace: "alb", Tags: []model.Tag{}, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Statistic: "Sum", Period: 60, Length: 600, Delay: 120, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, }, }, }, { "sagemaker: ARN contains uppercase letters", args{ region: "us-east-1", accountID: "123123123123", namespace: "AWS/SageMaker", dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{ { ARN: "arn:aws:sagemaker:us-east-1:123123123123:endpoint/someEndpoint", Tags: []model.Tag{ { Key: "Environment", Value: "prod", }, }, Namespace: "sagemaker", Region: "us-east-1", }, }, metricsList: []*model.Metric{ { MetricName: "Invocation4XXErrors", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "someEndpoint"}, {Name: "VariantName", Value: "AllTraffic"}, }, Namespace: "AWS/SageMaker", }, }, m: &model.MetricConfig{ Name: "Invocation4XXErrors", Statistics: []string{ "Sum", }, Period: 60, Length: 600, Delay: 120, }, }, []model.CloudwatchData{ { MetricName: "Invocation4XXErrors", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "someEndpoint"}, {Name: "VariantName", Value: "AllTraffic"}, }, ResourceName: "arn:aws:sagemaker:us-east-1:123123123123:endpoint/someEndpoint", Namespace: "AWS/SageMaker", GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Statistic: "Sum", Period: 60, Length: 600, Delay: 120, }, MetricMigrationParams: model.MetricMigrationParams{}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { assoc := maxdimassociator.NewAssociator(promslog.NewNopLogger(), tt.args.dimensionRegexps, tt.args.resources) metricDatas := getFilteredMetricDatas(promslog.NewNopLogger(), tt.args.namespace, tt.args.tagsOnMetrics, tt.args.metricsList, tt.args.dimensionNameRequirements, tt.args.m, assoc) if len(metricDatas) != len(tt.wantGetMetricsData) { t.Errorf("len(getFilteredMetricDatas()) = %v, want %v", len(metricDatas), len(tt.wantGetMetricsData)) } for i, got := range metricDatas { want := tt.wantGetMetricsData[i] assert.Equal(t, want.MetricName, got.MetricName) assert.Equal(t, want.ResourceName, got.ResourceName) assert.Equal(t, want.Namespace, got.Namespace) assert.ElementsMatch(t, want.Dimensions, got.Dimensions) assert.ElementsMatch(t, want.Tags, got.Tags) assert.Equal(t, want.MetricMigrationParams, got.MetricMigrationParams) assert.Equal(t, want.GetMetricDataProcessingParams.Statistic, got.GetMetricDataProcessingParams.Statistic) assert.Equal(t, want.GetMetricDataProcessingParams.Length, got.GetMetricDataProcessingParams.Length) assert.Equal(t, want.GetMetricDataProcessingParams.Period, got.GetMetricDataProcessingParams.Period) assert.Equal(t, want.GetMetricDataProcessingParams.Delay, got.GetMetricDataProcessingParams.Delay) assert.Nil(t, got.GetMetricDataResult) assert.Nil(t, got.GetMetricStatisticsResult) } }) } } ================================================ FILE: pkg/job/getmetricdata/compact.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata // compact iterates over a slice of pointers and deletes // unwanted elements as per the keep function return value. // The slice is modified in-place without copying elements. func compact[T any](input []*T, keep func(el *T) bool) []*T { // move all elements that must be kept at the beginning i := 0 for _, d := range input { if keep(d) { input[i] = d i++ } } // nil out any left element for j := i; j < len(input); j++ { input[j] = nil } // set new slice length to allow released elements to be collected return input[:i] } ================================================ FILE: pkg/job/getmetricdata/compact_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "testing" "github.com/stretchr/testify/require" ) func TestCompact(t *testing.T) { type data struct { n int } type testCase struct { name string input []*data keepFunc func(el *data) bool expectedRes []*data } testCases := []testCase{ { name: "empty", input: []*data{}, keepFunc: nil, expectedRes: []*data{}, }, { name: "one element input, one element result", input: []*data{{n: 0}}, keepFunc: func(_ *data) bool { return true }, expectedRes: []*data{{n: 0}}, }, { name: "one element input, empty result", input: []*data{{n: 0}}, keepFunc: func(_ *data) bool { return false }, expectedRes: []*data{}, }, { name: "two elements input, two elements result", input: []*data{{n: 0}, {n: 1}}, keepFunc: func(_ *data) bool { return true }, expectedRes: []*data{{n: 0}, {n: 1}}, }, { name: "two elements input, one element result (first)", input: []*data{{n: 0}, {n: 1}}, keepFunc: func(el *data) bool { return el.n == 1 }, expectedRes: []*data{{n: 1}}, }, { name: "two elements input, one element result (last)", input: []*data{{n: 0}, {n: 1}}, keepFunc: func(el *data) bool { return el.n == 0 }, expectedRes: []*data{{n: 0}}, }, { name: "two elements input, empty result", input: []*data{{n: 0}, {n: 1}}, keepFunc: func(_ *data) bool { return false }, expectedRes: []*data{}, }, { name: "three elements input, empty result", input: []*data{{n: 0}, {n: 1}, {n: 2}}, keepFunc: func(el *data) bool { return el.n < 0 }, expectedRes: []*data{}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { res := compact(tc.input, tc.keepFunc) require.Equal(t, tc.expectedRes, res) }) } } ================================================ FILE: pkg/job/getmetricdata/iterator.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "math" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type iteratorFactory struct { metricsPerQuery int } func (b iteratorFactory) Build(data []*model.CloudwatchData) Iterator { if len(data) == 0 { return nothingToIterate{} } batchSizesByPeriodAndDelay, longestLengthForBatch := mapProcessingParams(data) if len(batchSizesByPeriodAndDelay) == 1 { // Only 1 period use value from data period := data[0].GetMetricDataProcessingParams.Period if len(batchSizesByPeriodAndDelay[period]) == 1 { // Only 1 period with 1 delay use value from data and do simple batching delay := data[0].GetMetricDataProcessingParams.Delay params := StartAndEndTimeParams{ Period: period, Length: longestLengthForBatch[period][delay], Delay: delay, } return NewSimpleBatchIterator(b.metricsPerQuery, data, params) } } return NewVaryingTimeParameterBatchingIterator(b.metricsPerQuery, data, batchSizesByPeriodAndDelay, longestLengthForBatch) } type ( periodDelayToBatchSize = map[int64]map[int64]int periodDelayToLongestLength = map[int64]map[int64]int64 ) // mapProcessingParams loops through all the incoming CloudwatchData to pre-compute important information // to be used when initializing the batching iterator // Knowing the period + delay combinations with their batch sizes will allow us to pre-allocate the batch slices that could // be very large ahead of time without looping again later // Similarly we need to know the largest length for a period + delay combination later so gathering it while we are already // iterating will save some cycles later func mapProcessingParams(data []*model.CloudwatchData) (periodDelayToBatchSize, periodDelayToLongestLength) { batchSizesByPeriodAndDelay := periodDelayToBatchSize{} longestLengthForBatch := periodDelayToLongestLength{} for _, datum := range data { period := datum.GetMetricDataProcessingParams.Period delay := datum.GetMetricDataProcessingParams.Delay if _, exists := batchSizesByPeriodAndDelay[period]; !exists { batchSizesByPeriodAndDelay[period] = map[int64]int{delay: 0} longestLengthForBatch[period] = map[int64]int64{delay: 0} } if _, exists := batchSizesByPeriodAndDelay[period][delay]; !exists { batchSizesByPeriodAndDelay[period][delay] = 0 longestLengthForBatch[period][delay] = 0 } batchSizesByPeriodAndDelay[period][delay]++ if longestLengthForBatch[period][delay] < datum.GetMetricDataProcessingParams.Length { longestLengthForBatch[period][delay] = datum.GetMetricDataProcessingParams.Length } } return batchSizesByPeriodAndDelay, longestLengthForBatch } type nothingToIterate struct{} func (n nothingToIterate) Next() ([]*model.CloudwatchData, StartAndEndTimeParams) { return nil, StartAndEndTimeParams{} } func (n nothingToIterate) HasMore() bool { return false } type simpleBatchingIterator struct { size int currentBatch int data []*model.CloudwatchData entriesPerBatch int batchParams StartAndEndTimeParams } func (s *simpleBatchingIterator) Next() ([]*model.CloudwatchData, StartAndEndTimeParams) { // We are out of data return defaults if s.currentBatch >= s.size { return nil, StartAndEndTimeParams{} } startingIndex := s.currentBatch * s.entriesPerBatch endingIndex := startingIndex + s.entriesPerBatch if endingIndex > len(s.data) { endingIndex = len(s.data) } // TODO are we technically doing this https://go.dev/wiki/SliceTricks#batching-with-minimal-allocation and if not // would it change allocations to do this ahead of time? result := s.data[startingIndex:endingIndex] s.currentBatch++ return result, s.batchParams } func (s *simpleBatchingIterator) HasMore() bool { return s.currentBatch < s.size } // NewSimpleBatchIterator returns an iterator which slices the data in place based on the metricsPerQuery. func NewSimpleBatchIterator(metricsPerQuery int, data []*model.CloudwatchData, batchParams StartAndEndTimeParams) Iterator { return &simpleBatchingIterator{ size: int(math.Ceil(float64(len(data)) / float64(metricsPerQuery))), batchParams: batchParams, data: data, entriesPerBatch: metricsPerQuery, } } type timeParameterBatchingIterator struct { current Iterator remaining []Iterator } func (t *timeParameterBatchingIterator) Next() ([]*model.CloudwatchData, StartAndEndTimeParams) { batch, params := t.current.Next() // Doing this before returning from Next drastically simplifies HasMore because it can depend on // t.current.HasMore() being accurate. if !t.current.HasMore() { // Current iterator is out and there's none left, set current to nothingToIterate if len(t.remaining) == 0 { t.remaining = nil t.current = nothingToIterate{} } else { // Pop from https://go.dev/wiki/SliceTricks next, remaining := t.remaining[len(t.remaining)-1], t.remaining[:len(t.remaining)-1] t.current = next t.remaining = remaining } } return batch, params } func (t *timeParameterBatchingIterator) HasMore() bool { return t.current.HasMore() } func NewVaryingTimeParameterBatchingIterator( metricsPerQuery int, data []*model.CloudwatchData, batchSizes periodDelayToBatchSize, longestLengthForBatch periodDelayToLongestLength, ) Iterator { batches := make(map[int64]map[int64][]*model.CloudwatchData, len(batchSizes)) numberOfIterators := 0 // Pre-allocate batch slices for period, delays := range batchSizes { batches[period] = make(map[int64][]*model.CloudwatchData, len(delays)) for delay, batchSize := range delays { numberOfIterators++ batches[period][delay] = make([]*model.CloudwatchData, 0, batchSize) } } // Fill the batches for _, datum := range data { params := datum.GetMetricDataProcessingParams batch := batches[params.Period][params.Delay] batches[params.Period][params.Delay] = append(batch, datum) } var firstIterator Iterator iterators := make([]Iterator, 0, numberOfIterators-1) // We are ranging a map, and we won't have an index to mark the first iterator isFirst := true for period, delays := range batches { for delay, batch := range delays { batchParams := StartAndEndTimeParams{ Period: period, Delay: delay, } // Make sure to set the length to the longest length for the batch batchParams.Length = longestLengthForBatch[period][delay] iterator := NewSimpleBatchIterator(metricsPerQuery, batch, batchParams) if isFirst { firstIterator = iterator isFirst = false } else { iterators = append(iterators, iterator) } } } return &timeParameterBatchingIterator{ current: firstIterator, remaining: iterators, } } ================================================ FILE: pkg/job/getmetricdata/iterator_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "math/rand" "strconv" "testing" "github.com/stretchr/testify/assert" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestIteratorFactory_Build(t *testing.T) { tests := []struct { name string input []*model.CloudwatchData expectedIterator Iterator }{ { name: "empty returns nothing to iterator", input: []*model.CloudwatchData{}, expectedIterator: nothingToIterate{}, }, { name: "input with consistent period and delay returns simple batching", input: []*model.CloudwatchData{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, }, expectedIterator: &simpleBatchingIterator{}, }, { name: "input with inconsistent period returns time param batching", input: []*model.CloudwatchData{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 11, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 12, Delay: 100}}, }, expectedIterator: &timeParameterBatchingIterator{}, }, { name: "input with inconsistent delay returns time param batching", input: []*model.CloudwatchData{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 101}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 102}}, }, expectedIterator: &timeParameterBatchingIterator{}, }, { name: "input with inconsistent period and delay returns time param batching", input: []*model.CloudwatchData{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 10, Delay: 100}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 11, Delay: 101}}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 12, Delay: 102}}, }, expectedIterator: &timeParameterBatchingIterator{}, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { factory := iteratorFactory{100} iterator := factory.Build(tc.input) assert.IsType(t, tc.expectedIterator, iterator) }) } } func TestSimpleBatchingIterator_SetsLengthAndDelay(t *testing.T) { data := []*model.CloudwatchData{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Period: 101, Delay: 100}}, } params := StartAndEndTimeParams{ Period: 102, Length: 101, Delay: 100, } iterator := NewSimpleBatchIterator(1, data, params) _, out := iterator.Next() assert.Equal(t, params, out) } func TestSimpleBatchingIterator_IterateFlow(t *testing.T) { tests := []struct { name string metricsPerQuery int lengthOfCloudwatchData int expectedNumberOfCallsToNext int }{ { name: "1 per batch", metricsPerQuery: 1, lengthOfCloudwatchData: 10, expectedNumberOfCallsToNext: 10, }, { name: "divisible batches and requests", metricsPerQuery: 5, lengthOfCloudwatchData: 100, expectedNumberOfCallsToNext: 20, }, { name: "indivisible batches and requests", metricsPerQuery: 5, lengthOfCloudwatchData: 94, expectedNumberOfCallsToNext: 19, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { data := make([]*model.CloudwatchData, 0, tc.lengthOfCloudwatchData) for i := 0; i < tc.lengthOfCloudwatchData; i++ { data = append(data, getSampleMetricDatas(strconv.Itoa(i))) } params := StartAndEndTimeParams{ Period: data[0].GetMetricDataProcessingParams.Period, Length: data[0].GetMetricDataProcessingParams.Length, Delay: data[0].GetMetricDataProcessingParams.Delay, } iterator := NewSimpleBatchIterator(tc.metricsPerQuery, data, params) outputData := make([]*model.CloudwatchData, 0, len(data)) numberOfCallsToNext := 0 for iterator.HasMore() { numberOfCallsToNext++ batch, _ := iterator.Next() outputData = append(outputData, batch...) } assert.ElementsMatch(t, data, outputData) assert.Equal(t, tc.expectedNumberOfCallsToNext, numberOfCallsToNext) }) } } func TestVaryingTimeParameterBatchingIterator_IterateFlow(t *testing.T) { tests := []struct { name string metricsPerQuery int lengthOfCloudwatchDataByStartAndEndTimeParams map[StartAndEndTimeParams]int expectedBatchesByStartAndEndTimeParams map[StartAndEndTimeParams]int }{ { name: "1 per batch - two time parameters", metricsPerQuery: 1, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 10, {Period: 20, Length: 20, Delay: 20}: 10, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 10, {Period: 20, Length: 20, Delay: 20}: 10, }, }, { name: "1 per batch - uses max length for available period + delay", metricsPerQuery: 1, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 10, {Period: 10, Length: 30, Delay: 10}: 10, {Period: 20, Length: 20, Delay: 20}: 10, {Period: 20, Length: 40, Delay: 20}: 10, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 30, Delay: 10}: 20, {Period: 20, Length: 40, Delay: 20}: 20, }, }, { name: "divisible batches - two time parameters", metricsPerQuery: 5, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 100, {Period: 20, Length: 20, Delay: 20}: 100, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 20, {Period: 20, Length: 20, Delay: 20}: 20, }, }, { name: "divisible batches - uses max length for available period + delay", metricsPerQuery: 5, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 100, {Period: 10, Length: 30, Delay: 10}: 100, {Period: 20, Length: 20, Delay: 20}: 100, {Period: 20, Length: 40, Delay: 20}: 100, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 30, Delay: 10}: 40, {Period: 20, Length: 40, Delay: 20}: 40, }, }, { name: "indivisible batches - two time parameters", metricsPerQuery: 5, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 94, {Period: 20, Length: 20, Delay: 20}: 94, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 19, {Period: 20, Length: 20, Delay: 20}: 19, }, }, { name: "indivisible batches - uses max length for available period + delay", metricsPerQuery: 5, lengthOfCloudwatchDataByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 10, Delay: 10}: 94, {Period: 10, Length: 30, Delay: 10}: 94, {Period: 20, Length: 20, Delay: 20}: 94, {Period: 20, Length: 40, Delay: 20}: 94, }, expectedBatchesByStartAndEndTimeParams: map[StartAndEndTimeParams]int{ {Period: 10, Length: 30, Delay: 10}: 38, {Period: 20, Length: 40, Delay: 20}: 38, }, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { data := []*model.CloudwatchData{} for params, lengthOfCloudwatchData := range tc.lengthOfCloudwatchDataByStartAndEndTimeParams { for i := 0; i < lengthOfCloudwatchData; i++ { entry := getSampleMetricDatas(strconv.Itoa(rand.Int())) entry.GetMetricDataProcessingParams.Length = params.Length entry.GetMetricDataProcessingParams.Delay = params.Delay entry.GetMetricDataProcessingParams.Period = params.Period data = append(data, entry) } } iterator := iteratorFactory{metricsPerQuery: tc.metricsPerQuery}.Build(data) outputData := make([]*model.CloudwatchData, 0, len(data)) numberOfBatchesByStartAndEndTimeParams := map[StartAndEndTimeParams]int{} for iterator.HasMore() { batch, params := iterator.Next() numberOfBatchesByStartAndEndTimeParams[params]++ outputData = append(outputData, batch...) } assert.ElementsMatch(t, data, outputData) assert.Len(t, numberOfBatchesByStartAndEndTimeParams, len(tc.expectedBatchesByStartAndEndTimeParams)) for params, count := range tc.expectedBatchesByStartAndEndTimeParams { actualCount, ok := numberOfBatchesByStartAndEndTimeParams[params] assert.True(t, ok, "output batches was missing expected batches of start and endtime params %+v", params) assert.Equal(t, count, actualCount, "%+v had an incorrect batch count", params) } }) } } ================================================ FILE: pkg/job/getmetricdata/processor.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "context" "fmt" "log/slog" "strconv" "strings" "time" "golang.org/x/sync/errgroup" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type Client interface { GetMetricData(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []cloudwatch.MetricDataResult } type IteratorFactory interface { // Build returns an ideal batch iterator based on the provided CloudwatchData Build(requests []*model.CloudwatchData) Iterator } type Iterator interface { // Next returns the next batch of CloudWatch data be used when calling GetMetricData and the start + end time for // the GetMetricData call // If called when there are no more batches default values will be returned Next() ([]*model.CloudwatchData, StartAndEndTimeParams) // HasMore returns true if there are more batches to iterate otherwise false. Should be used in a loop // to govern calls to Next() HasMore() bool } type StartAndEndTimeParams struct { Period int64 Length int64 Delay int64 } type Processor struct { client Client concurrency int windowCalculator MetricWindowCalculator logger *slog.Logger factory IteratorFactory } func NewDefaultProcessor(logger *slog.Logger, client Client, metricsPerQuery int, concurrency int) Processor { return NewProcessor(logger, client, concurrency, MetricWindowCalculator{clock: TimeClock{}}, &iteratorFactory{metricsPerQuery: metricsPerQuery}) } func NewProcessor(logger *slog.Logger, client Client, concurrency int, windowCalculator MetricWindowCalculator, factory IteratorFactory) Processor { return Processor{ logger: logger, client: client, concurrency: concurrency, windowCalculator: windowCalculator, factory: factory, } } func (p Processor) Run(ctx context.Context, namespace string, requests []*model.CloudwatchData) ([]*model.CloudwatchData, error) { if len(requests) == 0 { return requests, nil } g, gCtx := errgroup.WithContext(ctx) g.SetLimit(p.concurrency) iterator := p.factory.Build(requests) for iterator.HasMore() { batch, batchParams := iterator.Next() g.Go(func() error { batch = addQueryIDsToBatch(batch) startTime, endTime := p.windowCalculator.Calculate(toSecondDuration(batchParams.Period), toSecondDuration(batchParams.Length), toSecondDuration(batchParams.Delay)) p.logger.Debug("GetMetricData Window", "start_time", startTime.Format(TimeFormat), "end_time", endTime.Format(TimeFormat)) data := p.client.GetMetricData(gCtx, batch, namespace, startTime, endTime) if data != nil { mapResultsToBatch(p.logger, data, batch) } else { p.logger.Warn("GetMetricData partition empty result", "start", startTime, "end", endTime) } return nil }) } if err := g.Wait(); err != nil { return nil, fmt.Errorf("GetMetricData work group error: %w", err) } // Remove unprocessed/unknown elements in place, if any. Since getMetricDatas // is a slice of pointers, the compaction can be easily done in-place. requests = compact(requests, func(m *model.CloudwatchData) bool { return m.GetMetricDataResult != nil }) return requests, nil } func addQueryIDsToBatch(batch []*model.CloudwatchData) []*model.CloudwatchData { for i, entry := range batch { entry.GetMetricDataProcessingParams.QueryID = indexToQueryID(i) } return batch } func mapResultsToBatch(logger *slog.Logger, results []cloudwatch.MetricDataResult, batch []*model.CloudwatchData) { for _, entry := range results { id, err := queryIDToIndex(entry.ID) if err != nil { logger.Warn("GetMetricData returned unknown Query ID", "err", err, "query_id", id) continue } if batch[id].GetMetricDataResult == nil { cloudwatchData := batch[id] mappedDataPoints := make([]model.DataPoint, 0, len(entry.DataPoints)) for i := 0; i < len(entry.DataPoints); i++ { mappedDataPoints = append(mappedDataPoints, model.DataPoint{Value: entry.DataPoints[i].Value, Timestamp: entry.DataPoints[i].Timestamp}) } cloudwatchData.GetMetricDataResult = &model.GetMetricDataResult{ Statistic: cloudwatchData.GetMetricDataProcessingParams.Statistic, DataPoints: mappedDataPoints, } // All GetMetricData processing is done clear the params cloudwatchData.GetMetricDataProcessingParams = nil } } } func indexToQueryID(i int) string { return fmt.Sprintf("id_%d", i) } func queryIDToIndex(queryID string) (int, error) { noID := strings.TrimPrefix(queryID, "id_") id, err := strconv.Atoi(noID) return id, err } func toSecondDuration(i int64) time.Duration { return time.Duration(i) * time.Second } ================================================ FILE: pkg/job/getmetricdata/processor_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "context" "fmt" "testing" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type cloudwatchDataInput struct { MetricName string GetMetricDataProcessingParams *model.GetMetricDataProcessingParams } type cloudwatchDataOutput struct { MetricName string *model.GetMetricDataResult } type metricDataResultForMetric struct { MetricName string result cloudwatch.MetricDataResult } type testClient struct { GetMetricDataFunc func(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []cloudwatch.MetricDataResult GetMetricDataResultForMetrics []metricDataResultForMetric } func (t testClient) GetMetricData(ctx context.Context, getMetricData []*model.CloudwatchData, namespace string, startTime time.Time, endTime time.Time) []cloudwatch.MetricDataResult { if t.GetMetricDataResultForMetrics != nil { var result []cloudwatch.MetricDataResult for _, datum := range getMetricData { for _, response := range t.GetMetricDataResultForMetrics { if datum.MetricName == response.MetricName { response.result.ID = datum.GetMetricDataProcessingParams.QueryID result = append(result, response.result) } } } return result } return t.GetMetricDataFunc(ctx, getMetricData, namespace, startTime, endTime) } func TestProcessor_Run(t *testing.T) { now := time.Now() tests := []struct { name string requests []*cloudwatchDataInput metricDataResultForMetrics []metricDataResultForMetric want []cloudwatchDataOutput metricsPerBatch int }{ { name: "successfully maps input to output when GetMetricData returns data", requests: []*cloudwatchDataInput{ {MetricName: "metric-1", GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Average"}}, }, metricDataResultForMetrics: []metricDataResultForMetric{ {MetricName: "metric-1", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(1000), Timestamp: now}}}}, }, want: []cloudwatchDataOutput{ {MetricName: "metric-1", GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1000), Timestamp: now}}}}, }, }, { name: "handles duplicate results", requests: []*cloudwatchDataInput{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Min"}, MetricName: "MetricOne"}, }, metricDataResultForMetrics: []metricDataResultForMetric{ {MetricName: "MetricOne", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}}}, {MetricName: "MetricOne", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(15), Timestamp: time.Date(2023, time.June, 7, 2, 9, 8, 0, time.UTC)}}}}, }, want: []cloudwatchDataOutput{ {MetricName: "MetricOne", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Min", DataPoints: []model.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}, }}, }, }, { name: "does not return a request when QueryID is not in MetricDataResult", requests: []*cloudwatchDataInput{ {MetricName: "metric-1", GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Average"}}, {MetricName: "metric-2", GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Average"}}, }, metricDataResultForMetrics: []metricDataResultForMetric{ {MetricName: "metric-1", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(1000), Timestamp: now}}}}, }, want: []cloudwatchDataOutput{ {MetricName: "metric-1", GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1000), Timestamp: now}}}}, }, }, { name: "maps nil metric dataPoints", requests: []*cloudwatchDataInput{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Min"}, MetricName: "MetricOne"}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Max"}, MetricName: "MetricTwo"}, }, metricDataResultForMetrics: []metricDataResultForMetric{ {MetricName: "MetricOne", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}}}, {MetricName: "MetricTwo"}, }, want: []cloudwatchDataOutput{ { MetricName: "MetricOne", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Min", DataPoints: []model.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}, }, }, { MetricName: "MetricTwo", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Max", DataPoints: []model.DataPoint{}, }, }, }, }, { name: "successfully maps input to output when multiple batches are involved", metricsPerBatch: 1, requests: []*cloudwatchDataInput{ {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Min"}, MetricName: "MetricOne"}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Max"}, MetricName: "MetricTwo"}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Sum"}, MetricName: "MetricThree"}, {GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{Statistic: "Count"}, MetricName: "MetricFour"}, }, metricDataResultForMetrics: []metricDataResultForMetric{ {MetricName: "MetricOne", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}}}, {MetricName: "MetricTwo", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(12), Timestamp: time.Date(2023, time.June, 7, 2, 9, 8, 0, time.UTC)}}}}, {MetricName: "MetricThree", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(15), Timestamp: time.Date(2023, time.June, 7, 3, 9, 8, 0, time.UTC)}}}}, {MetricName: "MetricFour", result: cloudwatch.MetricDataResult{DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(20), Timestamp: time.Date(2023, time.June, 7, 4, 9, 8, 0, time.UTC)}}}}, }, want: []cloudwatchDataOutput{ { MetricName: "MetricOne", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Min", DataPoints: []model.DataPoint{{Value: aws.Float64(5), Timestamp: time.Date(2023, time.June, 7, 1, 9, 8, 0, time.UTC)}}, }, }, { MetricName: "MetricTwo", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Max", DataPoints: []model.DataPoint{{Value: aws.Float64(12), Timestamp: time.Date(2023, time.June, 7, 2, 9, 8, 0, time.UTC)}}, }, }, { MetricName: "MetricThree", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Sum", DataPoints: []model.DataPoint{{Value: aws.Float64(15), Timestamp: time.Date(2023, time.June, 7, 3, 9, 8, 0, time.UTC)}}, }, }, { MetricName: "MetricFour", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Count", DataPoints: []model.DataPoint{{Value: aws.Float64(20), Timestamp: time.Date(2023, time.June, 7, 4, 9, 8, 0, time.UTC)}}, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { metricsPerQuery := 500 if tt.metricsPerBatch != 0 { metricsPerQuery = tt.metricsPerBatch } r := NewDefaultProcessor(promslog.NewNopLogger(), testClient{GetMetricDataResultForMetrics: tt.metricDataResultForMetrics}, metricsPerQuery, 1) cloudwatchData, err := r.Run(context.Background(), "anything_is_fine", ToCloudwatchData(tt.requests)) require.NoError(t, err) require.Len(t, cloudwatchData, len(tt.want)) got := make([]cloudwatchDataOutput, 0, len(cloudwatchData)) for _, data := range cloudwatchData { assert.Nil(t, data.GetMetricStatisticsResult) assert.Nil(t, data.GetMetricDataProcessingParams) assert.NotNil(t, data.GetMetricDataResult) got = append(got, cloudwatchDataOutput{ MetricName: data.MetricName, GetMetricDataResult: data.GetMetricDataResult, }) } assert.ElementsMatch(t, tt.want, got) }) } } func ToCloudwatchData(input []*cloudwatchDataInput) []*model.CloudwatchData { output := make([]*model.CloudwatchData, 0, len(input)) for _, i := range input { cloudwatchData := &model.CloudwatchData{ MetricName: i.MetricName, ResourceName: "test", Namespace: "test", Tags: []model.Tag{{Key: "tag", Value: "value"}}, Dimensions: []model.Dimension{{Name: "dimension", Value: "value"}}, GetMetricDataProcessingParams: i.GetMetricDataProcessingParams, GetMetricDataResult: nil, GetMetricStatisticsResult: nil, } output = append(output, cloudwatchData) } return output } func getSampleMetricDatas(id string) *model.CloudwatchData { return &model.CloudwatchData{ MetricName: "StorageBytes", Dimensions: []model.Dimension{ { Name: "FileSystemId", Value: "fs-abc123", }, { Name: "StorageClass", Value: "Standard", }, }, ResourceName: id, Namespace: "efs", Tags: []model.Tag{ { Key: "Value1", Value: "", }, { Key: "Value2", Value: "", }, }, MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, ExportAllDataPoints: false, }, GetMetricDataProcessingParams: &model.GetMetricDataProcessingParams{ Period: 60, Length: 60, Delay: 0, Statistic: "Average", }, } } func BenchmarkProcessorRun(b *testing.B) { type testcase struct { concurrency int metricsPerQuery int testResourcesCount int } for name, tc := range map[string]testcase{ "small case": { concurrency: 10, metricsPerQuery: 500, testResourcesCount: 10, }, "medium case": { concurrency: 10, metricsPerQuery: 500, testResourcesCount: 1000, }, "big case": { concurrency: 10, metricsPerQuery: 500, testResourcesCount: 2000, }, } { b.Run(name, func(b *testing.B) { doBench(b, tc.metricsPerQuery, tc.testResourcesCount, tc.concurrency) }) } } func doBench(b *testing.B, metricsPerQuery, testResourcesCount int, concurrency int) { testResourceIDs := make([]string, testResourcesCount) for i := 0; i < testResourcesCount; i++ { testResourceIDs[i] = fmt.Sprintf("test-resource-%d", i) } client := testClient{GetMetricDataFunc: func(_ context.Context, getMetricData []*model.CloudwatchData, _ string, _ time.Time, _ time.Time) []cloudwatch.MetricDataResult { b.StopTimer() results := make([]cloudwatch.MetricDataResult, 0, len(getMetricData)) for _, entry := range getMetricData { results = append(results, cloudwatch.MetricDataResult{ ID: entry.GetMetricDataProcessingParams.QueryID, DataPoints: []cloudwatch.DataPoint{{Value: aws.Float64(1), Timestamp: time.Now()}}, }) } b.StartTimer() return results }} for i := 0; i < b.N; i++ { // stop timer to not affect benchmark run // this has to do in every run, since running the processor mutates the metric datas slice b.StopTimer() datas := make([]*model.CloudwatchData, 0, testResourcesCount) for i := 0; i < testResourcesCount; i++ { datas = append(datas, getSampleMetricDatas(testResourceIDs[i])) } r := NewDefaultProcessor(promslog.NewNopLogger(), client, metricsPerQuery, concurrency) // re-start timer b.ReportAllocs() b.StartTimer() //nolint:errcheck r.Run(context.Background(), "anything_is_fine", datas) } } ================================================ FILE: pkg/job/getmetricdata/windowcalculator.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import "time" const TimeFormat = "2006-01-02T15:04:05.999999-07:00" // Clock small interface which allows for stubbing the time.Now() function for unit testing type Clock interface { Now() time.Time } // TimeClock implementation of Clock interface which delegates to Go's Time package type TimeClock struct{} func (tc TimeClock) Now() time.Time { return time.Now() } type MetricWindowCalculator struct { clock Clock } // Calculate computes the start and end time for the GetMetricData request to AWS // Always uses the wall clock time as starting point for calculations to ensure that // a variety of exporter configurations will work reliably. func (m MetricWindowCalculator) Calculate(period time.Duration, length time.Duration, delay time.Duration) (time.Time, time.Time) { now := m.clock.Now() if period > 0 { // Round down the time to a factor of the period: // https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_GetMetricData.html#API_GetMetricData_RequestParameters now = now.Add(-period / 2).Round(period) } startTime := now.Add(-(length + delay)) endTime := now.Add(-delay) return startTime, endTime } ================================================ FILE: pkg/job/getmetricdata/windowcalculator_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package getmetricdata import ( "testing" "time" ) // StubClock stub implementation of Clock interface that allows tests // to control time.Now() type StubClock struct { currentTime time.Time } func (mt StubClock) Now() time.Time { return mt.currentTime } func Test_MetricWindow(t *testing.T) { type data struct { roundingPeriod time.Duration length time.Duration delay time.Duration clock StubClock expectedStartTime time.Time expectedEndTime time.Time } testCases := []struct { testName string data data }{ { testName: "Go back four minutes and round to the nearest two minutes with two minute delay", data: data{ roundingPeriod: 120 * time.Second, length: 120 * time.Second, delay: 120 * time.Second, clock: StubClock{ currentTime: time.Date(2021, 11, 20, 0, 0, 0, 0, time.UTC), }, expectedStartTime: time.Date(2021, 11, 19, 23, 56, 0, 0, time.UTC), expectedEndTime: time.Date(2021, 11, 19, 23, 58, 0, 0, time.UTC), }, }, { testName: "Go back four minutes with two minute delay nad no rounding", data: data{ roundingPeriod: 0, length: 120 * time.Second, delay: 120 * time.Second, clock: StubClock{ currentTime: time.Date(2021, 1, 1, 0, 0o2, 22, 33, time.UTC), }, expectedStartTime: time.Date(2020, 12, 31, 23, 58, 22, 33, time.UTC), expectedEndTime: time.Date(2021, 1, 1, 0, 0, 22, 33, time.UTC), }, }, { testName: "Go back two days and round to the nearest day (midnight) with zero delay", data: data{ roundingPeriod: 86400 * time.Second, // 1 day length: 172800 * time.Second, // 2 days delay: 0, clock: StubClock{ currentTime: time.Date(2021, 11, 20, 8, 33, 44, 0, time.UTC), }, expectedStartTime: time.Date(2021, 11, 18, 0, 0, 0, 0, time.UTC), expectedEndTime: time.Date(2021, 11, 20, 0, 0, 0, 0, time.UTC), }, }, { testName: "Go back two days and round to the nearest 5 minutes with zero delay", data: data{ roundingPeriod: 300 * time.Second, // 5 min length: 172800 * time.Second, // 2 days delay: 0, clock: StubClock{ currentTime: time.Date(2021, 11, 20, 8, 33, 44, 0, time.UTC), }, expectedStartTime: time.Date(2021, 11, 18, 8, 30, 0, 0, time.UTC), expectedEndTime: time.Date(2021, 11, 20, 8, 30, 0, 0, time.UTC), }, }, } for _, tc := range testCases { t.Run(tc.testName, func(t *testing.T) { startTime, endTime := MetricWindowCalculator{tc.data.clock}.Calculate(tc.data.roundingPeriod, tc.data.length, tc.data.delay) if !startTime.Equal(tc.data.expectedStartTime) { t.Errorf("start time incorrect. Expected: %s, Actual: %s", tc.data.expectedStartTime.Format(TimeFormat), startTime.Format(TimeFormat)) t.Errorf("end time incorrect. Expected: %s, Actual: %s", tc.data.expectedEndTime.Format(TimeFormat), endTime.Format(TimeFormat)) } }) } } ================================================ FILE: pkg/job/listmetrics/processor.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package listmetrics import "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" type ProcessingParams struct { Namespace string Metrics []*model.MetricConfig RecentlyActiveOnly bool DimensionNameRequirements []string } ================================================ FILE: pkg/job/maxdimassociator/associator.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "cmp" "context" "fmt" "log/slog" "slices" "strings" "github.com/grafana/regexp" prom_model "github.com/prometheus/common/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var amazonMQBrokerSuffix = regexp.MustCompile("-[0-9]+$") // Associator implements a "best effort" algorithm to automatically map the output // of the ListMetrics API to the list of resources retrieved from the Tagging API. // The core logic is based on a manually maintained list of regexes that extract // dimensions names from ARNs (see services.go). YACE supports auto-discovery for // those AWS namespaces where the ARN regexes are correctly defined. type Associator struct { // mappings is a slice of dimensions-based mappings, one for each regex of a given namespace mappings []*dimensionsRegexpMapping logger *slog.Logger debugEnabled bool } type dimensionsRegexpMapping struct { // dimensions is a slice of dimensions names in a regex (normally 1 name is enough // to identify the resource type by its ARN, sometimes 2 or 3 dimensions names are // needed to identify sub-resources) dimensions []string // dimensionsMapping maps the set of dimensions (names and values) to a resource. // Dimensions names and values are encoded as a uint64 fingerprint. dimensionsMapping map[uint64]*model.TaggedResource } func (rm dimensionsRegexpMapping) toString() string { sb := strings.Builder{} sb.WriteString("{dimensions=[") for _, dim := range rm.dimensions { sb.WriteString(dim) } sb.WriteString("], dimensions_mappings={") for sign, res := range rm.dimensionsMapping { fmt.Fprintf(&sb, "%d", sign) sb.WriteString("=") sb.WriteString(res.ARN) sb.WriteString(",") } sb.WriteString("}}") return sb.String() } // NewAssociator builds all mappings for the given dimensions regexps and list of resources. func NewAssociator(logger *slog.Logger, dimensionsRegexps []model.DimensionsRegexp, resources []*model.TaggedResource) Associator { assoc := Associator{ mappings: []*dimensionsRegexpMapping{}, logger: logger, debugEnabled: logger.Handler().Enabled(context.Background(), slog.LevelDebug), // caching if debug is enabled } // Keep track of resources that have already been mapped. // Each resource will be matched against at most one regex. // TODO(cristian): use a more memory-efficient data structure mappedResources := make([]bool, len(resources)) for _, dr := range dimensionsRegexps { m := &dimensionsRegexpMapping{ dimensions: dr.DimensionsNames, dimensionsMapping: map[uint64]*model.TaggedResource{}, } for idx, r := range resources { if mappedResources[idx] { continue } match := dr.Regexp.FindStringSubmatch(r.ARN) if match == nil { continue } labels := make(map[string]string, len(match)) for i := 1; i < len(match); i++ { labels[dr.DimensionsNames[i-1]] = match[i] } signature := prom_model.LabelsToSignature(labels) m.dimensionsMapping[signature] = r mappedResources[idx] = true } if len(m.dimensionsMapping) > 0 { assoc.mappings = append(assoc.mappings, m) } // The mapping might end up as empty in cases e.g. where // one of the regexps defined for the namespace doesn't match // against any of the tagged resources. This might happen for // example when we define multiple regexps (to capture sibling // or sub-resources) and one of them doesn't match any resource. // This behaviour is ok, we just want to debug log to keep track of it. if assoc.debugEnabled { logger.Debug("unable to define a regex mapping", "regex", dr.Regexp.String()) } } // sort all mappings by decreasing number of dimensions names // (this is essential so that during matching we try to find the metric // with the most specific set of dimensions) slices.SortStableFunc(assoc.mappings, func(a, b *dimensionsRegexpMapping) int { return -1 * cmp.Compare(len(a.dimensions), len(b.dimensions)) }) if assoc.debugEnabled { for idx, regexpMapping := range assoc.mappings { logger.Debug("associator mapping", "mapping_idx", idx, "mapping", regexpMapping.toString()) } } return assoc } // AssociateMetricToResource finds the resource that corresponds to the given set of dimensions // names and values of a metric. The guess is based on the mapping built from dimensions regexps. // In case a map can't be found, the second return parameter indicates whether the metric should be // ignored or not. func (assoc Associator) AssociateMetricToResource(cwMetric *model.Metric) (*model.TaggedResource, bool) { logger := assoc.logger.With("metric_name", cwMetric.MetricName) if len(cwMetric.Dimensions) == 0 { logger.Debug("metric has no dimensions, don't skip") // Do not skip the metric (create a "global" metric) return nil, false } dimensions := make([]string, 0, len(cwMetric.Dimensions)) for _, dimension := range cwMetric.Dimensions { dimensions = append(dimensions, dimension.Name) } if assoc.debugEnabled { logger.Debug("associate loop start", "dimensions", strings.Join(dimensions, ",")) } // Attempt to find the regex mapping which contains the most // (but not necessarily all) the metric's dimensions names. // Regex mappings are sorted by decreasing number of dimensions names, // which favours find the mapping with most dimensions. mappingFound := false for idx, regexpMapping := range assoc.mappings { if containsAll(dimensions, regexpMapping.dimensions) { if assoc.debugEnabled { logger.Debug("found mapping", "mapping_idx", idx, "mapping", regexpMapping.toString()) } // A regex mapping has been found. The metric has all (and possibly more) // the dimensions computed for the mapping. Now compute a signature // of the labels (names and values) of the dimensions of this mapping, and try to // find a resource match. // This loop can run up to two times: // On the first iteration, special-case dimension value // fixes to match the value up with the resource ARN are applied to particular namespaces. // The second iteration will only run if a fix was applied for one of the special-case // namespaces and no match was found. It will try to find a match without applying the fixes. // This covers cases where the dimension value does line up with the resource ARN. mappingFound = true dimFixApplied := false shouldTryFixDimension := true // If no dimension fixes were applied, no need to try running again without the fixer. for dimFixApplied || shouldTryFixDimension { var labels map[string]string labels, dimFixApplied = buildLabelsMap(cwMetric, regexpMapping, shouldTryFixDimension) signature := prom_model.LabelsToSignature(labels) // Check if there's an entry for the labels (names and values) of the metric, // and return the resource in case. if resource, ok := regexpMapping.dimensionsMapping[signature]; ok { logger.Debug("resource matched", "signature", signature) return resource, false } // No resource was matched for the current signature. logger.Debug("resource signature attempt not matched", "signature", signature) shouldTryFixDimension = false } // No resource was matched for any signature, continue iterating across the // rest of regex mappings to attempt to find another one with fewer dimensions. logger.Debug("resource not matched") } } // At this point, we haven't been able to match the metric against // any resource based on the dimensions the associator knows. // If a regex mapping was ever found in the loop above but no entry // (i.e. matching labels names and values) matched the metric dimensions, // skip the metric altogether. // Otherwise, if we didn't find any regex mapping it means we can't // correctly map the dimensions names to a resource arn regex, // but we still want to keep the metric and create a "global" metric. logger.Debug("associate loop end", "skip", mappingFound) return nil, mappingFound } // buildLabelsMap returns a map of labels names and values, as well as whether the dimension fixer was applied. // For some namespaces, values might need to be modified in order // to match the dimension value extracted from ARN. func buildLabelsMap(cwMetric *model.Metric, regexpMapping *dimensionsRegexpMapping, shouldTryFixDimension bool) (map[string]string, bool) { labels := make(map[string]string, len(cwMetric.Dimensions)) dimFixApplied := false for _, rDimension := range regexpMapping.dimensions { for _, mDimension := range cwMetric.Dimensions { if shouldTryFixDimension { mDimension, dimFixApplied = fixDimension(cwMetric.Namespace, mDimension) } if rDimension == mDimension.Name { labels[mDimension.Name] = mDimension.Value } } } return labels, dimFixApplied } // fixDimension modifies the dimension value to accommodate special cases where // the dimension value doesn't match the resource ARN. func fixDimension(namespace string, dim model.Dimension) (model.Dimension, bool) { // AmazonMQ is special - for active/standby ActiveMQ brokers, // the value of the "Broker" dimension contains a number suffix // that is not part of the resource ARN if namespace == "AWS/AmazonMQ" && dim.Name == "Broker" { if amazonMQBrokerSuffix.MatchString(dim.Value) { dim.Value = amazonMQBrokerSuffix.ReplaceAllString(dim.Value, "") return dim, true } } // AWS Sagemaker inference component name may have upper case characters // name value to be able to match the resource ARN if namespace == "AWS/SageMaker" && dim.Name == "InferenceComponentName" { dim.Value = strings.ToLower(dim.Value) return dim, true } return dim, false } // containsAll returns true if a contains all elements of b func containsAll(a, b []string) bool { for _, e := range b { if slices.Contains(a, e) { continue } return false } return true } ================================================ FILE: pkg/job/maxdimassociator/associator_api_gateway_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var apiGatewayV1 = &model.TaggedResource{ ARN: "arn:aws:apigateway:us-east-2::/restapis/test-api", Namespace: "AWS/ApiGateway", } var apiGatewayV1Stage = &model.TaggedResource{ ARN: "arn:aws:apigateway:us-east-2::/restapis/test-api/stages/test", Namespace: "AWS/ApiGateway", } var apiGatewayV2 = &model.TaggedResource{ ARN: "arn:aws:apigateway:us-east-2::/apis/98765fghij", Namespace: "AWS/ApiGateway", } var apiGatewayV2Stage = &model.TaggedResource{ ARN: "arn:aws:apigateway:us-east-2::/apis/98765fghij/stages/$default", Namespace: "AWS/ApiGateway", } var apiGatewayResources = []*model.TaggedResource{apiGatewayV1, apiGatewayV1Stage, apiGatewayV2, apiGatewayV2Stage} func TestAssociatorAPIGateway(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match API Gateway V2 with ApiId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ApiGateway").ToModelDimensionsRegexp(), resources: apiGatewayResources, metric: &model.Metric{ MetricName: "5xx", Namespace: "AWS/ApiGateway", Dimensions: []model.Dimension{ {Name: "ApiId", Value: "98765fghij"}, }, }, }, expectedSkip: false, expectedResource: apiGatewayV2, }, { name: "should match API Gateway V2 with ApiId and Stage dimensions", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ApiGateway").ToModelDimensionsRegexp(), resources: apiGatewayResources, metric: &model.Metric{ MetricName: "5xx", Namespace: "AWS/ApiGateway", Dimensions: []model.Dimension{ {Name: "ApiId", Value: "98765fghij"}, {Name: "Stage", Value: "$default"}, }, }, }, expectedSkip: false, expectedResource: apiGatewayV2Stage, }, { name: "should match API Gateway V1 with ApiName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ApiGateway").ToModelDimensionsRegexp(), resources: apiGatewayResources, metric: &model.Metric{ MetricName: "5xx", Namespace: "AWS/ApiGateway", Dimensions: []model.Dimension{ {Name: "ApiName", Value: "test-api"}, }, }, }, expectedSkip: false, expectedResource: apiGatewayV1, }, { name: "should match API Gateway V1 with ApiName and Stage dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ApiGateway").ToModelDimensionsRegexp(), resources: apiGatewayResources, metric: &model.Metric{ MetricName: "5xx", Namespace: "AWS/ApiGateway", Dimensions: []model.Dimension{ {Name: "ApiName", Value: "test-api"}, {Name: "Stage", Value: "test"}, }, }, }, expectedSkip: false, expectedResource: apiGatewayV1Stage, }, { name: "should match API Gateway V1 with ApiName (Stage is not matched)", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ApiGateway").ToModelDimensionsRegexp(), resources: apiGatewayResources, metric: &model.Metric{ MetricName: "5xx", Namespace: "AWS/ApiGateway", Dimensions: []model.Dimension{ {Name: "ApiName", Value: "test-api"}, {Name: "Stage", Value: "dev"}, }, }, }, expectedSkip: false, expectedResource: apiGatewayV1, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_client_vpn_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var clientVpn = &model.TaggedResource{ ARN: "arn:aws:ec2:eu-central-1:075055617227:client-vpn-endpoint/cvpn-endpoint-0c9e5bd20be71e296", Namespace: "AWS/ClientVPN", } func TestAssociatorClientVPN(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match ClientVPN with Endpoint dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ClientVPN").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{clientVpn}, metric: &model.Metric{ MetricName: "CrlDaysToExpiry", Namespace: "AWS/ClientVPN", Dimensions: []model.Dimension{ {Name: "Endpoint", Value: "cvpn-endpoint-0c9e5bd20be71e296"}, }, }, }, expectedSkip: false, expectedResource: clientVpn, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_ddosprotection_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/assert" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var protectedResources1 = &model.TaggedResource{ ARN: "arn:aws:ec2:us-east-1:123456789012:instance/i-abc123", Namespace: "AWS/DDoSProtection", } var protectedResources2 = &model.TaggedResource{ ARN: "arn:aws:ec2:us-east-1:123456789012:instance/i-def456", Namespace: "AWS/DDoSProtection", } var protectedResources = []*model.TaggedResource{ protectedResources1, protectedResources2, } func TestAssociatorDDoSProtection(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with ResourceArn dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/DDoSProtection").ToModelDimensionsRegexp(), resources: protectedResources, metric: &model.Metric{ Namespace: "AWS/DDoSProtection", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "ResourceArn", Value: "arn:aws:ec2:us-east-1:123456789012:instance/i-abc123"}, }, }, }, expectedSkip: false, expectedResource: protectedResources1, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) assert.Equal(t, tc.expectedSkip, skip) assert.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_directoryservice_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var directory = &model.TaggedResource{ ARN: "arn:aws:ds::012345678901:directory/d-abc123", Namespace: "AWS/DirectoryService", } func TestAssociatorDirectoryService(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match directory id with Directory ID dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/DirectoryService").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{directory}, metric: &model.Metric{ MetricName: "Current Bandwidth", Namespace: "AWS/DirectoryService", Dimensions: []model.Dimension{ {Name: "Metric Category", Value: "NTDS"}, {Name: "Domain Controller IP", Value: "123.123.123.123"}, {Name: "Directory ID", Value: "d-abc123"}, }, }, }, expectedSkip: false, expectedResource: directory, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_dx_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var dxVif = &model.TaggedResource{ ARN: "arn:aws:directconnect::012345678901:dxvif/dxvif-abc123", Namespace: "AWS/DX", } func TestAssociatorDX(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match Virtual Interface with VirtualInterfaceId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/DX").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{dxVif}, metric: &model.Metric{ MetricName: "VirtualInterfaceBpsIngress", Namespace: "AWS/DX", Dimensions: []model.Dimension{ {Name: "ConnectionId", Value: "dxlag-abc123"}, {Name: "VirtualInterfaceId", Value: "dxvif-abc123"}, }, }, }, expectedSkip: false, expectedResource: dxVif, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_ec2_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var ec2Instance1 = &model.TaggedResource{ ARN: "arn:aws:ec2:us-east-1:123456789012:instance/i-abc123", Namespace: "AWS/EC2", } var ec2Instance2 = &model.TaggedResource{ ARN: "arn:aws:ec2:us-east-1:123456789012:instance/i-def456", Namespace: "AWS/EC2", } var ec2Resources = []*model.TaggedResource{ ec2Instance1, ec2Instance2, } func TestAssociatorEC2(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with InstanceId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/EC2").ToModelDimensionsRegexp(), resources: ec2Resources, metric: &model.Metric{ Namespace: "AWS/EC2", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "InstanceId", Value: "i-abc123"}, }, }, }, expectedSkip: false, expectedResource: ec2Instance1, }, { name: "should match another instance with InstanceId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/EC2").ToModelDimensionsRegexp(), resources: ec2Resources, metric: &model.Metric{ Namespace: "AWS/EC2", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "InstanceId", Value: "i-def456"}, }, }, }, expectedSkip: false, expectedResource: ec2Instance2, }, { name: "should skip with unmatched InstanceId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/EC2").ToModelDimensionsRegexp(), resources: ec2Resources, metric: &model.Metric{ Namespace: "AWS/EC2", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "InstanceId", Value: "i-blahblah"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "should not skip when unmatching because of non-ARN dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/EC2").ToModelDimensionsRegexp(), resources: ec2Resources, metric: &model.Metric{ Namespace: "AWS/EC2", MetricName: "StatusCheckFailed_System", Dimensions: []model.Dimension{ {Name: "AutoScalingGroupName", Value: "some-asg-name"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_ec_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var ecServerless = &model.TaggedResource{ ARN: "arn:aws:elasticache:eu-east-1:123456789012:serverlesscache:test-serverless-cluster", Namespace: "AWS/ElastiCache", } var ecCluster = &model.TaggedResource{ ARN: "arn:aws:elasticache:eu-east-1:123456789012:cluster:test-cluster-0001-001", Namespace: "AWS/ElastiCache", } var ecResources = []*model.TaggedResource{ ecServerless, ecCluster, } func TestAssociatorEC(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with clusterId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ElastiCache").ToModelDimensionsRegexp(), resources: ecResources, metric: &model.Metric{ MetricName: "TotalCmdsCount", Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ {Name: "clusterId", Value: "test-serverless-cluster"}, }, }, }, expectedSkip: false, expectedResource: ecServerless, }, { name: "should match with CacheClusterId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ElastiCache").ToModelDimensionsRegexp(), resources: ecResources, metric: &model.Metric{ MetricName: "EngineCPUUtilization", Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ {Name: "CacheClusterId", Value: "test-cluster-0001-001"}, }, }, }, expectedSkip: false, expectedResource: ecCluster, }, { name: "should skip with unmatched CacheClusterId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ElastiCache").ToModelDimensionsRegexp(), resources: ecResources, metric: &model.Metric{ MetricName: "EngineCPUUtilization", Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ {Name: "CacheClusterId", Value: "test-cluster-0001-002"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "should skip with unmatched clusterId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ElastiCache").ToModelDimensionsRegexp(), resources: ecResources, metric: &model.Metric{ MetricName: "TotalCmdsCount", Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ {Name: "clusterId", Value: "test-unmatched-serverless-cluster"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_ecs_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var ecsCluster = &model.TaggedResource{ ARN: "arn:aws:ecs:af-south-1:123456789222:cluster/sampleCluster", Namespace: "AWS/ECS", } var ecsService1 = &model.TaggedResource{ ARN: "arn:aws:ecs:af-south-1:123456789222:service/sampleCluster/service1", Namespace: "AWS/ECS", } var ecsService2 = &model.TaggedResource{ ARN: "arn:aws:ecs:af-south-1:123456789222:service/sampleCluster/service2", Namespace: "AWS/ECS", } var ecsResources = []*model.TaggedResource{ ecsCluster, ecsService1, ecsService2, } func TestAssociatorECS(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "cluster metric should be assigned cluster resource", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ECS").ToModelDimensionsRegexp(), resources: ecsResources, metric: &model.Metric{ MetricName: "MemoryReservation", Namespace: "AWS/ECS", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "sampleCluster"}, }, }, }, expectedSkip: false, expectedResource: ecsCluster, }, { name: "service metric should be assigned service1 resource", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ECS").ToModelDimensionsRegexp(), resources: ecsResources, metric: &model.Metric{ MetricName: "CPUUtilization", Namespace: "AWS/ECS", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "sampleCluster"}, {Name: "ServiceName", Value: "service1"}, }, }, }, expectedSkip: false, expectedResource: ecsService1, }, { name: "service metric should be assigned service2 resource", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/ECS").ToModelDimensionsRegexp(), resources: ecsResources, metric: &model.Metric{ MetricName: "CPUUtilization", Namespace: "AWS/ECS", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "sampleCluster"}, {Name: "ServiceName", Value: "service2"}, }, }, }, expectedSkip: false, expectedResource: ecsService2, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_event_roles_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var eventRule0 = &model.TaggedResource{ ARN: "arn:aws:events:eu-central-1:112246171613:rule/event-bus-name/rule-name", Namespace: "AWS/Events", } var eventRule1 = &model.TaggedResource{ ARN: "arn:aws:events:eu-central-1:123456789012:rule/aws.partner/partner.name/123456/rule-name", Namespace: "AWS/Events", } var eventRuleResources = []*model.TaggedResource{ eventRule0, eventRule1, } func TestAssociatorEventRule(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "2 dimensions should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Events").ToModelDimensionsRegexp(), resources: eventRuleResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/Events", Dimensions: []model.Dimension{ {Name: "EventBusName", Value: "event-bus-name"}, {Name: "RuleName", Value: "rule-name"}, }, }, }, expectedSkip: false, expectedResource: eventRule0, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_globalaccelerator_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var globalAcceleratorAccelerator = &model.TaggedResource{ ARN: "arn:aws:globalaccelerator::012345678901:accelerator/super-accelerator", Namespace: "AWS/GlobalAccelerator", } var globalAcceleratorListener = &model.TaggedResource{ ARN: "arn:aws:globalaccelerator::012345678901:accelerator/super-accelerator/listener/some_listener", Namespace: "AWS/GlobalAccelerator", } var globalAcceleratorEndpointGroup = &model.TaggedResource{ ARN: "arn:aws:globalaccelerator::012345678901:accelerator/super-accelerator/listener/some_listener/endpoint-group/eg1", Namespace: "AWS/GlobalAccelerator", } var globalAcceleratorResources = []*model.TaggedResource{ globalAcceleratorAccelerator, globalAcceleratorListener, globalAcceleratorEndpointGroup, } func TestAssociatorGlobalAccelerator(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with Accelerator dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GlobalAccelerator").ToModelDimensionsRegexp(), resources: globalAcceleratorResources, metric: &model.Metric{ MetricName: "ProcessedBytesOut", Namespace: "AWS/GlobalAccelerator", Dimensions: []model.Dimension{ {Name: "Accelerator", Value: "super-accelerator"}, }, }, }, expectedSkip: false, expectedResource: globalAcceleratorAccelerator, }, { name: "should match Listener with Accelerator and Listener dimensions", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GlobalAccelerator").ToModelDimensionsRegexp(), resources: globalAcceleratorResources, metric: &model.Metric{ MetricName: "ProcessedBytesOut", Namespace: "AWS/GlobalAccelerator", Dimensions: []model.Dimension{ {Name: "Accelerator", Value: "super-accelerator"}, {Name: "Listener", Value: "some_listener"}, }, }, }, expectedSkip: false, expectedResource: globalAcceleratorListener, }, { name: "should match EndpointGroup with Accelerator, Listener and EndpointGroup dimensions", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GlobalAccelerator").ToModelDimensionsRegexp(), resources: globalAcceleratorResources, metric: &model.Metric{ MetricName: "ProcessedBytesOut", Namespace: "AWS/GlobalAccelerator", Dimensions: []model.Dimension{ {Name: "Accelerator", Value: "super-accelerator"}, {Name: "Listener", Value: "some_listener"}, {Name: "EndpointGroup", Value: "eg1"}, }, }, }, expectedSkip: false, expectedResource: globalAcceleratorEndpointGroup, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_gwlb_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var gatewayLoadBalancer1 = &model.TaggedResource{ ARN: "arn:aws:elasticloadbalancing:eu-central-1:123456789012:loadbalancer/gwy/gwlb-1/4a049e69add14452", Namespace: "AWS/GatewayELB", } var gatewayLoadBalancer2 = &model.TaggedResource{ ARN: "arn:aws:elasticloadbalancing:eu-central-1:123456789012:loadbalancer/gwy/gwlb-2/a96cc19724cf1a87", Namespace: "AWS/GatewayELB", } var targetGroup1 = &model.TaggedResource{ ARN: "arn:aws:elasticloadbalancing:eu-central-1:123456789012:targetgroup/gwlb-target-group-1/012e9f368748cd345c", Namespace: "AWS/GatewayELB", } var gatewayLoadBalancerResources = []*model.TaggedResource{ gatewayLoadBalancer1, gatewayLoadBalancer2, targetGroup1, } func TestAssociatorGwlb(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with gateway loadbalancer one dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GatewayELB").ToModelDimensionsRegexp(), resources: gatewayLoadBalancerResources, metric: &model.Metric{ MetricName: "HealthyHostCount", Namespace: "AWS/GatewayELB", Dimensions: []model.Dimension{ {Name: "LoadBalancer", Value: "gwy/gwlb-1/4a049e69add14452"}, }, }, }, expectedSkip: false, expectedResource: gatewayLoadBalancer1, }, { name: "should match with gateway loadbalancer target group two dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GatewayELB").ToModelDimensionsRegexp(), resources: gatewayLoadBalancerResources, metric: &model.Metric{ MetricName: "HealthyHostCount", Namespace: "AWS/GatewayELB", Dimensions: []model.Dimension{ {Name: "LoadBalancer", Value: "gwy/gwlb-1/4a049e69add14452"}, {Name: "TargetGroup", Value: "targetgroup/gwlb-target-group-1/012e9f368748cd345c"}, }, }, }, expectedSkip: false, expectedResource: targetGroup1, }, { name: "should not match with any gateway loadbalancer", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/GatewayELB").ToModelDimensionsRegexp(), resources: gatewayLoadBalancerResources, metric: &model.Metric{ MetricName: "HealthyHostCount", Namespace: "AWS/GatewayELB", Dimensions: []model.Dimension{ {Name: "LoadBalancer", Value: "gwy/non-existing-gwlb/a96cc19724cf1a87"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_ipam_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var ec2IpamPool = &model.TaggedResource{ ARN: "arn:aws:ec2::123456789012:ipam-pool/ipam-pool-1ff5e4e9ad2c28b7b", Namespace: "AWS/IPAM", } var ipamResources = []*model.TaggedResource{ ec2IpamPool, } func TestAssociatorIpam(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with IpamPoolId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/IPAM").ToModelDimensionsRegexp(), resources: ipamResources, metric: &model.Metric{ MetricName: "VpcIPUsage", Namespace: "AWS/IPAM", Dimensions: []model.Dimension{ {Name: "IpamPoolId", Value: "ipam-pool-1ff5e4e9ad2c28b7b"}, }, }, }, expectedSkip: false, expectedResource: ec2IpamPool, }, { name: "should skip with unmatched IpamPoolId dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/IPAM").ToModelDimensionsRegexp(), resources: ipamResources, metric: &model.Metric{ MetricName: "VpcIPUsage", Namespace: "AWS/IPAM", Dimensions: []model.Dimension{ {Name: "IpamPoolId", Value: "ipam-pool-blahblah"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_kms_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var kmsKey = &model.TaggedResource{ ARN: "arn:aws:kms:us-east-2:123456789012:key/12345678-1234-1234-1234-123456789012", Namespace: "AWS/KMS", } func TestAssociatorKMS(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with KMS dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/KMS").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{kmsKey}, metric: &model.Metric{ MetricName: "SecondsUntilKeyMaterialExpiration", Namespace: "AWS/KMS", Dimensions: []model.Dimension{ {Name: "KeyId", Value: "12345678-1234-1234-1234-123456789012"}, }, }, }, expectedSkip: false, expectedResource: kmsKey, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_lambda_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var lambdaFunction = &model.TaggedResource{ ARN: "arn:aws:lambda:us-east-2:123456789012:function:lambdaFunction", Namespace: "AWS/Lambda", } var lambdaResources = []*model.TaggedResource{lambdaFunction} func TestAssociatorLambda(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with FunctionName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Lambda").ToModelDimensionsRegexp(), resources: lambdaResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/Lambda", Dimensions: []model.Dimension{ {Name: "FunctionName", Value: "lambdaFunction"}, }, }, }, expectedSkip: false, expectedResource: lambdaFunction, }, { name: "should skip with unmatched FunctionName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Lambda").ToModelDimensionsRegexp(), resources: lambdaResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/Lambda", Dimensions: []model.Dimension{ {Name: "FunctionName", Value: "anotherLambdaFunction"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "should match with FunctionName and Resource dimensions", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Lambda").ToModelDimensionsRegexp(), resources: lambdaResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/Lambda", Dimensions: []model.Dimension{ {Name: "FunctionName", Value: "lambdaFunction"}, {Name: "Resource", Value: "lambdaFunction"}, }, }, }, expectedSkip: false, expectedResource: lambdaFunction, }, { name: "should not skip when empty dimensions", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Lambda").ToModelDimensionsRegexp(), resources: lambdaResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/Lambda", Dimensions: []model.Dimension{}, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_logging_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "bytes" "log/slog" "testing" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestAssociatorLogging(t *testing.T) { type testcase struct { level slog.Level } for name, tc := range map[string]testcase{ "debug enabled": {level: slog.LevelDebug}, "debug disabled": {level: slog.LevelInfo}, } { t.Run(name, func(t *testing.T) { buf := &bytes.Buffer{} logger := slog.New(slog.NewTextHandler(buf, &slog.HandlerOptions{ Level: tc.level, })) associator := NewAssociator(logger, config.SupportedServices.GetService("AWS/Logs").ToModelDimensionsRegexp(), logGroupResources) res, skip := associator.AssociateMetricToResource(&model.Metric{ MetricName: "DeliveryThrottling", Namespace: "AWS/Logs", Dimensions: []model.Dimension{ {Name: "LogGroupName", Value: "/aws/lambda/log-group-1"}, }, }) require.NotNil(t, res) require.False(t, skip) assertion := require.NotContains if tc.level == slog.LevelDebug { assertion = require.Contains } assertion(t, buf.String(), "found mapping") }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_logs_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var logGroup1 = &model.TaggedResource{ ARN: "arn:aws:logs:eu-central-1:123456789012:log-group:/aws/lambda/log-group-1", Namespace: "AWS/Logs", } var logGroup2 = &model.TaggedResource{ ARN: "arn:aws:logs:eu-central-1:123456789012:log-group:/custom/log-group-2", Namespace: "AWS/Logs", } var logGroupResources = []*model.TaggedResource{ logGroup1, logGroup2, } func TestAssociatorLogs(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with log group one dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Logs").ToModelDimensionsRegexp(), resources: logGroupResources, metric: &model.Metric{ MetricName: "DeliveryThrottling", Namespace: "AWS/Logs", Dimensions: []model.Dimension{ {Name: "LogGroupName", Value: "/aws/lambda/log-group-1"}, }, }, }, expectedSkip: false, expectedResource: logGroup1, }, { name: "should match with log group two dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Logs").ToModelDimensionsRegexp(), resources: logGroupResources, metric: &model.Metric{ MetricName: "IncomingBytes", Namespace: "AWS/Logs", Dimensions: []model.Dimension{ {Name: "LogGroupName", Value: "/custom/log-group-2"}, }, }, }, expectedSkip: false, expectedResource: logGroup2, }, { name: "should not match with any log group", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Logs").ToModelDimensionsRegexp(), resources: logGroupResources, metric: &model.Metric{ MetricName: "ForwardingLogEvents", Namespace: "AWS/Logs", Dimensions: []model.Dimension{ {Name: "LogGroupName", Value: "/custom/nonexisting/log-group-3"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_mediaconvert_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var mediaConvertQueue = &model.TaggedResource{ ARN: "arn:aws:mediaconvert:eu-west-1:631611414237:queues/a-queue", Namespace: "AWS/MediaConvert", } var mediaConvertQueueTwo = &model.TaggedResource{ ARN: "arn:aws:mediaconvert:eu-west-1:631611414237:queues/a-second-queue", Namespace: "AWS/MediaConvert", } var mediaConvertResources = []*model.TaggedResource{ mediaConvertQueue, mediaConvertQueueTwo, } func TestAssociatorMediaConvert(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with mediaconvert queue one dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MediaConvert").ToModelDimensionsRegexp(), resources: mediaConvertResources, metric: &model.Metric{ MetricName: "JobsCompletedCount", Namespace: "AWS/MediaConvert", Dimensions: []model.Dimension{ {Name: "Queue", Value: "arn:aws:mediaconvert:eu-west-1:631611414237:queues/a-queue"}, }, }, }, expectedSkip: false, expectedResource: mediaConvertQueue, }, { name: "should match with mediaconvert queue two dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MediaConvert").ToModelDimensionsRegexp(), resources: mediaConvertResources, metric: &model.Metric{ MetricName: "JobsCompletedCount", Namespace: "AWS/MediaConvert", Dimensions: []model.Dimension{ {Name: "Queue", Value: "arn:aws:mediaconvert:eu-west-1:631611414237:queues/a-second-queue"}, }, }, }, expectedSkip: false, expectedResource: mediaConvertQueueTwo, }, { name: "should not match with any mediaconvert queue", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MediaConvert").ToModelDimensionsRegexp(), resources: mediaConvertResources, metric: &model.Metric{ MetricName: "JobsCompletedCount", Namespace: "AWS/MediaConvert", Dimensions: []model.Dimension{ {Name: "Queue", Value: "arn:aws:mediaconvert:eu-west-1:631611414237:queues/a-non-existing-queue"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_memorydb_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var memoryDBCluster1 = &model.TaggedResource{ ARN: "arn:aws:memorydb:us-east-1:123456789012:cluster/mycluster", Namespace: "AWS/MemoryDB", } var memoryDBCluster2 = &model.TaggedResource{ ARN: "arn:aws:memorydb:us-east-1:123456789012:cluster/othercluster", Namespace: "AWS/MemoryDB", } var memoryDBClusters = []*model.TaggedResource{ memoryDBCluster1, memoryDBCluster2, } func TestAssociatorMemoryDB(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with ClusterName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MemoryDB").ToModelDimensionsRegexp(), resources: memoryDBClusters, metric: &model.Metric{ Namespace: "AWS/MemoryDB", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "mycluster"}, }, }, }, expectedSkip: false, expectedResource: memoryDBCluster1, }, { name: "should match another instance with ClusterName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MemoryDB").ToModelDimensionsRegexp(), resources: memoryDBClusters, metric: &model.Metric{ Namespace: "AWS/MemoryDB", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "othercluster"}, }, }, }, expectedSkip: false, expectedResource: memoryDBCluster2, }, { name: "should skip with unmatched ClusterName dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MemoryDB").ToModelDimensionsRegexp(), resources: memoryDBClusters, metric: &model.Metric{ Namespace: "AWS/MemoryDB", MetricName: "CPUUtilization", Dimensions: []model.Dimension{ {Name: "ClusterName", Value: "blahblah"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "should not skip when unmatching because of non-ARN dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/MemoryDB").ToModelDimensionsRegexp(), resources: memoryDBClusters, metric: &model.Metric{ Namespace: "AWS/MemoryDB", MetricName: "BytesUsedForMemoryDB", Dimensions: []model.Dimension{ {Name: "OtherName", Value: "some-other-value"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_mq_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var rabbitMQBroker = &model.TaggedResource{ ARN: "arn:aws:mq:us-east-2:123456789012:broker:rabbitmq-broker:b-000-111-222-333", Namespace: "AWS/AmazonMQ", } var rabbitMQBrokerWithActiveStyleName = &model.TaggedResource{ ARN: "arn:aws:mq:us-east-2:123456789012:broker:rabbitmq-broker-0:b-000-111-222-333", Namespace: "AWS/AmazonMQ", } var activeMQBroker = &model.TaggedResource{ ARN: "arn:aws:mq:us-east-2:123456789012:broker:activemq-broker:b-000-111-222-333", Namespace: "AWS/AmazonMQ", } func TestAssociatorMQ(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with Broker dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/AmazonMQ").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{rabbitMQBroker}, metric: &model.Metric{ MetricName: "ProducerCount", Namespace: "AWS/AmazonMQ", Dimensions: []model.Dimension{ {Name: "Broker", Value: "rabbitmq-broker"}, }, }, }, expectedSkip: false, expectedResource: rabbitMQBroker, }, { name: "should match with Broker dimension when broker name has a number suffix and does match ARN", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/AmazonMQ").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{rabbitMQBrokerWithActiveStyleName}, metric: &model.Metric{ MetricName: "ProducerCount", Namespace: "AWS/AmazonMQ", Dimensions: []model.Dimension{ {Name: "Broker", Value: "rabbitmq-broker-0"}, }, }, }, expectedSkip: false, expectedResource: rabbitMQBrokerWithActiveStyleName, }, { // ActiveMQ allows active/standby modes where the `Broker` dimension has values // like `brokername-1` and `brokername-2` which don't match the ARN (the dimension // regex will extract `Broker` as `brokername` from ARN) name: "should match with Broker dimension when broker name has a number suffix and doesn't match ARN", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/AmazonMQ").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{activeMQBroker}, metric: &model.Metric{ MetricName: "ProducerCount", Namespace: "AWS/AmazonMQ", Dimensions: []model.Dimension{ {Name: "Broker", Value: "activemq-broker-1"}, }, }, }, expectedSkip: false, expectedResource: activeMQBroker, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_qldb_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var validQldbInstance = &model.TaggedResource{ ARN: "arn:aws:qldb:us-east-1:123456789012:ledger/test1", Namespace: "AWS/QLDB", } func TestAssociatorQLDB(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should match with ledger name dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/QLDB").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{validQldbInstance}, metric: &model.Metric{ Namespace: "AWS/QLDB", MetricName: "JournalStorage", Dimensions: []model.Dimension{ {Name: "LedgerName", Value: "test2"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "should not match with ledger name dimension when QLDB arn is not valid", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/QLDB").ToModelDimensionsRegexp(), resources: []*model.TaggedResource{validQldbInstance}, metric: &model.Metric{ Namespace: "AWS/QLDB", MetricName: "JournalStorage", Dimensions: []model.Dimension{ {Name: "LedgerName", Value: "test1"}, }, }, }, expectedSkip: false, expectedResource: validQldbInstance, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_redshift_serverless_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var workgroup = &model.TaggedResource{ ARN: "arn:aws:redshift-serverless:us-east-1:123456789012:workgroup/my-workgroup1", Namespace: "AWS/Redshift-Serverless", } var namespace = &model.TaggedResource{ ARN: "arn:aws:redshift-serverless:us-east-1:123456789012:namespace/my-namespace1", Namespace: "AWS/Redshift-Serverless", } var redshiftResources = []*model.TaggedResource{ workgroup, namespace, } func TestAssociatorRedshiftServerless(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "should not match nor skip with any workgroup none ARN dimension", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Redshift-Serverless").ToModelDimensionsRegexp(), resources: redshiftResources, metric: &model.Metric{ MetricName: "ComputeSeconds", Namespace: "AWS/Redshift-Serverless", Dimensions: []model.Dimension{ {Name: "Workgroup", Value: "my-nonexistant-workgroup-test1"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_endpoint_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerEndpointHealthOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:endpoint/example-endpoint-one", Namespace: "/aws/sagemaker/Endpoints", } var sagemakerEndpointHealthTwo = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:endpoint/example-endpoint-two", Namespace: "/aws/sagemaker/Endpoints", } var sagemakerHealthResources = []*model.TaggedResource{ sagemakerEndpointHealthOne, sagemakerEndpointHealthTwo, } func TestAssociatorSagemakerEndpoint(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "2 dimensions should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/Endpoints").ToModelDimensionsRegexp(), resources: sagemakerHealthResources, metric: &model.Metric{ MetricName: "MemoryUtilization", Namespace: "/aws/sagemaker/Endpoints", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "example-endpoint-two"}, {Name: "VariantName", Value: "example-endpoint-two-variant-one"}, }, }, }, expectedSkip: false, expectedResource: sagemakerEndpointHealthTwo, }, { name: "2 dimensions should not match", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/Endpoints").ToModelDimensionsRegexp(), resources: sagemakerHealthResources, metric: &model.Metric{ MetricName: "MemoryUtilization", Namespace: "/aws/sagemaker/Endpoints", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "example-endpoint-three"}, {Name: "VariantName", Value: "example-endpoint-three-variant-one"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_inf_component_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerInfComponentJobOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:inference-component/example-inference-component-one", Namespace: "/aws/sagemaker/InferenceComponents", } var sagemakerInfComponentJobResources = []*model.TaggedResource{ sagemakerInfComponentJobOne, } func TestAssociatorSagemakerInfComponentJob(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "1 dimension should not match but not skip", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/InferenceComponents").ToModelDimensionsRegexp(), resources: sagemakerInfComponentJobResources, metric: &model.Metric{ MetricName: "CPUUtilizationNormalized", Namespace: "/aws/sagemaker/InferenceComponents", Dimensions: []model.Dimension{ {Name: "InferenceComponentName", Value: "example-inference-component-one"}, }, }, }, expectedSkip: false, expectedResource: sagemakerInfComponentJobOne, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_inf_rec_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerInfRecJobOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:inference-recommendations-job/example-inf-rec-job-one", Namespace: "/aws/sagemaker/InferenceRecommendationsJobs", } var sagemakerInfRecJobResources = []*model.TaggedResource{ sagemakerInfRecJobOne, } func TestAssociatorSagemakerInfRecJob(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "1 dimension should not match but not skip", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/InferenceRecommendationsJobs").ToModelDimensionsRegexp(), resources: sagemakerInfRecJobResources, metric: &model.Metric{ MetricName: "ClientInvocations", Namespace: "/aws/sagemaker/InferenceRecommendationsJobs", Dimensions: []model.Dimension{ {Name: "JobName", Value: "example-inf-rec-job-one"}, }, }, }, expectedSkip: false, expectedResource: sagemakerInfRecJobOne, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_pipeline_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerPipelineOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:pipeline/example-pipeline-one", Namespace: "AWS/Sagemaker/ModelBuildingPipeline", } var sagemakerPipelineTwo = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:pipeline/example-pipeline-two", Namespace: "AWS/Sagemaker/ModelBuildingPipeline", } var sagemakerPipelineResources = []*model.TaggedResource{ sagemakerPipelineOne, sagemakerPipelineTwo, } func TestAssociatorSagemakerPipeline(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "2 dimensions should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Sagemaker/ModelBuildingPipeline").ToModelDimensionsRegexp(), resources: sagemakerPipelineResources, metric: &model.Metric{ MetricName: "ExecutionStarted", Namespace: "AWS/Sagemaker/ModelBuildingPipeline", Dimensions: []model.Dimension{ {Name: "PipelineName", Value: "example-pipeline-one"}, {Name: "StepName", Value: "example-pipeline-one-step-two"}, }, }, }, expectedSkip: false, expectedResource: sagemakerPipelineOne, }, { name: "1 dimension should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Sagemaker/ModelBuildingPipeline").ToModelDimensionsRegexp(), resources: sagemakerPipelineResources, metric: &model.Metric{ MetricName: "ExecutionStarted", Namespace: "AWS/Sagemaker/ModelBuildingPipeline", Dimensions: []model.Dimension{ {Name: "PipelineName", Value: "example-pipeline-two"}, }, }, }, expectedSkip: false, expectedResource: sagemakerPipelineTwo, }, { name: "2 dimensions should not match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/Sagemaker/ModelBuildingPipeline").ToModelDimensionsRegexp(), resources: sagemakerPipelineResources, metric: &model.Metric{ MetricName: "ExecutionStarted", Namespace: "AWS/Sagemaker/ModelBuildingPipeline", Dimensions: []model.Dimension{ {Name: "PipelineName", Value: "example-pipeline-three"}, {Name: "StepName", Value: "example-pipeline-three-step-two"}, }, }, }, expectedSkip: true, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_processing_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerProcessingJobOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:processing-job/example-processing-job-one", Namespace: "/aws/sagemaker/ProcessingJobs", } var sagemakerProcessingJobResources = []*model.TaggedResource{ sagemakerProcessingJobOne, } func TestAssociatorSagemakerProcessingJob(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "1 dimension should not match but not skip", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/ProcessingJobs").ToModelDimensionsRegexp(), resources: sagemakerProcessingJobResources, metric: &model.Metric{ MetricName: "CPUUtilization", Namespace: "/aws/sagemaker/ProcessingJobs", Dimensions: []model.Dimension{ {Name: "Host", Value: "example-processing-job-one/algo-1"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerEndpointInvocationOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:endpoint/example-endpoint-one", Namespace: "AWS/SageMaker", } var sagemakerEndpointInvocationTwo = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:endpoint/example-endpoint-two", Namespace: "AWS/SageMaker", } var sagemakerEndpointInvocationUpper = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:endpoint/example-endpoint-upper", Namespace: "AWS/SageMaker", } var sagemakerInferenceComponentInvocationOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:inference-component/example-inference-component-one", Namespace: "AWS/SageMaker", } var sagemakerInferenceComponentInvocationUpper = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:inference-component/example-inference-component-upper", Namespace: "AWS/SageMaker", } var sagemakerInvocationResources = []*model.TaggedResource{ sagemakerEndpointInvocationOne, sagemakerEndpointInvocationTwo, sagemakerEndpointInvocationUpper, sagemakerInferenceComponentInvocationOne, sagemakerInferenceComponentInvocationUpper, } func TestAssociatorSagemaker(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "3 dimensions should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "example-endpoint-one"}, {Name: "VariantName", Value: "example-endpoint-one-variant-one"}, {Name: "EndpointConfigName", Value: "example-endpoint-one-endpoint-config"}, }, }, }, expectedSkip: false, expectedResource: sagemakerEndpointInvocationOne, }, { name: "2 dimensions should match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "example-endpoint-two"}, {Name: "VariantName", Value: "example-endpoint-two-variant-one"}, }, }, }, expectedSkip: false, expectedResource: sagemakerEndpointInvocationTwo, }, { name: "2 dimensions should not match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "Invocations", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "example-endpoint-three"}, {Name: "VariantName", Value: "example-endpoint-three-variant-one"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "2 dimensions should not match in Upper case", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "ModelLatency", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "EndpointName", Value: "Example-Endpoint-Upper"}, {Name: "VariantName", Value: "example-endpoint-two-variant-one"}, }, }, }, expectedSkip: true, expectedResource: nil, }, { name: "inference component match", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "ModelLatency", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "InferenceComponentName", Value: "example-inference-component-one"}, }, }, }, expectedSkip: false, expectedResource: sagemakerInferenceComponentInvocationOne, }, { name: "inference component match in Upper case", args: args{ dimensionRegexps: config.SupportedServices.GetService("AWS/SageMaker").ToModelDimensionsRegexp(), resources: sagemakerInvocationResources, metric: &model.Metric{ MetricName: "ModelLatency", Namespace: "AWS/SageMaker", Dimensions: []model.Dimension{ {Name: "InferenceComponentName", Value: "Example-Inference-Component-Upper"}, }, }, }, expectedSkip: false, expectedResource: sagemakerInferenceComponentInvocationUpper, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_training_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerTrainingJobOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:training-job/example-training-job-one", Namespace: "/aws/sagemaker/TrainingJobs", } var sagemakerTrainingJobResources = []*model.TaggedResource{ sagemakerTrainingJobOne, } func TestAssociatorSagemakerTrainingJob(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "1 dimension should not skip", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/TrainingJobs").ToModelDimensionsRegexp(), resources: sagemakerTrainingJobResources, metric: &model.Metric{ MetricName: "CPUUtilization", Namespace: "/aws/sagemaker/TrainingJobs", Dimensions: []model.Dimension{ {Name: "Host", Value: "example-training-job-one/algo-1"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/maxdimassociator/associator_sagemaker_transform_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package maxdimassociator import ( "testing" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var sagemakerTransformJobOne = &model.TaggedResource{ ARN: "arn:aws:sagemaker:us-west-2:123456789012:transform-job/example-transform-job-one", Namespace: "/aws/sagemaker/TransformJobs", } var sagemakerTransformJobResources = []*model.TaggedResource{ sagemakerTransformJobOne, } func TestAssociatorSagemakerTransformJob(t *testing.T) { type args struct { dimensionRegexps []model.DimensionsRegexp resources []*model.TaggedResource metric *model.Metric } type testCase struct { name string args args expectedSkip bool expectedResource *model.TaggedResource } testcases := []testCase{ { name: "1 dimension should not match but not skip", args: args{ dimensionRegexps: config.SupportedServices.GetService("/aws/sagemaker/TransformJobs").ToModelDimensionsRegexp(), resources: sagemakerTransformJobResources, metric: &model.Metric{ MetricName: "CPUUtilization", Namespace: "/aws/sagemaker/TransformJobs", Dimensions: []model.Dimension{ {Name: "Host", Value: "example-transform-job-one/algo-1"}, }, }, }, expectedSkip: false, expectedResource: nil, }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { associator := NewAssociator(promslog.NewNopLogger(), tc.args.dimensionRegexps, tc.args.resources) res, skip := associator.AssociateMetricToResource(tc.args.metric) require.Equal(t, tc.expectedSkip, skip) require.Equal(t, tc.expectedResource, res) }) } } ================================================ FILE: pkg/job/resourcemetadata/resource.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package resourcemetadata import ( "context" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type Resource struct { // Name is an identifiable value for the resource and is variable dependent on the match made // It will be the AWS ARN (Amazon Resource Name) if a unique resource was found // It will be "global" if a unique resource was not found // CustomNamespaces will have the custom namespace Name Name string // Tags is a set of tags associated to the resource Tags []model.Tag } type Resources struct { StaticResource *Resource AssociatedResources []*Resource } type MetricResourceEnricher interface { Enrich(ctx context.Context, metrics []*model.Metric) ([]*model.Metric, Resources) } ================================================ FILE: pkg/job/scrape.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "context" "fmt" "log/slog" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics" emconfig "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/internal/enhancedmetrics/config" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/getmetricdata" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func ScrapeAwsData( ctx context.Context, logger *slog.Logger, jobsCfg model.JobsConfig, factory clients.Factory, metricsPerQuery int, cloudwatchConcurrency cloudwatch.ConcurrencyConfig, taggingAPIConcurrency int, ) ([]model.TaggedResourceResult, []model.CloudwatchMetricResult) { mux := &sync.Mutex{} cwData := make([]model.CloudwatchMetricResult, 0) awsInfoData := make([]model.TaggedResourceResult, 0) var wg sync.WaitGroup var enhancedMetricsService *enhancedmetrics.Service var enhancedMetricsInitFailed bool for _, discoveryJob := range jobsCfg.DiscoveryJobs { // initialize enhanced metrics service only if: // - the current discovery job has enhanced metrics configured // - the enhanced metrics service is not already initialized // - a previous initialization attempt has not already failed if discoveryJob.HasEnhancedMetrics() && enhancedMetricsService == nil && !enhancedMetricsInitFailed { if configProvider, ok := factory.(emconfig.RegionalConfigProvider); ok { enhancedMetricsService = enhancedmetrics.NewService( configProvider, enhancedmetrics.DefaultEnhancedMetricServiceRegistry, ) } else { enhancedMetricsInitFailed = true logger.Warn("Couldn't initialize enhanced metrics service", "factory_type", fmt.Sprintf("%T", factory), "err", "does not implement GetAWSRegionalConfig") } } for _, role := range discoveryJob.Roles { for _, region := range discoveryJob.Regions { wg.Add(1) go func(discoveryJob model.DiscoveryJob, region string, role model.Role) { defer wg.Done() jobLogger := logger.With("namespace", discoveryJob.Namespace, "region", region, "arn", role.RoleArn) accountID, err := factory.GetAccountClient(region, role).GetAccount(ctx) if err != nil { jobLogger.Error("Couldn't get account Id", "err", err) return } jobLogger = jobLogger.With("account", accountID) accountAlias, err := factory.GetAccountClient(region, role).GetAccountAlias(ctx) if err != nil { jobLogger.Warn("Couldn't get account alias", "err", err) } cloudwatchClient := factory.GetCloudwatchClient(region, role, cloudwatchConcurrency) gmdProcessor := getmetricdata.NewDefaultProcessor(logger, cloudwatchClient, metricsPerQuery, cloudwatchConcurrency.GetMetricData) resources, metrics := runDiscoveryJob( ctx, jobLogger, discoveryJob, region, factory.GetTaggingClient(region, role, taggingAPIConcurrency), cloudwatchClient, gmdProcessor, enhancedMetricsService, role, ) addDataToOutput := len(metrics) != 0 if config.FlagsFromCtx(ctx).IsFeatureEnabled(config.AlwaysReturnInfoMetrics) { addDataToOutput = addDataToOutput || len(resources) != 0 } if addDataToOutput { sc := &model.ScrapeContext{ Region: region, AccountID: accountID, AccountAlias: accountAlias, CustomTags: discoveryJob.CustomTags, } metricResult := model.CloudwatchMetricResult{ Context: sc, Data: metrics, } resourceResult := model.TaggedResourceResult{ Data: resources, } if discoveryJob.IncludeContextOnInfoMetrics { resourceResult.Context = sc } mux.Lock() awsInfoData = append(awsInfoData, resourceResult) cwData = append(cwData, metricResult) mux.Unlock() } }(discoveryJob, region, role) } } } for _, staticJob := range jobsCfg.StaticJobs { for _, role := range staticJob.Roles { for _, region := range staticJob.Regions { wg.Add(1) go func(staticJob model.StaticJob, region string, role model.Role) { defer wg.Done() jobLogger := logger.With("static_job_name", staticJob.Name, "region", region, "arn", role.RoleArn) accountID, err := factory.GetAccountClient(region, role).GetAccount(ctx) if err != nil { jobLogger.Error("Couldn't get account Id", "err", err) return } jobLogger = jobLogger.With("account", accountID) accountAlias, err := factory.GetAccountClient(region, role).GetAccountAlias(ctx) if err != nil { jobLogger.Warn("Couldn't get account alias", "err", err) } metrics := runStaticJob(ctx, jobLogger, staticJob, factory.GetCloudwatchClient(region, role, cloudwatchConcurrency)) metricResult := model.CloudwatchMetricResult{ Context: &model.ScrapeContext{ Region: region, AccountID: accountID, AccountAlias: accountAlias, CustomTags: staticJob.CustomTags, }, Data: metrics, } mux.Lock() cwData = append(cwData, metricResult) mux.Unlock() }(staticJob, region, role) } } } for _, customNamespaceJob := range jobsCfg.CustomNamespaceJobs { for _, role := range customNamespaceJob.Roles { for _, region := range customNamespaceJob.Regions { wg.Add(1) go func(customNamespaceJob model.CustomNamespaceJob, region string, role model.Role) { defer wg.Done() jobLogger := logger.With("custom_metric_namespace", customNamespaceJob.Namespace, "region", region, "arn", role.RoleArn) accountID, err := factory.GetAccountClient(region, role).GetAccount(ctx) if err != nil { jobLogger.Error("Couldn't get account Id", "err", err) return } jobLogger = jobLogger.With("account", accountID) accountAlias, err := factory.GetAccountClient(region, role).GetAccountAlias(ctx) if err != nil { jobLogger.Warn("Couldn't get account alias", "err", err) } cloudwatchClient := factory.GetCloudwatchClient(region, role, cloudwatchConcurrency) gmdProcessor := getmetricdata.NewDefaultProcessor(logger, cloudwatchClient, metricsPerQuery, cloudwatchConcurrency.GetMetricData) metrics := runCustomNamespaceJob(ctx, jobLogger, customNamespaceJob, cloudwatchClient, gmdProcessor) metricResult := model.CloudwatchMetricResult{ Context: &model.ScrapeContext{ Region: region, AccountID: accountID, AccountAlias: accountAlias, CustomTags: customNamespaceJob.CustomTags, }, Data: metrics, } mux.Lock() cwData = append(cwData, metricResult) mux.Unlock() }(customNamespaceJob, region, role) } } } wg.Wait() return awsInfoData, cwData } ================================================ FILE: pkg/job/scraper.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "context" "fmt" "log/slog" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/cloudwatchrunner" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/account" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type Scraper struct { jobsCfg model.JobsConfig logger *slog.Logger runnerFactory runnerFactory } type runnerFactory interface { GetAccountClient(region string, role model.Role) account.Client NewResourceMetadataRunner(logger *slog.Logger, region string, role model.Role) ResourceMetadataRunner NewCloudWatchRunner(logger *slog.Logger, region string, role model.Role, job cloudwatchrunner.Job) CloudwatchRunner } type ResourceMetadataRunner interface { Run(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) } type CloudwatchRunner interface { Run(ctx context.Context) ([]*model.CloudwatchData, error) } func NewScraper(logger *slog.Logger, jobsCfg model.JobsConfig, runnerFactory runnerFactory, ) *Scraper { return &Scraper{ runnerFactory: runnerFactory, logger: logger, jobsCfg: jobsCfg, } } type ErrorType string var ( AccountErr ErrorType = "Account for job was not found" ResourceMetadataErr ErrorType = "Failed to run resource metadata for job" CloudWatchCollectionErr ErrorType = "Failed to gather cloudwatch metrics for job" ) type Account struct { ID string Alias string } func (s Scraper) Scrape(ctx context.Context) ([]model.TaggedResourceResult, []model.CloudwatchMetricResult, []Error) { // Setup so we only do one GetAccount call per region + role combo when running jobs roleRegionToAccount := map[model.Role]map[string]func() (Account, error){} jobConfigVisitor(s.jobsCfg, func(_ any, role model.Role, region string) { if _, exists := roleRegionToAccount[role]; !exists { roleRegionToAccount[role] = map[string]func() (Account, error){} } roleRegionToAccount[role][region] = sync.OnceValues[Account, error](func() (Account, error) { client := s.runnerFactory.GetAccountClient(region, role) accountID, err := client.GetAccount(ctx) if err != nil { return Account{}, fmt.Errorf("failed to get Account: %w", err) } a := Account{ ID: accountID, } accountAlias, err := client.GetAccountAlias(ctx) if err != nil { s.logger.Warn("Failed to get optional account alias from account", "err", err, "account_id", accountID) } else { a.Alias = accountAlias } return a, nil }) }) var wg sync.WaitGroup mux := &sync.Mutex{} jobErrors := make([]Error, 0) metricResults := make([]model.CloudwatchMetricResult, 0) resourceResults := make([]model.TaggedResourceResult, 0) s.logger.Debug("Starting job runs") jobConfigVisitor(s.jobsCfg, func(job any, role model.Role, region string) { wg.Add(1) go func() { defer wg.Done() var namespace string jobAction(s.logger, job, func(job model.DiscoveryJob) { namespace = job.Namespace }, func(job model.CustomNamespaceJob) { namespace = job.Namespace }) jobContext := JobContext{ Namespace: namespace, Region: region, RoleARN: role.RoleArn, } jobLogger := s.logger.With("namespace", jobContext.Namespace, "region", jobContext.Region, "arn", jobContext.RoleARN) account, err := roleRegionToAccount[role][region]() if err != nil { jobError := NewError(jobContext, AccountErr, err) mux.Lock() jobErrors = append(jobErrors, jobError) mux.Unlock() return } jobContext.Account = account jobLogger = jobLogger.With("account_id", jobContext.Account.ID) var jobToRun cloudwatchrunner.Job jobAction(jobLogger, job, func(job model.DiscoveryJob) { jobLogger.Debug("Starting resource discovery") rmRunner := s.runnerFactory.NewResourceMetadataRunner(jobLogger, region, role) resources, err := rmRunner.Run(ctx, region, job) if err != nil { jobError := NewError(jobContext, ResourceMetadataErr, err) mux.Lock() jobErrors = append(jobErrors, jobError) mux.Unlock() return } if len(resources) > 0 { result := model.TaggedResourceResult{ Context: jobContext.ToScrapeContext(job.CustomTags), Data: resources, } mux.Lock() resourceResults = append(resourceResults, result) mux.Unlock() } else { jobLogger.Debug("No tagged resources") } jobLogger.Debug("Resource discovery finished", "number_of_discovered_resources", len(resources)) jobToRun = cloudwatchrunner.DiscoveryJob{Job: job, Resources: resources} }, func(job model.CustomNamespaceJob) { jobToRun = cloudwatchrunner.CustomNamespaceJob{Job: job} }, ) if jobToRun == nil { jobLogger.Debug("Ending job run early due to job error see job errors") return } jobLogger.Debug("Starting cloudwatch metrics runner") cwRunner := s.runnerFactory.NewCloudWatchRunner(jobLogger, region, role, jobToRun) metricResult, err := cwRunner.Run(ctx) if err != nil { jobError := NewError(jobContext, CloudWatchCollectionErr, err) mux.Lock() jobErrors = append(jobErrors, jobError) mux.Unlock() return } if len(metricResult) == 0 { jobLogger.Debug("No metrics data found") return } jobLogger.Debug("Job run finished", "number_of_metrics", len(metricResult)) result := model.CloudwatchMetricResult{ Context: jobContext.ToScrapeContext(jobToRun.CustomTags()), Data: metricResult, } mux.Lock() defer mux.Unlock() metricResults = append(metricResults, result) }() }) wg.Wait() s.logger.Debug("Finished job runs", "resource_results", len(resourceResults), "metric_results", len(metricResults)) return resourceResults, metricResults, jobErrors } // Walk through each custom namespace and discovery jobs and take an action func jobConfigVisitor(jobsCfg model.JobsConfig, action func(job any, role model.Role, region string)) { for _, job := range jobsCfg.DiscoveryJobs { for _, role := range job.Roles { for _, region := range job.Regions { action(job, role, region) } } } for _, job := range jobsCfg.CustomNamespaceJobs { for _, role := range job.Roles { for _, region := range job.Regions { action(job, role, region) } } } } // Take an action depending on the job type, only supports discovery and custom job types func jobAction(logger *slog.Logger, job any, discovery func(job model.DiscoveryJob), custom func(job model.CustomNamespaceJob)) { // Type switches are free https://stackoverflow.com/a/28027945 switch typedJob := job.(type) { case model.DiscoveryJob: discovery(typedJob) case model.CustomNamespaceJob: custom(typedJob) default: logger.Error("Unexpected job type", "err", fmt.Errorf("config type of %T is not supported", typedJob)) return } } // JobContext exists to track data we want for logging, errors, or other output context that's learned as the job runs // This makes it easier to track the data additively and morph it to the final shape necessary be it a model.ScrapeContext // or an Error. It's an exported type for tests but is not part of the public interface type JobContext struct { //nolint:revive Account Account Namespace string Region string RoleARN string } func (jc JobContext) ToScrapeContext(customTags []model.Tag) *model.ScrapeContext { return &model.ScrapeContext{ AccountID: jc.Account.ID, Region: jc.Region, CustomTags: customTags, AccountAlias: jc.Account.Alias, } } type Error struct { JobContext ErrorType ErrorType Err error } func NewError(context JobContext, errorType ErrorType, err error) Error { return Error{ JobContext: context, ErrorType: errorType, Err: err, } } func (e Error) ToLoggerKeyVals() []interface{} { return []interface{}{ "account_id", e.Account.ID, "namespace", e.Namespace, "region", e.Region, "role_arn", e.RoleARN, } } ================================================ FILE: pkg/job/scraper_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job_test import ( "context" "errors" "log/slog" "reflect" "testing" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/prometheus/common/promslog" "github.com/r3labs/diff/v3" "github.com/stretchr/testify/assert" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/account" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/job/cloudwatchrunner" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) type testRunnerFactory struct { GetAccountAliasFunc func() (string, error) GetAccountFunc func() (string, error) MetadataRunFunc func(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) CloudwatchRunFunc func(ctx context.Context, job cloudwatchrunner.Job) ([]*model.CloudwatchData, error) } func (t *testRunnerFactory) GetAccountAlias(context.Context) (string, error) { return t.GetAccountAliasFunc() } func (t *testRunnerFactory) GetAccount(context.Context) (string, error) { return t.GetAccountFunc() } func (t *testRunnerFactory) Run(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) { return t.MetadataRunFunc(ctx, region, job) } func (t *testRunnerFactory) GetAccountClient(string, model.Role) account.Client { return t } func (t *testRunnerFactory) NewResourceMetadataRunner(*slog.Logger, string, model.Role) job.ResourceMetadataRunner { return &testMetadataRunner{RunFunc: t.MetadataRunFunc} } func (t *testRunnerFactory) NewCloudWatchRunner(_ *slog.Logger, _ string, _ model.Role, job cloudwatchrunner.Job) job.CloudwatchRunner { return &testCloudwatchRunner{Job: job, RunFunc: t.CloudwatchRunFunc} } type testMetadataRunner struct { RunFunc func(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) } func (t testMetadataRunner) Run(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) { return t.RunFunc(ctx, region, job) } type testCloudwatchRunner struct { RunFunc func(ctx context.Context, job cloudwatchrunner.Job) ([]*model.CloudwatchData, error) Job cloudwatchrunner.Job } func (t testCloudwatchRunner) Run(ctx context.Context) ([]*model.CloudwatchData, error) { return t.RunFunc(ctx, t.Job) } func TestScrapeRunner_Run(t *testing.T) { tests := []struct { name string jobsCfg model.JobsConfig getAccountFunc func() (string, error) getAccountAliasFunc func() (string, error) metadataRunFunc func(ctx context.Context, region string, job model.DiscoveryJob) ([]*model.TaggedResource, error) cloudwatchRunFunc func(ctx context.Context, job cloudwatchrunner.Job) ([]*model.CloudwatchData, error) expectedResources []model.TaggedResourceResult expectedMetrics []model.CloudwatchMetricResult expectedErrs []job.Error }{ { name: "can run a discovery job", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "my-aws-account", nil }, metadataRunFunc: func(_ context.Context, _ string, _ model.DiscoveryJob) ([]*model.TaggedResource, error) { return []*model.TaggedResource{{ ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, }}, nil }, cloudwatchRunFunc: func(_ context.Context, _ cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { return []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedResources: []model.TaggedResourceResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.TaggedResource{ {ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}}, }, }, }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, }, }, }, { name: "can run a custom namespace job", jobsCfg: model.JobsConfig{ CustomNamespaceJobs: []model.CustomNamespaceJob{ { Regions: []string{"us-east-2"}, Name: "my-custom-job", Namespace: "custom-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-2", ExternalID: "external-id-2"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "my-aws-account", nil }, cloudwatchRunFunc: func(_ context.Context, _ cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { return []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-2", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, }, }, }, { name: "can run a discovery and custom namespace job", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, CustomNamespaceJobs: []model.CustomNamespaceJob{ { Regions: []string{"us-east-2"}, Name: "my-custom-job", Namespace: "custom-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-2", ExternalID: "external-id-2"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "my-aws-account", nil }, metadataRunFunc: func(_ context.Context, _ string, _ model.DiscoveryJob) ([]*model.TaggedResource, error) { return []*model.TaggedResource{{ ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, }}, nil }, cloudwatchRunFunc: func(_ context.Context, job cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { if job.Namespace() == "custom-namespace" { return []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, nil } return []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedResources: []model.TaggedResourceResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.TaggedResource{ {ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}}, }, }, }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, }, { Context: &model.ScrapeContext{Region: "us-east-2", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, }, }, }, { name: "returns errors from GetAccounts", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, CustomNamespaceJobs: []model.CustomNamespaceJob{ { Regions: []string{"us-east-2"}, Name: "my-custom-job", Namespace: "custom-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-2", ExternalID: "external-id-2"}, }, }, }, }, getAccountFunc: func() (string, error) { return "", errors.New("failed to get account") }, expectedErrs: []job.Error{ {JobContext: job.JobContext{Account: job.Account{}, Namespace: "aws-namespace", Region: "us-east-1", RoleARN: "aws-arn-1"}, ErrorType: job.AccountErr}, {JobContext: job.JobContext{Account: job.Account{}, Namespace: "custom-namespace", Region: "us-east-2", RoleARN: "aws-arn-2"}, ErrorType: job.AccountErr}, }, }, { name: "ignores errors from GetAccountAlias", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "", errors.New("No alias here") }, metadataRunFunc: func(_ context.Context, _ string, _ model.DiscoveryJob) ([]*model.TaggedResource, error) { return []*model.TaggedResource{{ ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, }}, nil }, cloudwatchRunFunc: func(_ context.Context, _ cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { return []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedResources: []model.TaggedResourceResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: ""}, Data: []*model.TaggedResource{ {ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}}, }, }, }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: ""}, Data: []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, }, }, }, { name: "returns errors from resource discovery without failing scrape", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, CustomNamespaceJobs: []model.CustomNamespaceJob{ { Regions: []string{"us-east-2"}, Name: "my-custom-job", Namespace: "custom-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-2", ExternalID: "external-id-2"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "my-aws-account", nil }, metadataRunFunc: func(_ context.Context, _ string, _ model.DiscoveryJob) ([]*model.TaggedResource, error) { return nil, errors.New("I failed you") }, cloudwatchRunFunc: func(_ context.Context, _ cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { return []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-2", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-2", ResourceName: "resource-2", Namespace: "custom-namespace", Dimensions: []model.Dimension{{Name: "dimension2", Value: "value2"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Minimum", DataPoints: []model.DataPoint{{Value: aws.Float64(2.0), Timestamp: time.Time{}}}}, }, }, }, }, expectedErrs: []job.Error{ { JobContext: job.JobContext{ Account: job.Account{ID: "aws-account-1", Alias: "my-aws-account"}, Namespace: "aws-namespace", Region: "us-east-1", RoleARN: "aws-arn-1", }, ErrorType: job.ResourceMetadataErr, }, }, }, { name: "returns errors from cloudwatch metrics runner without failing scrape", jobsCfg: model.JobsConfig{ DiscoveryJobs: []model.DiscoveryJob{ { Regions: []string{"us-east-1"}, Namespace: "aws-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-1", ExternalID: "external-id-1"}, }, }, }, CustomNamespaceJobs: []model.CustomNamespaceJob{ { Regions: []string{"us-east-2"}, Name: "my-custom-job", Namespace: "custom-namespace", Roles: []model.Role{ {RoleArn: "aws-arn-2", ExternalID: "external-id-2"}, }, }, }, }, getAccountFunc: func() (string, error) { return "aws-account-1", nil }, getAccountAliasFunc: func() (string, error) { return "my-aws-account", nil }, metadataRunFunc: func(_ context.Context, _ string, _ model.DiscoveryJob) ([]*model.TaggedResource, error) { return []*model.TaggedResource{{ ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, }}, nil }, cloudwatchRunFunc: func(_ context.Context, job cloudwatchrunner.Job) ([]*model.CloudwatchData, error) { if job.Namespace() == "custom-namespace" { return nil, errors.New("I failed you") } return []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, nil }, expectedResources: []model.TaggedResourceResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.TaggedResource{ {ARN: "resource-1", Namespace: "aws-namespace", Region: "us-east-1", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}}, }, }, }, expectedMetrics: []model.CloudwatchMetricResult{ { Context: &model.ScrapeContext{Region: "us-east-1", AccountID: "aws-account-1", AccountAlias: "my-aws-account"}, Data: []*model.CloudwatchData{ { MetricName: "metric-1", ResourceName: "resource-1", Namespace: "aws-namespace", Tags: []model.Tag{{Key: "tag1", Value: "value1"}}, Dimensions: []model.Dimension{{Name: "dimension1", Value: "value1"}}, GetMetricDataResult: &model.GetMetricDataResult{Statistic: "Maximum", DataPoints: []model.DataPoint{{Value: aws.Float64(1.0), Timestamp: time.Time{}}}}, }, }, }, }, expectedErrs: []job.Error{ { JobContext: job.JobContext{ Account: job.Account{ID: "aws-account-1", Alias: "my-aws-account"}, Namespace: "custom-namespace", Region: "us-east-2", RoleARN: "aws-arn-2", }, ErrorType: job.CloudWatchCollectionErr, }, }, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { rf := testRunnerFactory{ GetAccountFunc: tc.getAccountFunc, GetAccountAliasFunc: tc.getAccountAliasFunc, MetadataRunFunc: tc.metadataRunFunc, CloudwatchRunFunc: tc.cloudwatchRunFunc, } lvl := promslog.NewLevel() _ = lvl.Set("debug") sr := job.NewScraper(promslog.New(&promslog.Config{Level: lvl}), tc.jobsCfg, &rf) resources, metrics, errs := sr.Scrape(context.Background()) changelog, err := diff.Diff(tc.expectedResources, resources) assert.NoError(t, err, "failed to diff resources") assert.Len(t, changelog, 0, changelog) changelog, err = diff.Diff(tc.expectedMetrics, metrics) assert.NoError(t, err, "failed to diff metrics") assert.Len(t, changelog, 0, changelog) // We don't want to check the exact error just the message changelog, err = diff.Diff(tc.expectedErrs, errs, diff.Filter(func(_ []string, _ reflect.Type, field reflect.StructField) bool { return field.Name != "Err" })) assert.NoError(t, err, "failed to diff errs") assert.Len(t, changelog, 0, changelog) }) } } ================================================ FILE: pkg/job/static.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package job import ( "context" "log/slog" "sync" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/clients/cloudwatch" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func runStaticJob( ctx context.Context, logger *slog.Logger, resource model.StaticJob, clientCloudwatch cloudwatch.Client, ) []*model.CloudwatchData { cw := []*model.CloudwatchData{} mux := &sync.Mutex{} var wg sync.WaitGroup for j := range resource.Metrics { metric := resource.Metrics[j] wg.Add(1) go func() { defer wg.Done() data := model.CloudwatchData{ MetricName: metric.Name, ResourceName: resource.Name, Namespace: resource.Namespace, Dimensions: createStaticDimensions(resource.Dimensions), MetricMigrationParams: model.MetricMigrationParams{ NilToZero: metric.NilToZero, AddCloudwatchTimestamp: metric.AddCloudwatchTimestamp, }, Tags: nil, GetMetricDataProcessingParams: nil, GetMetricDataResult: nil, GetMetricStatisticsResult: nil, } data.GetMetricStatisticsResult = &model.GetMetricStatisticsResult{ Results: clientCloudwatch.GetMetricStatistics(ctx, logger, data.Dimensions, resource.Namespace, metric), Statistics: metric.Statistics, } if data.GetMetricStatisticsResult.Results != nil { mux.Lock() cw = append(cw, &data) mux.Unlock() } }() } wg.Wait() return cw } func createStaticDimensions(dimensions []model.Dimension) []model.Dimension { out := make([]model.Dimension, 0, len(dimensions)) for _, d := range dimensions { out = append(out, model.Dimension{ Name: d.Name, Value: d.Value, }) } return out } ================================================ FILE: pkg/model/model.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package model import ( "time" "github.com/grafana/regexp" ) const ( DefaultPeriodSeconds = int64(300) DefaultLengthSeconds = int64(300) ) type JobsConfig struct { StsRegion string DiscoveryJobs []DiscoveryJob StaticJobs []StaticJob CustomNamespaceJobs []CustomNamespaceJob } type DiscoveryJob struct { Regions []string Namespace string Roles []Role SearchTags []SearchTag CustomTags []Tag DimensionNameRequirements []string Metrics []*MetricConfig RoundingPeriod *int64 RecentlyActiveOnly bool ExportedTagsOnMetrics []string IncludeContextOnInfoMetrics bool DimensionsRegexps []DimensionsRegexp // EnhancedMetrics holds configuration for enhanced metrics in discovery jobs. It contains a configuration for the non-CloudWatch metrics to collect. EnhancedMetrics []*EnhancedMetricConfig } func (d *DiscoveryJob) HasEnhancedMetrics() bool { return len(d.EnhancedMetrics) > 0 } type EnhancedMetricConfig struct { Name string } type StaticJob struct { Name string Regions []string Roles []Role Namespace string CustomTags []Tag Dimensions []Dimension Metrics []*MetricConfig } type CustomNamespaceJob struct { Regions []string Name string Namespace string RoundingPeriod *int64 RecentlyActiveOnly bool Roles []Role Metrics []*MetricConfig CustomTags []Tag DimensionNameRequirements []string } type Role struct { RoleArn string ExternalID string } type MetricConfig struct { Name string Statistics []string Period int64 Length int64 Delay int64 NilToZero bool AddCloudwatchTimestamp bool ExportAllDataPoints bool } type DimensionsRegexp struct { Regexp *regexp.Regexp DimensionsNames []string } type LabelSet map[string]struct{} type Tag struct { Key string Value string } type SearchTag struct { Key string Value *regexp.Regexp } type Dimension struct { Name string Value string } type Metric struct { // The dimensions for the metric. Dimensions []Dimension MetricName string Namespace string } type CloudwatchMetricResult struct { Context *ScrapeContext Data []*CloudwatchData } type TaggedResourceResult struct { Context *ScrapeContext Data []*TaggedResource } type ScrapeContext struct { Region string AccountID string AccountAlias string CustomTags []Tag } // CloudwatchData is an internal representation of a CloudWatch // metric with attached data points, metric and resource information. type CloudwatchData struct { MetricName string // ResourceName will have different values depending on the job type // DiscoveryJob = Resource ARN associated with the metric or global when it could not be associated but shouldn't be dropped // StaticJob = Resource Name from static job config // CustomNamespace = Custom Namespace job name ResourceName string Namespace string Tags []Tag Dimensions []Dimension // GetMetricDataProcessingParams includes necessary fields to run GetMetricData GetMetricDataProcessingParams *GetMetricDataProcessingParams // MetricMigrationParams holds configuration values necessary when migrating the resulting metrics MetricMigrationParams MetricMigrationParams // GetMetricsDataResult is an optional field and will be non-nil when metric data was populated from the GetMetricsData API (Discovery and CustomNamespace jobs) GetMetricDataResult *GetMetricDataResult // GetMetricStatisticsResult is an optional field and will be non-nil when metric data was populated from the GetMetricStatistics API (static jobs) GetMetricStatisticsResult *GetMetricStatisticsResult } type GetMetricStatisticsResult struct { Results []*MetricStatisticsResult Statistics []string } type MetricStatisticsResult struct { // The average of the metric values that correspond to the data point. Average *float64 // The percentile statistic for the data point. ExtendedStatistics map[string]*float64 // The maximum metric value for the data point. Maximum *float64 // The minimum metric value for the data point. Minimum *float64 // The number of metric values that contributed to the aggregate value of this // data point. SampleCount *float64 // The sum of the metric values for the data point. Sum *float64 // The time stamp used for the data point. Timestamp *time.Time } type GetMetricDataProcessingParams struct { // QueryID is a value internal to processing used for mapping results from GetMetricData their original request QueryID string // The statistic to be used to call GetMetricData Statistic string // Fields which impact the start and endtime for Period int64 Length int64 Delay int64 } type MetricMigrationParams struct { NilToZero bool AddCloudwatchTimestamp bool ExportAllDataPoints bool } type GetMetricDataResult struct { Statistic string DataPoints []DataPoint } type DataPoint struct { Value *float64 Timestamp time.Time } // TaggedResource is an AWS resource with tags type TaggedResource struct { // ARN is the unique AWS ARN (Amazon Resource Name) of the resource ARN string // Namespace identifies the resource type (e.g. EC2) Namespace string // Region is the AWS regions that the resource belongs to Region string // Tags is a set of tags associated to the resource Tags []Tag } // FilterThroughTags returns true if all filterTags match // with tags of the TaggedResource, returns false otherwise. func (r TaggedResource) FilterThroughTags(filterTags []SearchTag) bool { if len(filterTags) == 0 { return true } tagFilterMatches := 0 for _, resourceTag := range r.Tags { for _, filterTag := range filterTags { if resourceTag.Key == filterTag.Key { if !filterTag.Value.MatchString(resourceTag.Value) { return false } // A resource needs to match all SearchTags to be returned, so we track the number of tag filter // matches to ensure it matches the number of tag filters at the end tagFilterMatches++ } } } return tagFilterMatches == len(filterTags) } // MetricTags returns a list of tags built from the tags of // TaggedResource, if exportedTags is not empty. // // Returned tags have as key the key from exportedTags, and // as value the value from the corresponding tag of the resource, // if it exists (otherwise an empty string). func (r TaggedResource) MetricTags(exportedTags []string) []Tag { if len(exportedTags) == 0 { return []Tag{} } tags := make([]Tag, 0, len(exportedTags)) for _, tagName := range exportedTags { tag := Tag{ Key: tagName, } for _, resourceTag := range r.Tags { if resourceTag.Key == tagName { tag.Value = resourceTag.Value break } } // Always add the tag, even if it's empty, to ensure the same labels are present on all metrics for a single service tags = append(tags, tag) } return tags } ================================================ FILE: pkg/model/model_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package model import ( "testing" "github.com/grafana/regexp" "github.com/stretchr/testify/require" ) func Test_FilterThroughTags(t *testing.T) { testCases := []struct { testName string resourceTags []Tag filterTags []SearchTag result bool }{ { testName: "exactly matching tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v1"), }, }, result: true, }, { testName: "unmatching tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k2", Value: regexp.MustCompile("v2"), }, }, result: false, }, { testName: "resource has more tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, { Key: "k2", Value: "v2", }, }, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v1"), }, }, result: true, }, { testName: "filter has more tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v1"), }, { Key: "k2", Value: regexp.MustCompile("v2"), }, }, result: false, }, { testName: "unmatching tag key", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k2", Value: regexp.MustCompile("v1"), }, }, result: false, }, { testName: "unmatching tag value", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v2"), }, }, result: false, }, { testName: "resource without tags", resourceTags: []Tag{}, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v2"), }, }, result: false, }, { testName: "empty filter tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{}, result: true, }, { testName: "filter with value regex", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, filterTags: []SearchTag{ { Key: "k1", Value: regexp.MustCompile("v.*"), }, }, result: true, }, } for _, tc := range testCases { t.Run(tc.testName, func(t *testing.T) { res := TaggedResource{ ARN: "aws::arn", Namespace: "AWS/Service", Region: "us-east-1", Tags: tc.resourceTags, } require.Equal(t, tc.result, res.FilterThroughTags(tc.filterTags)) }) } } func Test_MetricTags(t *testing.T) { testCases := []struct { testName string resourceTags []Tag exportedTags []string result []Tag }{ { testName: "empty exported tag", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, exportedTags: []string{}, result: []Tag{}, }, { testName: "single exported tag", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, exportedTags: []string{"k1"}, result: []Tag{ { Key: "k1", Value: "v1", }, }, }, { testName: "multiple exported tags", resourceTags: []Tag{ { Key: "k1", Value: "v1", }, }, exportedTags: []string{"k1", "k2"}, result: []Tag{ { Key: "k1", Value: "v1", }, { Key: "k2", Value: "", }, }, }, { testName: "resource without tags", resourceTags: []Tag{}, exportedTags: []string{"k1"}, result: []Tag{ { Key: "k1", Value: "", }, }, }, } for _, tc := range testCases { t.Run(tc.testName, func(t *testing.T) { res := TaggedResource{ ARN: "aws::arn", Namespace: "AWS/Service", Region: "us-east-1", Tags: tc.resourceTags, } require.Equal(t, tc.result, res.MetricTags(tc.exportedTags)) }) } } ================================================ FILE: pkg/promutil/migrate.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package promutil import ( "fmt" "log/slog" "maps" "math" "sort" "strings" "time" "github.com/grafana/regexp" prom_model "github.com/prometheus/common/model" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) var Percentile = regexp.MustCompile(`^p(\d{1,2}(\.\d{0,2})?|100)$`) func BuildMetricName(namespace, metricName, statistic string) string { sb := strings.Builder{} // Some namespaces have a leading forward slash like // /aws/sagemaker/TrainingJobs, which should be removed. var promNs string if strings.HasPrefix(namespace, "/") { promNs = PromString(strings.ToLower(namespace[1:])) } else { promNs = PromString(strings.ToLower(namespace)) } if !strings.HasPrefix(promNs, "aws") { sb.WriteString("aws_") } sb.WriteString(promNs) sb.WriteString("_") promMetricName := PromString(metricName) // Some metric names duplicate parts of the namespace as a prefix, // For example, the `Glue` namespace metrics have names prefixed also by `glue`` skip := 0 for _, part := range strings.Split(promNs, "_") { if strings.HasPrefix(promMetricName[skip:], part) { skip = len(part) } } promMetricName = strings.TrimPrefix(promMetricName[skip:], "_") sb.WriteString(promMetricName) if statistic != "" { sb.WriteString("_") PromStringToBuilder(statistic, &sb) } return sb.String() } func BuildNamespaceInfoMetrics(tagData []model.TaggedResourceResult, metrics []*PrometheusMetric, observedMetricLabels map[string]model.LabelSet, labelsSnakeCase bool, logger *slog.Logger) ([]*PrometheusMetric, map[string]model.LabelSet) { for _, tagResult := range tagData { contextLabels := contextToLabels(tagResult.Context, labelsSnakeCase, logger) for _, d := range tagResult.Data { metricName := BuildMetricName(d.Namespace, "info", "") promLabels := make(map[string]string, len(d.Tags)+len(contextLabels)+1) maps.Copy(promLabels, contextLabels) promLabels["name"] = d.ARN for _, tag := range d.Tags { ok, promTag := PromStringTag(tag.Key, labelsSnakeCase) if !ok { logger.Warn("tag name is an invalid prometheus label name", "tag", tag.Key) continue } labelName := "tag_" + promTag promLabels[labelName] = tag.Value } observedMetricLabels = recordLabelsForMetric(metricName, promLabels, observedMetricLabels) metrics = append(metrics, &PrometheusMetric{ Name: metricName, Labels: promLabels, Value: 0, }) } } return metrics, observedMetricLabels } func BuildMetrics(results []model.CloudwatchMetricResult, labelsSnakeCase bool, logger *slog.Logger) ([]*PrometheusMetric, map[string]model.LabelSet, error) { output := make([]*PrometheusMetric, 0) observedMetricLabels := make(map[string]model.LabelSet) for _, result := range results { contextLabels := contextToLabels(result.Context, labelsSnakeCase, logger) for _, metric := range result.Data { // This should not be possible but check just in case if metric.GetMetricStatisticsResult == nil && metric.GetMetricDataResult == nil { logger.Warn("Attempted to migrate metric with no result", "namespace", metric.Namespace, "metric_name", metric.MetricName, "resource_name", metric.ResourceName) } for _, statistic := range statisticsInCloudwatchData(metric) { dataPoints, err := getDataPoints(metric, statistic) for _, dataPoint := range dataPoints { ts := dataPoint.Timestamp dataPoint := dataPoint.Value if err != nil { return nil, nil, err } var exportedDatapoint float64 if dataPoint == nil && metric.MetricMigrationParams.AddCloudwatchTimestamp { // If we did not get a datapoint then the timestamp is a default value making it unusable in the // exported metric. Attempting to put a fake timestamp on the metric will likely conflict with // future CloudWatch timestamps which are always in the past. if metric.MetricMigrationParams.ExportAllDataPoints { // If we're exporting all data points, we can skip this one and check for a historical datapoint continue } // If we are not exporting all data points, we better have nothing exported break } if dataPoint == nil { exportedDatapoint = math.NaN() } else { exportedDatapoint = *dataPoint } if metric.MetricMigrationParams.NilToZero && math.IsNaN(exportedDatapoint) { exportedDatapoint = 0 } name := BuildMetricName(metric.Namespace, metric.MetricName, statistic) promLabels := createPrometheusLabels(metric, labelsSnakeCase, contextLabels, logger) observedMetricLabels = recordLabelsForMetric(name, promLabels, observedMetricLabels) if !metric.MetricMigrationParams.AddCloudwatchTimestamp { // if we're not adding the original timestamp, we have to zero it so we can validate the data in the exporter via EnsureLabelConsistencyAndRemoveDuplicates ts = time.Time{} } output = append(output, &PrometheusMetric{ Name: name, Labels: promLabels, Value: exportedDatapoint, Timestamp: ts, IncludeTimestamp: metric.MetricMigrationParams.AddCloudwatchTimestamp, }) if !metric.MetricMigrationParams.ExportAllDataPoints { // If we're not exporting all data points, we can skip the rest of the data points for this metric break } } } } } return output, observedMetricLabels, nil } func statisticsInCloudwatchData(d *model.CloudwatchData) []string { if d.GetMetricDataResult != nil { return []string{d.GetMetricDataResult.Statistic} } if d.GetMetricStatisticsResult != nil { return d.GetMetricStatisticsResult.Statistics } return []string{} } func getDataPoints(cwd *model.CloudwatchData, statistic string) ([]model.DataPoint, error) { // Not possible but for sanity if cwd.GetMetricStatisticsResult == nil && cwd.GetMetricDataResult == nil { return nil, fmt.Errorf("cannot map a data point with no results on %s", cwd.MetricName) } if cwd.GetMetricDataResult != nil { // If we have no dataPoints, we should return a single nil datapoint, which is then either dropped or converted to 0 if len(cwd.GetMetricDataResult.DataPoints) == 0 && !cwd.MetricMigrationParams.AddCloudwatchTimestamp { return []model.DataPoint{{ Value: nil, Timestamp: time.Time{}, }}, nil } return cwd.GetMetricDataResult.DataPoints, nil } var averageDataPoints []*model.MetricStatisticsResult // sorting by timestamps so we can consistently export the most updated datapoint // assuming Timestamp field in cloudwatch.Value struct is never nil for _, datapoint := range sortByTimestamp(cwd.GetMetricStatisticsResult.Results) { switch { case statistic == "Maximum": if datapoint.Maximum != nil { return []model.DataPoint{{Value: datapoint.Maximum, Timestamp: *datapoint.Timestamp}}, nil } case statistic == "Minimum": if datapoint.Minimum != nil { return []model.DataPoint{{Value: datapoint.Minimum, Timestamp: *datapoint.Timestamp}}, nil } case statistic == "Sum": if datapoint.Sum != nil { return []model.DataPoint{{Value: datapoint.Sum, Timestamp: *datapoint.Timestamp}}, nil } case statistic == "SampleCount": if datapoint.SampleCount != nil { return []model.DataPoint{{Value: datapoint.SampleCount, Timestamp: *datapoint.Timestamp}}, nil } case statistic == "Average": if datapoint.Average != nil { averageDataPoints = append(averageDataPoints, datapoint) } case Percentile.MatchString(statistic): if data, ok := datapoint.ExtendedStatistics[statistic]; ok { return []model.DataPoint{{Value: data, Timestamp: *datapoint.Timestamp}}, nil } default: return nil, fmt.Errorf("invalid statistic requested on metric %s: %s", cwd.MetricName, statistic) } } if len(averageDataPoints) > 0 { var total float64 var timestamp time.Time for _, p := range averageDataPoints { if p.Timestamp.After(timestamp) { timestamp = *p.Timestamp } total += *p.Average } average := total / float64(len(averageDataPoints)) return []model.DataPoint{{Value: &average, Timestamp: timestamp}}, nil } return nil, nil } func sortByTimestamp(dataPoints []*model.MetricStatisticsResult) []*model.MetricStatisticsResult { sort.Slice(dataPoints, func(i, j int) bool { jTimestamp := *dataPoints[j].Timestamp return dataPoints[i].Timestamp.After(jTimestamp) }) return dataPoints } func createPrometheusLabels(cwd *model.CloudwatchData, labelsSnakeCase bool, contextLabels map[string]string, logger *slog.Logger) map[string]string { labels := make(map[string]string, len(cwd.Dimensions)+len(cwd.Tags)+len(contextLabels)) labels["name"] = cwd.ResourceName // Inject the sfn name back as a label for _, dimension := range cwd.Dimensions { ok, promTag := PromStringTag(dimension.Name, labelsSnakeCase) if !ok { logger.Warn("dimension name is an invalid prometheus label name", "dimension", dimension.Name) continue } labels["dimension_"+promTag] = dimension.Value } for _, tag := range cwd.Tags { ok, promTag := PromStringTag(tag.Key, labelsSnakeCase) if !ok { logger.Warn("metric tag name is an invalid prometheus label name", "tag", tag.Key) continue } labels["tag_"+promTag] = tag.Value } maps.Copy(labels, contextLabels) return labels } func contextToLabels(context *model.ScrapeContext, labelsSnakeCase bool, logger *slog.Logger) map[string]string { if context == nil { return map[string]string{} } labels := make(map[string]string, 2+len(context.CustomTags)) labels["region"] = context.Region labels["account_id"] = context.AccountID // If there's no account alias, omit adding an extra label in the series, it will work either way query wise if context.AccountAlias != "" { labels["account_alias"] = context.AccountAlias } for _, label := range context.CustomTags { ok, promTag := PromStringTag(label.Key, labelsSnakeCase) if !ok { logger.Warn("custom tag name is an invalid prometheus label name", "tag", label.Key) continue } labels["custom_tag_"+promTag] = label.Value } return labels } // recordLabelsForMetric adds any missing labels from promLabels in to the LabelSet for the metric name and returns // the updated observedMetricLabels func recordLabelsForMetric(metricName string, promLabels map[string]string, observedMetricLabels map[string]model.LabelSet) map[string]model.LabelSet { if _, ok := observedMetricLabels[metricName]; !ok { observedMetricLabels[metricName] = make(model.LabelSet, len(promLabels)) } for label := range promLabels { if _, ok := observedMetricLabels[metricName][label]; !ok { observedMetricLabels[metricName][label] = struct{}{} } } return observedMetricLabels } // EnsureLabelConsistencyAndRemoveDuplicates ensures that every metric has the same set of labels based on the data // in observedMetricLabels and that there are no duplicate metrics. // Prometheus requires that all metrics with the same name have the same set of labels and that no duplicates are registered func EnsureLabelConsistencyAndRemoveDuplicates(metrics []*PrometheusMetric, observedMetricLabels map[string]model.LabelSet) []*PrometheusMetric { metricKeys := make(map[string]struct{}, len(metrics)) output := make([]*PrometheusMetric, 0, len(metrics)) for _, metric := range metrics { for observedLabels := range observedMetricLabels[metric.Name] { if _, ok := metric.Labels[observedLabels]; !ok { metric.Labels[observedLabels] = "" } } // We are including the timestamp in the metric key to ensure that we don't have duplicate metrics // if we have AddCloudwatchTimestamp enabled its the real timestamp, otherwise its a zero value // the timestamp is needed to ensure valid date created by ExportAllDataPoints metricKey := fmt.Sprintf("%s-%d-%d", metric.Name, prom_model.LabelsToSignature(metric.Labels), metric.Timestamp.Unix()) if _, exists := metricKeys[metricKey]; !exists { metricKeys[metricKey] = struct{}{} output = append(output, metric) } else { DuplicateMetricsFilteredCounter.Inc() } } return output } ================================================ FILE: pkg/promutil/migrate_test.go ================================================ // Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package promutil import ( "math" "testing" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model" ) func TestBuildNamespaceInfoMetrics(t *testing.T) { type testCase struct { name string resources []model.TaggedResourceResult metrics []*PrometheusMetric observedMetricLabels map[string]model.LabelSet labelsSnakeCase bool expectedMetrics []*PrometheusMetric expectedLabels map[string]model.LabelSet } testCases := []testCase{ { name: "metric with tag", resources: []model.TaggedResourceResult{ { Context: nil, Data: []*model.TaggedResource{ { ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Namespace: "AWS/ElastiCache", Region: "us-east-1", Tags: []model.Tag{ { Key: "CustomTag", Value: "tag_Value", }, }, }, }, }, }, metrics: []*PrometheusMetric{}, observedMetricLabels: map[string]model.LabelSet{}, labelsSnakeCase: false, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_info", Labels: map[string]string{ "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "tag_CustomTag": "tag_Value", }, Value: 0, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_info": map[string]struct{}{ "name": {}, "tag_CustomTag": {}, }, }, }, { name: "label snake case", resources: []model.TaggedResourceResult{ { Context: nil, Data: []*model.TaggedResource{ { ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Namespace: "AWS/ElastiCache", Region: "us-east-1", Tags: []model.Tag{ { Key: "CustomTag", Value: "tag_Value", }, }, }, }, }, }, metrics: []*PrometheusMetric{}, observedMetricLabels: map[string]model.LabelSet{}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_info", Labels: map[string]string{ "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "tag_custom_tag": "tag_Value", }, Value: 0, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_info": map[string]struct{}{ "name": {}, "tag_custom_tag": {}, }, }, }, { name: "with observed metrics and labels", resources: []model.TaggedResourceResult{ { Context: nil, Data: []*model.TaggedResource{ { ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Namespace: "AWS/ElastiCache", Region: "us-east-1", Tags: []model.Tag{ { Key: "CustomTag", Value: "tag_Value", }, }, }, }, }, }, metrics: []*PrometheusMetric{ { Name: "aws_ec2_cpuutilization_maximum", Labels: map[string]string{ "name": "arn:aws:ec2:us-east-1:123456789012:instance/i-abc123", "dimension_InstanceId": "i-abc123", }, Value: 0, }, }, observedMetricLabels: map[string]model.LabelSet{ "aws_ec2_cpuutilization_maximum": map[string]struct{}{ "name": {}, "dimension_InstanceId": {}, }, }, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_ec2_cpuutilization_maximum", Labels: map[string]string{ "name": "arn:aws:ec2:us-east-1:123456789012:instance/i-abc123", "dimension_InstanceId": "i-abc123", }, Value: 0, }, { Name: "aws_elasticache_info", Labels: map[string]string{ "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "tag_custom_tag": "tag_Value", }, Value: 0, }, }, expectedLabels: map[string]model.LabelSet{ "aws_ec2_cpuutilization_maximum": map[string]struct{}{ "name": {}, "dimension_InstanceId": {}, }, "aws_elasticache_info": map[string]struct{}{ "name": {}, "tag_custom_tag": {}, }, }, }, { name: "context on info metrics", resources: []model.TaggedResourceResult{ { Context: &model.ScrapeContext{ Region: "us-east-2", AccountID: "12345", CustomTags: []model.Tag{{ Key: "billable-to", Value: "api", }}, }, Data: []*model.TaggedResource{ { ARN: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Namespace: "AWS/ElastiCache", Region: "us-east-1", Tags: []model.Tag{ { Key: "cache_name", Value: "cache_instance_1", }, }, }, }, }, }, metrics: []*PrometheusMetric{}, observedMetricLabels: map[string]model.LabelSet{}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_info", Labels: map[string]string{ "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "tag_cache_name": "cache_instance_1", "account_id": "12345", "region": "us-east-2", "custom_tag_billable_to": "api", }, Value: 0, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_info": map[string]struct{}{ "name": {}, "tag_cache_name": {}, "account_id": {}, "region": {}, "custom_tag_billable_to": {}, }, }, }, { name: "metric with nonstandard namespace", resources: []model.TaggedResourceResult{ { Context: nil, Data: []*model.TaggedResource{ { ARN: "arn:aws:sagemaker:us-east-1:123456789012:training-job/sagemaker-xgboost", Namespace: "/aws/sagemaker/TrainingJobs", Region: "us-east-1", Tags: []model.Tag{ { Key: "CustomTag", Value: "tag_Value", }, }, }, }, }, }, metrics: []*PrometheusMetric{}, observedMetricLabels: map[string]model.LabelSet{}, labelsSnakeCase: false, expectedMetrics: []*PrometheusMetric{ { Name: "aws_sagemaker_trainingjobs_info", Labels: map[string]string{ "name": "arn:aws:sagemaker:us-east-1:123456789012:training-job/sagemaker-xgboost", "tag_CustomTag": "tag_Value", }, Value: 0, }, }, expectedLabels: map[string]model.LabelSet{ "aws_sagemaker_trainingjobs_info": map[string]struct{}{ "name": {}, "tag_CustomTag": {}, }, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { metrics, labels := BuildNamespaceInfoMetrics(tc.resources, tc.metrics, tc.observedMetricLabels, tc.labelsSnakeCase, promslog.NewNopLogger()) require.Equal(t, tc.expectedMetrics, metrics) require.Equal(t, tc.expectedLabels, labels) }) } } func TestBuildMetrics(t *testing.T) { ts := time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC) nullTs := time.Time{} type testCase struct { name string data []model.CloudwatchMetricResult labelsSnakeCase bool expectedMetrics []*PrometheusMetric expectedLabels map[string]model.LabelSet expectedErr error } testCases := []testCase{ { name: "metric with GetMetricDataResult and non-nil datapoint", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "FreeableMemory", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(2), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkBytesIn", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(3), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkBytesOut", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(4), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkPacketsIn", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, ExportAllDataPoints: true, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{ {Value: aws.Float64(4), Timestamp: ts}, {Value: aws.Float64(5), Timestamp: ts.Add(-1 * time.Minute)}, {Value: aws.Float64(6), Timestamp: ts.Add(-2 * time.Minute)}, }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkPacketsOut", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, ExportAllDataPoints: true, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{ {Value: nil, Timestamp: ts}, {Value: aws.Float64(5), Timestamp: ts.Add(-1 * time.Minute)}, {Value: aws.Float64(6), Timestamp: ts.Add(-2 * time.Minute)}, }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkMaxBytesIn", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, ExportAllDataPoints: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{ {Value: nil, Timestamp: ts}, {Value: aws.Float64(5), Timestamp: ts.Add(-1 * time.Minute)}, {Value: aws.Float64(6), Timestamp: ts.Add(-2 * time.Minute)}, }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, }, }}, labelsSnakeCase: false, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_cpuutilization_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_freeable_memory_average", Value: 2, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_bytes_in_average", Value: 3, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_bytes_out_average", Value: 4, Timestamp: ts, IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_packets_in_average", Value: 4, Timestamp: ts, IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_packets_in_average", Value: 5, Timestamp: ts.Add(-1 * time.Minute), IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_packets_in_average", Value: 6, Timestamp: ts.Add(-2 * time.Minute), IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_packets_out_average", Value: 5, Timestamp: ts.Add(-1 * time.Minute), IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, { Name: "aws_elasticache_network_packets_out_average", Value: 6, Timestamp: ts.Add(-2 * time.Minute), IncludeTimestamp: true, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_freeable_memory_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_network_bytes_in_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_network_bytes_out_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_network_packets_in_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_network_packets_out_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, }, expectedErr: nil, }, { name: "metric with GetMetricDataResult and nil datapoint", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: nil, Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "FreeableMemory", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: nil, Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkBytesIn", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: nil, Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, { MetricName: "NetworkBytesOut", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: nil, Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, }, }}, labelsSnakeCase: false, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_cpuutilization_average", Value: 0, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, IncludeTimestamp: false, }, { Name: "aws_elasticache_freeable_memory_average", Value: math.NaN(), Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, IncludeTimestamp: false, }, { Name: "aws_elasticache_network_bytes_in_average", Value: 0, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_CacheClusterId": "redis-cluster", }, IncludeTimestamp: false, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_freeable_memory_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, "aws_elasticache_network_bytes_in_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, }, }, expectedErr: nil, }, { name: "label snake case", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_cpuutilization_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_cache_cluster_id": "redis-cluster", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_cache_cluster_id": {}, }, }, expectedErr: nil, }, { name: "metric with nonstandard namespace", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "/aws/sagemaker/TrainingJobs", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "Host", Value: "sagemaker-xgboost", }, }, ResourceName: "arn:aws:sagemaker:us-east-1:123456789012:training-job/sagemaker-xgboost", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_sagemaker_trainingjobs_cpuutilization_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:sagemaker:us-east-1:123456789012:training-job/sagemaker-xgboost", "region": "us-east-1", "dimension_host": "sagemaker-xgboost", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_sagemaker_trainingjobs_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_host": {}, }, }, expectedErr: nil, }, { name: "metric with metric name that does duplicates part of the namespace as a prefix", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "glue.driver.aggregate.bytesRead", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "Glue", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "JobName", Value: "test-job", }, }, ResourceName: "arn:aws:glue:us-east-1:123456789012:job/test-job", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_glue_driver_aggregate_bytes_read_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:glue:us-east-1:123456789012:job/test-job", "region": "us-east-1", "dimension_job_name": "test-job", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_glue_driver_aggregate_bytes_read_average": { "account_id": {}, "name": {}, "region": {}, "dimension_job_name": {}, }, }, expectedErr: nil, }, { name: "metric with metric name that does not duplicate part of the namespace as a prefix", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "aggregate.glue.jobs.bytesRead", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "Glue", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "JobName", Value: "test-job", }, }, ResourceName: "arn:aws:glue:us-east-1:123456789012:job/test-job", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_glue_aggregate_glue_jobs_bytes_read_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:glue:us-east-1:123456789012:job/test-job", "region": "us-east-1", "dimension_job_name": "test-job", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_glue_aggregate_glue_jobs_bytes_read_average": { "account_id": {}, "name": {}, "region": {}, "dimension_job_name": {}, }, }, expectedErr: nil, }, { name: "custom tag", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: []model.Tag{{ Key: "billable-to", Value: "api", }}, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_cpuutilization_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_cache_cluster_id": "redis-cluster", "custom_tag_billable_to": "api", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_cache_cluster_id": {}, "custom_tag_billable_to": {}, }, }, expectedErr: nil, }, { name: "scraping with aws account alias", data: []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", AccountAlias: "billingacct", }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", }, }, }}, labelsSnakeCase: true, expectedMetrics: []*PrometheusMetric{ { Name: "aws_elasticache_cpuutilization_average", Value: 1, Timestamp: nullTs, Labels: map[string]string{ "account_id": "123456789012", "account_alias": "billingacct", "name": "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", "region": "us-east-1", "dimension_cache_cluster_id": "redis-cluster", }, }, }, expectedLabels: map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "account_alias": {}, "name": {}, "region": {}, "dimension_cache_cluster_id": {}, }, }, expectedErr: nil, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { res, labels, err := BuildMetrics(tc.data, tc.labelsSnakeCase, promslog.NewNopLogger()) if tc.expectedErr != nil { require.Equal(t, tc.expectedErr, err) } else { require.NoError(t, err) require.Equal(t, replaceNaNValues(tc.expectedMetrics), replaceNaNValues(res)) require.Equal(t, tc.expectedLabels, labels) } }) } } func Benchmark_BuildMetrics(b *testing.B) { ts := time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC) data := []model.CloudwatchMetricResult{{ Context: &model.ScrapeContext{ Region: "us-east-1", AccountID: "123456789012", CustomTags: nil, }, Data: []*model.CloudwatchData{ { MetricName: "CPUUtilization", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(1), Timestamp: ts}}, }, Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Tags: []model.Tag{{ Key: "managed_by", Value: "terraform", }}, }, { MetricName: "FreeableMemory", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: false, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(2), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Tags: []model.Tag{{ Key: "managed_by", Value: "terraform", }}, }, { MetricName: "NetworkBytesIn", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: false, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(3), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Tags: []model.Tag{{ Key: "managed_by", Value: "terraform", }}, }, { MetricName: "NetworkBytesOut", MetricMigrationParams: model.MetricMigrationParams{ NilToZero: true, AddCloudwatchTimestamp: true, }, Namespace: "AWS/ElastiCache", Dimensions: []model.Dimension{ { Name: "CacheClusterId", Value: "redis-cluster", }, }, GetMetricDataResult: &model.GetMetricDataResult{ Statistic: "Average", DataPoints: []model.DataPoint{{Value: aws.Float64(4), Timestamp: ts}}, }, ResourceName: "arn:aws:elasticache:us-east-1:123456789012:cluster:redis-cluster", Tags: []model.Tag{{ Key: "managed_by", Value: "terraform", }}, }, }, }} var labels map[string]model.LabelSet var err error b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { _, labels, err = BuildMetrics(data, false, promslog.NewNopLogger()) } expectedLabels := map[string]model.LabelSet{ "aws_elasticache_cpuutilization_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, "tag_managed_by": {}, }, "aws_elasticache_freeable_memory_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, "tag_managed_by": {}, }, "aws_elasticache_network_bytes_in_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, "tag_managed_by": {}, }, "aws_elasticache_network_bytes_out_average": { "account_id": {}, "name": {}, "region": {}, "dimension_CacheClusterId": {}, "tag_managed_by": {}, }, } require.NoError(b, err) require.Equal(b, expectedLabels, labels) } func TestBuildMetricName(t *testing.T) { type testCase struct { name string namespace string metric string statistic string expected string } testCases := []testCase{ { name: "standard AWS namespace", namespace: "AWS/ElastiCache", metric: "CPUUtilization", statistic: "Average", expected: "aws_elasticache_cpuutilization_average", }, { name: "nonstandard namespace with slashes", namespace: "/aws/sagemaker/TrainingJobs", metric: "CPUUtilization", statistic: "Average", expected: "aws_sagemaker_trainingjobs_cpuutilization_average", }, { name: "metric name duplicating namespace", namespace: "Glue", metric: "glue.driver.aggregate.bytesRead", statistic: "Average", expected: "aws_glue_driver_aggregate_bytes_read_average", }, { name: "metric name not duplicating namespace", namespace: "Glue", metric: "aggregate.glue.jobs.bytesRead", statistic: "Average", expected: "aws_glue_aggregate_glue_jobs_bytes_read_average", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { result := BuildMetricName(tc.namespace, tc.metric, tc.statistic) require.Equal(t, tc.expected, result) }) } } func Benchmark_BuildMetricName(b *testing.B) { testCases := []struct { namespace string metric string statistic string }{ { namespace: "AWS/ElastiCache", metric: "CPUUtilization", statistic: "Average", }, { namespace: "/aws/sagemaker/TrainingJobs", metric: "CPUUtilization", statistic: "Average", }, { namespace: "Glue", metric: "glue.driver.aggregate.bytesRead", statistic: "Average", }, { namespace: "Glue", metric: "aggregate.glue.jobs.bytesRead", statistic: "Average", }, } for _, tc := range testCases { testName := BuildMetricName(tc.namespace, tc.metric, tc.statistic) b.ResetTimer() b.ReportAllocs() b.Run(testName, func(b *testing.B) { for i := 0; i < b.N; i++ { BuildMetricName(tc.namespace, tc.metric, tc.statistic) } }) } } // replaceNaNValues replaces any NaN floating-point values with a marker value (54321.0) // so that require.Equal() can compare them. By default, require.Equal() will fail if any // struct values are NaN because NaN != NaN func replaceNaNValues(metrics []*PrometheusMetric) []*PrometheusMetric { for _, metric := range metrics { if math.IsNaN(metric.Value) { metric.Value = 54321.0 } } return metrics } // TestSortByTimeStamp validates that sortByTimestamp() sorts in descending order. func TestSortByTimeStamp(t *testing.T) { ts := time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC) dataPointMiddle := &model.MetricStatisticsResult{ Timestamp: aws.Time(ts.Add(time.Minute * 2 * -1)), Maximum: aws.Float64(2), } dataPointNewest := &model.MetricStatisticsResult{ Timestamp: aws.Time(ts.Add(time.Minute * -1)), Maximum: aws.Float64(1), } dataPointOldest := &model.MetricStatisticsResult{ Timestamp: aws.Time(ts.Add(time.Minute * 3 * -1)), Maximum: aws.Float64(3), } cloudWatchDataPoints := []*model.MetricStatisticsResult{ dataPointMiddle, dataPointNewest, dataPointOldest, } sortedDataPoints := sortByTimestamp(cloudWatchDataPoints) expectedDataPoints := []*model.MetricStatisticsResult{ dataPointNewest, dataPointMiddle, dataPointOldest, } require.Equal(t, expectedDataPoints, sortedDataPoints) } func Test_EnsureLabelConsistencyAndRemoveDuplicates(t *testing.T) { testCases := []struct { name string metrics []*PrometheusMetric observedLabels map[string]model.LabelSet output []*PrometheusMetric }{ { name: "adds missing labels", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, Value: 1.0, }, { Name: "metric1", Labels: map[string]string{"label2": "value2"}, Value: 2.0, }, { Name: "metric1", Labels: map[string]string{}, Value: 3.0, }, }, observedLabels: map[string]model.LabelSet{"metric1": {"label1": {}, "label2": {}, "label3": {}}}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1", "label2": "", "label3": ""}, Value: 1.0, }, { Name: "metric1", Labels: map[string]string{"label1": "", "label3": "", "label2": "value2"}, Value: 2.0, }, { Name: "metric1", Labels: map[string]string{"label1": "", "label2": "", "label3": ""}, Value: 3.0, }, }, }, { name: "duplicate metric", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, }, }, { name: "duplicate metric, multiple labels", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1", "label2": "value2"}, }, { Name: "metric1", Labels: map[string]string{"label2": "value2", "label1": "value1"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1", "label2": "value2"}, }, }, }, { name: "metric with different labels", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label2": "value2"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label2": "value2"}, }, }, }, { name: "two metrics", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric2", Labels: map[string]string{"label1": "value1"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric2", Labels: map[string]string{"label1": "value1"}, }, }, }, { name: "two metrics with different labels", metrics: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric2", Labels: map[string]string{"label2": "value2"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric2", Labels: map[string]string{"label2": "value2"}, }, }, }, { name: "multiple duplicates and non-duplicates", metrics: []*PrometheusMetric{ { Name: "metric2", Labels: map[string]string{"label2": "value2"}, }, { Name: "metric2", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, }, observedLabels: map[string]model.LabelSet{}, output: []*PrometheusMetric{ { Name: "metric2", Labels: map[string]string{"label2": "value2"}, }, { Name: "metric2", Labels: map[string]string{"label1": "value1"}, }, { Name: "metric1", Labels: map[string]string{"label1": "value1"}, }, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { actual := EnsureLabelConsistencyAndRemoveDuplicates(tc.metrics, tc.observedLabels) require.ElementsMatch(t, tc.output, actual) }) } } ================================================ FILE: pkg/promutil/prometheus.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package promutil import ( "strings" "time" "unicode" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "golang.org/x/exp/maps" ) var ( CloudwatchAPIErrorCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "yace_cloudwatch_request_errors", Help: "Help is not implemented yet.", }, []string{"api_name"}) CloudwatchAPICounter = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "yace_cloudwatch_requests_total", Help: "Number of calls made to the CloudWatch APIs", }, []string{"api_name"}) CloudwatchGetMetricDataAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_getmetricdata_requests_total", Help: "DEPRECATED: replaced by yace_cloudwatch_requests_total with api_name label", }) CloudwatchGetMetricDataAPIMetricsCounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_getmetricdata_metrics_requested_total", Help: "Number of metrics requested from the CloudWatch GetMetricData API which is how AWS bills", }) CloudwatchGetMetricStatisticsAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_getmetricstatistics_requests_total", Help: "DEPRECATED: replaced by yace_cloudwatch_requests_total with api_name label", }) ResourceGroupTaggingAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_resourcegrouptaggingapi_requests_total", Help: "Help is not implemented yet.", }) AutoScalingAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_autoscalingapi_requests_total", Help: "Help is not implemented yet.", }) TargetGroupsAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_targetgroupapi_requests_total", Help: "Help is not implemented yet.", }) APIGatewayAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_apigatewayapi_requests_total", }) APIGatewayAPIV2Counter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_apigatewayapiv2_requests_total", }) Ec2APICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_ec2api_requests_total", Help: "Help is not implemented yet.", }) ShieldAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_shieldapi_requests_total", Help: "Help is not implemented yet.", }) ManagedPrometheusAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_managedprometheusapi_requests_total", Help: "Help is not implemented yet.", }) StoragegatewayAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_storagegatewayapi_requests_total", Help: "Help is not implemented yet.", }) DmsAPICounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_dmsapi_requests_total", Help: "Help is not implemented yet.", }) DuplicateMetricsFilteredCounter = prometheus.NewCounter(prometheus.CounterOpts{ Name: "yace_cloudwatch_duplicate_metrics_filtered", Help: "Help is not implemented yet.", }) ) var replacer = strings.NewReplacer( " ", "_", ",", "_", "\t", "_", "/", "_", "\\", "_", ".", "_", "-", "_", ":", "_", "=", "_", "“", "_", "@", "_", "<", "_", ">", "_", "(", "_", ")", "_", "%", "_percent", ) type PrometheusMetric struct { Name string Labels map[string]string Value float64 IncludeTimestamp bool Timestamp time.Time } type PrometheusCollector struct { metrics []prometheus.Metric } func NewPrometheusCollector(metrics []*PrometheusMetric) *PrometheusCollector { return &PrometheusCollector{ metrics: toConstMetrics(metrics), } } func (p *PrometheusCollector) Describe(_ chan<- *prometheus.Desc) { // The exporter produces a dynamic set of metrics and the docs for prometheus.Collector Describe say // Sending no descriptor at all marks the Collector as “unchecked”, // i.e. no checks will be performed at registration time, and the // Collector may yield any Metric it sees fit in its Collect method. // Based on our use an "unchecked" collector is perfectly fine } func (p *PrometheusCollector) Collect(metrics chan<- prometheus.Metric) { for _, metric := range p.metrics { metrics <- metric } } func toConstMetrics(metrics []*PrometheusMetric) []prometheus.Metric { // We keep two fast lookup maps here one for the prometheus.Desc of a metric which can be reused for each metric with // the same name and the expected label key order of a particular metric name. // The prometheus.Desc object is expensive to create and being able to reuse it for all metrics with the same name // results in large performance gain. We use the other map because metrics created using the Desc only provide label // values and they must be provided in the exact same order as registered in the Desc. metricToDesc := map[string]*prometheus.Desc{} metricToExpectedLabelOrder := map[string][]string{} result := make([]prometheus.Metric, 0, len(metrics)) for _, metric := range metrics { metricName := metric.Name if _, ok := metricToDesc[metricName]; !ok { labelKeys := maps.Keys(metric.Labels) metricToDesc[metricName] = prometheus.NewDesc(metricName, "Help is not implemented yet.", labelKeys, nil) metricToExpectedLabelOrder[metricName] = labelKeys } metricsDesc := metricToDesc[metricName] // Create the label values using the label order of the Desc labelValues := make([]string, 0, len(metric.Labels)) for _, labelKey := range metricToExpectedLabelOrder[metricName] { labelValues = append(labelValues, metric.Labels[labelKey]) } promMetric, err := prometheus.NewConstMetric(metricsDesc, prometheus.GaugeValue, metric.Value, labelValues...) if err != nil { // If for whatever reason the metric or metricsDesc is considered invalid this will ensure the error is // reported through the collector promMetric = prometheus.NewInvalidMetric(metricsDesc, err) } else if metric.IncludeTimestamp { promMetric = prometheus.NewMetricWithTimestamp(metric.Timestamp, promMetric) } result = append(result, promMetric) } return result } func PromString(text string) string { var buf strings.Builder PromStringToBuilder(text, &buf) return buf.String() } func PromStringToBuilder(text string, buf *strings.Builder) { buf.Grow(len(text)) var prev rune for _, c := range text { switch c { case ' ', ',', '\t', '/', '\\', '.', '-', ':', '=', '@', '<', '>', '(', ')', '“': buf.WriteRune('_') case '%': buf.WriteString("_percent") default: if unicode.IsUpper(c) && (unicode.IsLower(prev) || unicode.IsDigit(prev)) { buf.WriteRune('_') } buf.WriteRune(unicode.ToLower(c)) } prev = c } } func PromStringTag(text string, labelsSnakeCase bool) (bool, string) { var s string if labelsSnakeCase { s = PromString(text) } else { s = sanitize(text) } return model.LabelName(s).IsValid(), s //nolint:staticcheck } // sanitize replaces some invalid chars with an underscore func sanitize(text string) string { if strings.ContainsAny(text, "“%") { // fallback to the replacer for complex cases: // - '“' is non-ascii rune // - '%' is replaced with a whole string return replacer.Replace(text) } b := []byte(text) for i := 0; i < len(b); i++ { switch b[i] { case ' ', ',', '\t', '/', '\\', '.', '-', ':', '=', '@', '<', '>', '(', ')': b[i] = '_' } } return string(b) } ================================================ FILE: pkg/promutil/prometheus_test.go ================================================ // Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package promutil import ( "testing" "time" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestSanitize(t *testing.T) { testCases := []struct { input string output string }{ { input: "Global.Topic.Count", output: "Global_Topic_Count", }, { input: "Status.Check.Failed_Instance", output: "Status_Check_Failed_Instance", }, { input: "IHaveA%Sign", output: "IHaveA_percentSign", }, } for _, tc := range testCases { assert.Equal(t, tc.output, sanitize(tc.input)) } } func TestPromStringTag(t *testing.T) { originalValidationScheme := model.NameValidationScheme //nolint:staticcheck model.NameValidationScheme = model.LegacyValidation //nolint:staticcheck defer func() { model.NameValidationScheme = originalValidationScheme //nolint:staticcheck }() testCases := []struct { name string label string toSnakeCase bool ok bool out string }{ { name: "valid", label: "labelName", toSnakeCase: false, ok: true, out: "labelName", }, { name: "valid, convert to snake case", label: "labelName", toSnakeCase: true, ok: true, out: "label_name", }, { name: "valid (snake case)", label: "label_name", toSnakeCase: false, ok: true, out: "label_name", }, { name: "valid (snake case) unchanged", label: "label_name", toSnakeCase: true, ok: true, out: "label_name", }, { name: "invalid chars", label: "invalidChars@$", toSnakeCase: false, ok: false, out: "", }, { name: "invalid chars, convert to snake case", label: "invalidChars@$", toSnakeCase: true, ok: false, out: "", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { ok, out := PromStringTag(tc.label, tc.toSnakeCase) assert.Equal(t, tc.ok, ok) if ok { assert.Equal(t, tc.out, out) } }) } } func TestNewPrometheusCollector_CanReportMetricsAndErrors(t *testing.T) { originalValidationScheme := model.NameValidationScheme //nolint:staticcheck model.NameValidationScheme = model.LegacyValidation //nolint:staticcheck defer func() { model.NameValidationScheme = originalValidationScheme //nolint:staticcheck }() metrics := []*PrometheusMetric{ { Name: "this*is*not*valid", Labels: map[string]string{}, Value: 0, IncludeTimestamp: false, }, { Name: "this_is_valid", Labels: map[string]string{"key": "value1"}, Value: 0, IncludeTimestamp: false, }, } collector := NewPrometheusCollector(metrics) registry := prometheus.NewRegistry() require.NoError(t, registry.Register(collector)) families, err := registry.Gather() assert.Error(t, err) assert.Len(t, families, 1) family := families[0] assert.Equal(t, "this_is_valid", family.GetName()) } func TestNewPrometheusCollector_CanReportMetrics(t *testing.T) { ts := time.Now() labelSet1 := map[string]string{"key1": "value", "key2": "value", "key3": "value"} labelSet2 := map[string]string{"key2": "out", "key3": "of", "key1": "order"} labelSet3 := map[string]string{"key2": "out", "key1": "of", "key3": "order"} metrics := []*PrometheusMetric{ { Name: "metric_with_labels", Labels: labelSet1, Value: 1, IncludeTimestamp: false, }, { Name: "metric_with_labels", Labels: labelSet2, Value: 2, IncludeTimestamp: false, }, { Name: "metric_with_labels", Labels: labelSet3, Value: 3, IncludeTimestamp: false, }, { Name: "metric_with_timestamp", Labels: map[string]string{}, Value: 1, IncludeTimestamp: true, Timestamp: ts, }, } collector := NewPrometheusCollector(metrics) registry := prometheus.NewRegistry() require.NoError(t, registry.Register(collector)) families, err := registry.Gather() assert.NoError(t, err) assert.Len(t, families, 2) var metricWithLabels *dto.MetricFamily var metricWithTs *dto.MetricFamily for _, metricFamily := range families { assert.Equal(t, dto.MetricType_GAUGE, metricFamily.GetType()) switch { case metricFamily.GetName() == "metric_with_labels": metricWithLabels = metricFamily case metricFamily.GetName() == "metric_with_timestamp": metricWithTs = metricFamily default: require.Failf(t, "Encountered an unexpected metric family %s", metricFamily.GetName()) } } require.NotNil(t, metricWithLabels) require.NotNil(t, metricWithTs) assert.Len(t, metricWithLabels.Metric, 3) for _, metric := range metricWithLabels.Metric { assert.Len(t, metric.Label, 3) var labelSetToMatch map[string]string switch *metric.Gauge.Value { case 1.0: labelSetToMatch = labelSet1 case 2.0: labelSetToMatch = labelSet2 case 3.0: labelSetToMatch = labelSet3 default: require.Fail(t, "Encountered an metric value value %v", *metric.Gauge.Value) } for _, labelPairs := range metric.Label { require.Contains(t, labelSetToMatch, *labelPairs.Name) require.Equal(t, labelSetToMatch[*labelPairs.Name], *labelPairs.Value) } } require.Len(t, metricWithTs.Metric, 1) tsMetric := metricWithTs.Metric[0] assert.Equal(t, ts.UnixMilli(), *tsMetric.TimestampMs) assert.Equal(t, 1.0, *tsMetric.Gauge.Value) }