Repository: aws-samples/aws-health-aware Branch: main Commit: 928494c70a90 Files: 15 Total size: 188.1 KB Directory structure: gitextract_sau1qmx8/ ├── .gitignore ├── CFN_DEPLOY_AHA.yml ├── CFN_MGMT_ROLE.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ExcludeAccountIDs(sample).csv ├── LICENSE ├── README.md ├── handler.py ├── messagegenerator.py ├── new_aha_event_schema.md └── terraform/ ├── Terraform_DEPLOY_AHA/ │ ├── Terraform_DEPLOY_AHA.tf │ └── terraform.tfvars └── Terraform_MGMT_ROLE/ ├── Terraform_MGMT_ROLE.tf └── terraform.tfvars ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # AHA Lambda Function package lambda_function.zip # ---> Terraform # Local .terraform directories **/.terraform/* # .tfstate files *.tfstate *.tfstate.* # tf lock file .terraform.lock.hcl # Crash log files crash.log # Ignore any .tfvars files that are generated automatically for each Terraform run. Most # .tfvars files are managed as part of configuration and so should be included in # version control. # # example.tfvars # Ignore override files as they are usually used to override resources locally and so # are not checked in override.tf override.tf.json *_override.tf *_override.tf.json # Include override files you do wish to add to version control using negated pattern # # !example_override.tf # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan # example: *tfplan* ================================================ FILE: CFN_DEPLOY_AHA.yml ================================================ AWSTemplateFormatVersion: '2010-09-09' Description: CloudFormation Template for AWS Health Aware (AHA) Metadata: 'AWS::CloudFormation::Interface': ParameterGroups: - Label: default: Customize Alerts/Notifications Parameters: - AWSOrganizationsEnabled - AWSHealthEventType - Label: default: Package Information Parameters: - S3Bucket - S3Key - Label: default: >- Communication Channels - Slack/Microsoft Teams/Amazon Chime And/or EventBridge Parameters: - SlackWebhookURL - MicrosoftTeamsWebhookURL - AmazonChimeWebhookURL - EventBusName - Label: default: Email Setup - For Alerting via Email Parameters: - FromEmail - ToEmail - Subject - Label: default: More Configurations - Optional Parameters: - EventSearchBack - Regions - ManagementAccountRoleArn - SecondaryRegion - AccountIDs ParameterLabels: AWSOrganizationsEnabled: default: AWS Organizations Enabled? ManagementAccountRoleArn: default: ARN of the AWS Organizations Management Account assume role (if using) AWSHealthEventType: default: The types of events to get alerted on S3Bucket: default: Name of S3 Bucket S3Key: default: Name of .zip file in S3 Bucket SlackWebhookURL: default: Slack Webhook URL MicrosoftTeamsWebhookURL: default: Microsoft Teams Webhook URL AmazonChimeWebhookURL: default: Amazon Chime Webhook URL FromEmail: default: Email From ToEmail: default: Email To Subject: default: Subject of Email HealthAPIFrequency: default: Hours back to search for events Regions: default: Which regions to search for events in SecondaryRegion: default: Deploy in secondary region? AccountIDs: default: Exclude any account numbers? Conditions: UsingSlack: !Not [!Equals [!Ref SlackWebhookURL, None]] UsingTeams: !Not [!Equals [!Ref MicrosoftTeamsWebhookURL, None]] UsingChime: !Not [!Equals [!Ref AmazonChimeWebhookURL, None]] UsingEventBridge: !Not [!Equals [!Ref EventBusName, None]] UsingSecrets: !Or [!Condition UsingSlack, !Condition UsingTeams, !Condition UsingChime, !Condition UsingEventBridge, !Condition UsingCrossAccountRole] UsingCrossAccountRole: !Not [!Equals [!Ref ManagementAccountRoleArn, None]] NotUsingMultiRegion: !Equals [!Ref SecondaryRegion, 'No'] UsingMultiRegion: !Not [!Equals [!Ref SecondaryRegion, 'No']] TestCondition: !Equals ['true', 'false'] UsingMultiRegionTeams: !And [!Condition UsingTeams, !Condition UsingMultiRegion] UsingMultiRegionSlack: !And [!Condition UsingSlack, !Condition UsingMultiRegion] UsingMultiRegionEventBridge: !And [!Condition UsingEventBridge, !Condition UsingMultiRegion] UsingMultiRegionChime: !And [!Condition UsingChime, !Condition UsingMultiRegion] UsingMultiRegionCrossAccountRole: !And [!Condition UsingCrossAccountRole, !Condition UsingMultiRegion] UsingAccountIds: !Not [!Equals [!Ref AccountIDs, None]] Parameters: AWSOrganizationsEnabled: Description: >- You can receive both PHD and SHD alerts if you're using AWS Organizations. If you are, make sure to enable Organizational Health View: (https://docs.aws.amazon.com/health/latest/ug/aggregate-events.html) to aggregate all PHD events in your AWS Organization. If not, you can still get SHD alerts. Default: 'No' AllowedValues: - 'Yes' - 'No' Type: String SecondaryRegion: Description: You can deploy this in a secondary region for resiliency. As a result, the DynamoDB table will become a Global DynamoDB table. Regions that support Global DynamoDB tables are listed Default: 'No' AllowedValues: - 'No' - us-east-1 - us-east-2 - us-west-1 - us-west-2 - ap-south-1 - ap-northeast-2 - ap-southeast-1 - ap-southeast-2 - ap-northeast-1 - ca-central-1 - eu-central-1 - eu-west-1 - eu-west-2 - eu-west-3 - sa-east-1 Type: String ManagementAccountRoleArn: Description: Arn of the IAM role in the top-level management account for collecting PHD Events. 'None' if deploying into the top-level management account. Type: String Default: None AWSHealthEventType: Description: >- Select the event type that you want AHA to report on. Refer to https://docs.aws.amazon.com/health/latest/APIReference/API_EventType.html for more information on EventType. Default: 'issue | accountNotification | scheduledChange' AllowedValues: - 'issue | accountNotification | scheduledChange' - 'issue' Type: String S3Bucket: Description: >- Name of your S3 Bucket where the AHA Package .zip resides. Just the name of the bucket (e.g. my-s3-bucket) Type: String S3Key: Description: >- Name of the .zip in your S3 Bucket. Just the name of the file (e.g. aha-v1.0.zip) Type: String EventBusName: Description: >- This is to ingest alerts into AWS EventBridge. Enter the event bus name if you wish to send the alerts to the AWS EventBridge. Note: By ingesting these alerts to AWS EventBridge, you can integrate with 35 SaaS vendors such as DataDog/NewRelic/PagerDuty. If you don't prefer to use EventBridge, leave the default (None). Type: String Default: None SlackWebhookURL: Description: >- Enter the Slack Webhook URL. If you don't prefer to use Slack, leave the default (None). Type: String Default: None MicrosoftTeamsWebhookURL: Description: >- Enter Microsoft Teams Webhook URL. If you don't prefer to use MS Teams, leave the default (None). Type: String Default: None AmazonChimeWebhookURL: Description: >- Enter the Chime Webhook URL, If you don't prefer to use Amazon Chime, leave the default (None). Type: String Default: None Regions: Description: >- By default, AHA reports events affecting all AWS regions. If you want to report on certain regions you can enter up to 10 in a comma separated format. Available Regions: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-3, ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2, eu-south-1,eu-south-3,eu-north-1,me-south-1,sa-east-1,global Default: all regions AllowedPattern: ".+" ConstraintDescription: No regions were entered, please read the documentation about selecting all regions or filtering on some. Type: String AccountIDs: Description: >- If you would like to EXCLUDE any accounts from alerting, upload a .csv file of comma-seperated account numbers to the same S3 bucket where the AHA.zip package is located. Sample AccountIDs file name: aha_account_ids.csv. If not, leave the default of None. Default: None Type: String AllowedPattern: (None)|(.+(\.csv))$ EventSearchBack: Description: How far back to search for events in hours. Default is 1 hour Default: '1' Type: Number FromEmail: Description: Enter FROM Email Address Type: String Default: none@domain.com AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ ConstraintDescription: 'FromEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' ToEmail: Description: >- Enter email addresses separated by commas (for ex: abc@amazon.com, bcd@amazon.com) Type: String Default: none@domain.com AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ ConstraintDescription: 'ToEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' Subject: Description: Enter the subject of the email address Type: String Default: AWS Health Alert Resources: GlobalDDBTable: Type: AWS::DynamoDB::GlobalTable Condition: UsingMultiRegion Properties: AttributeDefinitions: - AttributeName: arn AttributeType: S KeySchema: - AttributeName: arn KeyType: HASH Replicas: - Region: !Ref SecondaryRegion ReadProvisionedThroughputSettings: ReadCapacityUnits: 5 - Region: !Ref "AWS::Region" ReadProvisionedThroughputSettings: ReadCapacityUnits: 5 StreamSpecification: StreamViewType: "NEW_AND_OLD_IMAGES" TimeToLiveSpecification: AttributeName: ttl Enabled: true WriteProvisionedThroughputSettings: WriteCapacityAutoScalingSettings: MaxCapacity: 10 MinCapacity: 10 TargetTrackingScalingPolicyConfiguration: DisableScaleIn: false ScaleInCooldown: 30 ScaleOutCooldown: 30 TargetValue: 10 DynamoDBTable: Type: 'AWS::DynamoDB::Table' Condition: NotUsingMultiRegion Properties: AttributeDefinitions: - AttributeName: arn AttributeType: S KeySchema: - AttributeName: arn KeyType: HASH ProvisionedThroughput: ReadCapacityUnits: 5 WriteCapacityUnits: 5 TimeToLiveSpecification: AttributeName: ttl Enabled: TRUE AHASecondaryRegionStackSet: Condition: UsingMultiRegion DependsOn: GlobalDDBTable Type: AWS::CloudFormation::StackSet Properties: Description: Secondary Region CloudFormation Template for AWS Health Aware (AHA) PermissionModel: SELF_MANAGED Capabilities: [CAPABILITY_IAM] StackInstancesGroup: - Regions: - !Ref 'SecondaryRegion' DeploymentTargets: Accounts: - !Ref 'AWS::AccountId' StackSetName: 'aha-multi-region' TemplateBody: !Sub | Resources: AHA2ndRegionBucket: Type: AWS::S3::Bucket CopyAHA: Type: Custom::CopyAHA Properties: DestBucket: !Ref 'AHA2ndRegionBucket' ServiceToken: !GetAtt 'CopyAHAFunction.Arn' SourceBucket: ${S3Bucket} Object: - ${S3Key} CopyAHARole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: aha-lambda-copier PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:GetObject Resource: - 'arn:aws:s3:::${S3Bucket}*' - Effect: Allow Action: - s3:PutObject - s3:DeleteObject Resource: - !Join ['', [ 'arn:aws:s3:::', !Ref AHA2ndRegionBucket, '*']] CopyAHAFunction: Type: AWS::Lambda::Function DependsOn: AHA2ndRegionBucket Properties: Description: Copies AHA .zip from a source S3 bucket to a destination Handler: index.handler Runtime: python3.11 Role: !GetAtt 'CopyAHARole.Arn' Timeout: 240 Code: ZipFile: | import json import logging import threading import boto3 import cfnresponse def copy_object(source_bucket, dest_bucket, object): s3 = boto3.client('s3') for o in object: key = o copy_source = { 'Bucket': source_bucket, 'Key': key } print('copy_source: %s' % copy_source) print('dest_bucket = %s'%dest_bucket) print('key = %s' %key) s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, Key=key) def delete_object(bucket, object): s3 = boto3.client('s3') objects = {'Objects': [{'Key': o} for o in object]} s3.delete_objects(Bucket=bucket, Delete=objects) def timeout(event, context): logging.error('Execution is about to time out, sending failure response to CloudFormation') cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) def handler(event, context): # make sure we send a failure to CloudFormation if the function # is going to timeout timer = threading.Timer((context.get_remaining_time_in_millis() / 1000.00) - 0.5, timeout, args=[event, context]) timer.start() print('Received event: %s' % json.dumps(event)) status = cfnresponse.SUCCESS try: source_bucket = event['ResourceProperties']['SourceBucket'] dest_bucket = event['ResourceProperties']['DestBucket'] object = event['ResourceProperties']['Object'] if event['RequestType'] == 'Delete': delete_object(dest_bucket, object) else: copy_object(source_bucket, dest_bucket, object) except Exception as e: logging.error('Exception: %s' % e, exc_info=True) status = cfnresponse.FAILED finally: timer.cancel() cfnresponse.send(event, context, status, {}, None) LambdaSchedule: Type: AWS::Events::Rule Properties: Description: Lambda trigger Event ScheduleExpression: rate(1 minute) State: ENABLED Targets: - Arn: !GetAtt 'LambdaFunction.Arn' Id: LambdaSchedule LambdaSchedulePermission: Type: AWS::Lambda::Permission Properties: Action: lambda:InvokeFunction FunctionName: !GetAtt 'LambdaFunction.Arn' Principal: events.amazonaws.com SourceArn: !GetAtt 'LambdaSchedule.Arn' LambdaFunction: Type: AWS::Lambda::Function DependsOn: CopyAHA Properties: Description: Lambda function that runs AHA Code: S3Bucket: Ref: AHA2ndRegionBucket S3Key: "${S3Key}" Handler: handler.main MemorySize: 128 Timeout: 600 Role: ${LambdaExecutionRole.Arn} Runtime: python3.11 Environment: Variables: REGIONS: ${Regions} FROM_EMAIL: "${FromEmail}" TO_EMAIL: "${ToEmail}" EMAIL_SUBJECT: "${Subject}" DYNAMODB_TABLE: "${GlobalDDBTable}" EVENT_SEARCH_BACK: ${EventSearchBack} ORG_STATUS: ${AWSOrganizationsEnabled} HEALTH_EVENT_TYPE: "${AWSHealthEventType}" MANAGEMENT_ROLE_ARN: "${ManagementAccountRoleArn}" LambdaExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: AHA-LambdaPolicy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Resource: - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*' - !If [UsingMultiRegion, !Sub 'arn:aws:logs:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] - !If - UsingSecrets - Effect: Allow Action: - 'secretsmanager:GetResourcePolicy' - 'secretsmanager:DescribeSecret' - 'secretsmanager:ListSecretVersionIds' - 'secretsmanager:GetSecretValue' Resource: - !If [UsingTeams, !Sub '${MicrosoftChannelSecret}', !Ref AWS::NoValue] - !If [UsingSlack, !Sub '${SlackChannelSecret}', !Ref AWS::NoValue] - !If [UsingEventBridge, !Sub '${EventBusNameSecret}', !Ref AWS::NoValue] - !If [UsingChime, !Sub '${ChimeChannelSecret}', !Ref AWS::NoValue] - !If [UsingCrossAccountRole, !Sub '${AssumeRoleSecret}', !Ref AWS::NoValue] - !If - UsingMultiRegionTeams - !Sub - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${MicrosoftChannelSecret}' ]]} - !Ref AWS::NoValue - !If - UsingMultiRegionSlack - !Sub - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${SlackChannelSecret}' ]]} - !Ref AWS::NoValue - !If - UsingMultiRegionEventBridge - !Sub - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${EventBusNameSecret}' ]]} - !Ref AWS::NoValue - !If - UsingMultiRegionChime - !Sub - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${ChimeChannelSecret}' ]]} - !Ref AWS::NoValue - !If - UsingMultiRegionCrossAccountRole - !Sub - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${AssumeRoleSecret}' ]]} - !Ref AWS::NoValue - !Ref 'AWS::NoValue' - Effect: Allow Action: - health:DescribeAffectedAccountsForOrganization - health:DescribeAffectedEntitiesForOrganization - health:DescribeEventDetailsForOrganization - health:DescribeEventsForOrganization - health:DescribeEventDetails - health:DescribeEvents - health:DescribeEventTypes - health:DescribeAffectedEntities - organizations:ListAccounts - organizations:DescribeAccount Resource: "*" - Effect: Allow Action: - dynamodb:ListTables Resource: - !Sub 'arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:*' - !If [UsingMultiRegion, !Sub 'arn:aws:dynamodb:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] - Effect: Allow Action: - ses:SendEmail Resource: - !Sub 'arn:aws:ses:${AWS::Region}:${AWS::AccountId}:*' - !If [UsingMultiRegion, !Sub 'arn:aws:ses:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] - Effect: Allow Action: - dynamodb:UpdateTimeToLive - dynamodb:PutItem - dynamodb:DeleteItem - dynamodb:GetItem - dynamodb:Scan - dynamodb:Query - dynamodb:UpdateItem - dynamodb:UpdateTable - dynamodb:GetRecords Resource: !If [UsingMultiRegion, !GetAtt GlobalDDBTable.Arn, !GetAtt DynamoDBTable.Arn] - Effect: Allow Action: - events:PutEvents Resource: - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:event-bus/${EventBusName}' - !If [UsingMultiRegion, !Sub 'arn:aws:events:${SecondaryRegion}:${AWS::AccountId}:event-bus/${EventBusName}', !Ref AWS::NoValue] - !If - UsingAccountIds - Effect: Allow Action: - s3:GetObject Resource: !Sub 'arn:aws:s3:::${S3Bucket}/${AccountIDs}' - !Ref 'AWS::NoValue' - !If - UsingCrossAccountRole - Effect: Allow Action: - sts:AssumeRole Resource: !Ref ManagementAccountRoleArn - !Ref 'AWS::NoValue' LambdaSchedule: Type: 'AWS::Events::Rule' Properties: Description: Lambda trigger Event ScheduleExpression: rate(1 minute) State: ENABLED Targets: - Arn: !GetAtt LambdaFunction.Arn Id: LambdaSchedule LambdaSchedulePermission: Type: 'AWS::Lambda::Permission' Properties: Action: 'lambda:InvokeFunction' FunctionName: !GetAtt LambdaFunction.Arn Principal: events.amazonaws.com SourceArn: !GetAtt LambdaSchedule.Arn MicrosoftChannelSecret: Type: 'AWS::SecretsManager::Secret' Condition: UsingTeams Properties: Name: MicrosoftChannelID Description: Microsoft Channel ID Secret ReplicaRegions: !If - UsingMultiRegion - [{ Region: !Sub '${SecondaryRegion}' }] - !Ref "AWS::NoValue" SecretString: Ref: MicrosoftTeamsWebhookURL Tags: - Key: HealthCheckMicrosoft Value: ChannelID SlackChannelSecret: Type: 'AWS::SecretsManager::Secret' Condition: UsingSlack Properties: Name: SlackChannelID Description: Slack Channel ID Secret ReplicaRegions: !If - UsingMultiRegion - [{ Region: !Sub '${SecondaryRegion}' }] - !Ref "AWS::NoValue" SecretString: Ref: SlackWebhookURL Tags: - Key: HealthCheckSlack Value: ChannelID EventBusNameSecret: Type: 'AWS::SecretsManager::Secret' Condition: UsingEventBridge Properties: Name: EventBusName Description: EventBus Name Secret ReplicaRegions: !If - UsingMultiRegion - [{ Region: !Sub '${SecondaryRegion}' }] - !Ref "AWS::NoValue" SecretString: Ref: EventBusName Tags: - Key: EventBusName Value: ChannelID ChimeChannelSecret: Type: 'AWS::SecretsManager::Secret' Condition: UsingChime Properties: Name: ChimeChannelID Description: Chime Channel ID Secret ReplicaRegions: !If - UsingMultiRegion - [{ Region: !Sub '${SecondaryRegion}' }] - !Ref "AWS::NoValue" SecretString: Ref: AmazonChimeWebhookURL Tags: - Key: HealthCheckChime Value: ChannelID AssumeRoleSecret: Type: 'AWS::SecretsManager::Secret' Condition: UsingCrossAccountRole Properties: Name: AssumeRoleArn Description: Management account role for AHA to assume ReplicaRegions: !If - UsingMultiRegion - [{ Region: !Sub '${SecondaryRegion}' }] - !Ref "AWS::NoValue" SecretString: Ref: ManagementAccountRoleArn Tags: - Key: AssumeRoleArn Value: ChannelID LambdaFunction: Type: 'AWS::Lambda::Function' Properties: Description: Lambda function that runs AHA Code: S3Bucket: Ref: S3Bucket S3Key: Ref: S3Key Handler: handler.main MemorySize: 128 Timeout: 600 Role: 'Fn::Sub': '${LambdaExecutionRole.Arn}' Runtime: python3.11 Environment: Variables: Slack: !If [UsingSlack, "True", !Ref 'AWS::NoValue'] Teams: !If [UsingTeams, "True", !Ref 'AWS::NoValue'] Chime: !If [UsingChime, "True", !Ref 'AWS::NoValue'] Eventbridge: !If [UsingEventBridge, "True", !Ref 'AWS::NoValue'] ACCOUNT_IDS: Ref: AccountIDs REGIONS: Ref: Regions S3_BUCKET: Ref: S3Bucket FROM_EMAIL: Ref: FromEmail TO_EMAIL: Ref: ToEmail EMAIL_SUBJECT: Ref: Subject DYNAMODB_TABLE: !If [UsingMultiRegion, !Ref GlobalDDBTable, !Ref DynamoDBTable] EVENT_SEARCH_BACK: Ref: EventSearchBack ORG_STATUS: Ref: AWSOrganizationsEnabled HEALTH_EVENT_TYPE: Ref: AWSHealthEventType MANAGEMENT_ROLE_ARN: Ref: ManagementAccountRoleArn ================================================ FILE: CFN_MGMT_ROLE.yml ================================================ AWSTemplateFormatVersion: "2010-09-09" Description: Deploy Cross-Account Role for PHD access Parameters: OrgMemberAccountId: Type: String AllowedPattern: '^\d{12}$' Description: AWS Account ID of the AWS Organizations Member Account that will run AWS Health Aware Resources: AWSHealthAwareRoleForPHDEvents: Type: "AWS::IAM::Role" Properties: Description: "Grants access to PHD events" Path: / AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Action: - sts:AssumeRole Effect: Allow Principal: AWS: !Sub 'arn:aws:iam::${OrgMemberAccountId}:root' Policies: - PolicyName: AllowHealthCalls PolicyDocument: Statement: - Effect: Allow Action: - health:DescribeAffectedAccountsForOrganization - health:DescribeAffectedEntitiesForOrganization - health:DescribeEventDetailsForOrganization - health:DescribeEventsForOrganization - health:DescribeEventDetails - health:DescribeEvents - health:DescribeEventTypes - health:DescribeAffectedEntities Resource: "*" - PolicyName: AllowsDescribeOrg PolicyDocument: Statement: - Effect: Allow Action: - organizations:ListAccounts - organizations:ListAWSServiceAccessForOrganization - organizations:DescribeAccount Resource: "*" Outputs: AWSHealthAwareRoleForPHDEventsArn: Value: !GetAtt AWSHealthAwareRoleForPHDEvents.Arn ================================================ FILE: CODE_OF_CONDUCT.md ================================================ ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing Guidelines Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. Please read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution. ## Reporting Bugs/Feature Requests We welcome you to use the GitHub issue tracker to report bugs or suggest features. When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: * A reproducible test case or series of steps * The version of our code being used * Any modifications you've made relevant to the bug * Anything unusual about your environment or deployment ## Contributing via Pull Requests Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 1. You are working against the latest source on the *main* branch. 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. To send us a pull request, please: 1. Fork the repository. 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 3. Ensure local tests pass. 4. Commit to your fork using clear commit messages. 5. Send us a pull request, answering any default questions in the pull request interface. 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ## Security issue notifications If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. ## Licensing See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. ================================================ FILE: ExcludeAccountIDs(sample).csv ================================================ 000000000000 111111111111 ================================================ FILE: LICENSE ================================================ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ ![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha_banner.png?raw=1) **Table of Contents** - [Introduction](#introduction) - [What's New](#whats-new) - [Architecture](#architecture) - [Single Region](#single-region) - [Multi Region](#multi-region) - [Created AWS Resources](#created-aws-resources) - [Configuring an Endpoint](#configuring-an-endpoint) - [Creating a Amazon Chime Webhook URL](#creating-a-amazon-chime-webhook-url) - [Creating a Slack Webhook URL](#creating-a-slack-webhook-url) - [Creating a Microsoft Teams Webhook URL](#creating-a-microsoft-teams-webhook-url) - [Configuring an Email](#configuring-an-email) - [Creating a Amazon EventBridge Ingestion ARN](#creating-a-amazon-eventbridge-ingestion-arn) - [Deployment Options](#deployment-options) - [Using AWS Health Delegated Administrator with AHA](#using-aws-health-delegated-administrator-with-aha) - [CloudFormation](#cloudformation) - [AHA Without AWS Organizations using CloudFormation](#aha-without-aws-organizations-using-cloudformation) - [Prerequisites](#prerequisites) - [Deployment](#deployment) - [AHA With AWS Organizations on Management Account using CloudFormation](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-cloudformation) - [Prerequisites](#prerequisites-1) - [Deployment](#deployment-1) - [AHA With AWS Organizations on Member Account using CloudFormation](#aha-with-aws-organizations-on-member-account-using-cloudformation) - [Prerequisites](#prerequisites-2) - [Deployment](#deployment-2) - [Terraform](#terraform) - [AHA Without AWS Organizations using Terraform](#aha-without-aws-organizations-using-terraform) - [Prerequisites](#prerequisites-3) - [Deployment - Terraform](#deployment---terraform) - [AHA WITH AWS Organizations on Management Account using Terraform](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-terraform) - [Deployment - Terraform](#deployment---terraform-1) - [AHA WITH AWS Organizations on Member Account using Terraform](#aha-with-aws-organizations-on-member-account-using-terraform) - [Deployment - Terraform](#deployment---terraform-2) - [Updating using CloudFormation](#updating-using-cloudformation) - [Updating using Terraform](#updating-using-terraform) - [New Features](#new-features) - [Troubleshooting](#troubleshooting) # Introduction AWS Health Aware (AHA) is an automated notification tool for sending well-formatted AWS Health Alerts to Amazon Chime, Slack, Microsoft Teams, E-mail or an AWS Eventbridge compatible endpoint as long as you have Business or Enterprise Support. # What's New Release 2.3 introduces runtime performance improvements, terraform updates, allows use of Slack Workflow 2.0 webhooks (triggers), general fixes and documentation updates. # Architecture ## Single Region ![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha-arch-single-region.png?raw=1) ## Multi Region ![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha-arch-multi-region.png?raw=1) ## Created AWS Resources | Resource | Description | | ------------- | ------------------------------ | | `DynamoDBTable` | DynamoDB Table used to store Event ARNs, updates and TTL | | `ChimeChannelSecret` | Webhook URL for Amazon Chime stored in AWS Secrets Manager | | `EventBusNameSecret` | EventBus ARN for Amazon EventBridge stored in AWS Secrets Manager | | `LambdaExecutionRole` | IAM role used for LambdaFunction | | `LambdaFunction` | Main Lambda function that reads from AWS Health API, sends to endpoints and writes to DynamoDB | | `LambdaSchedule` | Amazon EventBridge rule that runs every min to invoke LambdaFunction | | `LambdaSchedulePermission` | IAM Role used for LambdaSchedule | | `MicrosoftChannelSecret` | Webhook URL for Microsoft Teams stored in AWS Secrets Manager | | `SlackChannelSecret` | Webhook URL for Slack stored in AWS Secrets Manager | # Configuring an Endpoint AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use any of these you'll need to set it up before-hand as some of these are done on 3rd party websites. We'll go over some of the common ones here. ## Creating a Amazon Chime Webhook URL **You will need to have access to create a Amazon Chime room and manage webhooks.** 1. Create a new [chat room](https://docs.aws.amazon.com/chime/latest/ug/chime-chat-room.html) for events (i.e. aws_events). 2. In the chat room created in step 1, **click** on the gear icon and **click** *manage webhooks and bots*. 3. **Click** *Add webhook*. 4. **Type** a name for the bot (e.g. AWS Health Bot) and **click** *Create*. 5. **Click** *Copy URL*, we will need it for the deployment. ## Creating a Slack Webhook URL **You will need to have access to add a new channel and app to your Slack Workspace**. *Webhook* 1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) 2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace. 3. In the search bar, search for: *Incoming Webhooks* and **click** on it. 4. **Click** on *Add to Slack*. 5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*. 6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. 7. For the deployment we will need the *Webhook URL*. *Workflow* 1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) 2. Within Slack **click** on your workspace name drop down arrow in the upper left. **click on Tools > Workflow Builder** 3. **Click** Create in the upper right hand corner of the Workflow Builder and give your workflow a name **click** next. 4. **Click** on *select* next to **Webhook** and then **click** *add variable* add the following variables one at a time in the *Key* section. All *data type* will be *text*: -text -accounts -resources -service -region -status -start_time -event_arn -updates 5. When done you should have 9 variables, double check them as they are case sensitive and will be referenced. When checked **click** on *done* and *next*. 6. **Click** on *add step* and then on the add a workflow step **click** *add* next to *send a message*. 7. Under *send this message to:* select the channel you created in Step 1 in *message text* you can should recreate this following: ![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images//workflow.png?raw=1) 8. **Click** *save* and the **click** *publish* 9. For the deployment we will need the *Webhook URL*. ## Creating a Microsoft Teams Webhook URL **You will need to have access to add a new channel and app to your Microsoft Teams channel**. 1. Create a new [channel](https://docs.microsoft.com/en-us/microsoftteams/get-started-with-teams-create-your-first-teams-and-channels) for events (i.e. aws_events) 2. Within your Microsoft Team go to *Apps* 3. In the search bar, search for: *Incoming Webhook* and **click** on it. 4. **Click** on *Add to team*. 5. **Type** in the name of your on the channel your created in step 1 and **click** *Set up a connector*. 6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. **Click** *Create* when done. 7. For the deployment we will need the webhook *URL* that is presented. ## Configuring an Email 1. You'll be able to send email alerts to one or many addresses. However, you must first [verify](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/verify-email-addresses-procedure.html) the email(s) in the Simple Email Service (SES) console. 2. AHA utilizes Amazon SES so all you need is to enter in a To: address and a From: address. 3. You *may* have to allow a rule in your environment so that the emails don't get labeled as SPAM. This will be something you have to congfigure on your own. ## Creating a Amazon EventBridge Ingestion ARN **Only required if you are going to be using EventBridge, you can create new with the instructions below or use an existing one**. 1. In the AWS Console, search for **Amazon EventBridge**. 2. On the left hand side, **click** *Event buses*. 3. Under *Custom event* bus **click** *Create event bus* 4. Give your Event bus a name and **click** *Create*. 5. For the deployment we will need the *Name* of the Event bus **(not the ARN, e.g. aha-eb01)**. # Deployment Options ## Using AWS Health Delegated Administrator with AHA >NOTE: For users with company restrictions of use/deployment of resources in the organization management account. > >On 2023-07-27, AWS Health released the [Delegated Administrator feature](https://docs.aws.amazon.com/health/latest/ug/delegated-administrator-organizational-view.html). By enabling an account as a delegated administrator, you can use AHA in Organization Mode without the need to create and assume the management account IAM role. To enable this feature: 1. Know the AWS Account ID of your AWS account you want to enable as a delegated administrator for AWS Health (e.g. 123456789012) 1. In the Org Management Account, run the command `aws organizations register-delegated-administrator --account-id ACCOUNT_ID --service-principal health.amazonaws.com` replacing ACCOUNT_ID with the ID of your Member Account 1. Deploy AHA in your deletegated administrator account using the steps for: 1. [AHA for users who ARE using AWS Organizations (CloudFormation)](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-cloudformation) 1. [AHA for users who ARE using AWS Organizations (Terraform)](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-terraform) ## CloudFormation There are 3 available ways to deploy AHA, all are done via the same CloudFormation template to make deployment as easy as possible. The 3 deployment methods for AHA are: 1. [**AHA for users WITHOUT AWS Organizations**](#aha-without-aws-organizations-using-cloudformation): Users NOT using AWS Organizations. 2. [**AHA for users WITH AWS Organizations (Management Account)**](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-cloudformation): Users who ARE using AWS Organizations and deploying in the top-level management account. 3. [**AHA for users WITH AWS Organizations (Member Account)**](#aha-with-aws-organizations-on-member-account-using-cloudformation): Users who ARE using AWS Organizations and deploying in a member account in the organization to assume a role in the top-level management account. ## AHA Without AWS Organizations using CloudFormation ### Prerequisites 1. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) 2. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. 3. If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region - Launch the stack. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) - Launch the stack. ### Deployment 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` 2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** 3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. 4. In your AWS console go to *CloudFormation*. 5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. 6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. - In *Stack name* type a stack name (i.e. AHA-Deployment). - In *AWSOrganizationsEnabled* leave it set to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the steps for [AHA for users who ARE using AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-cloudformation) or [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-cloudformation) - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 3 (e.g. my-aha-bucket). - In *S3Key* type ***just*** the name of the .zip file you created in Step 2 (e.g. aha-v1.8.zip). - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). - In *ARN of the AWS Organizations Management Account assume role* leave it set to default None as this is only for customers using AWS Organizations. - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. 7. Scroll to the bottom and **click** *Next*. 8. Scroll to the bottom and **click** *Next* again. 9. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. 10. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). ## AHA With AWS Organizations on Management or Delegated Administrator Account using CloudFormation ### Prerequisites 1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view.html) from the console or CLI, so that you can aggregate Health events for all accounts in your AWS Organization. 2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) 3. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager in the **AWS Organizations Master Account**. 4. If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region - Launch the stack. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) - Launch the stack. ### Deployment 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` 2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** 3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. 4. In your AWS console go to *CloudFormation*. 5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. 6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. - In *Stack name* type a stack name (i.e. AHA-Deployment). - In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-cloudformation) - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 3 (e.g. my-aha-bucket). - In *S3Key* type ***just*** the name of the .zip file you created in Step 2 (e.g. aha-v1.8.zip). - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). - In *ARN of the AWS Organizations Management Account assume role* leave it set to default None. - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. 7. Scroll to the bottom and **click** *Next*. 8. Scroll to the bottom and **click** *Next* again. 9. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. 10. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). ## AHA With AWS Organizations on Member Account using CloudFormation > Note: On 2023-07-27, AWS Health released the Delegated Admin feature which enables AHA deployments in member accounts without the extra steps below. See: [Using AWS Health Delegated Administrator with AHA](#using-aws-health-delegated-administrator-with-aha) ### Prerequisites 1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view.html) from the console or CLI, so that you can aggregate Health events for all accounts in your AWS Organization. 2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) 3. Have access to deploy Cloudformation Templates with the following resource: AWS IAM policies in the **AWS Organizations Master Account**. 4. If using Multi-Region, you must deploy the following 2 CloudFormation templates in the **Member Account** to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region - Launch the stack. - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) - Launch the stack. ### Deployment 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` 2. In your top-level management account AWS console go to *CloudFormation* 3. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. 4. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_MGMT_ROLE.yml` **Click** *Next*. - In *Stack name* type a stack name (i.e. aha-assume-role). - In *OrgMemberAccountId* put in the account id of the member account you plan to run AHA in (e.g. 000123456789). 5. Scroll to the bottom and **click** *Next*. 6. Scroll to the bottom and **click** *Next* again. 7. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. 8. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 1-2 minutes). This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume. 9. In the *Outputs* tab, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need it for step 14. 10. Back In the root of the package you downloaded/cloned you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** 11. Upload the .zip you created in Step 11 to an S3 in the same region you plan to deploy this in. 12. Login to the member account you plan to deploy this in and in your AWS console go to *CloudFormation*. 13. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. 14. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. - In *Stack name* type a stack name (i.e. AHA-Deployment). - In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-cloudformation) - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 12 (e.g. my-aha-bucket). - In *S3Key* type ***just*** the name of the .zip file you created in Step 11 (e.g. aha-v1.8.zip). - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). - In *ManagementAccountRoleArn* enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201) - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. 15. Scroll to the bottom and **click** *Next*. 16. Scroll to the bottom and **click** *Next* again. 17. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. 18. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). ## Terraform There are 3 available ways to deploy AHA, all are done via the same Terraform template to make deployment as easy as possible. **NOTE: ** AHA code is tested with Terraform version v1.0.9, please make sure to have minimum terraform verson of v1.0.9 installed. The 3 deployment methods for AHA are: 1. [**AHA for users NOT using AWS Organizations using Terraform**](#aha-without-aws-organizations-using-terraform): Users NOT using AWS Organizations. 2. [**AHA for users WITH AWS Organizations using Terraform (Management Account)**](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-terraform): Users who ARE using AWS Organizations and deploying in the top-level management account. 3. [**AHA for users WITH AWS Organizations using Terraform (Member Account)**](#aha-with-aws-organizations-on-member-account-using-terraform): Users who ARE using AWS Organizations and deploying in a member account in the organization to assume a role in the top-level management account. ## AHA Without AWS Organizations using Terraform ### Prerequisites 1. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) 2. Have access to deploy Terraform Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. **NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. ### Deployment - Terraform 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` ``` $ git clone https://github.com/aws-samples/aws-health-aware.git $ cd aws-health-aware/terraform/Terraform_DEPLOY_AHA ``` 2. Update parameters file **terraform.tfvars** as below - *aha_primary_region* - change to region where you want to deploy AHA solution - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. - *AWSOrganizationsEnabled* - Leave it to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the steps for [AHA for users who ARE using AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-or-delegated-administrator-account-using-terraform)] or [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-terraform) - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. - *Regions* - enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). - *ManagementAccountRoleArn* - Leave it default empty value - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. - *ManagementAccountRoleArn* - In ARN of the AWS Organizations Management Account assume role leave it set to default None as this is only for customers using AWS Organizations. 3. Deploy the solution using terraform commands below. ``` $ terraform init $ terraform plan $ terraform apply ``` ## AHA with AWS Organizations on Management or Delegated Administrator Account using Terraform 1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view.html) from the console or CLI, so that you can aggregate Health events for all accounts in your AWS Organization. 2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) **NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. ### Deployment - Terraform 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` ``` $ git clone https://github.com/aws-samples/aws-health-aware.git $ cd aws-health-aware/terraform/Terraform_DEPLOY_AHA ``` 5. Update parameters file **terraform.tfvars** as below - *aha_primary_region* - change to region where you want to deploy AHA solution - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. - *AWSOrganizationsEnabled* - change the value to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-terraform) - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. - *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). - *ManagementAccountRoleArn* - Leave it default empty value - *S3Bucket* - type ***just*** the name of the S3 bucket where exclude file .csv you upload. leave it empty if exclude Account feature is not used. - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. - *ManagementAccountRoleArn* - In ARN of the AWS Organizations Management Account assume role leave it set to default None, unless you are using a member account instead of the management account. Instructions for this configuration are in the next section. 3. Deploy the solution using terraform commands below. ``` $ terraform init $ terraform plan $ terraform apply ``` ## AHA WITH AWS Organizations on Member Account using Terraform > Note: On 2023-07-27, AWS Health released the Delegated Admin feature which enables AHA deployments in member accounts without the extra steps below. See: [Using AWS Health Delegated Administrator with AHA](#using-aws-health-delegated-administrator-with-aha) 1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view.html) from the console or CLI, so that you can aggregate Health events for all accounts in your AWS Organization. 2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) **NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. ### Deployment - Terraform 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` ``` $ git clone https://github.com/aws-samples/aws-health-aware.git ``` 2. In your top-level management account deploy terraform module Terraform_MGMT_ROLE.tf to create Cross-Account Role for PHD access ``` $ cd aws-health-aware/terraform/Terraform_MGMT_ROLE $ terraform init $ terraform plan $ terraform apply Input *OrgMemberAccountId* Enter the account id of the member account you plan to run AHA in (e.g. 000123456789). ``` 3. Wait for deployment to complete. This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume. and note the **AWSHealthAwareRoleForPHDEventsArn** role name, this will be used during deploying solution in member account 4. In the *Outputs* section, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need to update params file (variable ManagementAccountRoleArn). 4. Change directory to **terraform/Terraform_DEPLOY_AHA** to deploy the solution 5. Update parameters file **terraform.tfvars** as below - *aha_primary_region* - change to region where you want to deploy AHA solution - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. - *AWSOrganizationsEnabled* - change the value to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-terraform) - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. - *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). - *ManagementAccountRoleArn* - Enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201) - *S3Bucket* - type ***just*** the name of the S3 bucket where exclude file .csv you upload. leave it empty if exclude Account feature is not used. - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. 4. Deploy the solution using terraform commands below. ``` $ terraform init $ terraform plan $ terraform apply ``` # Updating using CloudFormation **Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:** 1. Download the updated CloudFormation Template .yml file and 2 `.py` files. 2. Zip up the 2 `.py` files and name the .zip with a different version number than before (e.g. if the .zip you originally uploaded is aha-v1.8.zip the new one should be aha-v1.9.zip) 3. In the AWS CloudFormation console **click** on the name of your stack, then **click** *Update*. 4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `CFN_DEPLOY_AHA.yml` file you downloaded and finally **click** *Next*. 5. In the *S3Key* text box change the version number in the name of the .zip to match name of the .zip you uploaded in Step 2 (The name of the .zip has to be different for CloudFormation to recognize a change). **Click** *Next*. 6. At the next screen **click** *Next* and finally **click** *Update stack*. This will now upgrade your environment to the latest version you downloaded. **If for some reason, you still have issues after updating, you can easily just delete the stack and redeploy. The infrastructure can be destroyed and rebuilt within minutes through CloudFormation.** # Updating using Terraform **Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:** 1. Pull the latest code from git repository for AHA. 2. Update the parameters file terraform.tfvars per your requirement 3. Copy the terraform template files to directory where your previous state exists 4. Deploy the templates as below ``` $ cd aws-health-aware $ git pull https://github.com/aws-samples/aws-health-aware.git $ cd terraform/Terraform_DEPLOY_AHA $ terraform init $ terraform plan - This command should show any difference existing config and latest code. $ terraform apply ``` **If for some reason, you still have issues after updating, you can easily just delete the stack and redeploy. The infrastructure can be destroyed and rebuilt within minutes through Terraform.** # New Features *Release 2.2* We are happy to announce the launch of new enhancements to AHA. Please try them out and keep sending us your feedback! 1. A revised schema for AHA events sent to EventBridge which enables new filtering and routing options. See the [new AHA event schema readme](new_aha_event_schema.md) for more detail. 2. Multi-region deployment option 3. Updated file names for improved clarity 4. Ability to filter accounts (Refer to AccountIDs CFN parameter for more info on how to exclude accounts from AHA notifications) 4. Ability to view Account Names for a given Account ID in the PHD alerts 5. If you are running AHA with the Non-Org mode, AHA will send the Account #' and resource(s) impacts if applicable for a given alert 6. Ability to deploy AHA with the Org mode on a member account 7. Support for a new Health Event Type - "Investigation" 8. Terraform support to deploy the solution # Troubleshooting * If for whatever reason you need to update the Webhook URL; just update the CloudFormation or terraform Template with the new Webhook URL. * If you are expecting an event and it did not show up it may be an oddly formed event. Take a look at *CloudWatch > Log groups* and search for the name of your Lambda function. See what the error is and reach out to us [email](mailto:aha-builders@amazon.com) for help. * If for any errors related to duplicate secrets during deployment, try deleting manually and redeploy the solution. Example command to delete SlackChannelID secret in us-east-1 region. ``` $ aws secretsmanager delete-secret --secret-id SlackChannelID --force-delete-without-recovery --region us-east-1 ``` * If you want to Exclude certain accounts from notifications, confirm your exlcusions file matches the format of the [sample ExcludeAccountIDs.csv file](ExcludeAccountIDs(sample).csv) with one account ID per line with no trailing commas (trailing commas indicate a null cell). * If your accounts listed in the CSV file are not excluded, check the CloudWatch log group for the AHA Lambda function for the message "Key filename is not a .csv file" as an indicator of any issues with your file. ================================================ FILE: handler.py ================================================ import json import logging from functools import lru_cache import boto3 import os import socket from dateutil import parser from datetime import datetime, timedelta from urllib.request import Request, urlopen, URLError, HTTPError from botocore.config import Config from botocore.exceptions import ClientError from messagegenerator import ( get_message_for_slack, get_org_message_for_slack, get_message_for_chime, get_org_message_for_chime, get_message_for_teams, get_org_message_for_teams, get_message_for_email, get_org_message_for_email, get_detail_for_eventbridge, ) logger = logging.getLogger() logger.setLevel(os.environ.get("LOG_LEVEL", "INFO").upper()) class CachedSecrets: def __init__(self, client): self.client = client @lru_cache def get_secret_value(self, *args, **kwargs): logger.debug(f"Getting secret {kwargs}") return self.client.get_secret_value(*args, **kwargs) class AWSApi: @lru_cache def client(self, *args, **kwargs): logger.debug(f"Returning new boto3 client for: {args}") return boto3.client(*args, **kwargs) @lru_cache def resource(self, resource_name): logger.debug(f"Returning new boto3 resource for: {resource_name}") return boto3.resource(resource_name) def cache_clear(self): self.client.cache_clear() self.resource.cache_clear() @lru_cache def secretsmanager(self, **kwargs): client = boto3.client("secretsmanager", **kwargs) return CachedSecrets(client) print("boto3 version: ", boto3.__version__) # query active health API endpoint health_dns = socket.gethostbyname_ex("global.health.amazonaws.com") (current_endpoint, global_endpoint, ip_endpoint) = health_dns health_active_list = current_endpoint.split(".") health_active_region = health_active_list[1] print("current health region: ", health_active_region) # create a boto3 health client w/ backoff/retry config = Config( region_name=health_active_region, retries=dict( max_attempts=10 # org view apis have a lower tps than the single # account apis so we need to use larger # backoff/retry values than than the boto defaults ), ) aws_api = AWSApi() # TODO decide if account_name should be blank on error # Get Account Name def get_account_name(account_id): org_client = get_sts_token("organizations") try: account_name = org_client.describe_account(AccountId=account_id)["Account"][ "Name" ] except Exception: account_name = account_id return account_name def send_alert(event_details, affected_accounts, affected_entities, event_type): slack_url = get_secrets()["slack"] teams_url = get_secrets()["teams"] chime_url = get_secrets()["chime"] SENDER = os.environ["FROM_EMAIL"] RECIPIENT = os.environ["TO_EMAIL"] event_bus_name = get_secrets()["eventbusname"] # get the list of resources from the array of affected entities resources = get_resources_from_entities(affected_entities) if "None" not in event_bus_name: try: print("Sending the alert to Event Bridge") send_to_eventbridge( get_detail_for_eventbridge(event_details, affected_entities), event_type, resources, event_bus_name, ) except HTTPError as e: print( "Got an error while sending message to EventBridge: ", e.code, e.reason ) except URLError as e: print("Server connection failed: ", e.reason) pass #Slack Notification Handling if slack_url != "None": for slack_webhook_type in ["services", "triggers", "workflows"]: if ("hooks.slack.com/" + slack_webhook_type) in slack_url: print("Sending the alert to Slack Webhook Channel") try: send_to_slack( get_message_for_slack( event_details, event_type, affected_accounts, resources, slack_webhook_type, ), slack_url, ) break except HTTPError as e: print("Got an error while sending message to Slack: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass else: print("Unsupported format in Slack Webhook") if "office.com/webhook" in teams_url: try: print("Sending the alert to Teams") send_to_teams( get_message_for_teams( event_details, event_type, affected_accounts, resources ), teams_url, ) except HTTPError as e: print("Got an error while sending message to Teams: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass # validate sender and recipient's email addresses if "none@domain.com" not in SENDER and RECIPIENT: try: print("Sending the alert to the emails") send_email(event_details, event_type, affected_accounts, resources) except HTTPError as e: print("Got an error while sending message to Email: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass if "hooks.chime.aws/incomingwebhooks" in chime_url: try: print("Sending the alert to Chime channel") send_to_chime( get_message_for_chime( event_details, event_type, affected_accounts, resources ), chime_url, ) except HTTPError as e: print("Got an error while sending message to Chime: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass def send_org_alert( event_details, affected_org_accounts, affected_org_entities, event_type ): slack_url = get_secrets()["slack"] teams_url = get_secrets()["teams"] chime_url = get_secrets()["chime"] SENDER = os.environ["FROM_EMAIL"] RECIPIENT = os.environ["TO_EMAIL"] event_bus_name = get_secrets()["eventbusname"] # get the list of resources from the array of affected entities resources = get_resources_from_entities(affected_org_entities) if "None" not in event_bus_name: try: print("Sending the org alert to Event Bridge") send_to_eventbridge( get_detail_for_eventbridge(event_details, affected_org_entities), event_type, resources, event_bus_name, ) except HTTPError as e: print( "Got an error while sending message to EventBridge: ", e.code, e.reason ) except URLError as e: print("Server connection failed: ", e.reason) pass #Slack Notification Handling if slack_url != "None": for slack_webhook_type in ["services", "triggers", "workflows"]: if ("hooks.slack.com/" + slack_webhook_type) in slack_url: print("Sending the alert to Slack Webhook Channel") try: send_to_slack( get_message_for_slack( event_details, event_type, affected_org_accounts, resources, slack_webhook_type, ), slack_url, ) break except HTTPError as e: print("Got an error while sending message to Slack: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass else: print("Unsupported format in Slack Webhook") if "office.com/webhook" in teams_url: try: print("Sending the alert to Teams") send_to_teams( get_org_message_for_teams( event_details, event_type, affected_org_accounts, resources ), teams_url, ) except HTTPError as e: print("Got an error while sending message to Teams: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass # validate sender and recipient's email addresses if "none@domain.com" not in SENDER and RECIPIENT: try: print("Sending the alert to the emails") send_org_email(event_details, event_type, affected_org_accounts, resources) except HTTPError as e: print("Got an error while sending message to Email: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass if "hooks.chime.aws/incomingwebhooks" in chime_url: try: print("Sending the alert to Chime channel") send_to_chime( get_org_message_for_chime( event_details, event_type, affected_org_accounts, resources ), chime_url, ) except HTTPError as e: print("Got an error while sending message to Chime: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass def send_to_slack(message, webhookurl): slack_message = message req = Request( webhookurl, data=json.dumps(slack_message).encode("utf-8"), headers={"content-type": "application/json"}, ) try: response = urlopen(req) response.read() except HTTPError as e: print("Request failed : ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason, e.reason) def send_to_chime(message, webhookurl): chime_message = {"Content": message} req = Request( webhookurl, data=json.dumps(chime_message).encode("utf-8"), headers={"content-Type": "application/json"}, ) try: response = urlopen(req) response.read() except HTTPError as e: print("Request failed : ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason, e.reason) def send_to_teams(message, webhookurl): teams_message = message req = Request( webhookurl, data=json.dumps(teams_message).encode("utf-8"), headers={"content-type": "application/json"}, ) try: response = urlopen(req) response.read() except HTTPError as e: print("Request failed : ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason, e.reason) def send_email(event_details, eventType, affected_accounts, affected_entities): SENDER = os.environ["FROM_EMAIL"] RECIPIENT = os.environ["TO_EMAIL"].split(",") # AWS_REGIONS = "us-east-1" AWS_REGION = os.environ["AWS_REGION"] SUBJECT = os.environ["EMAIL_SUBJECT"] BODY_HTML = get_message_for_email( event_details, eventType, affected_accounts, affected_entities ) client = aws_api.client("ses", AWS_REGION) response = client.send_email( Source=SENDER, Destination={"ToAddresses": RECIPIENT}, Message={ "Body": { "Html": {"Data": BODY_HTML}, }, "Subject": { "Charset": "UTF-8", "Data": SUBJECT, }, }, ) def send_org_email( event_details, eventType, affected_org_accounts, affected_org_entities ): SENDER = os.environ["FROM_EMAIL"] RECIPIENT = os.environ["TO_EMAIL"].split(",") # AWS_REGION = "us-east-1" AWS_REGION = os.environ["AWS_REGION"] SUBJECT = os.environ["EMAIL_SUBJECT"] BODY_HTML = get_org_message_for_email( event_details, eventType, affected_org_accounts, affected_org_entities ) client = aws_api.client("ses", AWS_REGION) response = client.send_email( Source=SENDER, Destination={"ToAddresses": RECIPIENT}, Message={ "Body": { "Html": {"Data": BODY_HTML}, }, "Subject": { "Charset": "UTF-8", "Data": SUBJECT, }, }, ) # non-organization view affected accounts def get_health_accounts(health_client, event, event_arn): affected_accounts = [] event_accounts_paginator = health_client.get_paginator("describe_affected_entities") event_accounts_page_iterator = event_accounts_paginator.paginate( filter={"eventArns": [event_arn]} ) for event_accounts_page in event_accounts_page_iterator: json_event_accounts = json.dumps(event_accounts_page, default=myconverter) parsed_event_accounts = json.loads(json_event_accounts) try: affected_accounts.append( parsed_event_accounts["entities"][0]["awsAccountId"] ) except Exception: affected_accounts = [] return affected_accounts # organization view affected accounts def get_health_org_accounts(health_client, event, event_arn): affected_org_accounts = [] event_accounts_paginator = health_client.get_paginator( "describe_affected_accounts_for_organization" ) event_accounts_page_iterator = event_accounts_paginator.paginate(eventArn=event_arn) for event_accounts_page in event_accounts_page_iterator: json_event_accounts = json.dumps(event_accounts_page, default=myconverter) parsed_event_accounts = json.loads(json_event_accounts) affected_org_accounts = affected_org_accounts + ( parsed_event_accounts["affectedAccounts"] ) return affected_org_accounts # get the array of affected entities for all affected accounts and return as an array of JSON objects def get_affected_entities(health_client, event_arn, affected_accounts, is_org_mode): affected_entity_array = [] for account in affected_accounts: account_name = "" if is_org_mode: event_entities_paginator = health_client.get_paginator( "describe_affected_entities_for_organization" ) event_entities_page_iterator = event_entities_paginator.paginate( organizationEntityFilters=[ {"awsAccountId": account, "eventArn": event_arn} ] ) account_name = get_account_name(account) else: event_entities_paginator = health_client.get_paginator( "describe_affected_entities" ) event_entities_page_iterator = event_entities_paginator.paginate( filter={"eventArns": [event_arn]} ) for event_entities_page in event_entities_page_iterator: json_event_entities = json.dumps(event_entities_page, default=myconverter) parsed_event_entities = json.loads(json_event_entities) for entity in parsed_event_entities["entities"]: entity.pop( "entityArn" ) # remove entityArn to avoid confusion with the arn of the entityValue (not present) entity.pop("eventArn") # remove eventArn duplicate of detail.arn entity.pop("lastUpdatedTime") # remove for brevity if is_org_mode: entity["awsAccountName"] = account_name affected_entity_array.append(entity) return affected_entity_array # COMMON # get the entityValues from the array and return as an array (of strings) for use with chat channels # don't list entities which are accounts (handled separately for chat applications) def get_resources_from_entities(affected_entity_array): resources = [] for entity in affected_entity_array: if entity["entityValue"] == "UNKNOWN": # UNKNOWN indicates a public/non-accountspecific event, no resources pass elif ( entity["entityValue"] != "AWS_ACCOUNT" and entity["entityValue"] != entity["awsAccountId"] ): resources.append(entity["entityValue"]) return resources # For Customers using AWS Organizations def update_org_ddb( event_arn, str_update, status_code, event_details, affected_org_accounts, affected_org_entities, ): # open dynamoDB dynamodb = aws_api.resource("dynamodb") ddb_table = os.environ["DYNAMODB_TABLE"] aha_ddb_table = dynamodb.Table(ddb_table) event_latestDescription = event_details["successfulSet"][0]["eventDescription"][ "latestDescription" ] # set time parameters delta_hours = os.environ["EVENT_SEARCH_BACK"] delta_hours = int(delta_hours) delta_hours_sec = delta_hours * 3600 # formatting time in seconds srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" str_ddb_format_sec = "%s" sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) # check if event arn already exists try: response = aha_ddb_table.get_item(Key={"arn": event_arn}) except ClientError as e: print(e.response["Error"]["Message"]) else: is_item_response = response.get("Item") if is_item_response == None: print(datetime.now().strftime(srt_ddb_format_full) + ": record not found") # write to dynamodb response = aha_ddb_table.put_item( Item={ "arn": event_arn, "lastUpdatedTime": str_update, "added": sec_now, "ttl": int(sec_now) + delta_hours_sec + 86400, "statusCode": status_code, "affectedAccountIDs": affected_org_accounts, "latestDescription": event_latestDescription # Cleanup: DynamoDB entry deleted 24 hours after last update } ) affected_org_accounts_details = [ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts ] # send to configured endpoints if status_code != "closed": send_org_alert( event_details, affected_org_accounts_details, affected_org_entities, event_type="create", ) else: send_org_alert( event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve", ) else: item = response["Item"] if item["lastUpdatedTime"] != str_update and ( item["statusCode"] != status_code or item["latestDescription"] != event_latestDescription or item["affectedAccountIDs"] != affected_org_accounts ): print( datetime.now().strftime(srt_ddb_format_full) + ": last Update is different" ) # write to dynamodb response = aha_ddb_table.put_item( Item={ "arn": event_arn, "lastUpdatedTime": str_update, "added": sec_now, "ttl": int(sec_now) + delta_hours_sec + 86400, "statusCode": status_code, "affectedAccountIDs": affected_org_accounts, "latestDescription": event_latestDescription # Cleanup: DynamoDB entry deleted 24 hours after last update } ) affected_org_accounts_details = [ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts ] # send to configured endpoints if status_code != "closed": send_org_alert( event_details, affected_org_accounts_details, affected_org_entities, event_type="create", ) else: send_org_alert( event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve", ) else: print("No new updates found, checking again in 1 minute.") # For Customers not using AWS Organizations def update_ddb( event_arn, str_update, status_code, event_details, affected_accounts, affected_entities, ): # open dynamoDB dynamodb = aws_api.resource("dynamodb") ddb_table = os.environ["DYNAMODB_TABLE"] aha_ddb_table = dynamodb.Table(ddb_table) event_latestDescription = event_details["successfulSet"][0]["eventDescription"][ "latestDescription" ] # set time parameters delta_hours = os.environ["EVENT_SEARCH_BACK"] delta_hours = int(delta_hours) delta_hours_sec = delta_hours * 3600 # formatting time in seconds srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" str_ddb_format_sec = "%s" sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) # check if event arn already exists try: response = aha_ddb_table.get_item(Key={"arn": event_arn}) except ClientError as e: print(e.response["Error"]["Message"]) else: is_item_response = response.get("Item") if is_item_response == None: print(datetime.now().strftime(srt_ddb_format_full) + ": record not found") # write to dynamodb response = aha_ddb_table.put_item( Item={ "arn": event_arn, "lastUpdatedTime": str_update, "added": sec_now, "ttl": int(sec_now) + delta_hours_sec + 86400, "statusCode": status_code, "affectedAccountIDs": affected_accounts, "latestDescription": event_latestDescription # Cleanup: DynamoDB entry deleted 24 hours after last update } ) affected_accounts_details = affected_accounts # send to configured endpoints if status_code != "closed": send_alert( event_details, affected_accounts_details, affected_entities, event_type="create", ) else: send_alert( event_details, affected_accounts_details, affected_entities, event_type="resolve", ) else: item = response["Item"] if item["lastUpdatedTime"] != str_update and ( item["statusCode"] != status_code or item["latestDescription"] != event_latestDescription or item["affectedAccountIDs"] != affected_accounts ): print( datetime.now().strftime(srt_ddb_format_full) + ": last Update is different" ) # write to dynamodb response = aha_ddb_table.put_item( Item={ "arn": event_arn, "lastUpdatedTime": str_update, "added": sec_now, "ttl": int(sec_now) + delta_hours_sec + 86400, "statusCode": status_code, "affectedAccountIDs": affected_accounts, "latestDescription": event_latestDescription # Cleanup: DynamoDB entry deleted 24 hours after last update } ) affected_accounts_details = [ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_accounts ] # send to configured endpoints if status_code != "closed": send_alert( event_details, affected_accounts_details, affected_entities, event_type="create", ) else: send_alert( event_details, affected_accounts_details, affected_entities, event_type="resolve", ) else: print("No new updates found, checking again in 1 minute.") def get_secrets(): secret_teams_name = "MicrosoftChannelID" secret_slack_name = "SlackChannelID" secret_chime_name = "ChimeChannelID" region_name = os.environ["AWS_REGION"] event_bus_name = "EventBusName" secret_assumerole_name = "AssumeRoleArn" secrets = {} # create a Secrets Manager client client = aws_api.secretsmanager(region_name=region_name) # Iteration through the configured AWS Secrets secrets["teams"] = ( get_secret(secret_teams_name, client) if "Teams" in os.environ else "None" ) secrets["slack"] = ( get_secret(secret_slack_name, client) if "Slack" in os.environ else "None" ) secrets["chime"] = ( get_secret(secret_chime_name, client) if "Chime" in os.environ else "None" ) secrets["ahaassumerole"] = ( get_secret(secret_assumerole_name, client) if os.environ["MANAGEMENT_ROLE_ARN"] != "None" else "None" ) secrets["eventbusname"] = ( get_secret(event_bus_name, client) if "Eventbridge" in os.environ else "None" ) # uncomment below to verify secrets values # print("Secrets: ",secrets) return secrets def get_secret(secret_name, client): try: get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: print(f"There was an error with the {secret_name} secret: ", e.response) return "None" finally: if "SecretString" not in get_secret_value_response: return "None" return get_secret_value_response["SecretString"] def describe_events(health_client): str_ddb_format_sec = "%s" # set hours to search back in time for events delta_hours = os.environ["EVENT_SEARCH_BACK"] health_event_type = os.environ["HEALTH_EVENT_TYPE"] delta_hours = int(delta_hours) time_delta = datetime.now() - timedelta(hours=delta_hours) print("Searching for events and updates made after: ", time_delta) dict_regions = os.environ["REGIONS"] str_filter = {"lastUpdatedTimes": [{"from": time_delta}]} if health_event_type == "issue": event_type_filter = {"eventTypeCategories": ["issue", "investigation"]} print( "AHA will be monitoring events with event type categories as 'issue' only!" ) str_filter.update(event_type_filter) if dict_regions != "all regions": dict_regions = [region.strip() for region in dict_regions.split(",")] print( "AHA will monitor for events only in the selected regions: ", dict_regions ) region_filter = {"regions": dict_regions} str_filter.update(region_filter) event_paginator = health_client.get_paginator("describe_events") event_page_iterator = event_paginator.paginate(filter=str_filter) for response in event_page_iterator: events = response.get("events", []) aws_events = json.dumps(events, default=myconverter) aws_events = json.loads(aws_events) print("Event(s) Received: ", json.dumps(aws_events)) if len(aws_events) > 0: # if there are new event(s) from AWS for event in aws_events: event_arn = event["arn"] status_code = event["statusCode"] str_update = parser.parse((event["lastUpdatedTime"])) str_update = str_update.strftime(str_ddb_format_sec) # get non-organizational view requirements affected_accounts = get_health_accounts(health_client, event, event_arn) affected_entities = get_affected_entities( health_client, event_arn, affected_accounts, is_org_mode=False ) # get event details event_details = json.dumps( describe_event_details(health_client, event_arn), default=myconverter, ) event_details = json.loads(event_details) print("Event Details: ", event_details) if event_details["successfulSet"] == []: print( "An error occured with account:", event_details["failedSet"][0]["awsAccountId"], "due to:", event_details["failedSet"][0]["errorName"], ":", event_details["failedSet"][0]["errorMessage"], ) continue else: # write to dynamoDB for persistence update_ddb( event_arn, str_update, status_code, event_details, affected_accounts, affected_entities, ) else: print("No events found in time frame, checking again in 1 minute.") def describe_org_events(health_client): str_ddb_format_sec = "%s" # set hours to search back in time for events delta_hours = os.environ["EVENT_SEARCH_BACK"] health_event_type = os.environ["HEALTH_EVENT_TYPE"] dict_regions = os.environ["REGIONS"] delta_hours = int(delta_hours) time_delta = datetime.now() - timedelta(hours=delta_hours) print("Searching for events and updates made after: ", time_delta) str_filter = {"lastUpdatedTime": {"from": time_delta}} if health_event_type == "issue": event_type_filter = {"eventTypeCategories": ["issue", "investigation"]} print( "AHA will be monitoring events with event type categories as 'issue' only!" ) str_filter.update(event_type_filter) if dict_regions != "all regions": dict_regions = [region.strip() for region in dict_regions.split(",")] print( "AHA will monitor for events only in the selected regions: ", dict_regions ) region_filter = {"regions": dict_regions} str_filter.update(region_filter) org_event_paginator = health_client.get_paginator( "describe_events_for_organization" ) org_event_page_iterator = org_event_paginator.paginate(filter=str_filter) for response in org_event_page_iterator: events = response.get("events", []) aws_events = json.dumps(events, default=myconverter) aws_events = json.loads(aws_events) print("Event(s) Received: ", json.dumps(aws_events)) if len(aws_events) > 0: for event in aws_events: event_arn = event["arn"] status_code = event["statusCode"] str_update = parser.parse((event["lastUpdatedTime"])) str_update = str_update.strftime(str_ddb_format_sec) # get organizational view requirements affected_org_accounts = get_health_org_accounts( health_client, event, event_arn ) if ( os.environ["ACCOUNT_IDS"] == "None" or os.environ["ACCOUNT_IDS"] == "" ): affected_org_accounts = affected_org_accounts update_org_ddb_flag = True else: account_ids_to_filter = getAccountIDs() if affected_org_accounts != []: focused_org_accounts = [ i for i in affected_org_accounts if i not in account_ids_to_filter ] print("Focused list is ", focused_org_accounts) if focused_org_accounts != []: update_org_ddb_flag = True affected_org_accounts = focused_org_accounts else: update_org_ddb_flag = False print("Focused Organization Account list is empty") else: update_org_ddb_flag = True affected_org_entities = get_affected_entities( health_client, event_arn, affected_org_accounts, is_org_mode=True ) # get event details event_details = json.dumps( describe_org_event_details( health_client, event_arn, affected_org_accounts ), default=myconverter, ) event_details = json.loads(event_details) print("Event Details: ", event_details) if event_details["successfulSet"] == []: print( "An error occured with account:", event_details["failedSet"][0]["awsAccountId"], "due to:", event_details["failedSet"][0]["errorName"], ":", event_details["failedSet"][0]["errorMessage"], ) continue else: # write to dynamoDB for persistence if update_org_ddb_flag: update_org_ddb( event_arn, str_update, status_code, event_details, affected_org_accounts, affected_org_entities, ) else: print("No events found in time frame, checking again in 1 minute.") def myconverter(json_object): if isinstance(json_object, datetime): return json_object.__str__() def describe_event_details(health_client, event_arn): response = health_client.describe_event_details( eventArns=[event_arn], ) return response def describe_org_event_details(health_client, event_arn, affected_org_accounts): if len(affected_org_accounts) >= 1: affected_account_ids = affected_org_accounts[0] response = health_client.describe_event_details_for_organization( organizationEventDetailFilters=[ {"awsAccountId": affected_account_ids, "eventArn": event_arn} ] ) else: response = describe_event_details(health_client, event_arn) return response def eventbridge_generate_entries(message, resources, event_bus): return [ { "Source": "aha", "DetailType": "AHA Event", "Resources": resources, "Detail": json.dumps(message), "EventBusName": event_bus, }, ] def send_to_eventbridge(message, event_type, resources, event_bus): print( "Sending response to Eventbridge - event_type, event_bus", event_type, event_bus ) client = aws_api.client("events") entries = eventbridge_generate_entries(message, resources, event_bus) print("Sending entries: ", entries) response = client.put_events(Entries=entries) print("Response from eventbridge is:", response) def getAccountIDs(): account_ids = "" key_file_name = os.environ["ACCOUNT_IDS"] print("Key filename is - ", key_file_name) if os.path.splitext(os.path.basename(key_file_name))[1] == ".csv": s3 = aws_api.client("s3") data = s3.get_object(Bucket=os.environ["S3_BUCKET"], Key=key_file_name) account_ids = [account.decode("utf-8") for account in data["Body"].iter_lines()] else: print("Key filename is not a .csv file") print(account_ids) return account_ids def get_sts_token(service): assumeRoleArn = get_secrets()["ahaassumerole"] boto3_client = None if "arn:aws:iam::" in assumeRoleArn: ACCESS_KEY = [] SECRET_KEY = [] SESSION_TOKEN = [] sts_connection = aws_api.client("sts") ct = datetime.now() role_session_name = "cross_acct_aha_session" acct_b = sts_connection.assume_role( RoleArn=assumeRoleArn, RoleSessionName=role_session_name, DurationSeconds=900, ) ACCESS_KEY = acct_b["Credentials"]["AccessKeyId"] SECRET_KEY = acct_b["Credentials"]["SecretAccessKey"] SESSION_TOKEN = acct_b["Credentials"]["SessionToken"] # create service client using the assumed role credentials, e.g. S3 boto3_client = aws_api.client( service, config=config, aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, aws_session_token=SESSION_TOKEN, ) print("Running in member account deployment mode") else: boto3_client = aws_api.client(service, config=config) print("Running in management account deployment mode") return boto3_client def main(event, context): aws_api.cache_clear() print("THANK YOU FOR CHOOSING AWS HEALTH AWARE!") health_client = get_sts_token("health") org_status = os.environ["ORG_STATUS"] # str_ddb_format_sec = '%s' # check for AWS Organizations Status if org_status == "No": # TODO update text below to reflect current functionality print( "AWS Organizations is not enabled. Only Service Health Dashboard messages will be alerted." ) describe_events(health_client) else: print( "AWS Organizations is enabled. Personal Health Dashboard and Service Health Dashboard messages will be alerted." ) describe_org_events(health_client) if __name__ == "__main__": main("", "") ================================================ FILE: messagegenerator.py ================================================ import json from datetime import datetime import sys import logging logger = logging.getLogger() def get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook): message = "" summary = "" if slack_webhook == "services": #Handle "Incoming Webhook" webhooks if len(affected_entities) >= 1: affected_entities = "\n".join(affected_entities) if affected_entities == "UNKNOWN": affected_entities = "All resources\nin region" else: affected_entities = "All resources\nin region" if len(affected_accounts) >= 1: affected_accounts = "\n".join(affected_accounts) else: affected_accounts = "All accounts\nin region" if event_type == "create": summary += ( f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" ) message = { "text": summary, "attachments": [ { "color": "danger", "fields": [ { "title": "Account(s)", "value": affected_accounts, "short": True }, { "title": "Resource(s)", "value": affected_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } ], } ] } elif event_type == "resolve": summary += ( f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" ) message = { "text": summary, "attachments": [ { "color": "00ff00", "fields": [ { "title": "Account(s)", "value": affected_accounts, "short": True }, { "title": "Resource(s)", "value": affected_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event'].get('endTime')), "short": True }, { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } ], } ] } else: #Handle 'workflows' or 'triggers' webhooks if len(affected_entities) >= 1: affected_entities = "\n".join(affected_entities) if affected_entities == "UNKNOWN": affected_entities = "All resources\nin region" else: affected_entities = "All resources in region" if len(affected_accounts) >= 1: affected_accounts = "\n".join(affected_accounts) else: affected_accounts = "All accounts in region" if event_type == "create": summary += ( f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" ) message = { "text": summary, "accounts": affected_accounts, "resources": affected_entities, "service": event_details['successfulSet'][0]['event']['service'], "region": event_details['successfulSet'][0]['event']['region'], "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "status": event_details['successfulSet'][0]['event']['statusCode'], "event_arn": event_details['successfulSet'][0]['event']['arn'], "updates": get_last_aws_update(event_details) } elif event_type == "resolve": summary += ( f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" ) message = { "text": summary, "accounts": affected_accounts, "resources": affected_entities, "service": event_details['successfulSet'][0]['event']['service'], "region": event_details['successfulSet'][0]['event']['region'], "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "status": event_details['successfulSet'][0]['event']['statusCode'], "event_arn": event_details['successfulSet'][0]['event']['arn'], "updates": get_last_aws_update(event_details) } print("Message sent to Slack: ", message) return message # COMMON compose the event detail field for org and non-org def get_detail_for_eventbridge(event_details, affected_entities): message = {} #replace the key "arn" with eventArn to match event format from aws.health message["eventArn"] = "" message.update(event_details['successfulSet'][0]['event']) message["eventArn"] = message.pop("arn") #message = event_details['successfulSet'][0]['event'] message["eventDescription"] = event_details["successfulSet"][0]["eventDescription"] message["affectedEntities"] = affected_entities # Log length of json message for debugging if eventbridge may reject the message as messages # are limited in size to 256KB json_message = json.dumps(message) print("PHD/SHD Message generated for EventBridge with estimated size ", str(sys.getsizeof(json_message) / 1024), "KB: ", message) return message def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook): message = "" summary = "" if slack_webhook == "webhook": if len(affected_org_entities) >= 1: affected_org_entities = "\n".join(affected_org_entities) else: affected_org_entities = "All resources\nin region" if len(affected_org_accounts) >= 1: affected_org_accounts = "\n".join(affected_org_accounts) else: affected_org_accounts = "All accounts\nin region" if event_type == "create": summary += ( f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" ) message = { "text": summary, "attachments": [ { "color": "danger", "fields": [ { "title": "Account(s)", "value": affected_org_accounts, "short": True }, { "title": "Resource(s)", "value": affected_org_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } ], } ] } elif event_type == "resolve": summary += ( f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" ) message = { "text": summary, "attachments": [ { "color": "00ff00", "fields": [ { "title": "Account(s)", "value": affected_org_accounts, "short": True }, { "title": "Resource(s)", "value": affected_org_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event'].get('endTime')), "short": True }, { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } ], } ] } else: if len(affected_org_entities) >= 1: affected_org_entities = "\n".join(affected_org_entities) else: affected_org_entities = "All resources in region" if len(affected_org_accounts) >= 1: affected_org_accounts = "\n".join(affected_org_accounts) else: affected_org_accounts = "All accounts in region" if event_type == "create": summary += ( f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" ) message = { "text": summary, "accounts": affected_org_accounts, "resources": affected_org_entities, "service": event_details['successfulSet'][0]['event']['service'], "region": event_details['successfulSet'][0]['event']['region'], "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "status": event_details['successfulSet'][0]['event']['statusCode'], "event_arn": event_details['successfulSet'][0]['event']['arn'], "updates": get_last_aws_update(event_details) } elif event_type == "resolve": summary += ( f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" ) message = { "text": summary, "accounts": affected_org_accounts, "resources": affected_org_entities, "service": event_details['successfulSet'][0]['event']['service'], "region": event_details['successfulSet'][0]['event']['region'], "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "status": event_details['successfulSet'][0]['event']['statusCode'], "event_arn": event_details['successfulSet'][0]['event']['arn'], "updates": get_last_aws_update(event_details) } json.dumps(message) print("Message sent to Slack: ", message) return message def get_message_for_chime(event_details, event_type, affected_accounts, affected_entities): message = "" if len(affected_entities) >= 1: affected_entities = "\n".join(affected_entities) if affected_entities == "UNKNOWN": affected_entities = "All resources\nin region" else: affected_entities = "All resources\nin region" if len(affected_accounts) >= 1: affected_accounts = "\n".join(affected_accounts) else: affected_accounts = "All accounts\nin region" summary = "" if event_type == "create": message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region.**" + "\n" "---" + "\n" "**Account(s)**: " + affected_accounts + "\n" "**Resource(s)**: " + affected_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" "**Updates:**" + "\n" + get_last_aws_update(event_details) ) elif event_type == "resolve": message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n" "---" + "\n" "**Account(s)**: " + affected_accounts + "\n" "**Resource(s)**: " + affected_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" "**End Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event'].get('endTime')) + "\n" "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" "**Updates:**" + "\n" + get_last_aws_update(event_details) ) message = truncate_message_if_needed(message, 4096) print("Message sent to Chime: ", message) return message def get_org_message_for_chime(event_details, event_type, affected_org_accounts, affected_org_entities): message = "" summary = "" if len(affected_org_entities) >= 1: affected_org_entities = "\n".join(affected_org_entities) else: affected_org_entities = "All resources in region" if len(affected_org_accounts) >= 1: affected_org_accounts = "\n".join(affected_org_accounts) else: affected_org_accounts = "All accounts in region" if event_type == "create": message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper()) + " service in " + str(event_details['successfulSet'][0]['event']['region'].upper() + " region**" + "\n" "---" + "\n" "**Account(s)**: " + affected_org_accounts + "\n" "**Resource(s)**: " + affected_org_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" "**Updates:**" + "\n" + get_last_aws_update(event_details) ) elif event_type == "resolve": message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper()) + " service in " + str(event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n" "---" + "\n" "**Account(s)**: " + affected_org_accounts + "\n" "**Resource(s)**: " + affected_org_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" "**End Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event'].get('endTime')) + "\n" "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" "**Updates:**" + "\n" + get_last_aws_update(event_details) ) message = truncate_message_if_needed(message, 4096) print("Message sent to Chime: ", message) return message def get_message_for_teams(event_details, event_type, affected_accounts, affected_entities): message = "" if len(affected_entities) >= 1: affected_entities = "\n".join(affected_entities) if affected_entities == "UNKNOWN": affected_entities = "All resources\nin region" else: affected_entities = "All resources\nin region" if len(affected_accounts) >= 1: affected_accounts = "\n".join(affected_accounts) else: affected_accounts = "All accounts\nin region" summary = "" if event_type == "create": title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][ 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ 'region'].upper() + " region." message = { "@type": "MessageCard", "@context": "http://schema.org/extensions", "themeColor": "FF0000", "summary": "AWS Health Aware Alert", "sections": [ { "activityTitle": str(title), "markdown": False, "facts": [ {"name": "Account(s)", "value": affected_accounts}, {"name": "Resource(s)", "value": affected_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, {"name": "Updates", "value": get_last_aws_update(event_details)} ], } ] } elif event_type == "resolve": title = "✅ [RESOLVED] The AWS Health issue with the " + event_details['successfulSet'][0]['event'][ 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ 'region'].upper() + " region is now resolved." message = { "@type": "MessageCard", "@context": "http://schema.org/extensions", "themeColor": "00ff00", "summary": "AWS Health Aware Alert", "sections": [ { "activityTitle": str(title), "markdown": False, "facts": [ {"name": "Account(s)", "value": affected_accounts}, {"name": "Resource(s)", "value": affected_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, {"name": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event'].get('endTime'))}, {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, {"name": "Updates", "value": get_last_aws_update(event_details)} ], } ] } print("Message sent to Teams: ", message) return message def get_org_message_for_teams(event_details, event_type, affected_org_accounts, affected_org_entities): message = "" summary = "" if len(affected_org_entities) >= 1: affected_org_entities = "\n".join(affected_org_entities) else: affected_org_entities = "All resources in region" if len(affected_org_accounts) >= 1: affected_org_accounts = "\n".join(affected_org_accounts) else: affected_org_accounts = "All accounts in region" if event_type == "create": title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][ 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ 'region'].upper() + " region." message = { "@type": "MessageCard", "@context": "http://schema.org/extensions", "themeColor": "FF0000", "summary": "AWS Health Aware Alert", "sections": [ { "activityTitle": title, "markdown": False, "facts": [ {"name": "Account(s)", "value": affected_org_accounts}, {"name": "Resource(s)", "value": affected_org_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, {"name": "Updates", "value": event_details['successfulSet'][0]['eventDescription']['latestDescription']} ], } ] } elif event_type == "resolve": title = "✅ [RESOLVED] The AWS Health issue with the " + event_details['successfulSet'][0]['event'][ 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ 'region'].upper() + " region is now resolved." message = { "@type": "MessageCard", "@context": "http://schema.org/extensions", "themeColor": "00ff00", "summary": "AWS Health Aware Alert", "sections": [ { "activityTitle": title, "markdown": False, "facts": [ {"name": "Account(s)", "value": affected_org_accounts}, {"name": "Resource(s)", "value": affected_org_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, {"name": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event'].get('endTime'))}, {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, {"name": "Updates", "value": event_details['successfulSet'][0]['eventDescription']['latestDescription']} ], } ] } return message print("Message sent to Teams: ", message) def get_message_for_email(event_details, event_type, affected_accounts, affected_entities): # Not srue why we have the new line in the affected entities code here if len(affected_entities) >= 1: affected_entities = "\n".join(affected_entities) if affected_entities == "UNKNOWN": affected_entities = "All resources\nin region" else: affected_entities = "All resources\nin region" if len(affected_accounts) >= 1: affected_accounts = "\n".join(affected_accounts) else: affected_accounts = "All accounts\nin region" if event_type == "create": BODY_HTML = f""" Greetings from AWS Health Aware,

There is an AWS incident that is in effect which may likely impact your resources. Here are the details:

Account(s): {affected_accounts}
Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
Status: {event_details['successfulSet'][0]['event']['statusCode']}
Event ARN: {event_details['successfulSet'][0]['event']['arn']}
Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

For updates, please visit the AWS Service Health Dashboard
If you are experiencing issues related to this event, please open an AWS Support case within your account.

Thanks,

AHA: AWS Health Aware

""" else: BODY_HTML = f""" Greetings again from AWS Health Aware,

Good news! The AWS Health incident from earlier has now been marked as resolved.

Account(s): {affected_accounts}
Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
End Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event'].get('endTime'))}
Status: {event_details['successfulSet'][0]['event']['statusCode']}
Event ARN: {event_details['successfulSet'][0]['event']['arn']}
Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

If you are still experiencing issues related to this event, please open an AWS Support case within your account.



Thanks,

AHA: AWS Health Aware

""" print("Message sent to Email: ", BODY_HTML) return BODY_HTML def get_org_message_for_email(event_details, event_type, affected_org_accounts, affected_org_entities): if len(affected_org_entities) >= 1: affected_org_entities = "\n".join(affected_org_entities) else: affected_org_entities = "All services related resources in region" if len(affected_org_accounts) >= 1: affected_org_accounts = "\n".join(affected_org_accounts) else: affected_org_accounts = "All accounts in region" if event_type == "create": BODY_HTML = f""" Greetings from AWS Health Aware,

There is an AWS incident that is in effect which may likely impact your resources. Here are the details:

Account(s): {affected_org_accounts}
Resource(s): {affected_org_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
Status: {event_details['successfulSet'][0]['event']['statusCode']}
Event ARN: {event_details['successfulSet'][0]['event']['arn']}
Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

For updates, please visit the AWS Service Health Dashboard
If you are experiencing issues related to this event, please open an AWS Support case within your account.

Thanks,

AHA: AWS Health Aware

""" else: BODY_HTML = f""" Greetings again from AWS Health Aware,

Good news! The AWS Health incident from earlier has now been marked as resolved.

Account(s): {affected_org_accounts}
Resource(s): {affected_org_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
End Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event'].get('endTime'))}
Status: {event_details['successfulSet'][0]['event']['statusCode']}
Event ARN: {event_details['successfulSet'][0]['event']['arn']}
Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

If you are still experiencing issues related to this event, please open an AWS Support case within your account.

Thanks,

AHA: AWS Health Aware

""" print("Message sent to Email: ", BODY_HTML) return BODY_HTML def cleanup_time(event_time): """ Takes as input a datetime string as received from The AWS Health event_detail call. It converts this string to a datetime object, changes the timezone to EST and then formats it into a readable string to display in Slack. :param event_time: datetime string :type event_time: str :return: A formatted string that includes the month, date, year and 12-hour time. :rtype: str """ if not event_time: return "Unknown" event_time = datetime.strptime(event_time[:16], '%Y-%m-%d %H:%M') return event_time.strftime("%Y-%m-%d %H:%M:%S") def get_last_aws_update(event_details): """ Takes as input the event_details and returns the last update from AWS (instead of the entire timeline) :param event_details: Detailed information about a specific AWS health event. :type event_details: dict :return: the last update message from AWS :rtype: str """ aws_message = event_details['successfulSet'][0]['eventDescription']['latestDescription'] return aws_message def format_date(event_time): """ Takes as input a datetime string as received from The AWS Health event_detail call. It converts this string to a datetime object, changes the timezone to EST and then formats it into a readable string to display in Slack. :param event_time: datetime string :type event_time: str :return: A formatted string that includes the month, date, year and 12-hour time. :rtype: str """ event_time = datetime.strptime(event_time[:16], '%Y-%m-%d %H:%M') return event_time.strftime('%B %d, %Y at %I:%M %p') def truncate_message_if_needed(message, max_length): """ Truncates the message if it exceeds the specified maximum length. :param message: Message you want to truncate. :type message: str :param max_length: Length at which to truncate the message. :type max_length: int :return: Possibly truncated message. :rtype: str """ message_length = len(message) if message_length > max_length: print(f"Message length of {message_length} is too long, truncating to {max_length}.") message = message[:(max_length - 3)] + "..." return message ================================================ FILE: new_aha_event_schema.md ================================================ # Readme for new AHA Event schema ## New AHA Event Schema With release X.Y.Z, AHA includes an updated format for events published to EventBridge. Building on the [existing event format](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#health-event-types) published by AWS Health, AHA enriches it with additional data from the Health API and AWS Organizations to enable new options for filtering in EventBridge. >Note: If you used the previous { "title": "Title", "value": "Value" } schema in your rules, you must update your rules to reflect the new schema when deploying the new version of AHA ### Schema: ``` { "version": "0", "id": "7bf73129-1428-4cd3-a780-95db273d1602", "detail-type": "AHA Event", "source": "aha", "account": "123456789012", "time": "2022-07-14T03:56:10Z", "region": "region of the eventbus", "resources": [ "i-1234567890abcdef0" ], "detail": { "eventArn": "arn:aws:health:region::event/id", "service": "service", "eventTypeCode": "typecode", "eventTypeCategory": "category", "region": "region of the Health event", "startTime": "2022-07-02 12:33:26.951000+00:00", "endTime": "2022-07-02 12:33:26.951000+00:00", "lastUpdatedTime": "2022-07-02 12:36:18.576000+00:00", "statusCode": "status", "eventScopeCode": "scopecode", "eventDescription": { "latestDescription": "description" }, "affectedEntities": [{ "entityValue": "i-1234567890abcdef0", "awsAccountId": "account number", "awsAccountName": "account name" }] } } ``` ### AHA added properties **eventScopeCode:** Specifies if the Health event is a public AWS service event or an account-specific event. Values: *string -* `PUBLIC | ACCOUNT_SPECIFIC` **statusCode:** Reflects whether the event is ongoing, resolved or in the case of scheduled maintenance, upcoming. Values: *string -* `open | closed | upcoming` **affectedEntities:** For ACCOUNT_SPECIFIC events, AHA includes expanded detail on resources. **affectedEntities** includes the listed **resources**, each as an **entitityValue** with the resource ID (as it appears in events for single accounts). AHA adds the related **awsAccountId** and In AWS Organizations, **awsAccountName** of the resource. Values: *entity object(s). May be empty if no resources are listed* ## EventBridge pattern examples As a primer we recommended you review the [EventBridge EventPatterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns.html) documentation and examples on [Content filtering in Amazon EventBridge event patterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns-content-based-filtering.html) Use the following sample event published by AWS Health Aware to test matching in the provided examples: ``` { "version": "0", "id": "e47c4390-b295-ce6f-7e94-f13083d7bb90", "detail-type": "AHA Event", "source": "aha", "account": "`234567890123`", "time": "2022-07-20T18:26:17Z", "region": "us-east-1", "resources": [ "vpn-0d0e3eeefe6aabb0d" ], "detail": { "arn": "arn:aws:health:us-east-1::event/VPN/AWS_VPN_REDUNDANCY_LOSS/AWS_VPN_REDUNDANCY_LOSS-1656151378267-7672191-IAD", "service": "VPN", "eventTypeCode": "AWS_VPN_REDUNDANCY_LOSS", "eventTypeCategory": "accountNotification", "region": "us-east-1", "startTime": "2022-06-25 10:00:48.868000+00:00", "lastUpdatedTime": "2022-06-25 10:02:58.371000+00:00", "statusCode": "open", "eventScopeCode": "ACCOUNT_SPECIFIC", "eventDescription": { "latestDescription": "Your VPN Connection associated with this event in the us-east-1 Region had a momentary lapse of redundancy as one of two tunnel endpoints was replaced. Connectivity on the second tunnel was not affected during this time. Both tunnels are now operating normally.\n\nReplacements can occur for several reasons, including health, software upgrades, customer-initiated modifications, and when underlying hardware is retired. If you have configured your VPN Customer Gateway to use both tunnels, then your VPN Connection will have utilized the alternate tunnel during the replacement process. For more on tunnel endpoint replacements, please see our documentation [1].\n\nIf you have not configured your VPN Customer Gateway to use both tunnels, then your VPN Connection may have been interrupted during the replacement. We encourage you to configure your router to use both tunnels. You can obtain the VPN Connection configuration recommendations for several types of VPN devices from the AWS Management Console [2]. On the \"Amazon VPC\" tab, select \"VPN Connections\". Then highlight the VPN Connection and choose \"Download Configuration\".\n\n[1] https://docs.aws.amazon.com/vpn/latest/s2svpn/monitoring-vpn-health-events.html\n[2] https://console.aws.amazon.com" }, "affectedEntities": [{ "entityValue": "vpn-0d0e3eeefe6aabb0d", "awsAccountId": "987654321987", "awsAccountName": "Prod-Apps" }] } } ``` To write a rule that matches resources found in the event, reference the **resources** key of the JSON event, and provide an event pattern to the EventBridge rule. Example 1 matches on an exact resource - “*vpn-0d0e3eeefe6aabb0d*”. Example 2 matches any resource starting with "*vpn-*" **Example 1:** ``` { "resources": [ "vpn-0d0e3eeefe6aabb0d" ] } ``` **Example 2:** ``` { "resources": [ {"prefix": "vpn-"} ] } ``` To match based on a specific service, note that **service** is nested within the **detail** key in the JSON structure, so we reference both **detail** and **service**. Example 3 matches the VPN service, and Example 4 matches EC2 OR S3 (will not match the sample event). To get a list of all service names used by AWS Health you can use the cli command - `aws health describe-event-types` **Example 3:** ``` { "detail": { "service": ["VPN"] } } ``` **Example 4:** ``` { "detail": { "service": ["EC2", "S3"] } } ``` To match events based on an AWS account name or number, use the following patterns. Take note of the additional levels of nesting based on the sample event. Example 5 matches a specific account number as **awsAccountId**. Example 6 matches a specific account name, and Example 7 adds an additional field of **eventTypeCategory** along with the “prefix” filter pattern which will match any value in the **awsAccountName** field that starts with “*Prod*” similar to a wildcard match of “*Prod**” **Example 5:** ``` { "detail": { "affectedEntities": { "awsAccountId": ["987654321987"] } } } ``` **Example 6:** ``` { "detail": { "affectedEntities": { "awsAccountName": ["Prod-Apps"] } } } ``` **Example 7:** ``` { "detail": { "eventTypeCategory": ["accountNotification"], "affectedEntities": { "awsAccountName": [{"prefix": "Prod"}] } } } ``` Combine any of the patterns listed to create more specific rules. Note that all patterns in the rule must match for the EventBridge rule to trigger. Example 8 will only match when all 3 conditions exist in an AHA event - **service** = *VPN*, **region** = *us-east-1* and **awsAccountId** = *987654321098* **Example 8:** ``` { "detail": { "service": ["VPN"], "region": ["us-east-1"], "affectedEntities": { "awsAccountId": ["987654321987"] } } } ``` As a best practice, also include `"source": ["aha"]` in your pattern if the event bus contains events generated by other sources. **Example 9:** ``` { "source": ["aha"], "detail": { "service": ["VPN"], "region": ["us-east-1"], "affectedEntities": { "awsAccountId": ["987654321987"] } } } ``` ================================================ FILE: terraform/Terraform_DEPLOY_AHA/Terraform_DEPLOY_AHA.tf ================================================ # Terraform script to deploy AHA Solution # 1.0 - Initial version # Variables defined below, you can overwrite them using tfvars or imput variables data "aws_caller_identity" "current" {} provider "aws" { region = var.aha_primary_region default_tags { tags = "${var.default_tags}" } } # Secondary region - provider config locals { secondary_region = "${var.aha_secondary_region == "" ? var.aha_primary_region : var.aha_secondary_region}" } provider "aws" { alias = "secondary_region" region = local.secondary_region default_tags { tags = "${var.default_tags}" } } # Comment below - if needed to use s3_bucket, s3_key for consistency with cf locals { source_files = ["${path.module}/../../handler.py", "${path.module}/../../messagegenerator.py"] } data "archive_file" "lambda_zip" { type = "zip" output_path = "${path.module}/lambda_function.zip" source { filename = "${basename(local.source_files[0])}" content = file("${local.source_files[0]}") } source { filename = "${basename(local.source_files[1])}" content = file("${local.source_files[1]}") } } variable "aha_primary_region" { description = "Primary region where AHA solution will be deployed" type = string default = "us-east-1" } variable "aha_secondary_region" { description = "Secondary region where AHA solution will be deployed" type = string default = "" } variable "default_tags" { description = "Tags used for the AWS resources created by this template" type = map default = { Application = "AHA-Solution" } } variable "dynamodbtable" { type = string default = "AHA-DynamoDBTable" } variable "AWSOrganizationsEnabled" { type = string default = "No" description = "You can receive both PHD and SHD alerts if you're using AWS Organizations. \n If you are, make sure to enable Organizational Health View: \n (https://docs.aws.amazon.com/health/latest/ug/aggregate-events.html) to \n aggregate all PHD events in your AWS Organization. If not, you can still \n get SHD alerts." validation { condition = ( var.AWSOrganizationsEnabled == "Yes" || var.AWSOrganizationsEnabled == "No" ) error_message = "AWSOrganizationsEnabled variable can only accept Yes or No as values." } } variable "ManagementAccountRoleArn" { type = string default = "" description = "Arn of the IAM role in the top-level management account for collecting PHD Events. 'None' if deploying into the top-level management account." } variable "AWSHealthEventType" { type = string default = "issue | accountNotification | scheduledChange" description = "Select the event type that you want AHA to report on. Refer to \n https://docs.aws.amazon.com/health/latest/APIReference/API_EventType.html for more information on EventType." validation { condition = ( var.AWSHealthEventType == "issue | accountNotification | scheduledChange" || var.AWSHealthEventType == "issue" ) error_message = "AWSHealthEventType variable can only accept issue | accountNotification | scheduledChange or issue as values." } } #variable "S3Bucket" { # type = string # description = "Name of your S3 Bucket where the AHA Package .zip resides. Just the name of the bucket (e.g. my-s3-bucket)" # validation { # condition = length(var.S3Bucket) > 0 # error_message = "The S3Bucket cannot be empty." # } #} # #variable "S3Key" { # type = string # description = "Name of the .zip in your S3 Bucket. Just the name of the file (e.g. aha-v1.0.zip)" # validation { # condition = length(var.S3Key) > 0 # error_message = "The S3Key cannot be empty." # } #} variable "EventBusName" { type = string default = "" description = "This is to ingest alerts into AWS EventBridge. Enter the event bus name if you wish to send the alerts to the AWS EventBridge. Note: By ingesting you wish to send the alerts to the AWS EventBridge. Note: By ingesting these alerts to AWS EventBridge, you can integrate with 35 SaaS vendors such as DataDog/NewRelic/PagerDuty. If you don't prefer to use EventBridge, leave the default (None)." } variable "SlackWebhookURL" { type = string default = "" description = "Enter the Slack Webhook URL. If you don't prefer to use Slack, leave the default (empty)." } variable "MicrosoftTeamsWebhookURL" { type = string default = "" description = "Enter Microsoft Teams Webhook URL. If you don't prefer to use MS Teams, leave the default (empty)." } variable "AmazonChimeWebhookURL" { type = string default = "" description = "Enter the Chime Webhook URL, If you don't prefer to use Amazon Chime, leave the default (empty)." } variable "Regions" { type = string default = "all regions" description = "By default, AHA reports events affecting all AWS regions. \n If you want to report on certain regions you can enter up to 10 in a comma separated format. \n Available Regions: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-3, \n ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2, \n eu-south-1,eu-south-3,eu-north-1,me-south-1,sa-east-1,global" } variable "EventSearchBack" { type = number default = "1" description = "How far back to search for events in hours. Default is 1 hour" } variable "FromEmail" { type = string default = "none@domain.com" description = "Enter FROM Email Address" } variable "ToEmail" { type = string default = "none@domain.com" description = "Enter email addresses separated by commas (for ex: abc@amazon.com, bcd@amazon.com)" } variable "Subject" { type = string default = "AWS Health Alert" description = "Enter the subject of the email address" } #variable "S3Bucket" { # type = string # description = "Name of your S3 Bucket where the AHA Package .zip resides. Just the name of the bucket (e.g. my-s3-bucket)" # default = "" #} variable "ExcludeAccountIDs" { type = string default = "" description = "If you would like to EXCLUDE any accounts from alerting, enter a .csv filename created with comma-seperated account numbers. Sample AccountIDs file name: aha_account_ids.csv. If not, leave the default empty." } ##### Resources for AHA Solution created below. # Random id generator resource "random_string" "resource_code" { length = 8 special = false upper = false } # S3 buckets creation resource "aws_s3_bucket" "AHA-S3Bucket-PrimaryRegion" { count = "${var.ExcludeAccountIDs != "" ? 1 : 0}" bucket = "aha-bucket-${var.aha_primary_region}-${random_string.resource_code.result}" tags = { Name = "aha-bucket" } } resource "aws_s3_bucket_acl" "AHA-S3Bucket-PrimaryRegion" { count = var.ExcludeAccountIDs != "" ? 1 : 0 bucket = aws_s3_bucket.AHA-S3Bucket-PrimaryRegion[0].id acl = "private" } resource "aws_s3_bucket" "AHA-S3Bucket-SecondaryRegion" { count = "${var.aha_secondary_region != "" && var.ExcludeAccountIDs != "" ? 1 : 0}" provider = aws.secondary_region bucket = "aha-bucket-${var.aha_secondary_region}-${random_string.resource_code.result}" tags = { Name = "aha-bucket" } } resource "aws_s3_bucket_acl" "AHA-S3Bucket-SecondaryRegion" { count = "${var.aha_secondary_region != "" && var.ExcludeAccountIDs != "" ? 1 : 0}" provider = aws.secondary_region bucket = aws_s3_bucket.AHA-S3Bucket-SecondaryRegion[0].id acl = "private" } resource "aws_s3_object" "AHA-S3Object-PrimaryRegion" { count = "${var.ExcludeAccountIDs != "" ? 1 : 0}" key = var.ExcludeAccountIDs bucket = aws_s3_bucket.AHA-S3Bucket-PrimaryRegion[0].bucket source = var.ExcludeAccountIDs tags = { Name = "${var.ExcludeAccountIDs}" } } resource "aws_s3_object" "AHA-S3Object-SecondaryRegion" { count = "${var.aha_secondary_region != "" && var.ExcludeAccountIDs != "" ? 1 : 0}" provider = aws.secondary_region key = var.ExcludeAccountIDs bucket = aws_s3_bucket.AHA-S3Bucket-SecondaryRegion[0].bucket source = var.ExcludeAccountIDs tags = { Name = "${var.ExcludeAccountIDs}" } } # DynamoDB table - Create if secondary region not set resource "aws_dynamodb_table" "AHA-DynamoDBTable" { count = "${var.aha_secondary_region == "" ? 1 : 0}" billing_mode = "PROVISIONED" hash_key = "arn" name = "${var.dynamodbtable}-${random_string.resource_code.result}" read_capacity = 5 write_capacity = 5 stream_enabled = false tags = { Name = "${var.dynamodbtable}" } attribute { name = "arn" type = "S" } point_in_time_recovery { enabled = false } timeouts {} ttl { attribute_name = "ttl" enabled = true } } # DynamoDB table - Multi region Global Table - Create if secondary region is set resource "aws_dynamodb_table" "AHA-GlobalDynamoDBTable" { count = "${var.aha_secondary_region == "" ? 0 : 1}" billing_mode = "PAY_PER_REQUEST" hash_key = "arn" name = "${var.dynamodbtable}-${random_string.resource_code.result}" stream_enabled = true stream_view_type = "NEW_AND_OLD_IMAGES" tags = { Name = "${var.dynamodbtable}" } attribute { name = "arn" type = "S" } point_in_time_recovery { enabled = false } replica { region_name = var.aha_secondary_region } timeouts {} ttl { attribute_name = "ttl" enabled = true } } # Tags for DynamoDB - secondary region resource "aws_dynamodb_tag" "AHA-GlobalDynamoDBTable" { count = "${var.aha_secondary_region == "" ? 0 : 1}" provider = aws.secondary_region resource_arn = replace(aws_dynamodb_table.AHA-GlobalDynamoDBTable[count.index].arn, var.aha_primary_region, var.aha_secondary_region) key = "Name" value = "${var.dynamodbtable}" } # Tags for DynamoDB - secondary region - default_tags resource "aws_dynamodb_tag" "AHA-GlobalDynamoDBTable-Additional-tags" { for_each = { for key, value in var.default_tags : key => value if var.aha_secondary_region != "" } provider = aws.secondary_region resource_arn = replace(aws_dynamodb_table.AHA-GlobalDynamoDBTable[0].arn, var.aha_primary_region, var.aha_secondary_region) key = each.key value = each.value } # Secrets - SlackChannelSecret resource "aws_secretsmanager_secret" "SlackChannelID" { count = "${var.SlackWebhookURL == "" ? 0 : 1}" name = "SlackChannelID" description = "Slack Channel ID Secret" recovery_window_in_days = 0 tags = { "HealthCheckSlack" = "ChannelID" } dynamic "replica" { for_each = var.aha_secondary_region == "" ? [] : [1] content { region = var.aha_secondary_region } } } resource "aws_secretsmanager_secret_version" "SlackChannelID" { count = "${var.SlackWebhookURL == "" ? 0 : 1}" secret_id = "${aws_secretsmanager_secret.SlackChannelID.*.id[count.index]}" secret_string = "${var.SlackWebhookURL}" } # Secrets - MicrosoftChannelSecret resource "aws_secretsmanager_secret" "MicrosoftChannelID" { count = "${var.MicrosoftTeamsWebhookURL == "" ? 0 : 1}" name = "MicrosoftChannelID" description = "Microsoft Channel ID Secret" recovery_window_in_days = 0 tags = { "HealthCheckMicrosoft" = "ChannelID" "Name" = "AHA-MicrosoftChannelID" } dynamic "replica" { for_each = var.aha_secondary_region == "" ? [] : [1] content { region = var.aha_secondary_region } } } resource "aws_secretsmanager_secret_version" "MicrosoftChannelID" { count = "${var.MicrosoftTeamsWebhookURL == "" ? 0 : 1}" secret_id = "${aws_secretsmanager_secret.MicrosoftChannelID.*.id[count.index]}" secret_string = "${var.MicrosoftTeamsWebhookURL}" } # Secrets - EventBusNameSecret resource "aws_secretsmanager_secret" "EventBusName" { count = "${var.EventBusName == "" ? 0 : 1}" name = "EventBusName" description = "EventBus Name Secret" recovery_window_in_days = 0 tags = { "EventBusName" = "ChannelID" "Name" = "AHA-EventBusName" } dynamic "replica" { for_each = var.aha_secondary_region == "" ? [] : [1] content { region = var.aha_secondary_region } } } resource "aws_secretsmanager_secret_version" "EventBusName" { count = "${var.EventBusName == "" ? 0 : 1}" secret_id = "${aws_secretsmanager_secret.EventBusName.*.id[count.index]}" secret_string = "${var.EventBusName}" } # Secrets - ChimeChannelSecret resource "aws_secretsmanager_secret" "ChimeChannelID" { count = "${var.AmazonChimeWebhookURL == "" ? 0 : 1}" name = "ChimeChannelID" description = "Chime Channel ID Secret" recovery_window_in_days = 0 tags = { "HealthCheckChime" = "ChannelID" "Name" = "AHA-ChimeChannelID-${random_string.resource_code.result}" } dynamic "replica" { for_each = var.aha_secondary_region == "" ? [] : [1] content { region = var.aha_secondary_region } } } resource "aws_secretsmanager_secret_version" "ChimeChannelID" { count = "${var.AmazonChimeWebhookURL == "" ? 0 : 1}" secret_id = "${aws_secretsmanager_secret.ChimeChannelID.*.id[count.index]}" secret_string = "${var.AmazonChimeWebhookURL}" } # Secrets - AssumeRoleSecret resource "aws_secretsmanager_secret" "AssumeRoleArn" { count = "${var.ManagementAccountRoleArn == "" ? 0 : 1}" name = "AssumeRoleArn" description = "Management account role for AHA to assume" recovery_window_in_days = 0 tags = { "AssumeRoleArn" = "" "Name" = "AHA-AssumeRoleArn" } dynamic "replica" { for_each = var.aha_secondary_region == "" ? [] : [1] content { region = var.aha_secondary_region } } } resource "aws_secretsmanager_secret_version" "AssumeRoleArn" { count = "${var.ManagementAccountRoleArn == "" ? 0 : 1}" secret_id = "${aws_secretsmanager_secret.AssumeRoleArn.*.id[count.index]}" secret_string = "${var.ManagementAccountRoleArn}" } # IAM Role for Lambda function execution resource "aws_iam_role" "AHA-LambdaExecutionRole" { name = "AHA-LambdaExecutionRole-${random_string.resource_code.result}" path = "/" assume_role_policy = jsonencode( { Version = "2012-10-17" Statement = [ { Action = "sts:AssumeRole" Effect = "Allow" Principal = { Service = "lambda.amazonaws.com" } }, ] } ) inline_policy { name = "AHA-LambdaPolicy" policy = data.aws_iam_policy_document.AHA-LambdaPolicy-Document.json } tags = { "Name" = "AHA-LambdaExecutionRole" } } data "aws_iam_policy_document" "AHA-LambdaPolicy-Document" { version = "2012-10-17" statement { effect = "Allow" actions = [ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", ] resources = [ "arn:aws:logs:${var.aha_primary_region}:${data.aws_caller_identity.current.account_id}:*", "arn:aws:logs:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:*" ] } statement { effect = "Allow" actions = [ "health:DescribeAffectedAccountsForOrganization", "health:DescribeAffectedEntitiesForOrganization", "health:DescribeEventDetailsForOrganization", "health:DescribeEventsForOrganization", "health:DescribeEventDetails", "health:DescribeEvents", "health:DescribeEventTypes", "health:DescribeAffectedEntities", "organizations:ListAccounts", "organizations:DescribeAccount", ] resources = [ "*" ] } statement { effect = "Allow" actions = [ "dynamodb:ListTables", ] resources = [ "arn:aws:dynamodb:${var.aha_primary_region}:${data.aws_caller_identity.current.account_id}:*", "arn:aws:dynamodb:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:*", ] } statement { effect = "Allow" actions = [ "ses:SendEmail", ] resources = [ "arn:aws:ses:${var.aha_primary_region}:${data.aws_caller_identity.current.account_id}:*", "arn:aws:ses:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:*", ] } statement { effect = "Allow" actions = [ "dynamodb:UpdateTimeToLive", "dynamodb:PutItem", "dynamodb:DeleteItem", "dynamodb:GetItem", "dynamodb:Scan", "dynamodb:Query", "dynamodb:UpdateItem", "dynamodb:UpdateTable", "dynamodb:GetRecords", ] resources = [ #aws_dynamodb_table.AHA-DynamoDBTable.arn "arn:aws:dynamodb:${var.aha_primary_region}:${data.aws_caller_identity.current.account_id}:table/${var.dynamodbtable}-${random_string.resource_code.result}", "arn:aws:dynamodb:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:table/${var.dynamodbtable}-${random_string.resource_code.result}", ] } dynamic "statement" { for_each = var.SlackWebhookURL == "" ? [] : [1] content { effect = "Allow" actions = [ "secretsmanager:GetResourcePolicy", "secretsmanager:DescribeSecret", "secretsmanager:ListSecretVersionIds", "secretsmanager:GetSecretValue", ] resources = [ aws_secretsmanager_secret.SlackChannelID[0].arn, "arn:aws:secretsmanager:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.SlackChannelID[0].arn),6)}" # var.aha_secondary_region != "" ? "arn:aws:secretsmanager:${var.aha_secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.SlackChannelID[0].arn),6)}" : null ] } } dynamic "statement" { for_each = var.MicrosoftTeamsWebhookURL == "" ? [] : [1] content { effect = "Allow" actions = [ "secretsmanager:GetResourcePolicy", "secretsmanager:DescribeSecret", "secretsmanager:ListSecretVersionIds", "secretsmanager:GetSecretValue", ] resources = [ aws_secretsmanager_secret.MicrosoftChannelID[0].arn, "arn:aws:secretsmanager:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.MicrosoftChannelID[0].arn),6)}" ] } } dynamic "statement" { for_each = var.AmazonChimeWebhookURL == "" ? [] : [1] content { effect = "Allow" actions = [ "secretsmanager:GetResourcePolicy", "secretsmanager:DescribeSecret", "secretsmanager:ListSecretVersionIds", "secretsmanager:GetSecretValue", ] resources = [ aws_secretsmanager_secret.ChimeChannelID[0].arn, "arn:aws:secretsmanager:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.ChimeChannelID[0].arn),6)}" ] } } dynamic "statement" { for_each = var.EventBusName == "" ? [] : [1] content { effect = "Allow" actions = [ "secretsmanager:GetResourcePolicy", "secretsmanager:DescribeSecret", "secretsmanager:ListSecretVersionIds", "secretsmanager:GetSecretValue", ] resources = [ aws_secretsmanager_secret.EventBusName[0].arn, "arn:aws:secretsmanager:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.EventBusName[0].arn),6)}" ] } } dynamic "statement" { for_each = var.ManagementAccountRoleArn == "" ? [] : [1] content { effect = "Allow" actions = [ "secretsmanager:GetResourcePolicy", "secretsmanager:DescribeSecret", "secretsmanager:ListSecretVersionIds", "secretsmanager:GetSecretValue", ] resources = [ aws_secretsmanager_secret.AssumeRoleArn[0].arn, "arn:aws:secretsmanager:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:secret:${element(split(":", aws_secretsmanager_secret.AssumeRoleArn[0].arn),6)}" ] } } dynamic "statement" { for_each = var.EventBusName == "" ? [] : [1] content { effect = "Allow" actions = [ "events:PutEvents", ] resources = [ "arn:aws:events:${var.aha_primary_region}:${data.aws_caller_identity.current.account_id}:event-bus/${var.EventBusName}", "arn:aws:events:${local.secondary_region}:${data.aws_caller_identity.current.account_id}:event-bus/${var.EventBusName}" ] } } dynamic "statement" { for_each = var.ManagementAccountRoleArn == "" ? [] : [1] content { effect = "Allow" actions = [ "sts:AssumeRole", ] resources = [ "${var.ManagementAccountRoleArn}", ] } } dynamic "statement" { for_each = var.ExcludeAccountIDs == "" ? [] : [1] content { effect = "Allow" actions = [ "s3:GetObject", ] resources = [ "arn:aws:s3:::aha-bucket-${var.aha_primary_region}-${random_string.resource_code.result}/${var.ExcludeAccountIDs}", "arn:aws:s3:::aha-bucket-${local.secondary_region}-${random_string.resource_code.result}/${var.ExcludeAccountIDs}", ] } } } # aws_lambda_function - AHA-LambdaFunction - Primary region resource "aws_lambda_function" "AHA-LambdaFunction-PrimaryRegion" { description = "Lambda function that runs AHA" function_name = "AHA-LambdaFunction-${random_string.resource_code.result}" handler = "handler.main" memory_size = 128 timeout = 600 filename = data.archive_file.lambda_zip.output_path source_code_hash = data.archive_file.lambda_zip.output_base64sha256 # s3_bucket = var.S3Bucket # s3_key = var.S3Key reserved_concurrent_executions = -1 role = aws_iam_role.AHA-LambdaExecutionRole.arn runtime = "python3.11" environment { variables = { "Slack" = var.SlackWebhookURL != "" ? "True" : null "Team" = var.MicrosoftTeamsWebhookURL != "" ? "True" : null "Chime" = var.AmazonChimeWebhookURL != "" ? "True" : null "Eventbridge" = var.EventBusName != "" ? "True" : null "DYNAMODB_TABLE" = "${var.dynamodbtable}-${random_string.resource_code.result}" "EMAIL_SUBJECT" = var.Subject "EVENT_SEARCH_BACK" = var.EventSearchBack "FROM_EMAIL" = var.FromEmail "HEALTH_EVENT_TYPE" = var.AWSHealthEventType "ORG_STATUS" = var.AWSOrganizationsEnabled "REGIONS" = var.Regions "TO_EMAIL" = var.ToEmail "MANAGEMENT_ROLE_ARN" = var.ManagementAccountRoleArn == "" ? "None" : var.ManagementAccountRoleArn "ACCOUNT_IDS" = var.ExcludeAccountIDs "S3_BUCKET" = join("",aws_s3_bucket.AHA-S3Bucket-PrimaryRegion[*].bucket) } } timeouts {} tracing_config { mode = "PassThrough" } tags = { "Name" = "AHA-LambdaFunction" } depends_on = [ aws_dynamodb_table.AHA-DynamoDBTable, aws_dynamodb_table.AHA-GlobalDynamoDBTable, ] } # aws_lambda_function - AHA-LambdaFunction - Secondary region resource "aws_lambda_function" "AHA-LambdaFunction-SecondaryRegion" { count = "${var.aha_secondary_region == "" ? 0 : 1}" provider = aws.secondary_region description = "Lambda function that runs AHA" function_name = "AHA-LambdaFunction-${random_string.resource_code.result}" handler = "handler.main" memory_size = 128 timeout = 600 filename = data.archive_file.lambda_zip.output_path source_code_hash = data.archive_file.lambda_zip.output_base64sha256 # s3_bucket = var.S3Bucket # s3_key = var.S3Key reserved_concurrent_executions = -1 role = aws_iam_role.AHA-LambdaExecutionRole.arn runtime = "python3.11" environment { variables = { "Slack" = var.SlackWebhookURL != "" ? "True" : null "Team" = var.MicrosoftTeamsWebhookURL != "" ? "True" : null "Chime" = var.AmazonChimeWebhookURL != "" ? "True" : null "Eventbridge" = var.EventBusName != "" ? "True" : null "DYNAMODB_TABLE" = "${var.dynamodbtable}-${random_string.resource_code.result}" "EMAIL_SUBJECT" = var.Subject "EVENT_SEARCH_BACK" = var.EventSearchBack "FROM_EMAIL" = var.FromEmail "HEALTH_EVENT_TYPE" = var.AWSHealthEventType "ORG_STATUS" = var.AWSOrganizationsEnabled "REGIONS" = var.Regions "TO_EMAIL" = var.ToEmail "MANAGEMENT_ROLE_ARN" = var.ManagementAccountRoleArn "ACCOUNT_IDS" = var.ExcludeAccountIDs "S3_BUCKET" = join("",aws_s3_bucket.AHA-S3Bucket-SecondaryRegion[*].bucket) } } timeouts {} tracing_config { mode = "PassThrough" } tags = { "Name" = "AHA-LambdaFunction" } depends_on = [ aws_dynamodb_table.AHA-DynamoDBTable, aws_dynamodb_table.AHA-GlobalDynamoDBTable, ] } # EventBridge - Schedule to run lambda resource "aws_cloudwatch_event_rule" "AHA-LambdaSchedule-PrimaryRegion" { description = "Lambda trigger Event" event_bus_name = "default" state = "ENABLED" name = "AHA-LambdaSchedule-${random_string.resource_code.result}" schedule_expression = "rate(1 minute)" tags = { "Name" = "AHA-LambdaSchedule" } } resource "aws_cloudwatch_event_rule" "AHA-LambdaSchedule-SecondaryRegion" { description = "Lambda trigger Event" count = "${var.aha_secondary_region == "" ? 0 : 1}" provider = aws.secondary_region event_bus_name = "default" state = "ENABLED" name = "AHA-LambdaSchedule-${random_string.resource_code.result}" schedule_expression = "rate(1 minute)" tags = { "Name" = "AHA-LambdaSchedule" } } resource "aws_cloudwatch_event_target" "AHA-LambdaFunction-PrimaryRegion" { arn = aws_lambda_function.AHA-LambdaFunction-PrimaryRegion.arn rule = aws_cloudwatch_event_rule.AHA-LambdaSchedule-PrimaryRegion.name } resource "aws_cloudwatch_event_target" "AHA-LambdaFunction-SecondaryRegion" { count = "${var.aha_secondary_region == "" ? 0 : 1}" provider = aws.secondary_region arn = aws_lambda_function.AHA-LambdaFunction-SecondaryRegion[0].arn rule = aws_cloudwatch_event_rule.AHA-LambdaSchedule-SecondaryRegion[0].name } resource "aws_lambda_permission" "AHA-LambdaSchedulePermission-PrimaryRegion" { action = "lambda:InvokeFunction" principal = "events.amazonaws.com" function_name = aws_lambda_function.AHA-LambdaFunction-PrimaryRegion.arn source_arn = aws_cloudwatch_event_rule.AHA-LambdaSchedule-PrimaryRegion.arn } resource "aws_lambda_permission" "AHA-LambdaSchedulePermission-SecondaryRegion" { count = "${var.aha_secondary_region == "" ? 0 : 1}" provider = aws.secondary_region action = "lambda:InvokeFunction" principal = "events.amazonaws.com" function_name = aws_lambda_function.AHA-LambdaFunction-SecondaryRegion[0].arn source_arn = aws_cloudwatch_event_rule.AHA-LambdaSchedule-SecondaryRegion[0].arn } ================================================ FILE: terraform/Terraform_DEPLOY_AHA/terraform.tfvars ================================================ # Input variables for Terraform_DEPLOY_AHA.tf (AHA Solution deploy using terraform) # # Customize Alerts/Notifications aha_primary_region="us-east-1" aha_secondary_region="" AWSOrganizationsEnabled="No" AWSHealthEventType="issue | accountNotification | scheduledChange" # Communication Channels - Slack/Microsoft Teams/Amazon Chime And/or EventBridge SlackWebhookURL="" MicrosoftTeamsWebhookURL="" AmazonChimeWebhookURL="" EventBusName="" # Email Setup - For Alerting via Email FromEmail="none@domain.com" ToEmail="none@domain.com" Subject="AWS Health Alert" # More Configurations - Optional # By default, AHA reports events affecting all AWS regions. # If you want to report on certain regions you can enter up to 10 in a comma separated format. EventSearchBack="1" Regions="all regions" ManagementAccountRoleArn="" ExcludeAccountIDs="" # Tags applied to all resources - using module provider. Update them per your requirement. default_tags = { Application = "AHA-Solution" Environment = "PROD" auto-delete = "no" } # commands to apply changes # terraform init # terraform plan # terraform apply ================================================ FILE: terraform/Terraform_MGMT_ROLE/Terraform_MGMT_ROLE.tf ================================================ # Deploy Cross-Account Role for PHD access # Parameters provider "aws" { region = var.aha_primary_region default_tags { tags = "${var.default_tags}" } } variable "aha_primary_region" { description = "Primary region where AHA solution will be deployed" type = string default = "us-east-1" } variable "default_tags" { description = "Tags used for the AWS resources created by this template" type = map default = { Application = "AHA-Solution" } } variable "OrgMemberAccountId" { type = string description = "AWS Account ID of the AWS Organizations Member Account that will run AWS Health Aware" validation { condition = length(var.OrgMemberAccountId) == 12 error_message = "The OrgMemberAccountId must be a valid AWS Account ID." } } # Random id generator resource "random_string" "resource_code" { length = 8 special = false upper = false } # aws_iam_role.AWSHealthAwareRoleForPHDEvents: resource "aws_iam_role" "AWSHealthAwareRoleForPHDEvents" { assume_role_policy = jsonencode( { Statement = [ { Action = "sts:AssumeRole" Effect = "Allow" Principal = { AWS = "arn:aws:iam::${var.OrgMemberAccountId}:root" } }, ] Version = "2012-10-17" } ) name = "AWSHealthAwareRoleForPHDEvents-${random_string.resource_code.result}" description = "Grants access to PHD event" path = "/" inline_policy { name = "AllowHealthCalls" policy = jsonencode( { Statement = [ { Action = [ "health:DescribeAffectedAccountsForOrganization", "health:DescribeAffectedEntitiesForOrganization", "health:DescribeEventDetailsForOrganization", "health:DescribeEventsForOrganization", "health:DescribeEventDetails", "health:DescribeEvents", "health:DescribeEventTypes", "health:DescribeAffectedEntities", ] Effect = "Allow" Resource = "*" }, ] } ) } inline_policy { name = "AllowsDescribeOrg" policy = jsonencode( { Statement = [ { Action = [ "organizations:ListAccounts", "organizations:ListAWSServiceAccessForOrganization", "organizations:DescribeAccount", ] Effect = "Allow" Resource = "*" }, ] } ) } } output "AWSHealthAwareRoleForPHDEventsArn" { value = aws_iam_role.AWSHealthAwareRoleForPHDEvents.arn } ================================================ FILE: terraform/Terraform_MGMT_ROLE/terraform.tfvars ================================================ # Region for IAM role creation - it's global service, so we will be just using one region. aha_primary_region="us-east-1" # Tags applied to all resources - using module provider. Update them per your requirement. default_tags = { Application = "AHA-Solution" Environment = "DEV" auto-delete = "no" }