Repository: BigDataBoutique/elasticsearch-cloud-deploy Branch: master Commit: ecc771a84e20 Files: 83 Total size: 157.2 KB Directory structure: gitextract_vjdmnds9/ ├── .gitignore ├── LICENSE ├── README.md ├── assets/ │ ├── ec2-role-trust-policy.json │ ├── elasticsearch.yml │ ├── node-init.json │ ├── s3-backup.json │ └── scripts/ │ ├── aws/ │ │ ├── autoattach-disk.sh │ │ ├── config-bootstrap-node.sh │ │ ├── config-cluster.sh │ │ └── config-es-discovery.sh │ ├── bootstrap.sh │ ├── client.sh │ ├── common/ │ │ ├── config-beats.sh │ │ ├── config-clients.sh │ │ ├── config-cluster.sh │ │ ├── config-es.sh │ │ └── env.sh │ ├── data.sh │ ├── gcp/ │ │ ├── autoattach-disk.sh │ │ ├── config-bootstrap-node.sh │ │ ├── config-cluster.sh │ │ ├── config-es-discovery.sh │ │ ├── config-es.sh │ │ └── env.sh │ ├── master.sh │ └── singlenode.sh ├── packer/ │ ├── README.md │ ├── elasticsearch7-node.packer.json │ ├── install-beats.sh │ ├── install-cloud-plugin.sh │ ├── install-cloudwatch-agent.sh │ ├── install-custom.sh │ ├── install-elasticsearch7.sh │ ├── install-kibana7.sh │ ├── kibana7-node.packer.json │ ├── update-machine.sh │ └── variables.json ├── templates/ │ ├── aws_user_data.sh │ ├── gcp_user_data.sh │ └── user_data.sh ├── terraform-aws/ │ ├── README.md │ ├── alb.tf │ ├── ami.tf │ ├── certs.tf │ ├── client.tf │ ├── datas-voters.tf │ ├── datas.tf │ ├── dev.tf │ ├── disks.tf │ ├── iam.tf │ ├── main.tf │ ├── masters.tf │ ├── outputs.tf │ ├── singlenode.tf │ ├── terraform.tfvars.example │ ├── variables.tf │ ├── versions.tf │ └── vpc.tf ├── terraform-azure/ │ ├── README.md │ ├── clients.tf │ ├── datas.tf │ ├── images.tf │ ├── lb.tf │ ├── main.tf │ ├── masters.tf │ ├── outputs.tf │ ├── single-node.tf │ └── variables.tf └── terraform-gcp/ ├── certs.tf ├── client.tf ├── datas-voters.tf ├── datas.tf ├── dev.tf ├── disks.tf ├── image.tf ├── lb.tf ├── main.tf ├── masters.tf ├── outputs.tf ├── singlenode.tf ├── terraform.tfvars.example └── variables.tf ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ terraform.tfstate terraform.tfvars .idea/ *.pem *.backup .terraform/ .gcp* cluster_bootstrap_state gcp-account.json *.iml id_rsa.pub packer-es-manifest.json packer-kb-manifest.json .terraform.lock.hcl plan.* ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Deploy Elasticsearch on the cloud easily This repository contains a set of tools and scripts to deploy an Elasticsearch cluster on the cloud, using best-practices and state of the art tooling. Need to monitor and optimize your cluster after setting it up? Consider using [Pulse](https://pulse.support/). ***Note:*** This branch supports Elasticsearch 7.x only. For other Elasticsearch versions see [feat-8x](https://github.com/BigDataBoutique/elasticsearch-cloud-deploy/tree/feat-8x), [elasticsearch-5.x](https://github.com/BigDataBoutique/elasticsearch-cloud-deploy/tree/elasticsearch-5.x) and [elasticsearch-6.x](https://github.com/BigDataBoutique/elasticsearch-cloud-deploy/tree/elasticsearch-6.x) branches. You need to use the latest version of Terraform and Packer for all features to work correctly. Features: * Deployment of data and master nodes as separate nodes, as well as data-voters * Client node with Kibana and authenticated Elasticsearch access * Single node cluster support * DNS and load-balancing access to client nodes * Sealed from external access, only accessible via password-protected external facing client nodes * AWS deployment support (under `terraform-aws`) * Google Cloud Platform deployment (under `terraform-gcp`) * Packer scripts for both GCP and AWS (under `packer`) * Azure deployment - not maintained at the moment (under `terraform-azure`) ## Usage Clone this repo to work locally. You might want to fork it in case you need to apply some additional configurations or commit changes to the variables file. Create images with Packer (see `packer` folder in this repo), and then go into the terraform folder and run `terraform plan`. See README files in each respective folder. ## tfstate Once you run `terraform apply` on any of the terraform folders in this repo, a file `terraform.tfstate` will be created. This file contains the mapping between your cloud elements to the terraform configuration. Make sure to keep this file safe. See [this guide](https://blog.gruntwork.io/how-to-manage-terraform-state-28f5697e68fa#.fbb2nalw6) for a discussion on tfstate management and locking between team members. ================================================ FILE: assets/ec2-role-trust-policy.json ================================================ { "Version": "2008-10-17", "Statement": [ { "Action": "sts:AssumeRole", "Principal": { "Service": ["ec2.amazonaws.com"] }, "Effect": "Allow" } ] } ================================================ FILE: assets/elasticsearch.yml ================================================ bootstrap.memory_lock: true node.name: ${HOSTNAME} action.destructive_requires_name: true indices.fielddata.cache.size: 1% # default is unbounded ================================================ FILE: assets/node-init.json ================================================ { "Statement": [ { "Action": [ "ec2:DescribeInstances", "ec2:DescribeVolumes", "ec2:AttachVolume", "ec2:DescribeTags", "autoscaling:DescribeAutoScalingGroups" ], "Effect": "Allow", "Resource": [ "*" ] } ], "Version": "2012-10-17" } ================================================ FILE: assets/s3-backup.json ================================================ { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::${s3_backup_bucket}"] }, { "Effect": "Allow", "Action": [ "s3:PutObject", "s3:GetObject", "s3:DeleteObject" ], "Resource": ["arn:aws:s3:::${s3_backup_bucket}/*"] } ] } ================================================ FILE: assets/scripts/aws/autoattach-disk.sh ================================================ # Required variables # - aws_region # - es_cluster # - elasticsearch_data_dir AV_ZONE="$(ec2metadata --availability-zone)" INSTANCE_ROLE="$(aws ec2 describe-tags --region $aws_region --filters Name=resource-id,Values=$(ec2metadata --instance-id) | jq -r '.Tags[] | select(.Key == "Role") | .Value')" echo "AV_ZONE: $AV_ZONE" echo "INSTANCE_ROLE: $INSTANCE_ROLE" while true; do UNATTACHED_VOLUME_ID="$(aws ec2 describe-volumes --region $aws_region --filters Name=tag:ClusterName,Values=$es_cluster Name=tag:AutoAttachGroup,Values=$INSTANCE_ROLE Name=availability-zone,Values=$AV_ZONE | jq -r '.Volumes[] | select(.Attachments | length == 0) | .VolumeId' | shuf -n 1)" echo "UNATTACHED_VOLUME_ID: $UNATTACHED_VOLUME_ID" aws ec2 attach-volume --device "/dev/xvdh" --instance-id=$(ec2metadata --instance-id) --volume-id $UNATTACHED_VOLUME_ID --region "$aws_region" if [ "$?" != "0" ]; then sleep 10 continue fi sleep 30 ATTACHMENTS_COUNT="$(aws ec2 describe-volumes --region $aws_region --filters Name=volume-id,Values=$UNATTACHED_VOLUME_ID | jq -r '.Volumes[0].Attachments | length')" if [ "$ATTACHMENTS_COUNT" != "0" ]; then break; fi done echo 'Waiting for 30 seconds for the disk to become mountable...' sleep 30 sudo mkdir -p $elasticsearch_data_dir export DEVICE_NAME=$(lsblk -ip | tail -n +2 | awk '{print $1 " " ($7? "MOUNTEDPART" : "") }' | sed ':a;N;$!ba;s/\n`/ /g' | grep -v MOUNTEDPART) if sudo mount -o defaults -t ext4 $DEVICE_NAME $elasticsearch_data_dir; then echo 'Successfully mounted existing disk' else echo 'Trying to mount a fresh disk' sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard $DEVICE_NAME sudo mount -o defaults -t ext4 $DEVICE_NAME $elasticsearch_data_dir && echo 'Successfully mounted a fresh disk' fi echo "$DEVICE_NAME $elasticsearch_data_dir ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab sudo chown -R elasticsearch:elasticsearch $elasticsearch_data_dir ================================================ FILE: assets/scripts/aws/config-bootstrap-node.sh ================================================ # Required variables # - aws_region # - es_environment # - masters_count while true do echo "Fetching masters..." MASTER_INSTANCES="$(aws ec2 describe-instances --region=$aws_region --filters Name=instance-state-name,Values=running Name=tag:Role,Values=master,data-voters Name=tag:Cluster,Values=$es_environment | jq -r '.Reservations | map(.Instances[].InstanceId) | .[]' | sort)" COUNT=`echo "$MASTER_INSTANCES" | wc -l` if [ "$COUNT" -eq "$masters_count" ]; then echo "Masters count is correct... Rechecking in 60 sec" sleep 60 MASTER_INSTANCES_RECHECK="$(aws ec2 describe-instances --region=$aws_region --filters Name=instance-state-name,Values=running Name=tag:Role,Values=master,data-voters Name=tag:Cluster,Values=$es_environment | jq -r '.Reservations | map(.Instances[].InstanceId) | .[]' | sort)" if [ "$MASTER_INSTANCES" = "$MASTER_INSTANCES_RECHECK" ]; then break fi fi sleep 5 done echo "Fetched masters" MASTER_IPS="$(aws ec2 describe-instances --region $aws_region --instance-ids $MASTER_INSTANCES | jq -r '.Reservations[].Instances[].PrivateIpAddress')" SEED_HOSTS=`echo "$MASTER_IPS" | paste -sd ',' -` INITIAL_MASTER_NODES=`echo "$MASTER_IPS" | awk '{print "ip-" $0}' | tr . - | paste -sd ',' -` echo "discovery.seed_hosts: $SEED_HOSTS" >>/etc/elasticsearch/elasticsearch.yml echo "cluster.initial_master_nodes: $(hostname),$INITIAL_MASTER_NODES" >>/etc/elasticsearch/elasticsearch.yml ================================================ FILE: assets/scripts/aws/config-cluster.sh ================================================ # Required variables # - security_enabled # - client_pwd # - s3_backup_bucket # - ES_HOST # - CURL_AUTH if [ "${s3_backup_bucket}" != "" ]; then curl $CURL_AUTH -k -X PUT "$ES_HOST/_snapshot/s3_repo" -H 'Content-Type: application/json' -d' { "type": "s3", "settings": { "bucket": "'"$s3_backup_bucket"'" } } ' sleep 1 curl $CURL_AUTH -k -X POST "$ES_HOST/_nodes/reload_secure_settings" fi ================================================ FILE: assets/scripts/aws/config-es-discovery.sh ================================================ # Required variables # - aws_region # - security_groups # - es_environment cat <>/etc/elasticsearch/elasticsearch.yml network.host: _ec2:privateIpv4_,localhost plugin.mandatory: discovery-ec2 cloud.node.auto_attributes: true cluster.routing.allocation.awareness.attributes: aws_availability_zone discovery: seed_providers: ec2 ec2.groups: $security_groups ec2.host_type: private_ip ec2.tag.Cluster: $es_environment ec2.protocol: http # no need in HTTPS for internal AWS calls # manually set the endpoint because of auto-discovery issues # https://github.com/elastic/elasticsearch/issues/27464 ec2.endpoint: ec2.$aws_region.amazonaws.com EOF ================================================ FILE: assets/scripts/bootstrap.sh ================================================ #!/bin/bash set +e . /opt/cloud-deploy-scripts/common/env.sh . /opt/cloud-deploy-scripts/$cloud_provider/env.sh /opt/cloud-deploy-scripts/common/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-bootstrap-node.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es-discovery.sh cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ master ] EOF # add bootstrap.password to the keystore, so that config-cluster scripts can run # only done on bootstrap and singlenode nodes, before starting ES if [ "${security_enabled}" == "true" ]; then echo "${client_pwd}" | /usr/share/elasticsearch/bin/elasticsearch-keystore add --stdin bootstrap.password fi # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service set -e /opt/cloud-deploy-scripts/common/config-cluster.sh set +e /opt/cloud-deploy-scripts/$cloud_provider/config-cluster.sh while true do HEALTH="$(curl $CURL_AUTH --silent -k "$ES_HOST/_cluster/health" | jq -r '.status')" if [ "$HEALTH" == "green" ]; then break fi sleep 5 done /opt/cloud-deploy-scripts/$cloud_provider/config-cluster.sh if [ "$auto_shut_down_bootstrap_node" == "true" ] then if [ "$cloud_provider" == "aws" ]; then shutdown -h now elif [ "$cloud_provider" == "gcp" ]; then gcloud compute instances delete $HOSTNAME --zone $GCP_ZONE --quiet fi fi ================================================ FILE: assets/scripts/client.sh ================================================ #!/bin/bash set +e . /opt/cloud-deploy-scripts/common/env.sh . /opt/cloud-deploy-scripts/$cloud_provider/env.sh # It is required to bind to all interfaces for load balancer on GCP to work if [ "$cloud_provider" == "gcp" ]; then export BIND_TO_ALL="true" fi /opt/cloud-deploy-scripts/common/config-es.sh /opt/cloud-deploy-scripts/common/config-beats.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es-discovery.sh cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ ingest, remote_cluster_client ] EOF # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service /opt/cloud-deploy-scripts/common/config-clients.sh ================================================ FILE: assets/scripts/common/config-beats.sh ================================================ # Required variables # - filebeat_monitoring_host if [ "${filebeat_monitoring_host}" != "" ]; then cat </etc/filebeat/modules.d/elasticsearch.yml # Module: elasticsearch # Docs: https://www.elastic.co/guide/en/beats/filebeat/7.6/filebeat-module-elasticsearch.html - module: elasticsearch server: enabled: true gc: enabled: false audit: enabled: false slowlog: enabled: true deprecation: enabled: true EOF cat </etc/filebeat/filebeat.yml filebeat.config.modules.path: /etc/filebeat/modules.d/*.yml output.elasticsearch: hosts: ["$filebeat_monitoring_host"] setup.ilm.enabled: false EOF systemctl daemon-reload fi ================================================ FILE: assets/scripts/common/config-clients.sh ================================================ # Required variables # - client_user # - client_pwd # - security_enabled # - monitoring_enabled # - BIND_TO_ALL # - ES_HOST # - CURL_AUTH # security_encryption_key # reporting_encryption_key # Setup x-pack security also on Kibana configs where applicable if [ -f "/etc/kibana/kibana.yml" ]; then if [ "$BIND_TO_ALL" == "true" ]; then echo "server.host: 0.0.0.0" | sudo tee -a /etc/kibana/kibana.yml else echo "server.host: $(hostname -i)" | sudo tee -a /etc/kibana/kibana.yml fi if [ ! -z "$security_encryption_key" ]; then echo "$security_encryption_key" | /usr/share/kibana/bin/kibana-keystore add --stdin xpack.security.encryptionKey fi if [ ! -z "$reporting_encryption_key" ]; then echo "$reporting_encryption_key" | /usr/share/kibana/bin/kibana-keystore add --stdin xpack.reporting.encryptionKey fi echo "xpack.security.enabled: $security_enabled" | sudo tee -a /etc/kibana/kibana.yml echo "xpack.monitoring.enabled: $monitoring_enabled" | sudo tee -a /etc/kibana/kibana.yml if [ "$security_enabled" == "true" ]; then echo "elasticsearch.username: \"kibana\"" | sudo tee -a /etc/kibana/kibana.yml echo "${client_pwd}" | /usr/share/kibana/bin/kibana-keystore add --stdin elasticsearch.password fi systemctl daemon-reload systemctl enable kibana.service sudo service kibana restart fi ================================================ FILE: assets/scripts/common/config-cluster.sh ================================================ # Required variables # - security_enabled # - client_pwd # - ES_HOST # - CURL_AUTH i=1 while true do echo "Checking cluster health, attempt $i" HEALTH="$(curl $CURL_AUTH --silent -k "$ES_HOST/_cluster/health" | jq -r '.status')" DATA_NODE_COUNT="$(curl $CURL_AUTH --silent -k "$ES_HOST/_cat/nodes?h=node.role" | grep 'd\|h\|c' | wc -l)" if [ "$HEALTH" == "green" ] && [ "$DATA_NODE_COUNT" != "0" ]; then break fi sleep 5 i=$((i+1)) done # if any of the below fail, bootstrap failed - exit on error set -e if [ "$security_enabled" == "true" ]; then curl $CURL_AUTH \ -X PUT -H 'Content-Type: application/json' -k \ "$ES_HOST/_security/user/elastic/_password" -d '{ "password": "'"$client_pwd"'" }' curl $CURL_AUTH \ -X PUT -H 'Content-Type: application/json' -k \ "$ES_HOST/_security/user/kibana/_password" -d '{ "password": "'"$client_pwd"'" }' curl $CURL_AUTH \ -X PUT -H 'Content-Type: application/json' -k \ "$ES_HOST/_security/user/logstash_system/_password" -d '{ "password": "'"$client_pwd"'" }' curl $CURL_AUTH \ -X PUT -H 'Content-Type: application/json' -k \ "$ES_HOST/_security/user/remote_monitoring_user/_password" -d '{ "password": "'"$client_pwd"'" }' fi ================================================ FILE: assets/scripts/common/config-es.sh ================================================ # Required variables # - es_cluster # - monitoring_enabled # - elasticsearch_data_dir # - elasticsearch_logs_dir # - security_enabled # - ca_cert # - node_cert # - node_key # - xpack_monitoring_host # - heap_size # - use_g1gc # Configure elasticsearch cat <>/etc/elasticsearch/elasticsearch.yml cluster.name: $es_cluster xpack.monitoring.enabled: $monitoring_enabled xpack.monitoring.collection.enabled: $monitoring_enabled path.data: $elasticsearch_data_dir path.logs: $elasticsearch_logs_dir xpack.security.enabled: $security_enabled EOF # Configure log4j retention and level sudo sed -i "21 s,.*,appender.rolling.policies.size.size=${log_size}MB," /etc/elasticsearch/log4j2.properties sudo sed -i "55 s,.*,rootLogger.level = $log_level," /etc/elasticsearch/log4j2.properties # If security enabled if [ "$security_enabled" == "true" ]; then mkdir -p /etc/elasticsearch/config/certs/ echo -n "$ca_cert" > /etc/elasticsearch/config/certs/ca.crt echo -n "$node_cert" > /etc/elasticsearch/config/certs/tls.crt echo -n "$node_key" > /etc/elasticsearch/config/certs/tls.key cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml xpack.security.transport.ssl.enabled: true xpack.security.transport.ssl.verification_mode: "certificate" xpack.security.transport.ssl.key: "/etc/elasticsearch/config/certs/tls.key" xpack.security.transport.ssl.certificate: "/etc/elasticsearch/config/certs/tls.crt" xpack.security.transport.ssl.certificate_authorities: "/etc/elasticsearch/config/certs/ca.crt" EOF fi if [ "$xpack_monitoring_host" != "self" ]; then cat <>/etc/elasticsearch/elasticsearch.yml xpack.monitoring.exporters.xpack_remote: type: http host: "$xpack_monitoring_host" EOF fi cat <<'EOF' >>/etc/security/limits.conf # allow user 'elasticsearch' mlockall elasticsearch soft memlock unlimited elasticsearch hard memlock unlimited EOF sudo mkdir -p /etc/systemd/system/elasticsearch.service.d cat <<'EOF' >>/etc/systemd/system/elasticsearch.service.d/override.conf [Service] LimitMEMLOCK=infinity Restart=always RestartSec=10 EOF # Setup heap size and memory locking sudo sed -i 's/#MAX_LOCKED_MEMORY=.*$/MAX_LOCKED_MEMORY=unlimited/' /etc/init.d/elasticsearch sudo sed -i 's/#MAX_LOCKED_MEMORY=.*$/MAX_LOCKED_MEMORY=unlimited/' /etc/default/elasticsearch # Set java heap size if [ -d "/etc/elasticsearch/jvm.options.d" ] then # For versions 7.11 and newer, heap settings are saved in a dedicated file in jvm.options.d cat <>/etc/elasticsearch/jvm.options.d/heap.options -Xms${heap_size} -Xmx${heap_size} EOF # Mitigate log4j lookup exploit cat <>/etc/elasticsearch/jvm.options.d/log4j.options -Dlog4j2.formatMsgNoLookups=true -XX:-HeapDumpOnOutOfMemoryError EOF else # Pre 7.11 sudo sed -i "s/^-Xms.*/-Xms$heap_size/" /etc/elasticsearch/jvm.options sudo sed -i "s/^-Xmx.*/-Xmx$heap_size/" /etc/elasticsearch/jvm.options echo "-Dlog4j2.formatMsgNoLookups=true" >> /etc/elasticsearch/jvm.options # Disable heap dumps echo "-XX:-HeapDumpOnOutOfMemoryError" | sudo tee -a /etc/elasticsearch/jvm.options fi # Setup GC if [ "$use_g1gc" = "true" ]; then sudo sed -i -re 's/# ([0-9]+-[0-9]+:-XX:-UseConcMarkSweepGC)/\1/ig' /etc/elasticsearch/jvm.options sudo sed -i -re 's/# ([0-9]+-[0-9]+:-XX:-UseCMSInitiatingOccupancyOnly)/\1/ig' /etc/elasticsearch/jvm.options sudo sed -i 's/[0-9]\+-:-XX:+UseG1GC/10-:-XX:+UseG1GC/ig' /etc/elasticsearch/jvm.options sudo sed -i 's/[0-9]\+-:-XX:G1ReservePercent/10-:-XX:G1ReservePercent/ig' /etc/elasticsearch/jvm.options sudo sed -i 's/[0-9]\+-:-XX:InitiatingHeapOccupancyPercent/10-:-XX:InitiatingHeapOccupancyPercent/ig' /etc/elasticsearch/jvm.options fi # Create log and data dirs sudo mkdir -p $elasticsearch_logs_dir sudo mkdir -p $elasticsearch_data_dir sudo chown -R elasticsearch:elasticsearch $elasticsearch_logs_dir sudo chown -R elasticsearch:elasticsearch $elasticsearch_data_dir ================================================ FILE: assets/scripts/common/env.sh ================================================ export ES_HOST="http://localhost:9200" if [ "$https_enabled" == "true" ]; then export ES_HOST="https://localhost:9200" fi export CURL_AUTH="" if [ "$security_enabled" == "true" ]; then export CURL_AUTH=" --user elastic:$client_pwd " fi ================================================ FILE: assets/scripts/data.sh ================================================ #!/bin/bash set +e . /opt/cloud-deploy-scripts/common/env.sh . /opt/cloud-deploy-scripts/$cloud_provider/env.sh /opt/cloud-deploy-scripts/$cloud_provider/autoattach-disk.sh /opt/cloud-deploy-scripts/common/config-es.sh /opt/cloud-deploy-scripts/common/config-beats.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es-discovery.sh if [ "$is_voting_only" == "true" ] then cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ data_hot, data_content, ingest, transform, master, voting_only, remote_cluster_client] EOF else cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ data_hot, data_content, ingest, transform, remote_cluster_client ] EOF fi # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service ================================================ FILE: assets/scripts/gcp/autoattach-disk.sh ================================================ # Required variables # - GCP_ZONE # - es_cluster # - elasticsearch_data_dir while true; do INSTANCE_ROLE="$(gcloud compute instances describe $HOSTNAME --zone $GCP_ZONE --format json | jq -r ".labels.role")" echo "INSTANCE_ROLE: $INSTANCE_ROLE" UNATTACHED_VOLUME_ID="$(gcloud compute disks list --filter="zone=$GCP_ZONE AND labels.cluster-name=$es_cluster AND labels.auto-attach-group=$INSTANCE_ROLE" --format json | jq -r '.[] | .name' | shuf -n 1)" echo "UNATTACHED_VOLUME_ID: $UNATTACHED_VOLUME_ID" gcloud compute instances attach-disk $HOSTNAME --disk $UNATTACHED_VOLUME_ID --device-name "espersistent" --zone $GCP_ZONE if [ "$?" == "0" ]; then break fi sleep 30 done echo 'Waiting for 30 seconds for the disk to become mountable...' sleep 30 sudo mkdir -p $elasticsearch_data_dir export DEVICE_NAME=$(lsblk -ip | tail -n +2 | grep -v " rom" | awk '{print $1 " " ($7? "MOUNTEDPART" : "") }' | sed ':a;N;$!ba;s/\n`/ /g' | sed ':a;N;$!ba;s/\n|-/ /g' | grep -v MOUNTEDPART) if sudo mount -o defaults -t ext4 $DEVICE_NAME $elasticsearch_data_dir; then echo 'Successfully mounted existing disk' else echo 'Trying to mount a fresh disk' sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard $DEVICE_NAME sudo mount -o defaults -t ext4 $DEVICE_NAME $elasticsearch_data_dir && echo 'Successfully mounted a fresh disk' fi echo "$DEVICE_NAME $elasticsearch_data_dir ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab sudo chown -R elasticsearch:elasticsearch $elasticsearch_data_dir ================================================ FILE: assets/scripts/gcp/config-bootstrap-node.sh ================================================ # Required variables # - es_environment # - masters_count i=1 while true do echo "Fetching masters..." MASTER_INSTANCES="$(gcloud compute instances list --filter="labels.cluster=$es_environment AND labels.role=(master OR data-voters)" --format 'get(networkInterfaces[0].networkIP)' | sort)" COUNT=`echo "$MASTER_INSTANCES" | wc -l` echo "Found $COUNT instances, expecting $masters_count, attempt $i" if [ "$COUNT" -eq "$masters_count" ]; then echo "Masters count is correct... Rechecking in 60 sec" sleep 60 MASTER_INSTANCES_RECHECK="$(gcloud compute instances list --filter="labels.cluster=$es_environment AND labels.role=(master OR data-voters)" --format 'get(networkInterfaces[0].networkIP)' | sort)" if [ "$MASTER_INSTANCES" = "$MASTER_INSTANCES_RECHECK" ]; then break fi fi sleep 5 i=$((i+1)) done echo "Fetched masters" MASTER_IPS="$MASTER_INSTANCES" SEED_HOSTS=`echo "$MASTER_IPS" | paste -sd ',' -` echo "discovery.seed_hosts: $SEED_HOSTS" >>/etc/elasticsearch/elasticsearch.yml echo "cluster.initial_master_nodes: $(hostname -I),$SEED_HOSTS" >>/etc/elasticsearch/elasticsearch.yml ================================================ FILE: assets/scripts/gcp/config-cluster.sh ================================================ # Required variables # - security_enabled # - client_pwd # - gcs_snapshots_bucket # - ES_HOST # - CURL_AUTH if [ "${gcs_snapshots_bucket}" != "" ]; then curl $CURL_AUTH -X PUT "$ES_HOST/_snapshot/gcs_repo" -H 'Content-Type: application/json' -d' { "type": "gcs", "settings": { "bucket": "'$gcs_snapshots_bucket'" } } ' fi ================================================ FILE: assets/scripts/gcp/config-es-discovery.sh ================================================ # Required variables # - gcp_zones # - gcp_project_id # - BIND_TO_ALL cat <>/etc/elasticsearch/elasticsearch.yml plugin.mandatory: discovery-gce cloud.gce.project_id: ${gcp_project_id} cloud.gce.zone: ${gcp_zones} discovery.seed_providers: gce EOF if [ "$BIND_TO_ALL" == "true" ]; then echo "network.host: 0.0.0.0" >> /etc/elasticsearch/elasticsearch.yml else echo "network.host: _gce_,_gce:hostname_,localhost" >> /etc/elasticsearch/elasticsearch.yml fi ================================================ FILE: assets/scripts/gcp/config-es.sh ================================================ if [ "${gcs_snapshots_bucket}" != "" ]; then echo "$gcs_service_account_key" | base64 -d > /tmp/gcs-snapshots-service-account.json /usr/share/elasticsearch/bin/elasticsearch-keystore add-file gcs.client.default.credentials_file /tmp/gcs-snapshots-service-account.json rm /tmp/gcs-snapshots-service-account.json fi ================================================ FILE: assets/scripts/gcp/env.sh ================================================ # gcloud cli sometimes fails if you use it right after the instance has started up # adding a retry for that case while true; do export GCP_ZONE="$(gcloud compute instances list --filter="name=('"$HOSTNAME"')" --format "value(zone)")" if [ "$GCP_ZONE" != "" ]; then break fi echo "Failed to detect GCP_ZONE. Retrying in 5 seconds..." sleep 5 done ================================================ FILE: assets/scripts/master.sh ================================================ #!/bin/bash set +e . /opt/cloud-deploy-scripts/common/env.sh . /opt/cloud-deploy-scripts/$cloud_provider/env.sh /opt/cloud-deploy-scripts/$cloud_provider/autoattach-disk.sh /opt/cloud-deploy-scripts/common/config-es.sh /opt/cloud-deploy-scripts/common/config-beats.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es-discovery.sh cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ master ] EOF # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service ================================================ FILE: assets/scripts/singlenode.sh ================================================ #!/bin/bash set +e . /opt/cloud-deploy-scripts/common/env.sh . /opt/cloud-deploy-scripts/$cloud_provider/env.sh # It is required to bind to all interfaces for load balancer on GCP to work if [ "$cloud_provider" == "gcp" ]; then export BIND_TO_ALL="true" fi /opt/cloud-deploy-scripts/$cloud_provider/autoattach-disk.sh /opt/cloud-deploy-scripts/common/config-es.sh /opt/cloud-deploy-scripts/common/config-beats.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es.sh /opt/cloud-deploy-scripts/$cloud_provider/config-es-discovery.sh cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml node.roles: [ data_hot, data_content, ingest, transform, master, remote_cluster_client ] discovery.type: single-node EOF /opt/cloud-deploy-scripts/common/config-clients.sh # add bootstrap.password to the keystore, so that config-cluster scripts can run # only done on bootstrap and singlenode nodes, before starting ES if [ "${security_enabled}" == "true" ]; then echo "${client_pwd}" | /usr/share/elasticsearch/bin/elasticsearch-keystore add --stdin bootstrap.password fi # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service /opt/cloud-deploy-scripts/common/config-cluster.sh /opt/cloud-deploy-scripts/$cloud_provider/config-cluster.sh ================================================ FILE: packer/README.md ================================================ # Elasticsearch and Kibana machine images This Packer configuration will generate Ubuntu images with Elasticsearch, Kibana and other important tools for deploying and managing Elasticsearch clusters on the cloud. The output of running Packer here would be two machine images, as below: * elasticsearch node image, containing latest Elasticsearch installed (latest version 7.x) and configured with best-practices. * kibana node image, based on the elasticsearch node image, and with Kibana (7.x, latest). ## On Amazon Web Services (AWS) Using the AWS builder will create the two images and store them as AMIs. As a convention the Packer builders will use a dedicated IAM roles, which you will need to have present. ```bash aws iam create-role --role-name packer --assume-role-policy-document '{ "Version": "2012-10-17", "Statement": { "Effect": "Allow", "Principal": {"Service": "ec2.amazonaws.com"}, "Action": "sts:AssumeRole", "Sid": "" } }' ``` Response will look something like this: ```json { "Role": { "AssumeRolePolicyDocument": { "Version": "2012-10-17", "Statement": { "Action": "sts:AssumeRole", "Effect": "Allow", "Principal": { "Service": "ec2.amazonaws.com" } } }, "RoleId": "AROAJ7Q2L7NZJHZBB6JKY", "CreateDate": "2016-12-16T13:22:47.254Z", "RoleName": "packer", "Path": "/", "Arn": "arn:aws:iam::611111111117:role/packer" } } ``` Follow up by execting the following ```bash aws iam create-instance-profile --instance-profile-name packer aws iam add-role-to-instance-profile --instance-profile-name packer --role-name packer ``` By default, AWS builder will pick a subnet from the default VPC for running the builder instance. It is required for that subnet to have Public IPs auto-assignment enabled. Otherwise, packer won't be able to make a SSH connection to the instance and will hang on `Waiting for SSH to become available...` If you don't want to enable public IPs auto-assignment on your default VPC subnets, you can explicitly set the subnet by setting `vpc_id` and `subnet_id` keys in *.packer.json files `amazon-ebs` builder definitions. ## On Microsoft Azure Before running Packer for the first time you will need to do a one-time initial setup. Use PowerShell, and login to AzureRm. See here for more details: https://docs.microsoft.com/en-us/powershell/azure/authenticate-azureps. Once logged in, take note of the subscription and tenant IDs which will be printed out. Alternatively, you can retrieve them by running `Get-AzureRmSubscription` once logged-in. ```Powershell $rgName = "packer-elasticsearch-images" $location = "East US" New-AzureRmResourceGroup -Name $rgName -Location $location $Password = ([char[]]([char]33..[char]95) + ([char[]]([char]97..[char]126)) + 0..9 | sort {Get-Random})[0..8] -join '' "Password: " + $Password $sp = New-AzureRmADServicePrincipal -DisplayName "Azure Packer IKF" -Password $Password New-AzureRmRoleAssignment -RoleDefinitionName Contributor -ServicePrincipalName $sp.ApplicationId $sp.ApplicationId ``` Note the resource group name, location, password, sp.ApplicationId as used in the script and emitted as output and update `variables.json`. To learn more about using Packer on Azure see https://docs.microsoft.com/en-us/azure/virtual-machines/windows/build-image-with-packer Similarly, using the Azure CLI is going to look something like below: ```bash export rgName=packer-elasticsearch-images az group create -n ${rgName} -l eastus az ad sp create-for-rbac --query "{ client_id: appId, client_secret: password, tenant_id: tenant }" # outputs client_id, client_secret and tenant_id az account show --query "{ subscription_id: id }" # outputs subscription_id ``` ## Building Install packer. https://developer.hashicorp.com/packer/tutorials/docker-get-started/get-started-install-cli Alternatively, install pkenv which allows better control over the installed version, and then install packer. https://github.com/iamhsa/pkenv Install the relevant plugin by running one of the following: ``` packer plugins install github.com/hashicorp/amazon packer plugins install github.com/hashicorp/googlecompute packer plugins install github.com/hashicorp/azure ``` Building the AMIs is done using the following commands: ```bash packer build -only=aws -var-file=variables.json elasticsearch7-node.packer.json packer build -only=aws -var-file=variables.json kibana7-node.packer.json ``` Replace the `-only` parameter to `azure` to build images for Azure instead of AWS. For creating the Kibana image in azure, make sure you update "azure_elasticsearch_image_name" in variables.json. You can see the value in the output for the creation of the Elasticsearch image. ================================================ FILE: packer/elasticsearch7-node.packer.json ================================================ { "description": "Elasticsearch Image", "builders": [ { "name": "aws", "type": "amazon-ebs", "ami_name": "elasticsearch7-{{isotime | clean_resource_name}}", "availability_zone": "{{user `aws_az`}}", "iam_instance_profile": "packer", "instance_type": "t2.micro", "region": "{{user `aws_region`}}", "run_tags": { "role": "packer" }, "source_ami_filter": { "filters": { "virtualization-type": "hvm", "name": "*ubuntu-jammy-22.04-amd64-server-*", "root-device-type": "ebs" }, "owners": ["099720109477"], "most_recent": true }, "ssh_timeout": "10m", "ssh_username": "ubuntu", "tags": { "ImageType": "elasticsearch7-packer-image" } }, { "name": "azure", "type": "azure-arm", "client_id": "{{user `azure_client_id`}}", "client_secret": "{{user `azure_client_secret`}}", "tenant_id": "{{user `azure_tenant_id`}}", "subscription_id": "{{user `azure_subscription_id`}}", "managed_image_resource_group_name": "{{user `azure_resource_group_name`}}", "managed_image_name": "elasticsearch7-{{isotime \"2006-01-02T030405\"}}", "os_type": "Linux", "image_publisher": "Canonical", "image_offer": "UbuntuServer", "image_sku": "18.04-LTS", "location": "{{user `azure_location`}}", "vm_size": "Standard_DS2_v2" }, { "name": "gcp", "type": "googlecompute", "account_file": "{{user `gcp_account_file`}}", "project_id": "{{user `gcp_project_id`}}", "source_image_family": "ubuntu-2204-lts", "zone": "{{user `gcp_zone`}}", "image_family": "elasticsearch-7", "image_name": "elasticsearch7-{{isotime | clean_resource_name}}", "preemptible": true, "ssh_username": "ubuntu" } ], "provisioners": [ { "type": "file", "source": "../assets/scripts", "destination": "/tmp", "only": ["aws", "gcp"] }, { "type": "shell", "inline": [ "sudo mkdir -p /opt/cloud-deploy-scripts", "sudo mv /tmp/scripts/* /opt/cloud-deploy-scripts", "sudo chmod +x -R /opt/cloud-deploy-scripts" ], "only": ["aws", "gcp"] }, { "type": "shell", "script": "update-machine.sh", "execute_command": "echo '' | {{ .Vars }} sudo -E -S bash '{{ .Path }}'" }, { "type": "file", "source": "../assets/elasticsearch.yml", "destination": "elasticsearch.yml" }, { "type": "shell", "script": "install-elasticsearch7.sh", "environment_vars": [ "ES_VERSION={{user `elasticsearch_version`}}" ], "execute_command": "echo '' | {{ .Vars }} sudo -E -S bash '{{ .Path }}'" }, { "type": "shell", "script": "install-cloud-plugin.sh", "execute_command": "echo '' | {{ .Vars }} sudo -E -S bash '{{ .Path }}'" }, { "type": "shell", "environment_vars": [ "ES_VERSION={{user `elasticsearch_version`}}" ], "script": "install-beats.sh" }, { "type": "shell", "script": "install-custom.sh" } ], "post-processors": [ { "type": "manifest", "output": "packer-es-manifest.json" } ] } ================================================ FILE: packer/install-beats.sh ================================================ #!/bin/bash set -e ES_VERSION="${ES_VERSION:-7.9.0}" curl -L -O "https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-$ES_VERSION-amd64.deb" sudo dpkg -i "filebeat-$ES_VERSION-amd64.deb" rm "filebeat-$ES_VERSION-amd64.deb" curl -L -O "https://artifacts.elastic.co/downloads/beats/heartbeat/heartbeat-$ES_VERSION-amd64.deb" sudo dpkg -i "heartbeat-$ES_VERSION-amd64.deb" rm "heartbeat-$ES_VERSION-amd64.deb" curl -L -O "https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-$ES_VERSION-amd64.deb" sudo dpkg -i "metricbeat-$ES_VERSION-amd64.deb" rm "metricbeat-$ES_VERSION-amd64.deb" ================================================ FILE: packer/install-cloud-plugin.sh ================================================ #!/bin/bash set -e cd /usr/share/elasticsearch/ if [[ $PACKER_BUILD_NAME == "aws" ]]; then sudo bin/elasticsearch-plugin install --batch discovery-ec2 sudo bin/elasticsearch-plugin install --batch repository-s3 elif [[ $PACKER_BUILD_NAME == "azure" ]]; then sudo bin/elasticsearch-plugin install --batch repository-azure elif [[ $PACKER_BUILD_NAME == "gcp" ]]; then sudo bin/elasticsearch-plugin install --batch discovery-gce sudo bin/elasticsearch-plugin install --batch repository-gcs fi ================================================ FILE: packer/install-cloudwatch-agent.sh ================================================ curl https://s3.amazonaws.com/aws-cloudwatch/downloads/latest/awslogs-agent-setup.py -O chmod +x awslogs-agent-setup.py sudo mv awslogs-agent-setup.py /usr/bin #sudo python ./awslogs-agent-setup.py --region us-east-2 ================================================ FILE: packer/install-custom.sh ================================================ #!/bin/bash set -e ================================================ FILE: packer/install-elasticsearch7.sh ================================================ #!/bin/bash set -e # Get the PGP Key wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - echo "deb https://artifacts.elastic.co/packages/7.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-7.x.list apt-get update if [ -z "$ES_VERSION" ]; then echo "Installing the latest Elasticsearch version" apt-get install elasticsearch else echo "Installing Elasticsearch version $ES_VERSION" apt-get install elasticsearch=$ES_VERSION fi mkdir /usr/share/elasticsearch/logs mkdir /usr/share/elasticsearch/data chown elasticsearch:elasticsearch /usr/share/elasticsearch/logs chown elasticsearch:elasticsearch /usr/share/elasticsearch/data mv elasticsearch.yml /etc/elasticsearch/elasticsearch.yml chown elasticsearch:elasticsearch /etc/elasticsearch/elasticsearch.yml ================================================ FILE: packer/install-kibana7.sh ================================================ #!/bin/bash set -e # Get the PGP Key # wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - # echo "deb https://artifacts.elastic.co/packages/7.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-7.x.list # Fix for the apt lock issue sleep 100 apt-get update if [ -z "$ES_VERSION" ]; then echo "Installing latest Kibana version" apt-get install kibana else echo "Installing Kibana version $ES_VERSION" apt-get install kibana=$ES_VERSION fi # This needs to be here explicitly because of a long first-initialization time of Kibana systemctl daemon-reload systemctl enable kibana.service sudo service kibana start ================================================ FILE: packer/kibana7-node.packer.json ================================================ { "description": "Kibana Image", "builders": [ { "name": "aws", "type": "amazon-ebs", "ami_name": "kibana7-{{isotime | clean_resource_name}}", "availability_zone": "{{user `aws_az`}}", "iam_instance_profile": "packer", "instance_type": "t2.medium", "region": "{{user `aws_region`}}", "run_tags": { "role": "packer" }, "source_ami_filter": { "filters": { "virtualization-type": "hvm", "name": "elasticsearch7-*", "root-device-type": "ebs" }, "owners": [ "self" ], "most_recent": true }, "ssh_timeout": "10m", "ssh_username": "ubuntu", "tags": { "ImageType": "kibana7-packer-image" } }, { "name": "azure", "type": "azure-arm", "client_id": "{{user `azure_client_id`}}", "client_secret": "{{user `azure_client_secret`}}", "tenant_id": "{{user `azure_tenant_id`}}", "subscription_id": "{{user `azure_subscription_id`}}", "managed_image_resource_group_name": "{{user `azure_resource_group_name`}}", "managed_image_name": "kibana7-{{isotime \"2006-01-02T030405\"}}", "os_type": "Linux", "custom_managed_image_name": "{{user `azure_elasticsearch_image_name`}}", "custom_managed_image_resource_group_name":"{{user `azure_resource_group_name`}}", "location": "{{user `azure_location`}}", "vm_size": "Standard_DS2_v2" }, { "name": "gcp", "type": "googlecompute", "account_file": "{{user `gcp_account_file`}}", "project_id": "{{user `gcp_project_id`}}", "source_image_family": "elasticsearch-7", "zone": "{{user `gcp_zone`}}", "image_family": "kibana-7", "image_name": "kibana7-{{isotime | clean_resource_name}}", "preemptible": true, "ssh_username": "ubuntu" } ], "provisioners": [ { "type": "shell", "script": "install-kibana7.sh", "environment_vars": [ "ES_VERSION={{user `elasticsearch_version`}}" ], "execute_command": "echo '' | {{ .Vars }} sudo -E -S sh '{{ .Path }}'" } ], "post-processors": [ { "type": "manifest", "output": "packer-kb-manifest.json" } ] } ================================================ FILE: packer/update-machine.sh ================================================ #!/bin/bash export DEBIAN_FRONTEND=noninteractive sudo rm /boot/grub/menu.lst # https://github.com/hashicorp/packer/issues/2639 echo "Waiting 100 seconds for cloud-init to finish..." sleep 100 sudo apt-get update sudo -E apt-get upgrade -y sudo -E apt-get install -y software-properties-common git python3-dev htop ntp jq apt-transport-https unzip if [[ $PACKER_BUILD_NAME == "aws" ]]; then sudo -E apt-get install -y awscli fi # Disable daily apt unattended updates. echo 'APT::Periodic::Enable "0";' >> /etc/apt/apt.conf.d/10periodic ================================================ FILE: packer/variables.json ================================================ { "elasticsearch_version": "", "aws_region": "us-east-1", "aws_az": "us-east-1d", "azure_client_id": "", "azure_client_secret": "", "azure_subscription_id": "", "azure_tenant_id": "", "gcp_zone": "us-central1-a", "gcp_project_id": "elasticsearch", "gcp_account_file": ".gcp_account.json", "azure_location": "East US", "azure_resource_group_name": "packer-elasticsearch-images", "azure_elasticsearch_image_name": "" } ================================================ FILE: templates/aws_user_data.sh ================================================ #!/bin/bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 if [ "${DEV_MODE_scripts_s3_bucket}" != "" ]; then sudo aws s3 cp --recursive "s3://${DEV_MODE_scripts_s3_bucket}" /opt/cloud-deploy-scripts/ sudo chmod -R +x /opt/cloud-deploy-scripts fi export cloud_provider="${cloud_provider}" export elasticsearch_data_dir="${elasticsearch_data_dir}" export elasticsearch_logs_dir="${elasticsearch_logs_dir}" export heap_size="${heap_size}" export is_voting_only="${is_voting_only}" export es_cluster="${es_cluster}" export es_environment="${es_environment}" export security_groups="${security_groups}" export aws_region="${aws_region}" export use_g1gc="${use_g1gc}" export security_enabled="${security_enabled}" export monitoring_enabled="${monitoring_enabled}" export masters_count="${masters_count}" export client_user="${client_user}" export s3_backup_bucket="${s3_backup_bucket}" export xpack_monitoring_host="${xpack_monitoring_host}" export filebeat_monitoring_host="${filebeat_monitoring_host}" export client_pwd="${client_pwd}" export master="${master}" export data="${data}" export bootstrap_node="${bootstrap_node}" export ca_cert="${ca_cert}" export node_cert="${node_cert}" export node_key="${node_key}" export log_level="${log_level}" export log_size="${log_size}" export security_encryption_key="${security_encryption_key}" export reporting_encryption_key="${reporting_encryption_key}" export auto_shut_down_bootstrap_node="${auto_shut_down_bootstrap_node}" /opt/cloud-deploy-scripts/${startup_script} ================================================ FILE: templates/gcp_user_data.sh ================================================ #!/bin/bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 if [ "${DEV_MODE_scripts_gcs_bucket}" != "" ]; then sudo gsutil cp -r "gs://${DEV_MODE_scripts_gcs_bucket}/*" /opt/cloud-deploy-scripts/ sudo chmod -R +x /opt/cloud-deploy-scripts fi export cloud_provider="${cloud_provider}" export gcp_zones="${gcp_zones}" export gcp_project_id="${gcp_project_id}" export gcs_snapshots_bucket="${gcs_snapshots_bucket}" export gcs_service_account_key="${gcs_service_account_key}" export elasticsearch_data_dir="${elasticsearch_data_dir}" export elasticsearch_logs_dir="${elasticsearch_logs_dir}" export heap_size="${heap_size}" export is_voting_only="${is_voting_only}" export es_cluster="${es_cluster}" export es_environment="${es_environment}" export use_g1gc="${use_g1gc}" export security_enabled="${security_enabled}" export monitoring_enabled="${monitoring_enabled}" export masters_count="${masters_count}" export client_user="${client_user}" export xpack_monitoring_host="${xpack_monitoring_host}" export filebeat_monitoring_host="${filebeat_monitoring_host}" export client_pwd="${client_pwd}" export master="${master}" export data="${data}" export bootstrap_node="${bootstrap_node}" export ca_cert="${ca_cert}" export node_cert="${node_cert}" export node_key="${node_key}" export log_level="${log_level}" export log_size="${log_size}" export security_encryption_key="${security_encryption_key}" export reporting_encryption_key="${reporting_encryption_key}" export auto_shut_down_bootstrap_node="${auto_shut_down_bootstrap_node}" /opt/cloud-deploy-scripts/${startup_script} ================================================ FILE: templates/user_data.sh ================================================ #!/bin/bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 function fetch_master_nodes_ips() { if [ "${cloud_provider}" == "aws" ]; then local master_instance_ids="$(aws ec2 describe-instances --region=${aws_region} --filters Name=instance-state-name,Values=running Name=tag:Role,Values=master Name=tag:Cluster,Values=${es_environment} | jq -r '.Reservations | map(.Instances[].InstanceId) | .[]' | sort)" echo "$(aws ec2 describe-instances --region ${aws_region} --instance-ids $master_instance_ids | jq -r '.Reservations[].Instances[].PrivateIpAddress' | sort)" fi if [ "${cloud_provider}" == "gcp" ]; then echo "$(gcloud compute instances list --filter 'tags.items=es-master-node AND tags.items=${es_cluster}' --format 'get(networkInterfaces[0].networkIP)' | sort)" fi } if [ "${cloud_provider}" == "azure" ] || [ "${cloud_provider}" == "gcp" ]; then # Change node name to AWS-like hostname sudo sed -i -e "s/node.name: .*$/node.name: ip-$(hostname -I | tr . -)/ig" /etc/elasticsearch/elasticsearch.yml fi if [ "${bootstrap_node}" == "true" ]; then while true do echo "Fetching masters..." MASTER_INSTANCES="$(fetch_master_nodes_ips)" COUNT=`echo "$MASTER_INSTANCES" | wc -l` if [ "$COUNT" -eq "${masters_count}" ]; then echo "Masters count is correct... Rechecking in 60 sec" sleep 60 MASTER_INSTANCES_RECHECK="$(fetch_master_nodes_ips)" if [ "$MASTER_INSTANCES" = "$MASTER_INSTANCES_RECHECK" ]; then break fi fi sleep 5 done echo "Fetched masters" MASTER_IPS="$MASTER_INSTANCES" SEED_HOSTS=`echo "$MASTER_IPS" | paste -sd ',' -` INITIAL_MASTER_NODES=`echo "$MASTER_IPS" | awk '{print "ip-" $0}' | tr . - | paste -sd ',' -` fi # Configure elasticsearch cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml cluster.name: ${es_cluster} # only data nodes should have ingest and http capabilities node.master: ${master} node.data: ${data} node.ingest: ${data} xpack.security.enabled: ${security_enabled} xpack.monitoring.enabled: false path.data: ${elasticsearch_data_dir} path.logs: ${elasticsearch_logs_dir} EOF if [ "${bootstrap_node}" == "true" ]; then echo "discovery.seed_hosts: $SEED_HOSTS" >>/etc/elasticsearch/elasticsearch.yml echo "cluster.initial_master_nodes: $(hostname -I),$INITIAL_MASTER_NODES" >>/etc/elasticsearch/elasticsearch.yml fi if [ "${master}" == "true" ] && [ "${data}" == "true" ]; then echo "discovery.type: single-node" >>/etc/elasticsearch/elasticsearch.yml fi if [ "${monitoring_enabled}" == "true" ]; then cat <<'EOF' >/etc/metricbeat/metricbeat.yml metricbeat.modules: - module: elasticsearch period: 10s hosts: ["http://localhost:9200"] #username: "elastic" #password: "changeme" #ssl.certificate_authorities: ["/etc/pki/root/ca.pem"] # Set to true to send data collected by module to X-Pack # Monitoring instead of metricbeat-* indices. xpack.enabled: true - module: system metricsets: - cpu # CPU usage - load # CPU load averages - memory # Memory usage - network # Network IO #- process # Per process metrics #- process_summary # Process summary #- uptime # System Uptime - socket_summary # Socket summary #- core # Per CPU core usage - diskio # Disk IO #- filesystem # File system usage for each mountpoint #- fsstat # File system summary metrics #- raid # Raid #- socket # Sockets and connection info (linux only) #- service # systemd service information enabled: true period: 10s processes: ['.*'] output.elasticsearch: enabled: true # Array of hosts to connect to. # Scheme and port can be left out and will be set to the default (http and 9200) # In case you specify and additional path, the scheme is required: http://localhost:9200/path # IPv6 addresses should always be defined as: https://[2001:db8::1]:9200 hosts: ["${xpack_monitoring_host}"] EOF fi if [ "${cloud_provider}" == "aws" ]; then cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml network.host: _ec2:privateIpv4_,localhost plugin.mandatory: discovery-ec2 cloud.node.auto_attributes: true cluster.routing.allocation.awareness.attributes: aws_availability_zone discovery: seed_providers: ec2 ec2.groups: ${security_groups} ec2.host_type: private_ip ec2.tag.Cluster: ${es_environment} ec2.availability_zones: ${availability_zones} ec2.protocol: http # no need in HTTPS for internal AWS calls # manually set the endpoint because of auto-discovery issues # https://github.com/elastic/elasticsearch/issues/27464 ec2.endpoint: ec2.${aws_region}.amazonaws.com EOF fi if [ "${cloud_provider}" == "gcp" ]; then cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml network.host: _gce_,localhost plugin.mandatory: discovery-gce cloud.gce.project_id: ${gcp_project_id} cloud.gce.zone: ${gcp_zone} discovery.seed_providers: gce EOF fi # Azure doesn't have a proper discovery plugin, hence we are going old-school and relying on scaleset name prefixes if [ "${cloud_provider}" == "azure" ]; then cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml network.host: _site_,localhost # For discovery we are using predictable hostnames (thanks for the computer name prefix), but could just as well use the # predictable subnet addresses starting at 10.1.0.5. EOF # avoiding discovery noise in single-node scenario if [ "${master}" == "true" ] && [ "${data}" == "true" ]; then cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml discovery.seed_hosts: ["${es_cluster}-master000000", "${es_cluster}-data000000"] EOF else cat <<'EOF' >>/etc/elasticsearch/elasticsearch.yml discovery.seed_hosts: ["${es_cluster}-master000000", "${es_cluster}-master000001", "${es_cluster}-master000002", "${es_cluster}-data000000", "${es_cluster}-data000001"] EOF fi fi cat <<'EOF' >>/etc/security/limits.conf # allow user 'elasticsearch' mlockall elasticsearch soft memlock unlimited elasticsearch hard memlock unlimited EOF sudo mkdir -p /etc/systemd/system/elasticsearch.service.d cat <<'EOF' >>/etc/systemd/system/elasticsearch.service.d/override.conf [Service] LimitMEMLOCK=infinity Restart=always RestartSec=10 EOF # Setup heap size and memory locking sudo sed -i 's/#MAX_LOCKED_MEMORY=.*$/MAX_LOCKED_MEMORY=unlimited/' /etc/init.d/elasticsearch sudo sed -i 's/#MAX_LOCKED_MEMORY=.*$/MAX_LOCKED_MEMORY=unlimited/' /etc/default/elasticsearch sudo sed -i "s/^-Xms.*/-Xms${heap_size}/" /etc/elasticsearch/jvm.options sudo sed -i "s/^-Xmx.*/-Xmx${heap_size}/" /etc/elasticsearch/jvm.options # Setup GC sudo sed -i "s/^-XX:+UseConcMarkSweepGC/-XX:+UseG1GC/" /etc/elasticsearch/jvm.options # Storage sudo mkdir -p ${elasticsearch_logs_dir} sudo chown -R elasticsearch:elasticsearch ${elasticsearch_logs_dir} # # we are assuming volume is declared and attached when data_dir is passed to the script if { [ "${master}" == "true" ] || [ "${data}" == "true" ]; } && [ "${bootstrap_node}" != "true" ]; then sudo mkdir -p ${elasticsearch_data_dir} export DEVICE_NAME=$(lsblk -ip | tail -n +2 | grep -v " rom" | awk '{print $1 " " ($7? "MOUNTEDPART" : "") }' | sed ':a;N;$!ba;s/\n`/ /g' | sed ':a;N;$!ba;s/\n|-/ /g' | grep -v MOUNTEDPART) if sudo mount -o defaults -t ext4 $DEVICE_NAME ${elasticsearch_data_dir}; then echo 'Successfully mounted existing disk' else echo 'Trying to mount a fresh disk' sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard $DEVICE_NAME sudo mount -o defaults -t ext4 $DEVICE_NAME ${elasticsearch_data_dir} && echo 'Successfully mounted a fresh disk' fi echo "$DEVICE_NAME ${elasticsearch_data_dir} ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab sudo chown -R elasticsearch:elasticsearch ${elasticsearch_data_dir} fi if [ -f "/etc/nginx/nginx.conf" ]; then # Setup basic auth for nginx web front and start the service if exists sudo htpasswd -bc /etc/nginx/conf.d/search.htpasswd ${client_user} "${client_pwd}" sudo service nginx start fi # Start Elasticsearch systemctl daemon-reload systemctl enable elasticsearch.service systemctl start elasticsearch.service if [ "${bootstrap_node}" == "true" ]; then while true do echo "Checking cluster health" HEALTH="$(curl --silent http://localhost:9200/_cluster/health | jq -r '.status')" if [ "$HEALTH" = "green" ]; then break fi sleep 5 done if [ "${cloud_provider}" == "aws" ]; then # AWS instance is set to terminate after shutdown automatically shutdown -h now fi if [ "${cloud_provider}" == "gcp" ]; then INSTANCE_NAME="$(gcloud compute instances list --filter 'tags.items=es-bootstrap-node AND tags.items=${es_cluster}' --format 'get(name)')" gcloud compute instances delete $INSTANCE_NAME --zone ${gcp_zone} --quiet fi else # Setup x-pack security also on Kibana configs where applicable if [ -f "/etc/kibana/kibana.yml" ]; then echo "xpack.security.enabled: ${security_enabled}" | sudo tee -a /etc/kibana/kibana.yml echo "xpack.monitoring.enabled: ${monitoring_enabled}" | sudo tee -a /etc/kibana/kibana.yml systemctl daemon-reload systemctl enable kibana.service sudo service kibana restart fi if [ -f "/etc/nginx/nginx.conf" ]; then sudo rm /etc/grafana/grafana.ini cat <<'EOF' >>/etc/grafana/grafana.ini [security] admin_user = ${client_user} admin_password = ${client_pwd} EOF sudo /bin/systemctl daemon-reload sudo /bin/systemctl enable grafana-server.service sudo service grafana-server start fi sleep 60 if [ `systemctl is-failed elasticsearch.service` == 'failed' ]; then echo "Elasticsearch unit failed to start" exit 1 fi fi ================================================ FILE: terraform-aws/README.md ================================================ # AWS deployment ## Create the AMIs with Packer Go to the packer folder and see the README there. Once you have the AMI IDs, return here and continue with the next steps. ## Create key-pair ```bash aws ec2 create-key-pair --key-name elasticsearch --query 'KeyMaterial' --output text > elasticsearch.pem ``` ## VPC Create a VPC, or use existing. You will need the VPC ID we will use the available subnets within it. ## Configurations Edit `terraform.tfvars` (syntax `var_name = value` per line) to specify the following: * `aws_region` - the region where to launch the cluster in. * `availability_zones` - at least 2 availability zones in that region. * `es_cluster` - the name of the Elasticsearch cluster to launch. * `key_name` - the name of the key to use - that key needs to be handy so you can access the machines if needed. * `vpc_id` - the ID of the VPC to launch the cluster in. The rest of the configurations are mostly around cluster topology and machine types and sizes. * define clients_subnet_ids/cluster_subnet_ids with a map of availability zones to a list of subnets: ``` cluster_subnet_ids = {us-east-1b=["subnet-xxxxxxxx","subnet-yyyyyyyy"]} ``` * define instance counts with a map of availability zones to counts: ``` masters_count = { "us-east-1a" = 2, "us-east-1b" = 1 } datas_count = { "us-east-1a" = 1 } clients_count = { "us-east-1a" = 1 } ``` single nodes are created by having empty maps for all counts (the default) * an example for the single node availability zone: ``` singlenode_az = "us-east-1b" ``` ### Cluster topology Two modes of deployment are supported: * A recommended configuration, with dedicated master-eligible nodes, data nodes, and client nodes. This is a production-ready and best-practice configuration. See more details in the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html). * Single node mode - mostly useful for experimentation At this point we consider the role `ingest` as unanimous with `data`, so all data nodes are also ingest nodes. The default mode is the single-node mode. To change it to the recommended configuration, edit `terraform.tfvars` and set number of master nodes to 3, data nodes to at least 2, and client nodes to at least 1. All nodes with the `client` role will be attached to an ALB, so access to all client nodes can be done via the DNS it exposes. ### Cluster bootstrap Deploying a cluster in non single-node mode requires [bootstrapping the cluster](https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-discovery-bootstrap-cluster.html). We do this automatically, by spinning up a special bootstrap node, and terminating it once finished. This only happens once, first time you deploy the cluster. State information on whether cluster is bootstrapped or not is kept in a local file `cluster_bootstrap_state` which is used on later `terraform apply` runs (use `printf 0 > cluster_bootstrap_state` to reset it) After the bootstrap node has terminated, you can start using the cluster. ### Security groups By default we create two security groups - one for the internal cluster nodes (data and master), and one for the client nodes. Your applications need to be in the latter only, and communicate with the cluster via the client nodes only. If you prefer using a security group of your own, you can add it to `additional_security_groups` in `terraform.tfvars`. ## Launch the cluster with Terraform On first usage, you will need to execute `terraform init` to initialize the terraform providers used. To deploy the cluster, or apply any changes to an existing cluster deployed using this project, run: ```bash terraform plan terraform apply ``` When terraform is done, you should see a lot of output ending with something like this: ``` Apply complete! Resources: 14 added, 0 changed, 0 destroyed. The state of your infrastructure has been saved to the path below. This state is required to modify and destroy your infrastructure, so keep it safe. To inspect the complete state use the `terraform show` command. State path: terraform.tfstate Outputs: clients_dns = internal-es-test-client-lb-963348710.eu-central-1.elb.amazonaws.com ``` Note `clients_dns` - that's your entry point to the cluster. ### Look around The client nodes are the ones exposed to external networks. They provide Kibana, Grafana, Cerebro and direct Elasticsearch access. Client nodes are accessible via their public IPs (depending on your security group / VPC settings) and the DNS of the ALB they are attached to (see above). Client nodes listen on port 9200 and are password protected depending on your choice (variable `security_enabled`). user is defined in the variable `client_user`, make note of the password as you run terraform. On client nodes you will find: * Kibana access is direct on port 5601 (http://host:5601) * [Cerebro](https://github.com/lmenezes/cerebro) (a cluster management UI) is available on http://host:9000/cerebro/ * For direct Elasticsearch access, go to host:9200 You can pull the list of instances by their state and role using aws-cli: ```bash aws ec2 describe-instances --filters Name=instance-state-name,Values=running aws ec2 describe-instances --filters Name=instance-state-name,Values=running,Name=tag:Role,Values=client ``` To login to one of the instances: ```bash ssh -i elasticsearch.pem ubuntu@{public IP / DNS of the instance} ``` ### Changing cluster size after deployment Terraform is smart enough to make the least amount of changes possible and resize resources when possible instead of destroying them. When you want to change the cluster configuration (e.g. add more client nodes, data nodes, resize disk or instances, etc) just edit `terraform.tfvars` and run `terraform plan` followed by `terraform apply`. ================================================ FILE: terraform-aws/alb.tf ================================================ resource "aws_security_group" "elasticsearch-alb-sg" { name = "${var.es_cluster}-alb-sg" description = "ElasticSearch Ports for ALB Access" vpc_id = var.vpc_id } # allow ES port access resource "aws_security_group_rule" "elasticsearch-alb-sg-ingress-rule-es" { type = "ingress" protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] from_port = 9200 to_port = 9200 security_group_id = aws_security_group.elasticsearch-alb-sg.id } # allow egress resource "aws_security_group_rule" "elasticsearch-alb-sg-egress-rule-all" { type = "egress" protocol = "-1" cidr_blocks = ["0.0.0.0/0"] from_port = 0 to_port = 0 security_group_id = aws_security_group.elasticsearch-alb-sg.id } # allow Kibana port access resource "aws_security_group_rule" "elasticsearch-alb-sg-ingress-rule-kibana" { count = length(keys(var.clients_count)) > 0 || local.singlenode_mode ? 1 : 0 type = "ingress" protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] from_port = 5601 to_port = 5601 security_group_id = aws_security_group.elasticsearch-alb-sg.id } # Target Groups #----------------------------------------------------- resource "aws_lb_target_group" "esearch-p9200-tg" { name = "${var.es_cluster}-p9200-tg" port = 9200 protocol = "HTTP" vpc_id = var.vpc_id health_check { healthy_threshold = 5 unhealthy_threshold = 2 timeout = 5 path = "/" port = 9200 interval = 15 matcher = "401" } } resource "aws_lb_target_group" "kibana-p5601-tg" { count = length(keys(var.clients_count)) > 0 || local.singlenode_mode ? 1 : 0 name = "${var.es_cluster}-p5601-tg" port = 5601 protocol = "HTTP" vpc_id = var.vpc_id health_check { healthy_threshold = 5 unhealthy_threshold = 2 timeout = 5 path = "/" port = 5601 interval = 15 matcher = "302" } } resource "aws_lb" "elasticsearch-alb" { name = "${var.es_cluster}-alb" internal = ! var.public_facing load_balancer_type = "application" security_groups = [aws_security_group.elasticsearch-alb-sg.id] subnets = coalescelist(var.alb_subnets, tolist(data.aws_subnets.all-subnets.ids)) enable_deletion_protection = false } #----------------------------------------------------- # ALB Listeners and Listener Rules #----------------------------------------------------- resource "aws_lb_listener" "esearch" { load_balancer_arn = aws_lb.elasticsearch-alb.arn port = "9200" protocol = "HTTP" default_action { type = "forward" target_group_arn = aws_lb_target_group.esearch-p9200-tg.arn } } resource "aws_lb_listener" "kibana" { count = length(keys(var.clients_count)) > 0 || local.singlenode_mode ? 1 : 0 load_balancer_arn = aws_lb.elasticsearch-alb.arn port = "5601" protocol = "HTTP" default_action { type = "forward" target_group_arn = aws_lb_target_group.kibana-p5601-tg[0].arn } } ================================================ FILE: terraform-aws/ami.tf ================================================ // Find the latest available AMI for Elasticsearch data "aws_ami" "elasticsearch" { filter { name = "state" values = ["available"] } filter { name = "tag:ImageType" values = [var.elasticsearch_packer_image] } most_recent = true owners = ["self"] } // Find the latest available AMI for the Kibana client node data "aws_ami" "kibana_client" { filter { name = "state" values = ["available"] } filter { name = "tag:ImageType" values = [var.kibana_packer_image] } most_recent = true owners = ["self"] } ================================================ FILE: terraform-aws/certs.tf ================================================ locals { cert_common_name = "elasticsearch-cloud-deploy autogenerated CA" validity_period_hours = 365 * 24 early_renewal_hours = 30 * 24 } resource "tls_private_key" "ca" { count = var.security_enabled ? 1 : 0 algorithm = "RSA" } resource "tls_self_signed_cert" "ca" { count = var.security_enabled ? 1 : 0 #key_algorithm = "RSA" private_key_pem = join("", tls_private_key.ca[*].private_key_pem) subject { common_name = local.cert_common_name } validity_period_hours = local.validity_period_hours early_renewal_hours = local.early_renewal_hours is_ca_certificate = true allowed_uses = [ "server_auth", "cert_signing", "crl_signing", "client_auth" ] } resource "tls_private_key" "node" { count = var.security_enabled ? 1 : 0 algorithm = "RSA" } resource "tls_cert_request" "node" { count = var.security_enabled ? 1 : 0 #key_algorithm = "RSA" private_key_pem = join("", tls_private_key.node[*].private_key_pem) subject { common_name = local.cert_common_name } } resource "tls_locally_signed_cert" "node" { count = var.security_enabled ? 1 : 0 #ca_key_algorithm = "RSA" cert_request_pem = join("", tls_cert_request.node[*].cert_request_pem) ca_private_key_pem = join("", tls_private_key.ca[*].private_key_pem) ca_cert_pem = join("", tls_self_signed_cert.ca[*].cert_pem) validity_period_hours = local.validity_period_hours early_renewal_hours = local.early_renewal_hours allowed_uses = [ "key_encipherment", "digital_signature", "server_auth", "client_auth" ] } ================================================ FILE: terraform-aws/client.tf ================================================ data "template_file" "client_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { startup_script = "client.sh", heap_size = var.client_heap_size }) } resource "aws_launch_template" "client" { name_prefix = "elasticsearch-${var.es_cluster}-client-nodes" image_id = data.aws_ami.kibana_client.id instance_type = var.master_instance_type user_data = base64encode(data.template_file.client_userdata_script.rendered) key_name = var.key_name iam_instance_profile { arn = aws_iam_instance_profile.elasticsearch.arn } network_interfaces { delete_on_termination = true associate_public_ip_address = false security_groups = concat( [aws_security_group.elasticsearch_security_group.id], [aws_security_group.elasticsearch_clients_security_group.id], var.additional_security_groups, ) } lifecycle { create_before_destroy = true } } resource "aws_autoscaling_group" "client_nodes" { count = length(keys(var.clients_count)) name = "elasticsearch-${var.es_cluster}-client-nodes-${keys(var.clients_count)[count.index]}" max_size = var.clients_count[keys(var.clients_count)[count.index]] min_size = var.clients_count[keys(var.clients_count)[count.index]] desired_capacity = var.clients_count[keys(var.clients_count)[count.index]] default_cooldown = 30 force_delete = true vpc_zone_identifier = local.clients_subnet_ids[keys(var.clients_count)[count.index]] target_group_arns = [ aws_lb_target_group.esearch-p9200-tg.arn, aws_lb_target_group.kibana-p5601-tg[0].arn, ] launch_template { id = aws_launch_template.client.id version = "$Latest" } tag { key = "Name" value = format("%s-client-node", var.es_cluster) propagate_at_launch = true } tag { key = "Environment" value = var.environment propagate_at_launch = true } tag { key = "Cluster" value = "${var.environment}-${var.es_cluster}" propagate_at_launch = true } tag { key = "Role" value = "client" propagate_at_launch = true } tag { key = "AutoAttachDiskDisabled" value = "true" propagate_at_launch = true } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-aws/datas-voters.tf ================================================ data "template_file" "data_voters_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { heap_size = var.data_heap_size is_voting_only = "true" startup_script = "data.sh" }) } resource "aws_launch_template" "data_voters" { name_prefix = "elasticsearch-${var.es_cluster}-data-voters-nodes" image_id = data.aws_ami.elasticsearch.id instance_type = var.data_instance_type user_data = base64encode(data.template_file.data_voters_userdata_script.rendered) key_name = var.key_name ebs_optimized = var.ebs_optimized iam_instance_profile { arn = aws_iam_instance_profile.elasticsearch.arn } network_interfaces { delete_on_termination = true associate_public_ip_address = false security_groups = concat( [aws_security_group.elasticsearch_security_group.id], var.additional_security_groups, ) } lifecycle { create_before_destroy = true } } resource "aws_autoscaling_group" "data_voters_nodes" { count = length(keys(var.data_voters_count)) name = "elasticsearch-${var.es_cluster}-data-voters-nodes-${keys(var.data_voters_count)[count.index]}" max_size = var.data_voters_count[keys(var.data_voters_count)[count.index]] min_size = var.data_voters_count[keys(var.data_voters_count)[count.index]] desired_capacity = var.data_voters_count[keys(var.data_voters_count)[count.index]] default_cooldown = 30 force_delete = true vpc_zone_identifier = local.cluster_subnet_ids[keys(var.data_voters_count)[count.index]] depends_on = [ aws_autoscaling_group.master_nodes, aws_ebs_volume.data-voter ] target_group_arns = [ aws_lb_target_group.esearch-p9200-tg.arn, ] launch_template { id = aws_launch_template.data_voters.id version = "$Latest" } tag { key = "Name" value = format("%s-data-voter-node", var.es_cluster) propagate_at_launch = true } tag { key = "Environment" value = var.environment propagate_at_launch = true } tag { key = "Cluster" value = "${var.environment}-${var.es_cluster}" propagate_at_launch = true } tag { key = "Role" value = "data-voters" propagate_at_launch = true } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-aws/datas.tf ================================================ data "template_file" "data_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { startup_script = "data.sh", heap_size = var.data_heap_size }) } resource "aws_launch_template" "data" { name_prefix = "elasticsearch-${var.es_cluster}-data-nodes" image_id = data.aws_ami.elasticsearch.id instance_type = var.data_instance_type user_data = base64encode(data.template_file.data_userdata_script.rendered) key_name = var.key_name ebs_optimized = var.ebs_optimized iam_instance_profile { arn = aws_iam_instance_profile.elasticsearch.arn } network_interfaces { delete_on_termination = true associate_public_ip_address = false security_groups = concat( [aws_security_group.elasticsearch_security_group.id], var.additional_security_groups, ) } lifecycle { create_before_destroy = true } } resource "aws_autoscaling_group" "data_nodes" { count = length(keys(var.datas_count)) name = "elasticsearch-${var.es_cluster}-data-nodes-${keys(var.datas_count)[count.index]}" max_size = var.datas_count[keys(var.datas_count)[count.index]] min_size = var.datas_count[keys(var.datas_count)[count.index]] desired_capacity = var.datas_count[keys(var.datas_count)[count.index]] default_cooldown = 30 force_delete = true vpc_zone_identifier = local.cluster_subnet_ids[keys(var.datas_count)[count.index]] depends_on = [ aws_autoscaling_group.master_nodes, aws_ebs_volume.data ] target_group_arns = [ aws_lb_target_group.esearch-p9200-tg.arn, ] launch_template { id = aws_launch_template.data.id version = "$Latest" } tag { key = "Name" value = format("%s-data-node", var.es_cluster) propagate_at_launch = true } tag { key = "Environment" value = var.environment propagate_at_launch = true } tag { key = "Cluster" value = "${var.environment}-${var.es_cluster}" propagate_at_launch = true } tag { key = "Role" value = "data" propagate_at_launch = true } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-aws/dev.tf ================================================ # data "template_file" "dev-s3" { # template = file("${path.module}/../assets/s3-backup.json") # vars = { # s3_backup_bucket = var.DEV_MODE_scripts_s3_bucket # } # } # resource "aws_s3_bucket" "dev" { # count = var.DEV_MODE_scripts_s3_bucket == "" ? 0 : 1 # bucket = "${var.DEV_MODE_scripts_s3_bucket}" # region = var.aws_region # acl = "private" # } # resource "aws_iam_role_policy" "dev-s3" { # count = var.DEV_MODE_scripts_s3_bucket != "" ? 1 : 0 # name = "${var.es_cluster}-elasticsearch-s3-devmode-policy" # role = aws_iam_role.elasticsearch.id # policy = data.template_file.dev-s3.rendered # } ================================================ FILE: terraform-aws/disks.tf ================================================ locals { master_az_flattened = toset(flatten([ for az, count in var.masters_count : [ for i in range(0, count) : jsonencode({ "az" = az, "index" = i, "name" = "${az}-${i}" }) ] ])) data_az_flattened = toset(flatten([ for az, count in var.datas_count : [ for i in range(0, count) : jsonencode({ "az" = az, "index" = i, "name" = "${az}-${i}" }) ] ])) data_voters_az_flattened = toset(flatten([ for az, count in var.data_voters_count : [ for i in range(0, count) : jsonencode({ "az" = az, "index" = i, "name" = "${az}-${i}" }) ] ])) } resource "aws_ebs_volume" "master" { for_each = local.master_az_flattened availability_zone = jsondecode(each.value)["az"] size = 10 type = var.disk_type encrypted = var.volume_encryption tags = { Name = "elasticsearch-${var.es_cluster}-master-${jsondecode(each.value)["name"]}" ClusterName = var.es_cluster VolumeIndex = jsondecode(each.value)["index"] AutoAttachGroup = "master" } } resource "aws_ebs_volume" "data" { for_each = local.data_az_flattened availability_zone = jsondecode(each.value)["az"] size = var.elasticsearch_volume_size type = var.disk_type encrypted = var.volume_encryption tags = { Name = "elasticsearch-${var.es_cluster}-data-${jsondecode(each.value)["name"]}" ClusterName = var.es_cluster VolumeIndex = jsondecode(each.value)["index"] AutoAttachGroup = "data" } } resource "aws_ebs_volume" "data-voter" { for_each = local.data_voters_az_flattened availability_zone = jsondecode(each.value)["az"] size = var.elasticsearch_volume_size type = var.disk_type encrypted = var.volume_encryption tags = { Name = "elasticsearch-${var.es_cluster}-data-voters-${jsondecode(each.value)["name"]}" ClusterName = var.es_cluster VolumeIndex = jsondecode(each.value)["index"] AutoAttachGroup = "data-voters" } } resource "aws_ebs_volume" "singlenode" { count = local.singlenode_mode ? 1 : 0 availability_zone = var.singlenode_az size = var.elasticsearch_volume_size type = var.disk_type encrypted = var.volume_encryption tags = { Name = "elasticsearch-${var.es_cluster}-singlenode" ClusterName = var.es_cluster VolumeIndex = "0" AutoAttachGroup = "singlenode" } } ================================================ FILE: terraform-aws/iam.tf ================================================ data "template_file" "data_s3_backup" { template = file("${path.module}/../assets/s3-backup.json") vars = { s3_backup_bucket = var.s3_backup_bucket } } resource "aws_iam_role" "elasticsearch" { name = "${var.es_cluster}-elasticsearch-discovery-role" assume_role_policy = file("${path.module}/../assets/ec2-role-trust-policy.json") } resource "aws_iam_role_policy" "elasticsearch" { name = "${var.es_cluster}-elasticsearch-node-init-policy" policy = file( "${path.module}/../assets/node-init.json", ) role = aws_iam_role.elasticsearch.id } resource "aws_iam_role_policy" "s3_backup" { count = var.s3_backup_bucket != "" ? 1 : 0 name = "${var.es_cluster}-elasticsearch-backup-policy" policy = data.template_file.data_s3_backup.rendered role = aws_iam_role.elasticsearch.id } resource "aws_iam_instance_profile" "elasticsearch" { name = "${var.es_cluster}-elasticsearch-discovery-profile" path = "/" role = aws_iam_role.elasticsearch.name } ================================================ FILE: terraform-aws/main.tf ================================================ provider "aws" { region = var.aws_region } resource "random_string" "vm-login-password" { length = 16 special = false } resource "random_string" "security-encryption-key" { length = 32 special = false } resource "random_string" "reporting-encryption-key" { length = 32 special = false } locals { all_availability_zones = compact(tolist(setunion( keys(var.masters_count), keys(var.datas_count), keys(var.clients_count), keys(var.data_voters_count), toset([var.singlenode_az]) ))) cluster_subnet_ids = { for i, az in local.all_availability_zones : az => lookup(var.cluster_subnet_ids, az, element(data.aws_subnets.subnets-per-az.*.ids, i)) } clients_subnet_ids = { for i, az in local.all_availability_zones : az => lookup(var.clients_subnet_ids, az, element(data.aws_subnets.subnets-per-az.*.ids, i)) } flat_cluster_subnet_ids = flatten(values(local.cluster_subnet_ids)) flat_clients_subnet_ids = flatten(values(local.clients_subnet_ids)) bootstrap_node_subnet_id = var.bootstrap_node_subnet_id != "" ? var.bootstrap_node_subnet_id : coalescelist(local.flat_cluster_subnet_ids, [""])[0] singlenode_mode = (length(keys(var.masters_count)) + length(keys(var.datas_count)) + length(keys(var.data_voters_count)) + length(keys(var.clients_count))) == 0 singlenode_subnet_id = local.singlenode_mode ? local.cluster_subnet_ids[var.singlenode_az][0] : "" masters_count = local.singlenode_mode ? 0 : sum(concat(values(var.masters_count), values(var.data_voters_count))) is_cluster_bootstrapped = data.local_file.cluster_bootstrap_state.content == "1" || !var.requires_bootstrapping user_data_common = { cloud_provider = "aws" elasticsearch_data_dir = var.elasticsearch_data_dir elasticsearch_logs_dir = var.elasticsearch_logs_dir es_cluster = var.es_cluster es_environment = "${var.environment}-${var.es_cluster}" security_groups = aws_security_group.elasticsearch_security_group.id aws_region = var.aws_region security_enabled = var.security_enabled monitoring_enabled = var.monitoring_enabled masters_count = local.masters_count client_user = var.client_user xpack_monitoring_host = var.xpack_monitoring_host filebeat_monitoring_host = var.filebeat_monitoring_host s3_backup_bucket = var.s3_backup_bucket use_g1gc = var.use_g1gc client_pwd = random_string.vm-login-password.result master = false data = false bootstrap_node = false log_level = var.log_level log_size = var.log_size is_voting_only = false ca_cert = var.security_enabled ? join("", tls_self_signed_cert.ca[*].cert_pem) : "" node_cert = var.security_enabled ? join("", tls_locally_signed_cert.node[*].cert_pem) : "" node_key = var.security_enabled ? join("", tls_private_key.node[*].private_key_pem) : "" DEV_MODE_scripts_s3_bucket = var.DEV_MODE_scripts_s3_bucket security_encryption_key = random_string.security-encryption-key.result reporting_encryption_key = random_string.reporting-encryption-key.result auto_shut_down_bootstrap_node = var.auto_shut_down_bootstrap_node } } ############################################################################## # Elasticsearch ############################################################################## resource "aws_security_group" "elasticsearch_security_group" { name = "elasticsearch-${var.es_cluster}-security-group" description = "Elasticsearch ports with ssh" vpc_id = var.vpc_id tags = { Name = "${var.es_cluster}-elasticsearch" cluster = var.es_cluster } # ssh access from everywhere ingress { from_port = 22 to_port = 22 protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] } # inter-cluster communication over ports 9200-9400 ingress { from_port = 9200 to_port = 9400 protocol = "tcp" self = true } # allow inter-cluster ping ingress { from_port = 8 to_port = 0 protocol = "icmp" self = true } # allow alb sg access ingress { from_port = 9200 to_port = 9200 protocol = "tcp" security_groups = [aws_security_group.elasticsearch-alb-sg.id] } egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } resource "aws_security_group" "elasticsearch_clients_security_group" { name = "elasticsearch-${var.es_cluster}-clients-security-group" description = "Kibana HTTP access from outside" vpc_id = var.vpc_id tags = { Name = "${var.es_cluster}-kibana" cluster = var.es_cluster } # allow alb sg access ingress { from_port = 9200 to_port = 9200 protocol = "tcp" security_groups = [aws_security_group.elasticsearch-alb-sg.id] } ingress { from_port = 5601 to_port = 5601 protocol = "tcp" security_groups = [aws_security_group.elasticsearch-alb-sg.id] } egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } ================================================ FILE: terraform-aws/masters.tf ================================================ data "local_file" "cluster_bootstrap_state" { filename = "${path.module}/cluster_bootstrap_state" } data "template_file" "master_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { startup_script = "master.sh", heap_size = var.master_heap_size }) } data "template_file" "bootstrap_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { startup_script = "bootstrap.sh", heap_size = var.master_heap_size }) } resource "aws_launch_template" "master" { name_prefix = "elasticsearch-${var.es_cluster}-master-nodes" image_id = data.aws_ami.elasticsearch.id instance_type = var.master_instance_type user_data = base64encode(data.template_file.master_userdata_script.rendered) key_name = var.key_name iam_instance_profile { arn = aws_iam_instance_profile.elasticsearch.arn } network_interfaces { delete_on_termination = true associate_public_ip_address = false security_groups = concat( [aws_security_group.elasticsearch_security_group.id], var.additional_security_groups, ) } lifecycle { create_before_destroy = true } } resource "aws_autoscaling_group" "master_nodes" { count = length(keys(var.masters_count)) name = "elasticsearch-${var.es_cluster}-master-nodes-${keys(var.masters_count)[count.index]}" max_size = var.masters_count[keys(var.masters_count)[count.index]] min_size = var.masters_count[keys(var.masters_count)[count.index]] desired_capacity = var.masters_count[keys(var.masters_count)[count.index]] default_cooldown = 30 force_delete = true vpc_zone_identifier = local.cluster_subnet_ids[keys(var.masters_count)[count.index]] launch_template { id = aws_launch_template.master.id version = "$Latest" } tag { key = "Name" value = format("%s-master-node", var.es_cluster) propagate_at_launch = true } tag { key = "Environment" value = var.environment propagate_at_launch = true } tag { key = "Cluster" value = "${var.environment}-${var.es_cluster}" propagate_at_launch = true } tag { key = "Role" value = "master" propagate_at_launch = true } lifecycle { create_before_destroy = true } depends_on = [aws_ebs_volume.master] } resource "aws_instance" "bootstrap_node" { count = local.singlenode_mode || local.is_cluster_bootstrapped ? 0 : 1 ami = data.aws_ami.elasticsearch.id instance_type = var.master_instance_type instance_initiated_shutdown_behavior = "terminate" vpc_security_group_ids = concat( [aws_security_group.elasticsearch_security_group.id], var.additional_security_groups, ) iam_instance_profile = aws_iam_instance_profile.elasticsearch.id user_data = data.template_file.bootstrap_userdata_script.rendered key_name = var.key_name subnet_id = local.bootstrap_node_subnet_id associate_public_ip_address = false tags = { Name = "${var.es_cluster}-bootstrap-node" Environment = var.environment Cluster = "${var.environment}-${var.es_cluster}" Role = "bootstrap" } } resource "null_resource" "cluster_bootstrap_state" { provisioner "local-exec" { command = "printf 1 > ${path.module}/cluster_bootstrap_state" } provisioner "local-exec" { when = destroy command = "printf 0 > ${path.module}/cluster_bootstrap_state" } depends_on = [aws_instance.bootstrap_node] } ================================================ FILE: terraform-aws/outputs.tf ================================================ output "clients_dns" { value = aws_lb.elasticsearch-alb.*.dns_name } output "vm_password" { value = random_string.vm-login-password.result } ================================================ FILE: terraform-aws/singlenode.tf ================================================ data "template_file" "singlenode_userdata_script" { template = file("${path.module}/../templates/aws_user_data.sh") vars = merge(local.user_data_common, { startup_script = "singlenode.sh", heap_size = var.master_heap_size }) } resource "aws_launch_template" "single_node" { name_prefix = "elasticsearch-${var.es_cluster}-single-node" image_id = data.aws_ami.kibana_client.id instance_type = var.data_instance_type user_data = base64encode(data.template_file.singlenode_userdata_script.rendered) key_name = var.key_name ebs_optimized = var.ebs_optimized iam_instance_profile { arn = aws_iam_instance_profile.elasticsearch.arn } network_interfaces { delete_on_termination = true associate_public_ip_address = false security_groups = [aws_security_group.elasticsearch_security_group.id, aws_security_group.elasticsearch_clients_security_group.id] } lifecycle { create_before_destroy = true } } resource "aws_autoscaling_group" "singlenode" { count = local.singlenode_mode ? 1 : 0 name = "elasticsearch-${var.es_cluster}-singlenode" min_size = 1 max_size = 1 desired_capacity = 1 default_cooldown = 30 force_delete = true vpc_zone_identifier = [local.singlenode_subnet_id] target_group_arns = [ aws_lb_target_group.esearch-p9200-tg.arn, aws_lb_target_group.kibana-p5601-tg[0].arn, ] launch_template { id = aws_launch_template.single_node.id version = "$Latest" } tag { key = "Name" value = format("%s-elasticsearch", var.es_cluster) propagate_at_launch = true } tag { key = "Environment" value = var.environment propagate_at_launch = true } tag { key = "Cluster" value = "${var.environment}-${var.es_cluster}" propagate_at_launch = true } tag { key = "Role" value = "singlenode" propagate_at_launch = true } lifecycle { create_before_destroy = true } depends_on = [aws_ebs_volume.singlenode] } ================================================ FILE: terraform-aws/terraform.tfvars.example ================================================ aws_region="us-east-1" es_cluster="elastic-cluster" vpc_id="vpc-somevpc" key_name="keyname" masters_count = { "us-east-1a" = 3 } datas_count = { "us-east-1a" = 1 } clients_count = { "us-east-1a" = 1 } data_voters_count = { "us-east-1a" = 2 } security_enabled = true monitoring_enabled = false client_user = "someuser" public_facing = false auto_shut_down_bootstrap_node = true ================================================ FILE: terraform-aws/variables.tf ================================================ ### MANDATORY ### variable "es_cluster" { description = "Name of the elasticsearch cluster, used in node discovery" } variable "aws_region" { type = string } variable "vpc_id" { description = "VPC ID to create the Elasticsearch cluster in" type = string } variable "clients_subnet_ids" { description = "Subnets to run client nodes in, defined as avalabilityZone -> subnets mapping. Will autofill to all available subnets in AZ when left empty." type = map(list(string)) default = {} } variable "cluster_subnet_ids" { description = "Subnets to run cluster nodes in, defined as avalabilityZone -> subnets mapping. Will autofill to all available subnets in AZ when left empty." type = map(list(string)) default = {} } variable "key_name" { description = "Key name to be used with the launched EC2 instances." default = "elasticsearch" } variable "environment" { default = "default" } variable "data_instance_type" { type = string default = "c5.2xlarge" } variable "master_instance_type" { type = string default = "c5.large" } variable "elasticsearch_volume_size" { type = string default = "100" # gb } variable "volume_encryption" { default = true } variable "elasticsearch_data_dir" { default = "/opt/elasticsearch/data" } variable "elasticsearch_logs_dir" { default = "/var/log/elasticsearch" } # default elasticsearch heap size variable "data_heap_size" { type = string default = "8g" } variable "master_heap_size" { type = string default = "2g" } variable "client_heap_size" { type = string default = "1g" } variable "masters_count" { type = map(number) default = {} description = "Master nodes count per avalabilityZone. If all node counts are empty, will run in singlenode mode." } variable "datas_count" { type = map(number) default = {} description = "Data nodes count per avalabilityZone. If all node counts are empty, will run in singlenode mode." } variable "data_voters_count" { type = map(number) default = {} description = "Data voter nodes count per avalabilityZone. If all node counts are empty, will run in singlenode mode." } variable "clients_count" { type = map(number) default = {} description = "Client nodes count per avalabilityZone. If all node counts are empty, will run in singlenode mode." } variable "security_enabled" { description = "Whether or not to enable x-pack security on the cluster" default = false } variable "monitoring_enabled" { description = "Whether or not to enable x-pack monitoring on the cluster" default = false } variable "client_user" { description = "The username to use when setting up basic auth on Grafana and Cerebro." default = "elastic" } variable "public_facing" { description = "Whether or not the created cluster should be accessible from the public internet" type = bool default = true } # the ability to add additional existing security groups. In our case # we have consul running as agents on the box variable "additional_security_groups" { type = list(string) default = [] } variable "ebs_optimized" { description = "Whether data instances are EBS optimized or not" default = "true" } variable "xpack_monitoring_host" { description = "ES host to send monitoring data" default = "http://localhost:9200" } variable "filebeat_monitoring_host" { description = "ES host to send filebeat data" default = false } variable "s3_backup_bucket" { description = "S3 bucket for backups" default = "" } variable "alb_subnets" { description = "Subnets to run the ALB in. Defaults to all VPC subnets." default = [] } variable "singlenode_az" { description = "This variable is required when running in singlenode mode. Singlenode mode is enabled when masters_count, datas_count and clients_count are all empty," default = "" } variable "bootstrap_node_subnet_id" { description = "Use to override which subnet the bootstrap node is created in." default = "" } variable "use_g1gc" { description = "Whether or not to enable G1GC in jvm.options ES config. Left in for backwards compatibility, deployments with Elasticsearch 7.7 and above should not use this." default = false } variable "DEV_MODE_scripts_s3_bucket" { description = "S3 bucket to override init scripts from. Should not be used on production." default = "" } variable "requires_bootstrapping" { description = "Overrides cluster bootstrap state" default = true } variable "elasticsearch_packer_image" { description = "The name of the image family for elasticsearch" default = "elasticsearch7-packer-image" } variable "kibana_packer_image" { description = "The name of the image family for kibana" default = "kibana7-packer-image" } variable "ec2_vpc_endpoint_id" { description = "Use to skip creation of ec2 VPC endpoint and reference your own" default = "" } variable "s3_vpc_endpoint_id" { description = "Use to skip creation of s3 VPC endpoint and reference your own" default = "" } variable "autoscaling_vpc_endpoint_id" { description = "Use to skip creation of autoscaling VPC endpoint and reference your own" default = "" } variable "log_size" { description = "Retained log4j log size in MB" default = "128" } variable "log_level" { description = "log4j log level" default = "INFO" } variable "auto_shut_down_bootstrap_node" { description = "disable to prevent bootstrap node from shutting down" default = true } variable "disk_type" { description = "disk type" default = "gp2" } ================================================ FILE: terraform-aws/versions.tf ================================================ terraform { required_version = ">= 0.12" } ================================================ FILE: terraform-aws/vpc.tf ================================================ data "aws_vpc" "selected" { id = var.vpc_id } data "aws_subnets" "all-subnets" { filter { name = "vpc-id" values = [var.vpc_id] } } data "aws_route_tables" "vpc_route_tables" { vpc_id = var.vpc_id } data "aws_subnets" "subnets-per-az" { count = length(local.all_availability_zones) filter { name = "availability-zone" values = [local.all_availability_zones[count.index]] } filter { name = "vpc-id" values = [var.vpc_id] } } resource "aws_security_group" "vpc-endpoint" { vpc_id = var.vpc_id ingress { from_port = 80 to_port = 80 protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] } ingress { from_port = 443 to_port = 443 protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] } egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } resource "aws_vpc_endpoint" "ec2" { vpc_id = var.vpc_id count = var.ec2_vpc_endpoint_id == "" ? 1 : 0 service_name = "com.amazonaws.${var.aws_region}.ec2" vpc_endpoint_type = "Interface" private_dns_enabled = true security_group_ids = [aws_security_group.vpc-endpoint.id] subnet_ids = compact(setunion( local.flat_cluster_subnet_ids, local.flat_clients_subnet_ids, [local.singlenode_subnet_id], [local.bootstrap_node_subnet_id] )) } resource "aws_vpc_endpoint" "autoscaling" { vpc_id = var.vpc_id count = var.autoscaling_vpc_endpoint_id == "" ? 1 : 0 service_name = "com.amazonaws.${var.aws_region}.autoscaling" vpc_endpoint_type = "Interface" private_dns_enabled = true security_group_ids = [aws_security_group.vpc-endpoint.id] subnet_ids = compact(setunion( local.flat_cluster_subnet_ids, local.flat_clients_subnet_ids, [local.singlenode_subnet_id], [local.bootstrap_node_subnet_id] )) } resource "aws_vpc_endpoint" "s3" { vpc_id = var.vpc_id count = var.s3_vpc_endpoint_id == "" ? 1 : 0 service_name = "com.amazonaws.${var.aws_region}.s3" vpc_endpoint_type = "Gateway" route_table_ids = data.aws_route_tables.vpc_route_tables.ids } ================================================ FILE: terraform-azure/README.md ================================================ # Azure deployment ## Create the machine images with Packer Go to the packer folder and see the README there. Once you have the machine image IDs, return here and continue with the next steps. ## Create key-pair or use your own This deployment is configured to use your default SSH keys as machine credentials. If you want to use other keys, change the path to the keys you want to use (look for `key_path` in variables.tf). Use [this guide](https://help.github.com/articles/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent/) to generate new keys if needed. ## Configurations Edit `variables.tf` to specify the following: * `azure_location` - the Azure location where to launch the cluster in. * `azure_subscription_id`, `azure_client_id`, `azure_client_secret`, `azure_tenant_id` - the same credentials used in the Packer step. See the README there for instructions on how to retrieve them. * `es_cluster` - the name of the Elasticsearch cluster to launch. * `key_path` - the filesystem path to the SSH key to use as virtual machines login credentials. * `data_instance_type`, `master_instance_type`, `client_instance_type` - Azure machine instance types to use for each machine type in the cluster. * `security_enabled`, `monitoring_enabled` - whether to enable X-Pack Security and Monitoring features, respectively. * `client_user` - the username to use for HTTP basic authentication that is enabled on the client nodes. Password is generated automatically and can be accessed after deployment by running `terraform output`. The rest of the configurations are mostly around cluster topology and machine types and sizes. ### Cluster topology Two modes of deployment are supported: * A recommended configuration, with dedicated master-eligible nodes, data nodes, and client nodes. This is a production-ready and best-practice configuration. See more details in the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html). * Single node mode - mostly useful for experimentation At this point we consider the role `ingest` as unanimous with `data`, so all data nodes are also ingest nodes. The default mode is the single-node mode. To change it to the recommended configuration, edit `variables.tf` and set number of master nodes to 3, data nodes to at least 2, and client nodes to at least 1. All nodes with the `client` role will be attached to an Azure load balancer, so access to all client nodes can be done via the DNS it exposes. ## Launch the cluster with Terraform ```bash terraform plan terraform apply ``` When terraform is done, you should see a lot of output ending with something like this: ``` Apply complete! Resources: 14 added, 0 changed, 0 destroyed. The state of your infrastructure has been saved to the path below. This state is required to modify and destroy your infrastructure, so keep it safe. To inspect the complete state use the `terraform show` command. State path: terraform.tfstate Outputs: public_dns = elasticsearch-cluster-foo.eastus.cloudapp.azure.com vm_password = rBTKoLsf7x8ODZVd ``` Note `clients_lb_public_ipaddress` and `vm-password` - that's your entry point to the cluster and the password for the `exampleuser` default user. ### Look around The client nodes are the ones exposed to external networks. They provide endpoints for Kibana, Grafana, Cerebro and direct Elasticsearch access. By default client nodes are accessible via their public IPs and the DNS of the load balancer they are attached to (see above). Client nodes listen on port 8080 and are password protected. Access is managed by nginx which is expecting a username and password pair. Default user name is exampleuser and the password is generated automatically when deploying. You can change those defaults by editing [this file](https://github.com/synhershko/elasticsearch-cloud-deploy/blob/master/packer/install-nginx.sh) and running Packer again. On client nodes you will find: * Kibana access is direct on port 80 of the load balancer host (http://host) * [Cerebro](https://github.com/lmenezes/cerebro) (a cluster management UI) is available on http://host/cerebro/ * For direct Elasticsearch access, go to http://host/es/ * In the single-node deployment mode, the default port is 8080 and the host is the machine host (not the load balancer) * Grafana is accessible on port 3000 - http://host:3000/ The default credentials are `exampleuser` as username, and password as generated by Terraform during the deployment (will show up as `vm-password` after deployment when you run `terraform output`). Elastic's X-Pack is deployed on the cluster out of the box with monitoring enabled but security disabled - you should enable and setup X-Pack Security for any production deployment. To ssh to one of the instances: ```bash ssh ubuntu@{public IP / DNS of the instance or load balancer} ``` ## Backups The Azure repository plugin is installed on the cluster and ready to be used for index snapshots and (should you ever need) a restore. Official documentation is available here: https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository-azure-usage.html ### Auto- and manual- scale out The entire stack is deployed using Azure scale-sets, which are easy to scale up and down manually (from the Azure portal, from the command line, or using the same Terraform scripts), or automatically based on host metrics and application metrics using [Azure scale-set features](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-autoscale-overview). ## Elastic Discovery on Azure Unfortunately, the story of cluster discovery on Azure is practically non-existent. There is an Azure "Classic" discovery plugin that has been deprecated since circa 5.0 and Elastic are yet to release a properly working discovery plugin (there is [a PR for one](https://github.com/elastic/elasticsearch/pull/22679) which is open for over a year now if you want to track it). A discovery plugin on a public cloud is important because it takes a lot of complexity off you, and manages the initial cluster nodes discovery using the available cloud APIs. Having none available, I defaulted to using vnet and naming conventions. Another viable option is using file-based discovery, which is a file describing your cluster you can upload to the images and use as a seed. ================================================ FILE: terraform-azure/clients.tf ================================================ data "template_file" "client_userdata_script" { template = "${file("${path.module}/../templates/user_data.sh")}" vars { cloud_provider = "azure" volume_name = "" elasticsearch_data_dir = "/var/lib/elasticsearch" elasticsearch_logs_dir = "${var.elasticsearch_logs_dir}" heap_size = "1g" es_cluster = "${var.es_cluster}" es_environment = "${var.environment}-${var.es_cluster}" security_groups = "" availability_zones = "" minimum_master_nodes = "${format("%d", var.masters_count / 2 + 1)}" master = "false" data = "false" http_enabled = "true" security_enabled = "${var.security_enabled}" monitoring_enabled = "${var.monitoring_enabled}" client_user = "${var.client_user}" client_pwd = "${random_string.vm-login-password.result}" } } resource "azurerm_virtual_machine_scale_set" "client-nodes" { count = "${var.clients_count == "0" ? "0" : "1"}" name = "es-${var.es_cluster}-client-nodes" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" location = "${var.azure_location}" "sku" { name = "${var.client_instance_type}" tier = "Standard" capacity = "${var.clients_count}" } upgrade_policy_mode = "Manual" overprovision = false "os_profile" { computer_name_prefix = "${var.es_cluster}-client" admin_username = "ubuntu" admin_password = "${random_string.vm-login-password.result}" custom_data = "${data.template_file.client_userdata_script.rendered}" } "network_profile" { name = "es-${var.es_cluster}-net-profile" primary = true "ip_configuration" { name = "es-${var.es_cluster}-ip-profile" subnet_id = "${azurerm_subnet.elasticsearch_subnet.id}" load_balancer_backend_address_pool_ids = ["${azurerm_lb_backend_address_pool.clients-lb-backend.id}"] } } storage_profile_image_reference { id = "${data.azurerm_image.kibana.id}" } "storage_profile_os_disk" { caching = "ReadWrite" create_option = "FromImage" managed_disk_type = "Standard_LRS" } os_profile_linux_config { disable_password_authentication = true ssh_keys { path = "/home/ubuntu/.ssh/authorized_keys" key_data = "${file(var.key_path)}" } } } ================================================ FILE: terraform-azure/datas.tf ================================================ data "template_file" "data_userdata_script" { template = "${file("${path.module}/../templates/user_data.sh")}" vars { cloud_provider = "azure" volume_name = "" elasticsearch_data_dir = "${var.elasticsearch_data_dir}" elasticsearch_logs_dir = "${var.elasticsearch_logs_dir}" heap_size = "${var.data_heap_size}" es_cluster = "${var.es_cluster}" es_environment = "${var.environment}-${var.es_cluster}" security_groups = "" availability_zones = "" minimum_master_nodes = "${format("%d", var.masters_count / 2 + 1)}" master = "false" data = "true" http_enabled = "true" security_enabled = "${var.security_enabled}" monitoring_enabled = "${var.monitoring_enabled}" client_user = "" client_pwd = "" } } resource "azurerm_virtual_machine_scale_set" "data-nodes" { count = "${var.datas_count == "0" ? "0" : "1"}" name = "es-${var.es_cluster}-data-nodes" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" location = "${var.azure_location}" "sku" { name = "${var.data_instance_type}" tier = "Standard" capacity = "${var.datas_count}" } upgrade_policy_mode = "Manual" overprovision = false "os_profile" { computer_name_prefix = "${var.es_cluster}-data" admin_username = "ubuntu" admin_password = "${random_string.vm-login-password.result}" custom_data = "${data.template_file.data_userdata_script.rendered}" } "network_profile" { name = "es-${var.es_cluster}-net-profile" primary = true accelerated_networking = true "ip_configuration" { name = "es-${var.es_cluster}-ip-profile" subnet_id = "${azurerm_subnet.elasticsearch_subnet.id}" } } storage_profile_image_reference { id = "${data.azurerm_image.elasticsearch.id}" } "storage_profile_os_disk" { caching = "ReadWrite" create_option = "FromImage" managed_disk_type = "Standard_LRS" } os_profile_linux_config { disable_password_authentication = true ssh_keys { path = "/home/ubuntu/.ssh/authorized_keys" key_data = "${file(var.key_path)}" } } // storage_profile_data_disk { // lun = 0 // caching = "ReadWrite" // create_option = "Empty" // disk_size_gb = "${var.elasticsearch_volume_size}" // managed_disk_type = "Standard_LRS" // } } ================================================ FILE: terraform-azure/images.tf ================================================ data "azurerm_image" "elasticsearch" { resource_group_name = "packer-elasticsearch-images" name_regex = "^elasticsearch6-\\d{4,4}-\\d{2,2}-\\d{2,2}T\\d{6,6}" sort_descending = true } data "azurerm_image" "kibana" { resource_group_name = "packer-elasticsearch-images" name_regex = "^kibana6-\\d{4,4}-\\d{2,2}-\\d{2,2}T\\d{6,6}" sort_descending = true } ================================================ FILE: terraform-azure/lb.tf ================================================ resource "azurerm_public_ip" "clients" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-public-ip" location = "${var.azure_location}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" public_ip_address_allocation = "static" domain_name_label = "${azurerm_resource_group.elasticsearch.name}" } resource "azurerm_lb" "clients" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" location = "${var.azure_location}" name = "es-${var.es_cluster}-clients-lb" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" frontend_ip_configuration { name = "es-${var.es_cluster}-ip" subnet_id = "${azurerm_subnet.elasticsearch_subnet.id}" private_ip_address_allocation = "dynamic" } } resource "azurerm_lb" "clients-public" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" location = "${var.azure_location}" name = "es-${var.es_cluster}-clients-public-lb" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" frontend_ip_configuration { name = "es-${var.es_cluster}-public-ip" public_ip_address_id = "${azurerm_public_ip.clients.id}" } } resource "azurerm_lb_backend_address_pool" "clients-lb-backend" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-clients-lb-backend" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" loadbalancer_id = "${var.associate_public_ip == true ? azurerm_lb.clients-public.id : azurerm_lb.clients.id}" } resource "azurerm_lb_probe" "clients-httpprobe" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-clients-lb-probe" port = 8080 protocol = "Http" request_path = "/status" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" loadbalancer_id = "${var.associate_public_ip == true ? azurerm_lb.clients-public.id : azurerm_lb.clients.id}" } // Kibana, Cerebro and Elasticsearch access - protected by default by the nginx proxy resource "azurerm_lb_rule" "clients-lb-rule" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-clients-lb-rule" backend_port = 8080 frontend_port = 80 frontend_ip_configuration_name = "${var.associate_public_ip == true ? "es-${var.es_cluster}-public-ip" : "es-${var.es_cluster}-ip"}" backend_address_pool_id = "${azurerm_lb_backend_address_pool.clients-lb-backend.id}" protocol = "Tcp" loadbalancer_id = "${var.associate_public_ip == true ? azurerm_lb.clients-public.id : azurerm_lb.clients.id}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" } // Grafana instance, protected by default by their own login screen resource "azurerm_lb_rule" "clients-lb-rule2" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-clients-lb-rule2" backend_port = 3000 frontend_port = 3000 frontend_ip_configuration_name = "${var.associate_public_ip == true ? "es-${var.es_cluster}-public-ip" : "es-${var.es_cluster}-ip"}" backend_address_pool_id = "${azurerm_lb_backend_address_pool.clients-lb-backend.id}" protocol = "Tcp" loadbalancer_id = "${var.associate_public_ip == true ? azurerm_lb.clients-public.id : azurerm_lb.clients.id}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" } // SSH access resource "azurerm_lb_rule" "clients-lb-rule-ssh" { count = "${var.associate_public_ip == "true" && var.clients_count != "0" ? "1" : "0"}" name = "es-${var.es_cluster}-clients-lb-rule-ssh" backend_port = 22 frontend_port = 22 frontend_ip_configuration_name = "${var.associate_public_ip == true ? "es-${var.es_cluster}-public-ip" : "es-${var.es_cluster}-ip"}" backend_address_pool_id = "${azurerm_lb_backend_address_pool.clients-lb-backend.id}" protocol = "Tcp" loadbalancer_id = "${var.associate_public_ip == true ? azurerm_lb.clients-public.id : azurerm_lb.clients.id}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" } ================================================ FILE: terraform-azure/main.tf ================================================ provider "azurerm" { subscription_id = "${var.azure_subscription_id}" client_id = "${var.azure_client_id}" client_secret = "${var.azure_client_secret}" tenant_id = "${var.azure_tenant_id}" } resource "random_string" "vm-login-password" { length = 16 special = true override_special = "!@#%&-_" } resource "azurerm_resource_group" "elasticsearch" { location = "${var.azure_location}" name = "elasticsearch-cluster-${var.es_cluster}" } resource "azurerm_virtual_network" "elasticsearch_vnet" { name = "es-${var.es_cluster}-vnet" location = "${var.azure_location}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" address_space = ["10.1.0.0/24"] } resource "azurerm_subnet" "elasticsearch_subnet" { name = "es-${var.es_cluster}-subnet" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" virtual_network_name = "${azurerm_virtual_network.elasticsearch_vnet.name}" address_prefix = "10.1.0.0/24" } ================================================ FILE: terraform-azure/masters.tf ================================================ data "template_file" "master_userdata_script" { template = "${file("${path.module}/../templates/user_data.sh")}" vars { cloud_provider = "azure" volume_name = "" elasticsearch_data_dir = "/var/lib/elasticsearch" elasticsearch_logs_dir = "${var.elasticsearch_logs_dir}" heap_size = "${var.master_heap_size}" es_cluster = "${var.es_cluster}" es_environment = "${var.environment}-${var.es_cluster}" security_groups = "" availability_zones = "" minimum_master_nodes = "${format("%d", var.masters_count / 2 + 1)}" master = "true" data = "false" http_enabled = "false" security_enabled = "${var.security_enabled}" monitoring_enabled = "${var.monitoring_enabled}" client_user = "" client_pwd = "" } } resource "azurerm_virtual_machine_scale_set" "master-nodes" { count = "${var.masters_count == "0" ? "0" : "1"}" name = "es-${var.es_cluster}-master-nodes" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" location = "${var.azure_location}" "sku" { name = "${var.master_instance_type}" tier = "Standard" capacity = "${var.masters_count}" } upgrade_policy_mode = "Manual" overprovision = false "os_profile" { computer_name_prefix = "${var.es_cluster}-master" admin_username = "ubuntu" admin_password = "${random_string.vm-login-password.result}" custom_data = "${data.template_file.master_userdata_script.rendered}" } "network_profile" { name = "es-${var.es_cluster}-net-profile" primary = true "ip_configuration" { name = "es-${var.es_cluster}-ip-profile" subnet_id = "${azurerm_subnet.elasticsearch_subnet.id}" } } storage_profile_image_reference { id = "${data.azurerm_image.elasticsearch.id}" } "storage_profile_os_disk" { caching = "ReadWrite" create_option = "FromImage" managed_disk_type = "Standard_LRS" } os_profile_linux_config { disable_password_authentication = true ssh_keys { path = "/home/ubuntu/.ssh/authorized_keys" key_data = "${file(var.key_path)}" } } } ================================================ FILE: terraform-azure/outputs.tf ================================================ output "es_image_id" { value = "${data.azurerm_image.elasticsearch.name}" } output "kibana_image_id" { value = "${data.azurerm_image.kibana.name}" } output "clients_public_dns" { value = "${azurerm_public_ip.clients.*.fqdn}" } output "clients_public_ip_address" { value = "${azurerm_public_ip.clients.*.ip_address}" } output "public_dns" { value = "${azurerm_public_ip.single-node.*.fqdn}" } output "public_ip_address" { value = "${azurerm_public_ip.single-node.*.ip_address}" } output "vm_password" { value = "${random_string.vm-login-password.result}" } ================================================ FILE: terraform-azure/single-node.tf ================================================ data "template_file" "singlenode_userdata_script" { template = "${file("${path.module}/../templates/user_data.sh")}" vars { cloud_provider = "azure" volume_name = "" elasticsearch_data_dir = "${var.elasticsearch_data_dir}" elasticsearch_logs_dir = "${var.elasticsearch_logs_dir}" heap_size = "${var.data_heap_size}" es_cluster = "${var.es_cluster}" es_environment = "${var.environment}-${var.es_cluster}" security_groups = "" availability_zones = "" minimum_master_nodes = "${format("%d", var.masters_count / 2 + 1)}" master = "true" data = "true" http_enabled = "true" security_enabled = "${var.security_enabled}" monitoring_enabled = "${var.monitoring_enabled}" client_user = "${var.client_user}" client_pwd = "${random_string.vm-login-password.result}" } } resource "azurerm_public_ip" "single-node" { count = "${var.masters_count == "0" && var.datas_count == "0" ? "1" : "0"}" name = "es-${var.es_cluster}-single-node-public-ip" location = "${var.azure_location}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" public_ip_address_allocation = "static" domain_name_label = "${azurerm_resource_group.elasticsearch.name}" } resource "azurerm_network_interface" "single-node" { // Only create if it's a single-node configuration count = "${var.masters_count == "0" && var.datas_count == "0" ? "1" : "0"}" name = "es-${var.es_cluster}-singlenode-nic" location = "${var.azure_location}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" ip_configuration { name = "es-${var.es_cluster}-singlenode-ip" subnet_id = "${azurerm_subnet.elasticsearch_subnet.id}" private_ip_address_allocation = "dynamic" public_ip_address_id = "${azurerm_public_ip.single-node.id}" } } resource "azurerm_virtual_machine" "single-node" { // Only create if it's a single-node configuration count = "${var.masters_count == "0" && var.datas_count == "0" ? "1" : "0"}" name = "es-${var.es_cluster}-singlenode" location = "${var.azure_location}" resource_group_name = "${azurerm_resource_group.elasticsearch.name}" network_interface_ids = ["${azurerm_network_interface.single-node.id}"] vm_size = "${var.data_instance_type}" storage_image_reference { id = "${data.azurerm_image.kibana.id}" } storage_os_disk { name = "es-${var.es_cluster}-singlenode-osdisk" caching = "ReadWrite" create_option = "FromImage" managed_disk_type = "Standard_LRS" } "os_profile" { computer_name = "es-${var.es_cluster}-singlenode" admin_username = "ubuntu" admin_password = "${random_string.vm-login-password.result}" custom_data = "${data.template_file.singlenode_userdata_script.rendered}" } os_profile_linux_config { disable_password_authentication = true ssh_keys { path = "/home/ubuntu/.ssh/authorized_keys" key_data = "${file(var.key_path)}" } } } ================================================ FILE: terraform-azure/variables.tf ================================================ variable "azure_location" { type = "string" default = "East US" } variable "azure_client_id" { type = "string" } variable "azure_client_secret" { type = "string" } variable "azure_subscription_id" { type = "string" } variable "azure_tenant_id" { type = "string" } variable "es_cluster" { description = "Name of the elasticsearch cluster, used in node discovery" default = "my-cluster" } variable "key_path" { description = "Key name to be used with the launched EC2 instances." default = "~/.ssh/id_rsa.pub" } variable "environment" { default = "default" } variable "data_instance_type" { type = "string" default = "Standard_D12_v2" } variable "master_instance_type" { type = "string" default = "Standard_A2_v2" } variable "client_instance_type" { type = "string" default = "Standard_A2_v2" } variable "elasticsearch_volume_size" { type = "string" default = "100" # gb } variable "use_instance_storage" { default = "true" } variable "associate_public_ip" { default = "true" } variable "elasticsearch_data_dir" { default = "/mnt/elasticsearch/data" } variable "elasticsearch_logs_dir" { default = "/var/log/elasticsearch" } # default elasticsearch heap size variable "data_heap_size" { type = "string" default = "7g" } variable "master_heap_size" { type = "string" default = "2g" } variable "masters_count" { default = "1" } variable "datas_count" { default = "1" } variable "clients_count" { default = "1" } # whether or not to enable x-pack security on the cluster variable "security_enabled" { default = "false" } # whether or not to enable x-pack monitoring on the cluster variable "monitoring_enabled" { default = "true" } # client nodes have nginx installed on them, these credentials are used for basic auth variable "client_user" { default = "exampleuser" } ================================================ FILE: terraform-gcp/certs.tf ================================================ locals { cert_common_name = "elasticsearch-cloud-deploy autogenerated CA" validity_period_hours = 365 * 24 early_renewal_hours = 30 * 24 } resource "tls_private_key" "ca" { count = var.security_enabled ? 1 : 0 algorithm = "RSA" } resource "tls_self_signed_cert" "ca" { count = var.security_enabled ? 1 : 0 key_algorithm = "RSA" private_key_pem = join("", tls_private_key.ca[*].private_key_pem) subject { common_name = local.cert_common_name } validity_period_hours = local.validity_period_hours early_renewal_hours = local.early_renewal_hours is_ca_certificate = true allowed_uses = [ "server_auth", "cert_signing", "crl_signing", "client_auth" ] } resource "tls_private_key" "node" { count = var.security_enabled ? 1 : 0 algorithm = "RSA" } resource "tls_cert_request" "node" { count = var.security_enabled ? 1 : 0 key_algorithm = "RSA" private_key_pem = join("", tls_private_key.node[*].private_key_pem) subject { common_name = local.cert_common_name } } resource "tls_locally_signed_cert" "node" { count = var.security_enabled ? 1 : 0 ca_key_algorithm = "RSA" cert_request_pem = join("", tls_cert_request.node[*].cert_request_pem) ca_private_key_pem = join("", tls_private_key.ca[*].private_key_pem) ca_cert_pem = join("", tls_self_signed_cert.ca[*].cert_pem) validity_period_hours = local.validity_period_hours early_renewal_hours = local.early_renewal_hours allowed_uses = [ "key_encipherment", "digital_signature", "server_auth", "client_auth" ] } ================================================ FILE: terraform-gcp/client.tf ================================================ data "template_file" "client_userdata_script" { template = "${file("${path.module}/../templates/gcp_user_data.sh")}" vars = merge(local.user_data_common, { heap_size = "${var.client_heap_size}" startup_script = "client.sh" }) } resource "google_compute_target_pool" "client" { name = "${var.es_cluster}-client-targetpool" } resource "google_compute_instance_group_manager" "client" { for_each = toset(keys(var.clients_count)) provider = google name = "${var.es_cluster}-igm-client-${each.value}" project = "${var.gcp_project_id}" zone = each.value named_port { name = "nginx" port = 8080 } named_port { name = "es" port = 9200 } version { instance_template = google_compute_instance_template.client.self_link name = "primary" } base_instance_name = "${var.es_cluster}-client" target_pools = [google_compute_target_pool.client.self_link] } resource "google_compute_autoscaler" "client" { for_each = toset(keys(var.clients_count)) name = "${var.es_cluster}-autoscaler-client-${each.value}" zone = each.value target = google_compute_instance_group_manager.client[each.value].self_link autoscaling_policy { max_replicas = var.clients_count[each.value] min_replicas = var.clients_count[each.value] cooldown_period = 60 } } resource "google_compute_instance_template" "client" { provider = google name_prefix = "${var.es_cluster}-instance-template-client" project = "${var.gcp_project_id}" machine_type = "${var.master_machine_type}" can_ip_forward = true tags = [ "${var.es_cluster}", "es-client-node", "http-server", "https-server" ] metadata_startup_script = "${data.template_file.client_userdata_script.rendered}" labels = { environment = var.environment cluster = "${var.environment}-${var.es_cluster}" role = "client" } disk { source_image = data.google_compute_image.kibana.self_link boot = true } network_interface { network = var.cluster_network } service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-gcp/datas-voters.tf ================================================ data "template_file" "data_voting_userdata_script" { template = file("${path.module}/../templates/gcp_user_data.sh") vars = merge(local.user_data_common, { heap_size = "${var.data_heap_size}" is_voting_only = "true" startup_script = "data.sh" }) } resource "google_compute_instance_group_manager" "data-voters" { for_each = toset(keys(var.data_voters_count)) provider = google name = "${var.es_cluster}-igm-data-voters-${each.value}" project = var.gcp_project_id zone = each.value version { instance_template = google_compute_instance_template.data-voters.self_link name = "primary" } named_port { name = "es" port = 9200 } base_instance_name = "${var.es_cluster}-data-voting" target_pools = var.enable_direct_data_access ? [google_compute_target_pool.client.self_link] : [] } resource "google_compute_autoscaler" "data-voters" { for_each = toset(keys(var.data_voters_count)) name = "${var.es_cluster}-autoscaler-data-voters-${each.value}" zone = each.value target = google_compute_instance_group_manager.data-voters[each.value].self_link autoscaling_policy { max_replicas = var.data_voters_count[each.value] min_replicas = var.data_voters_count[each.value] cooldown_period = 60 } } resource "google_compute_instance_template" "data-voters" { provider = google name_prefix = "${var.es_cluster}-instance-template-data-voters" project = var.gcp_project_id machine_type = var.data_machine_type can_ip_forward = false tags = ["${var.es_cluster}", "es-data-node", "es-master-node"] metadata_startup_script = data.template_file.data_voting_userdata_script.rendered labels = { environment = var.environment cluster = "${var.environment}-${var.es_cluster}" role = "data-voters" } disk { source_image = data.google_compute_image.elasticsearch.self_link boot = true } network_interface { network = var.cluster_network } service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-gcp/datas.tf ================================================ data "template_file" "data_userdata_script" { template = file("${path.module}/../templates/gcp_user_data.sh") vars = merge(local.user_data_common, { heap_size = "${var.data_heap_size}" startup_script = "data.sh" }) } resource "google_compute_instance_group_manager" "data" { for_each = toset(keys(var.datas_count)) provider = google name = "${var.es_cluster}-igm-data-${each.value}" project = var.gcp_project_id zone = each.value version { instance_template = google_compute_instance_template.data.self_link name = "primary" } named_port { name = "es" port = 9200 } base_instance_name = "${var.es_cluster}-data" target_pools = var.enable_direct_data_access ? [google_compute_target_pool.client.self_link] : [] } resource "google_compute_autoscaler" "data" { for_each = toset(keys(var.datas_count)) name = "${var.es_cluster}-autoscaler-data-${each.value}" zone = each.value target = google_compute_instance_group_manager.data[each.value].self_link autoscaling_policy { max_replicas = var.datas_count[each.value] min_replicas = var.datas_count[each.value] cooldown_period = 60 } } resource "google_compute_instance_template" "data" { provider = google name_prefix = "${var.es_cluster}-instance-template-data" project = var.gcp_project_id machine_type = var.data_machine_type can_ip_forward = false tags = ["${var.es_cluster}", "es-data-node"] metadata_startup_script = data.template_file.data_userdata_script.rendered labels = { environment = var.environment cluster = "${var.environment}-${var.es_cluster}" role = "data" } disk { source_image = data.google_compute_image.elasticsearch.self_link boot = true } network_interface { network = var.cluster_network } service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-gcp/dev.tf ================================================ resource "google_storage_bucket" "dev" { count = var.DEV_MODE_scripts_gcs_bucket != "" ? 1 : 0 name = var.DEV_MODE_scripts_gcs_bucket location = var.gcp_region force_destroy = true } ================================================ FILE: terraform-gcp/disks.tf ================================================ locals { master_zone_flattened = toset(flatten([ for zone, count in var.masters_count : [ for i in range(0, count) : jsonencode({ "zone" = zone, "index" = i, "name" = "${zone}-${i}" }) ] ])) data_voters_zone_flattened = toset(flatten([ for zone, count in var.data_voters_count : [ for i in range(0, count) : jsonencode({ "zone" = zone, "index" = i, "name" = "${zone}-${i}" }) ] ])) data_zone_flattened = toset(flatten([ for zone, count in var.datas_count : [ for i in range(0, count) : jsonencode({ "zone" = zone, "index" = i, "name" = "${zone}-${i}" }) ] ])) } resource "google_compute_disk" "master" { for_each = local.master_zone_flattened name = "elasticsearch-${var.es_cluster}-master-${jsondecode(each.value)["name"]}" zone = jsondecode(each.value)["zone"] size = 10 labels = { cluster-name = "${var.es_cluster}" volume-index = jsondecode(each.value)["index"] auto-attach-group = "master" } } resource "google_compute_disk" "data" { for_each = local.data_zone_flattened name = "elasticsearch-${var.es_cluster}-data-${jsondecode(each.value)["name"]}" zone = jsondecode(each.value)["zone"] size = var.elasticsearch_volume_size labels = { cluster-name = "${var.es_cluster}" volume-index = jsondecode(each.value)["index"] auto-attach-group = "data" } } resource "google_compute_disk" "data_voters" { for_each = local.data_voters_zone_flattened name = "elasticsearch-${var.es_cluster}-data-voters-${jsondecode(each.value)["name"]}" zone = jsondecode(each.value)["zone"] size = var.elasticsearch_volume_size labels = { cluster-name = "${var.es_cluster}" volume-index = jsondecode(each.value)["index"] auto-attach-group = "data-voters" } } resource "google_compute_disk" "singlenode" { count = local.singlenode_mode ? 1 : 0 name = "elasticsearch-${var.es_cluster}-singlenode" zone = var.singlenode_zone size = var.elasticsearch_volume_size labels = { cluster-name = "${var.es_cluster}" volume-index = "0" auto-attach-group = "singlenode" } } ================================================ FILE: terraform-gcp/image.tf ================================================ data "google_compute_image" "elasticsearch" { family = "elasticsearch-7" } data "google_compute_image" "kibana" { family = "kibana-7" } ================================================ FILE: terraform-gcp/lb.tf ================================================ # Public LB locals { external_ports = var.public_facing ? toset(["9200", "5601"]) : toset([]) load_balance_data_nodes = !local.singlenode_mode && var.enable_direct_data_access load_balance_client_nodes = !local.singlenode_mode && length(var.clients_count) > 0 } ## Address resource "google_compute_address" "external-lb" { count = var.public_facing ? 1 : 0 name = "${var.es_cluster}-external-lb" } ## Single node mode resource "google_compute_forwarding_rule" "singlenode" { for_each = local.singlenode_mode ? local.external_ports : [] ip_address = join("", google_compute_address.external-lb[*].address) name = "${var.es_cluster}-external-singlenode-${each.value}" target = google_compute_target_pool.singlenode.self_link port_range = each.value } ## cluster mode resource "google_compute_forwarding_rule" "client" { for_each = (local.load_balance_client_nodes) ? local.external_ports : [] ip_address = join("", google_compute_address.external-lb[*].address) name = "${var.es_cluster}-external-client-${each.value}" target = google_compute_target_pool.client.self_link port_range = each.value } # Internal LB ## Healthcheck resource "google_compute_health_check" "internal" { name = "${var.es_cluster}-internal-healthcheck" timeout_sec = 1 check_interval_sec = 1 tcp_health_check { port = "9200" port_specification = "USE_FIXED_PORT" } log_config { enable = true } } ## Single node resource "google_compute_region_backend_service" "internal-singlenode" { count = local.singlenode_mode ? 1 : 0 name = "${var.es_cluster}-internal-singlenode" region = var.gcp_region health_checks = [google_compute_health_check.internal.self_link] protocol = "TCP" backend { group = google_compute_instance_group_manager.singlenode.instance_group } } resource "google_compute_forwarding_rule" "internal-singlenode" { count = local.singlenode_mode ? 1 : 0 name = "${var.es_cluster}-internal-singlenode" region = var.gcp_region service_label = "${var.es_cluster}-internal" load_balancing_scheme = "INTERNAL" backend_service = join("", google_compute_region_backend_service.internal-singlenode[*].self_link) all_ports = true } ## Client nodes resource "google_compute_region_backend_service" "internal-client" { count = local.load_balance_client_nodes || local.load_balance_data_nodes ? 1 : 0 name = "${var.es_cluster}-internal-client" region = var.gcp_region health_checks = [google_compute_health_check.internal.self_link] protocol = "TCP" dynamic "backend" { for_each = local.load_balance_client_nodes ? toset(keys(var.clients_count)) : [] content { group = google_compute_instance_group_manager.client[backend.value].instance_group } } dynamic "backend" { for_each = local.load_balance_data_nodes ? toset(keys(var.datas_count)) : [] content { group = google_compute_instance_group_manager.data[backend.value].instance_group } } dynamic "backend" { for_each = local.load_balance_data_nodes ? toset(keys(var.data_voters_count)) : [] content { group = google_compute_instance_group_manager.data-voters[backend.value].instance_group } } } ## forwarding rule resource "google_compute_forwarding_rule" "internal-client" { count = !local.singlenode_mode ? 1 : 0 name = "${var.es_cluster}-internal-client" region = var.gcp_region service_label = "${var.es_cluster}-internal" load_balancing_scheme = "INTERNAL" backend_service = join("", google_compute_region_backend_service.internal-client[*].self_link) all_ports = true } ================================================ FILE: terraform-gcp/main.tf ================================================ terraform { required_providers { tls = { source = "hashicorp/tls" version = "3.1.0" } } } provider "google" { # comment out to use environment credentials credentials = var.gcp_credentials_path project = var.gcp_project_id region = var.gcp_region zone = var.gcp_zone } resource "random_string" "vm-login-password" { length = 16 special = false } resource "random_string" "security-encryption-key" { length = 32 special = false } resource "random_string" "reporting-encryption-key" { length = 32 special = false } resource "google_compute_firewall" "internode" { name = "${var.es_cluster}-firewall-allow-internode" network = var.cluster_network allow { protocol = "tcp" ports = ["9200-9400"] } source_tags = [var.es_cluster] } resource "google_compute_firewall" "external" { count = var.public_facing ? 1 : 0 name = "${var.es_cluster}-firewall-allow-external" network = var.cluster_network allow { protocol = "tcp" ports = ["9200", "5601"] } } resource "google_compute_router" "router" { name = "${var.es_cluster}-router" network = var.cluster_network } resource "google_compute_router_nat" "nat" { name = "${var.es_cluster}-router-nat" router = google_compute_router.router.name nat_ip_allocate_option = "AUTO_ONLY" source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" } resource "google_service_account" "gcs" { account_id = "${var.es_cluster}-gcs" display_name = "${var.es_cluster}-gcs-service-account" } resource "google_service_account_key" "gcs" { service_account_id = google_service_account.gcs.name public_key_type = "TYPE_X509_PEM_FILE" } resource "google_storage_bucket" "snapshots" { count = var.gcs_snapshots_bucket != "" ? 1 : 0 name = var.gcs_snapshots_bucket location = var.gcp_region } resource "google_storage_bucket_iam_member" "legacy-bucket-reader" { count = var.gcs_snapshots_bucket != "" ? 1 : 0 bucket = join("", google_storage_bucket.snapshots[*].name) role = "roles/storage.legacyBucketReader" member = "serviceAccount:${google_service_account.gcs.email}" } resource "google_storage_bucket_iam_member" "object-admin" { count = var.gcs_snapshots_bucket != "" ? 1 : 0 bucket = join("", google_storage_bucket.snapshots[*].name) role = "roles/storage.objectAdmin" member = "serviceAccount:${google_service_account.gcs.email}" } locals { masters_count = sum(concat(values(var.masters_count), values(var.data_voters_count))) all_zones = compact(tolist(setunion( keys(var.masters_count), keys(var.datas_count), keys(var.data_voters_count), keys(var.clients_count), toset([var.singlenode_zone]) ))) singlenode_mode = (length(keys(var.masters_count)) + length(keys(var.datas_count)) + length(keys(var.clients_count))) == 0 is_cluster_bootstrapped = data.local_file.cluster_bootstrap_state.content == "1" || !var.requires_bootstrapping user_data_common = { cloud_provider = "gcp" gcs_snapshots_bucket = var.gcs_snapshots_bucket elasticsearch_data_dir = var.elasticsearch_data_dir elasticsearch_logs_dir = var.elasticsearch_logs_dir es_cluster = var.es_cluster gcp_project_id = var.gcp_project_id gcp_zones = join(",", tolist(local.all_zones)) es_environment = "${var.environment}-${var.es_cluster}" security_enabled = var.security_enabled monitoring_enabled = var.monitoring_enabled masters_count = local.masters_count client_user = var.client_user xpack_monitoring_host = var.xpack_monitoring_host filebeat_monitoring_host = var.filebeat_monitoring_host use_g1gc = var.use_g1gc client_pwd = random_string.vm-login-password.result master = false data = false bootstrap_node = false log_level = var.log_level log_size = var.log_size is_voting_only = false gcs_service_account_key = join("", google_service_account_key.gcs[*].private_key) ca_cert = var.security_enabled ? join("", tls_self_signed_cert.ca[*].cert_pem) : "" node_cert = var.security_enabled ? join("", tls_locally_signed_cert.node[*].cert_pem) : "" node_key = var.security_enabled ? join("", tls_private_key.node[*].private_key_pem) : "" DEV_MODE_scripts_gcs_bucket = var.DEV_MODE_scripts_gcs_bucket security_encryption_key = random_string.security-encryption-key.result reporting_encryption_key = random_string.reporting-encryption-key.result auto_shut_down_bootstrap_node = var.auto_shut_down_bootstrap_node } } ================================================ FILE: terraform-gcp/masters.tf ================================================ data "local_file" "cluster_bootstrap_state" { filename = "${path.module}/cluster_bootstrap_state" } data "template_file" "master_userdata_script" { template = "${file("${path.module}/../templates/gcp_user_data.sh")}" vars = merge(local.user_data_common, { heap_size = "${var.master_heap_size}" startup_script = "master.sh" }) } data "template_file" "bootstrap_userdata_script" { template = "${file("${path.module}/../templates/gcp_user_data.sh")}" vars = merge(local.user_data_common, { heap_size = "${var.master_heap_size}" startup_script = "bootstrap.sh" }) } resource "google_compute_instance_group_manager" "master" { for_each = toset(keys(var.masters_count)) provider = google-beta name = "${var.es_cluster}-igm-master-${each.value}" project = "${var.gcp_project_id}" zone = each.value version { instance_template = google_compute_instance_template.master.self_link name = "primary" } base_instance_name = "${var.es_cluster}-master" } resource "google_compute_autoscaler" "master" { for_each = toset(keys(var.masters_count)) name = "${var.es_cluster}-autoscaler-master-${each.value}" zone = each.value target = google_compute_instance_group_manager.master[each.value].self_link autoscaling_policy { max_replicas = var.masters_count[each.value] min_replicas = var.masters_count[each.value] cooldown_period = 60 } } resource "google_compute_instance" "bootstrap_node" { count = local.singlenode_mode || local.is_cluster_bootstrapped ? 0 : 1 name = "${var.es_cluster}-bootstrap-node" machine_type = "${var.master_machine_type}" zone = "${var.gcp_zone}" tags = ["${var.es_cluster}", "es-bootstrap-node"] boot_disk { initialize_params { image = data.google_compute_image.elasticsearch.self_link } } network_interface { network = var.cluster_network } metadata_startup_script = "${data.template_file.bootstrap_userdata_script.rendered}" service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } } resource "google_compute_instance_template" "master" { provider = google-beta name_prefix = "${var.es_cluster}-instance-template-master" project = "${var.gcp_project_id}" machine_type = "${var.master_machine_type}" can_ip_forward = false tags = ["${var.es_cluster}", "es-master-node"] metadata_startup_script = "${data.template_file.master_userdata_script.rendered}" labels = { environment = var.environment cluster = "${var.environment}-${var.es_cluster}" role = "master" } disk { source_image = data.google_compute_image.elasticsearch.self_link boot = true } network_interface { network = var.cluster_network } service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } lifecycle { create_before_destroy = true } } resource "null_resource" "cluster_bootstrap_state" { provisioner "local-exec" { command = "printf 1 > ${path.module}/cluster_bootstrap_state" } provisioner "local-exec" { when = destroy command = "printf 0 > ${path.module}/cluster_bootstrap_state" } depends_on = [google_compute_instance.bootstrap_node] } ================================================ FILE: terraform-gcp/outputs.tf ================================================ output "external_lb" { value = var.public_facing ? join("", google_compute_address.external-lb[*].address) : "" } output "internal_lb" { value = local.singlenode_mode ? join("", google_compute_forwarding_rule.internal-singlenode[*].service_name) : join("", google_compute_forwarding_rule.internal-client[*].service_name) } output "vm_password" { value = "${random_string.vm-login-password.result}" } ================================================ FILE: terraform-gcp/singlenode.tf ================================================ data "template_file" "singlenode_userdata_script" { template = "${file("${path.module}/../templates/gcp_user_data.sh")}" vars = merge(local.user_data_common, { heap_size = "${var.master_heap_size}" startup_script = "singlenode.sh" }) } resource "google_compute_target_pool" "singlenode" { name = "${var.es_cluster}-singlenode-targetpool" } resource "google_compute_instance_group_manager" "singlenode" { provider = google name = "${var.es_cluster}-igm-singlenode" project = "${var.gcp_project_id}" zone = "${var.singlenode_zone}" version { instance_template = google_compute_instance_template.singlenode.self_link name = "primary" } base_instance_name = "${var.es_cluster}-singlenode" target_pools = [google_compute_target_pool.singlenode.self_link] } resource "google_compute_autoscaler" "singlenode" { count = local.singlenode_mode ? 1 : 0 name = "${var.es_cluster}-autoscaler-singlenode" zone = "${var.singlenode_zone}" target = google_compute_instance_group_manager.singlenode.self_link autoscaling_policy { max_replicas = 1 min_replicas = 1 cooldown_period = 60 } } resource "google_compute_instance_template" "singlenode" { provider = google name_prefix = "${var.es_cluster}-instance-template-single" project = "${var.gcp_project_id}" machine_type = "${var.data_machine_type}" tags = ["${var.es_cluster}", "es-singlenode-node", "http-server", "https-server"] metadata = { sshKeys = "ubuntu:${file(var.gcp_ssh_pub_key_file)}" } metadata_startup_script = "${data.template_file.singlenode_userdata_script.rendered}" labels = { environment = var.environment cluster = "${var.environment}-${var.es_cluster}" role = "singlenode" } disk { source_image = data.google_compute_image.kibana.self_link boot = true } network_interface { network = var.cluster_network } service_account { scopes = ["userinfo-email", "compute-rw", "storage-ro"] } lifecycle { create_before_destroy = true } } ================================================ FILE: terraform-gcp/terraform.tfvars.example ================================================ es_cluster = "elastic-cluster" gcp_project_id = "some_project" # see main.tf for using environment credentials gcp_credentials_path = "credentials.json" gcp_zone = "us-east1-b" gcp_region = "us-east1" environment = "test" masters_count = { "us-east1-b" = 1 } datas_count = { "us-east1-b" = 2 } data_voters_count = { "us-east1-b" = 2 } clients_count = { "us-east1-b" = 1 } security_enabled = true monitoring_enabled = false client_user = "someuser" public_facing = false data_machine_type = "n2-highmem-2" elasticsearch_volume_size = "200" data_heap_size = "8g" gcp_ssh_pub_key_file = "id_rsa.pub" enable_direct_data_access = true ================================================ FILE: terraform-gcp/variables.tf ================================================ ### MANDATORY ### variable "es_cluster" { description = "Name of the elasticsearch cluster, used in node discovery" } variable "gcp_project_id" { type = string } variable "gcp_credentials_path" { type = string default = "" } variable "gcp_zone" { type = string default = "us-central1-a" } variable "gcp_region" { type = string default = "us-central1" } variable "environment" { default = "default" } variable "masters_count" { type = map(number) default = {} description = "Master nodes count per GCP zone. If all node counts are empty, will run in singlenode mode." } variable "datas_count" { type = map(number) default = {} description = "Data nodes count per GCP zone. If all node counts are empty, will run in singlenode mode." } variable "data_voters_count" { type = map(number) default = {} description = "Data nodes count per GCP zone. If all node counts are empty, will run in singlenode mode." } variable "clients_count" { type = map(number) default = {} description = "Client nodes count per GCP zone. If all node counts are empty, will run in singlenode mode." } variable "security_enabled" { description = "Whether or not to enable x-pack security on the cluster" default = true } variable "singlenode_zone" { description = "This variable is required when running in singlenode mode. Singlenode mode is enabled when masters_count, datas_count and clients_count are all empty," default = "" } variable "monitoring_enabled" { description = "Whether or not to enable x-pack monitoring on the cluster" default = "true" } variable "client_user" { description = "The username to use when setting up basic auth on Grafana and Cerebro." default = "elastic" } variable "public_facing" { description = "Whether or not the created cluster should be accessible from the public internet" type = bool default = true } variable "gcs_snapshots_bucket" { description = "GCS bucket for backups" default = "" } variable "cluster_network" { default = "default" } variable "master_machine_type" { default = "n1-standard-1" } variable "data_machine_type" { default = "n1-standard-4" } variable "elasticsearch_volume_size" { type = string default = "100" # gb } variable "elasticsearch_data_dir" { default = "/opt/elasticsearch/data" } variable "elasticsearch_logs_dir" { default = "/var/log/elasticsearch" } variable "data_heap_size" { type = string default = "8g" } variable "master_heap_size" { type = string default = "2g" } variable "client_heap_size" { type = string default = "1g" } variable "xpack_monitoring_host" { description = "ES host to send monitoring data" default = "http://localhost:9200" } variable "filebeat_monitoring_host" { description = "ES host to send filebeat data" default = "" } variable "use_g1gc" { description = "Whether or not to enable G1GC in jvm.options ES config. . Left in for backwards compatibility, deployments with Elasticsearch 7.7 and above should not use this." default = false } variable "DEV_MODE_scripts_gcs_bucket" { description = "GCS bucket to override init scripts from. Should not be used on production." default = "" } variable "gcp_ssh_pub_key_file" { default = "id_rsa.pub" } variable "enable_direct_data_access" { default = false description = "Enable attaching load balancer directly to data nodes" } variable "requires_bootstrapping" { description = "Overrides cluster bootstrap state" default = true } variable "log_size" { description = "Retained log4j log size in MB" default = "128" } variable "log_level" { description = "log4j log level" default = "INFO" } variable "auto_shut_down_bootstrap_node" { description = "disable to prevent bootstrap node from shutting down" default = true }